File: | jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp |
Warning: | line 847, column 21 Value stored to 'saved_state' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #include "precompiled.hpp" |
26 | #include "jvm.h" |
27 | #include "asm/assembler.hpp" |
28 | #include "asm/assembler.inline.hpp" |
29 | #include "compiler/compiler_globals.hpp" |
30 | #include "compiler/disassembler.hpp" |
31 | #include "gc/shared/barrierSet.hpp" |
32 | #include "gc/shared/barrierSetAssembler.hpp" |
33 | #include "gc/shared/collectedHeap.inline.hpp" |
34 | #include "gc/shared/tlab_globals.hpp" |
35 | #include "interpreter/bytecodeHistogram.hpp" |
36 | #include "interpreter/interpreter.hpp" |
37 | #include "memory/resourceArea.hpp" |
38 | #include "memory/universe.hpp" |
39 | #include "oops/accessDecorators.hpp" |
40 | #include "oops/compressedOops.inline.hpp" |
41 | #include "oops/klass.inline.hpp" |
42 | #include "prims/methodHandles.hpp" |
43 | #include "runtime/flags/flagSetting.hpp" |
44 | #include "runtime/interfaceSupport.inline.hpp" |
45 | #include "runtime/jniHandles.hpp" |
46 | #include "runtime/objectMonitor.hpp" |
47 | #include "runtime/os.hpp" |
48 | #include "runtime/safepoint.hpp" |
49 | #include "runtime/safepointMechanism.hpp" |
50 | #include "runtime/sharedRuntime.hpp" |
51 | #include "runtime/stubRoutines.hpp" |
52 | #include "runtime/thread.hpp" |
53 | #include "utilities/macros.hpp" |
54 | #include "crc32c.h" |
55 | |
56 | #ifdef PRODUCT |
57 | #define BLOCK_COMMENT(str) /* nothing */ |
58 | #define STOP(error)block_comment(error); stop(error) stop(error) |
59 | #else |
60 | #define BLOCK_COMMENT(str) block_comment(str) |
61 | #define STOP(error)block_comment(error); stop(error) block_comment(error); stop(error) |
62 | #endif |
63 | |
64 | #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") |
65 | |
66 | #ifdef ASSERT1 |
67 | bool AbstractAssembler::pd_check_instruction_mark() { return true; } |
68 | #endif |
69 | |
70 | static Assembler::Condition reverse[] = { |
71 | Assembler::noOverflow /* overflow = 0x0 */ , |
72 | Assembler::overflow /* noOverflow = 0x1 */ , |
73 | Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , |
74 | Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , |
75 | Assembler::notZero /* zero = 0x4, equal = 0x4 */ , |
76 | Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , |
77 | Assembler::above /* belowEqual = 0x6 */ , |
78 | Assembler::belowEqual /* above = 0x7 */ , |
79 | Assembler::positive /* negative = 0x8 */ , |
80 | Assembler::negative /* positive = 0x9 */ , |
81 | Assembler::noParity /* parity = 0xa */ , |
82 | Assembler::parity /* noParity = 0xb */ , |
83 | Assembler::greaterEqual /* less = 0xc */ , |
84 | Assembler::less /* greaterEqual = 0xd */ , |
85 | Assembler::greater /* lessEqual = 0xe */ , |
86 | Assembler::lessEqual /* greater = 0xf, */ |
87 | |
88 | }; |
89 | |
90 | |
91 | // Implementation of MacroAssembler |
92 | |
93 | // First all the versions that have distinct versions depending on 32/64 bit |
94 | // Unless the difference is trivial (1 line or so). |
95 | |
96 | #ifndef _LP641 |
97 | |
98 | // 32bit versions |
99 | |
100 | Address MacroAssembler::as_Address(AddressLiteral adr) { |
101 | return Address(adr.target(), adr.rspec()); |
102 | } |
103 | |
104 | Address MacroAssembler::as_Address(ArrayAddress adr) { |
105 | return Address::make_array(adr); |
106 | } |
107 | |
108 | void MacroAssembler::call_VM_leaf_base(address entry_point, |
109 | int number_of_arguments) { |
110 | call(RuntimeAddress(entry_point)); |
111 | increment(rsp, number_of_arguments * wordSize); |
112 | } |
113 | |
114 | void MacroAssembler::cmpklass(Address src1, Metadata* obj) { |
115 | cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); |
116 | } |
117 | |
118 | |
119 | void MacroAssembler::cmpklass(Register src1, Metadata* obj) { |
120 | cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); |
121 | } |
122 | |
123 | void MacroAssembler::cmpoop(Address src1, jobject obj) { |
124 | cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); |
125 | } |
126 | |
127 | void MacroAssembler::cmpoop(Register src1, jobject obj) { |
128 | cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); |
129 | } |
130 | |
131 | void MacroAssembler::extend_sign(Register hi, Register lo) { |
132 | // According to Intel Doc. AP-526, "Integer Divide", p.18. |
133 | if (VM_Version::is_P6() && hi == rdx && lo == rax) { |
134 | cdql(); |
135 | } else { |
136 | movl(hi, lo); |
137 | sarl(hi, 31); |
138 | } |
139 | } |
140 | |
141 | void MacroAssembler::jC2(Register tmp, Label& L) { |
142 | // set parity bit if FPU flag C2 is set (via rax) |
143 | save_rax(tmp); |
144 | fwait(); fnstsw_ax(); |
145 | sahf(); |
146 | restore_rax(tmp); |
147 | // branch |
148 | jcc(Assembler::parity, L); |
149 | } |
150 | |
151 | void MacroAssembler::jnC2(Register tmp, Label& L) { |
152 | // set parity bit if FPU flag C2 is set (via rax) |
153 | save_rax(tmp); |
154 | fwait(); fnstsw_ax(); |
155 | sahf(); |
156 | restore_rax(tmp); |
157 | // branch |
158 | jcc(Assembler::noParity, L); |
159 | } |
160 | |
161 | // 32bit can do a case table jump in one instruction but we no longer allow the base |
162 | // to be installed in the Address class |
163 | void MacroAssembler::jump(ArrayAddress entry) { |
164 | jmp(as_Address(entry)); |
165 | } |
166 | |
167 | // Note: y_lo will be destroyed |
168 | void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { |
169 | // Long compare for Java (semantics as described in JVM spec.) |
170 | Label high, low, done; |
171 | |
172 | cmpl(x_hi, y_hi); |
173 | jcc(Assembler::less, low); |
174 | jcc(Assembler::greater, high); |
175 | // x_hi is the return register |
176 | xorl(x_hi, x_hi); |
177 | cmpl(x_lo, y_lo); |
178 | jcc(Assembler::below, low); |
179 | jcc(Assembler::equal, done); |
180 | |
181 | bind(high); |
182 | xorl(x_hi, x_hi); |
183 | increment(x_hi); |
184 | jmp(done); |
185 | |
186 | bind(low); |
187 | xorl(x_hi, x_hi); |
188 | decrementl(x_hi); |
189 | |
190 | bind(done); |
191 | } |
192 | |
193 | void MacroAssembler::lea(Register dst, AddressLiteral src) { |
194 | mov_literal32(dst, (int32_t)src.target(), src.rspec()); |
195 | } |
196 | |
197 | void MacroAssembler::lea(Address dst, AddressLiteral adr) { |
198 | // leal(dst, as_Address(adr)); |
199 | // see note in movl as to why we must use a move |
200 | mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); |
201 | } |
202 | |
203 | void MacroAssembler::leave() { |
204 | mov(rsp, rbp); |
205 | pop(rbp); |
206 | } |
207 | |
208 | void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { |
209 | // Multiplication of two Java long values stored on the stack |
210 | // as illustrated below. Result is in rdx:rax. |
211 | // |
212 | // rsp ---> [ ?? ] \ \ |
213 | // .... | y_rsp_offset | |
214 | // [ y_lo ] / (in bytes) | x_rsp_offset |
215 | // [ y_hi ] | (in bytes) |
216 | // .... | |
217 | // [ x_lo ] / |
218 | // [ x_hi ] |
219 | // .... |
220 | // |
221 | // Basic idea: lo(result) = lo(x_lo * y_lo) |
222 | // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) |
223 | Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); |
224 | Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); |
225 | Label quick; |
226 | // load x_hi, y_hi and check if quick |
227 | // multiplication is possible |
228 | movl(rbx, x_hi); |
229 | movl(rcx, y_hi); |
230 | movl(rax, rbx); |
231 | orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 |
232 | jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply |
233 | // do full multiplication |
234 | // 1st step |
235 | mull(y_lo); // x_hi * y_lo |
236 | movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, |
237 | // 2nd step |
238 | movl(rax, x_lo); |
239 | mull(rcx); // x_lo * y_hi |
240 | addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, |
241 | // 3rd step |
242 | bind(quick); // note: rbx, = 0 if quick multiply! |
243 | movl(rax, x_lo); |
244 | mull(y_lo); // x_lo * y_lo |
245 | addl(rdx, rbx); // correct hi(x_lo * y_lo) |
246 | } |
247 | |
248 | void MacroAssembler::lneg(Register hi, Register lo) { |
249 | negl(lo); |
250 | adcl(hi, 0); |
251 | negl(hi); |
252 | } |
253 | |
254 | void MacroAssembler::lshl(Register hi, Register lo) { |
255 | // Java shift left long support (semantics as described in JVM spec., p.305) |
256 | // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) |
257 | // shift value is in rcx ! |
258 | assert(hi != rcx, "must not use rcx")do { if (!(hi != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 258, "assert(" "hi != rcx" ") failed", "must not use rcx"); ::breakpoint(); } } while (0); |
259 | assert(lo != rcx, "must not use rcx")do { if (!(lo != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 259, "assert(" "lo != rcx" ") failed", "must not use rcx"); ::breakpoint(); } } while (0); |
260 | const Register s = rcx; // shift count |
261 | const int n = BitsPerWord; |
262 | Label L; |
263 | andl(s, 0x3f); // s := s & 0x3f (s < 0x40) |
264 | cmpl(s, n); // if (s < n) |
265 | jcc(Assembler::less, L); // else (s >= n) |
266 | movl(hi, lo); // x := x << n |
267 | xorl(lo, lo); |
268 | // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! |
269 | bind(L); // s (mod n) < n |
270 | shldl(hi, lo); // x := x << s |
271 | shll(lo); |
272 | } |
273 | |
274 | |
275 | void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { |
276 | // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) |
277 | // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) |
278 | assert(hi != rcx, "must not use rcx")do { if (!(hi != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 278, "assert(" "hi != rcx" ") failed", "must not use rcx"); ::breakpoint(); } } while (0); |
279 | assert(lo != rcx, "must not use rcx")do { if (!(lo != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 279, "assert(" "lo != rcx" ") failed", "must not use rcx"); ::breakpoint(); } } while (0); |
280 | const Register s = rcx; // shift count |
281 | const int n = BitsPerWord; |
282 | Label L; |
283 | andl(s, 0x3f); // s := s & 0x3f (s < 0x40) |
284 | cmpl(s, n); // if (s < n) |
285 | jcc(Assembler::less, L); // else (s >= n) |
286 | movl(lo, hi); // x := x >> n |
287 | if (sign_extension) sarl(hi, 31); |
288 | else xorl(hi, hi); |
289 | // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! |
290 | bind(L); // s (mod n) < n |
291 | shrdl(lo, hi); // x := x >> s |
292 | if (sign_extension) sarl(hi); |
293 | else shrl(hi); |
294 | } |
295 | |
296 | void MacroAssembler::movoop(Register dst, jobject obj) { |
297 | mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); |
298 | } |
299 | |
300 | void MacroAssembler::movoop(Address dst, jobject obj) { |
301 | mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); |
302 | } |
303 | |
304 | void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { |
305 | mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); |
306 | } |
307 | |
308 | void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { |
309 | mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); |
310 | } |
311 | |
312 | void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { |
313 | // scratch register is not used, |
314 | // it is defined to match parameters of 64-bit version of this method. |
315 | if (src.is_lval()) { |
316 | mov_literal32(dst, (intptr_t)src.target(), src.rspec()); |
317 | } else { |
318 | movl(dst, as_Address(src)); |
319 | } |
320 | } |
321 | |
322 | void MacroAssembler::movptr(ArrayAddress dst, Register src) { |
323 | movl(as_Address(dst), src); |
324 | } |
325 | |
326 | void MacroAssembler::movptr(Register dst, ArrayAddress src) { |
327 | movl(dst, as_Address(src)); |
328 | } |
329 | |
330 | // src should NEVER be a real pointer. Use AddressLiteral for true pointers |
331 | void MacroAssembler::movptr(Address dst, intptr_t src) { |
332 | movl(dst, src); |
333 | } |
334 | |
335 | |
336 | void MacroAssembler::pop_callee_saved_registers() { |
337 | pop(rcx); |
338 | pop(rdx); |
339 | pop(rdi); |
340 | pop(rsi); |
341 | } |
342 | |
343 | void MacroAssembler::push_callee_saved_registers() { |
344 | push(rsi); |
345 | push(rdi); |
346 | push(rdx); |
347 | push(rcx); |
348 | } |
349 | |
350 | void MacroAssembler::pushoop(jobject obj) { |
351 | push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); |
352 | } |
353 | |
354 | void MacroAssembler::pushklass(Metadata* obj) { |
355 | push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); |
356 | } |
357 | |
358 | void MacroAssembler::pushptr(AddressLiteral src) { |
359 | if (src.is_lval()) { |
360 | push_literal32((int32_t)src.target(), src.rspec()); |
361 | } else { |
362 | pushl(as_Address(src)); |
363 | } |
364 | } |
365 | |
366 | static void pass_arg0(MacroAssembler* masm, Register arg) { |
367 | masm->push(arg); |
368 | } |
369 | |
370 | static void pass_arg1(MacroAssembler* masm, Register arg) { |
371 | masm->push(arg); |
372 | } |
373 | |
374 | static void pass_arg2(MacroAssembler* masm, Register arg) { |
375 | masm->push(arg); |
376 | } |
377 | |
378 | static void pass_arg3(MacroAssembler* masm, Register arg) { |
379 | masm->push(arg); |
380 | } |
381 | |
382 | #ifndef PRODUCT |
383 | extern "C" void findpc(intptr_t x); |
384 | #endif |
385 | |
386 | void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { |
387 | // In order to get locks to work, we need to fake a in_VM state |
388 | JavaThread* thread = JavaThread::current(); |
389 | JavaThreadState saved_state = thread->thread_state(); |
390 | thread->set_thread_state(_thread_in_vm); |
391 | if (ShowMessageBoxOnError) { |
392 | JavaThread* thread = JavaThread::current(); |
393 | JavaThreadState saved_state = thread->thread_state(); |
394 | thread->set_thread_state(_thread_in_vm); |
395 | if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { |
396 | ttyLocker ttyl; |
397 | BytecodeCounter::print(); |
398 | } |
399 | // To see where a verify_oop failed, get $ebx+40/X for this frame. |
400 | // This is the value of eip which points to where verify_oop will return. |
401 | if (os::message_box(msg, "Execution stopped, print registers?")) { |
402 | print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); |
403 | BREAKPOINT::breakpoint(); |
404 | } |
405 | } |
406 | fatal("DEBUG MESSAGE: %s", msg)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 406, "DEBUG MESSAGE: %s", msg); ::breakpoint(); } while (0); |
407 | } |
408 | |
409 | void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { |
410 | ttyLocker ttyl; |
411 | FlagSetting fs(Debugging, true); |
412 | tty->print_cr("eip = 0x%08x", eip); |
413 | #ifndef PRODUCT |
414 | if ((WizardMode || Verbose) && PrintMiscellaneous) { |
415 | tty->cr(); |
416 | findpc(eip); |
417 | tty->cr(); |
418 | } |
419 | #endif |
420 | #define PRINT_REG(rax) \ |
421 | { tty->print("%s = ", #rax); os::print_location(tty, rax); } |
422 | PRINT_REG(rax); |
423 | PRINT_REG(rbx); |
424 | PRINT_REG(rcx); |
425 | PRINT_REG(rdx); |
426 | PRINT_REG(rdi); |
427 | PRINT_REG(rsi); |
428 | PRINT_REG(rbp); |
429 | PRINT_REG(rsp); |
430 | #undef PRINT_REG |
431 | // Print some words near top of staack. |
432 | int* dump_sp = (int*) rsp; |
433 | for (int col1 = 0; col1 < 8; col1++) { |
434 | tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); |
435 | os::print_location(tty, *dump_sp++); |
436 | } |
437 | for (int row = 0; row < 16; row++) { |
438 | tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); |
439 | for (int col = 0; col < 8; col++) { |
440 | tty->print(" 0x%08x", *dump_sp++); |
441 | } |
442 | tty->cr(); |
443 | } |
444 | // Print some instructions around pc: |
445 | Disassembler::decode((address)eip-64, (address)eip); |
446 | tty->print_cr("--------"); |
447 | Disassembler::decode((address)eip, (address)eip+32); |
448 | } |
449 | |
450 | void MacroAssembler::stop(const char* msg) { |
451 | ExternalAddress message((address)msg); |
452 | // push address of message |
453 | pushptr(message.addr()); |
454 | { Label L; call(L, relocInfo::none); bind(L); } // push eip |
455 | pusha(); // push registers |
456 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)((address)((address_word)(MacroAssembler::debug32))))); |
457 | hlt(); |
458 | } |
459 | |
460 | void MacroAssembler::warn(const char* msg) { |
461 | push_CPU_state(); |
462 | |
463 | ExternalAddress message((address) msg); |
464 | // push address of message |
465 | pushptr(message.addr()); |
466 | |
467 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)((address)((address_word)(warning))))); |
468 | addl(rsp, wordSize); // discard argument |
469 | pop_CPU_state(); |
470 | } |
471 | |
472 | void MacroAssembler::print_state() { |
473 | { Label L; call(L, relocInfo::none); bind(L); } // push eip |
474 | pusha(); // push registers |
475 | |
476 | push_CPU_state(); |
477 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)((address)((address_word)(MacroAssembler::print_state32))))); |
478 | pop_CPU_state(); |
479 | |
480 | popa(); |
481 | addl(rsp, wordSize); |
482 | } |
483 | |
484 | #else // _LP64 |
485 | |
486 | // 64 bit versions |
487 | |
488 | Address MacroAssembler::as_Address(AddressLiteral adr) { |
489 | // amd64 always does this as a pc-rel |
490 | // we can be absolute or disp based on the instruction type |
491 | // jmp/call are displacements others are absolute |
492 | assert(!adr.is_lval(), "must be rval")do { if (!(!adr.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 492, "assert(" "!adr.is_lval()" ") failed", "must be rval") ; ::breakpoint(); } } while (0); |
493 | assert(reachable(adr), "must be")do { if (!(reachable(adr))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 493, "assert(" "reachable(adr)" ") failed", "must be"); ::breakpoint (); } } while (0); |
494 | return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); |
495 | |
496 | } |
497 | |
498 | Address MacroAssembler::as_Address(ArrayAddress adr) { |
499 | AddressLiteral base = adr.base(); |
500 | lea(rscratch1, base); |
501 | Address index = adr.index(); |
502 | assert(index._disp == 0, "must not have disp")do { if (!(index._disp == 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 502, "assert(" "index._disp == 0" ") failed", "must not have disp" ); ::breakpoint(); } } while (0); // maybe it can? |
503 | Address array(rscratch1, index._index, index._scale, index._disp); |
504 | return array; |
505 | } |
506 | |
507 | void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { |
508 | Label L, E; |
509 | |
510 | #ifdef _WIN64 |
511 | // Windows always allocates space for it's register args |
512 | assert(num_args <= 4, "only register arguments supported")do { if (!(num_args <= 4)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 512, "assert(" "num_args <= 4" ") failed", "only register arguments supported" ); ::breakpoint(); } } while (0); |
513 | subq(rsp, frame::arg_reg_save_area_bytes); |
514 | #endif |
515 | |
516 | // Align stack if necessary |
517 | testl(rsp, 15); |
518 | jcc(Assembler::zero, L); |
519 | |
520 | subq(rsp, 8); |
521 | { |
522 | call(RuntimeAddress(entry_point)); |
523 | } |
524 | addq(rsp, 8); |
525 | jmp(E); |
526 | |
527 | bind(L); |
528 | { |
529 | call(RuntimeAddress(entry_point)); |
530 | } |
531 | |
532 | bind(E); |
533 | |
534 | #ifdef _WIN64 |
535 | // restore stack pointer |
536 | addq(rsp, frame::arg_reg_save_area_bytes); |
537 | #endif |
538 | |
539 | } |
540 | |
541 | void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { |
542 | assert(!src2.is_lval(), "should use cmpptr")do { if (!(!src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 542, "assert(" "!src2.is_lval()" ") failed", "should use cmpptr" ); ::breakpoint(); } } while (0); |
543 | |
544 | if (reachable(src2)) { |
545 | cmpq(src1, as_Address(src2)); |
546 | } else { |
547 | lea(rscratch1, src2); |
548 | Assembler::cmpq(src1, Address(rscratch1, 0)); |
549 | } |
550 | } |
551 | |
552 | int MacroAssembler::corrected_idivq(Register reg) { |
553 | // Full implementation of Java ldiv and lrem; checks for special |
554 | // case as described in JVM spec., p.243 & p.271. The function |
555 | // returns the (pc) offset of the idivl instruction - may be needed |
556 | // for implicit exceptions. |
557 | // |
558 | // normal case special case |
559 | // |
560 | // input : rax: dividend min_long |
561 | // reg: divisor (may not be eax/edx) -1 |
562 | // |
563 | // output: rax: quotient (= rax idiv reg) min_long |
564 | // rdx: remainder (= rax irem reg) 0 |
565 | assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register")do { if (!(reg != rax && reg != rdx)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 565, "assert(" "reg != rax && reg != rdx" ") failed" , "reg cannot be rax or rdx register"); ::breakpoint(); } } while (0); |
566 | static const int64_t min_long = 0x8000000000000000; |
567 | Label normal_case, special_case; |
568 | |
569 | // check for special case |
570 | cmp64(rax, ExternalAddress((address) &min_long)); |
571 | jcc(Assembler::notEqual, normal_case); |
572 | xorl(rdx, rdx); // prepare rdx for possible special case (where |
573 | // remainder = 0) |
574 | cmpq(reg, -1); |
575 | jcc(Assembler::equal, special_case); |
576 | |
577 | // handle normal case |
578 | bind(normal_case); |
579 | cdqq(); |
580 | int idivq_offset = offset(); |
581 | idivq(reg); |
582 | |
583 | // normal and special case exit |
584 | bind(special_case); |
585 | |
586 | return idivq_offset; |
587 | } |
588 | |
589 | void MacroAssembler::decrementq(Register reg, int value) { |
590 | if (value == min_jint) { subq(reg, value); return; } |
591 | if (value < 0) { incrementq(reg, -value); return; } |
592 | if (value == 0) { ; return; } |
593 | if (value == 1 && UseIncDec) { decq(reg) ; return; } |
594 | /* else */ { subq(reg, value) ; return; } |
595 | } |
596 | |
597 | void MacroAssembler::decrementq(Address dst, int value) { |
598 | if (value == min_jint) { subq(dst, value); return; } |
599 | if (value < 0) { incrementq(dst, -value); return; } |
600 | if (value == 0) { ; return; } |
601 | if (value == 1 && UseIncDec) { decq(dst) ; return; } |
602 | /* else */ { subq(dst, value) ; return; } |
603 | } |
604 | |
605 | void MacroAssembler::incrementq(AddressLiteral dst) { |
606 | if (reachable(dst)) { |
607 | incrementq(as_Address(dst)); |
608 | } else { |
609 | lea(rscratch1, dst); |
610 | incrementq(Address(rscratch1, 0)); |
611 | } |
612 | } |
613 | |
614 | void MacroAssembler::incrementq(Register reg, int value) { |
615 | if (value == min_jint) { addq(reg, value); return; } |
616 | if (value < 0) { decrementq(reg, -value); return; } |
617 | if (value == 0) { ; return; } |
618 | if (value == 1 && UseIncDec) { incq(reg) ; return; } |
619 | /* else */ { addq(reg, value) ; return; } |
620 | } |
621 | |
622 | void MacroAssembler::incrementq(Address dst, int value) { |
623 | if (value == min_jint) { addq(dst, value); return; } |
624 | if (value < 0) { decrementq(dst, -value); return; } |
625 | if (value == 0) { ; return; } |
626 | if (value == 1 && UseIncDec) { incq(dst) ; return; } |
627 | /* else */ { addq(dst, value) ; return; } |
628 | } |
629 | |
630 | // 32bit can do a case table jump in one instruction but we no longer allow the base |
631 | // to be installed in the Address class |
632 | void MacroAssembler::jump(ArrayAddress entry) { |
633 | lea(rscratch1, entry.base()); |
634 | Address dispatch = entry.index(); |
635 | assert(dispatch._base == noreg, "must be")do { if (!(dispatch._base == noreg)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 635, "assert(" "dispatch._base == noreg" ") failed", "must be" ); ::breakpoint(); } } while (0); |
636 | dispatch._base = rscratch1; |
637 | jmp(dispatch); |
638 | } |
639 | |
640 | void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { |
641 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 641); ::breakpoint(); } while (0); // 64bit doesn't use two regs |
642 | cmpq(x_lo, y_lo); |
643 | } |
644 | |
645 | void MacroAssembler::lea(Register dst, AddressLiteral src) { |
646 | mov_literal64(dst, (intptr_t)src.target(), src.rspec()); |
647 | } |
648 | |
649 | void MacroAssembler::lea(Address dst, AddressLiteral adr) { |
650 | mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); |
651 | movptr(dst, rscratch1); |
652 | } |
653 | |
654 | void MacroAssembler::leave() { |
655 | // %%% is this really better? Why not on 32bit too? |
656 | emit_int8((unsigned char)0xC9); // LEAVE |
657 | } |
658 | |
659 | void MacroAssembler::lneg(Register hi, Register lo) { |
660 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 660); ::breakpoint(); } while (0); // 64bit doesn't use two regs |
661 | negq(lo); |
662 | } |
663 | |
664 | void MacroAssembler::movoop(Register dst, jobject obj) { |
665 | mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); |
666 | } |
667 | |
668 | void MacroAssembler::movoop(Address dst, jobject obj) { |
669 | mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); |
670 | movq(dst, rscratch1); |
671 | } |
672 | |
673 | void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { |
674 | mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); |
675 | } |
676 | |
677 | void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { |
678 | mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); |
679 | movq(dst, rscratch1); |
680 | } |
681 | |
682 | void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { |
683 | if (src.is_lval()) { |
684 | mov_literal64(dst, (intptr_t)src.target(), src.rspec()); |
685 | } else { |
686 | if (reachable(src)) { |
687 | movq(dst, as_Address(src)); |
688 | } else { |
689 | lea(scratch, src); |
690 | movq(dst, Address(scratch, 0)); |
691 | } |
692 | } |
693 | } |
694 | |
695 | void MacroAssembler::movptr(ArrayAddress dst, Register src) { |
696 | movq(as_Address(dst), src); |
697 | } |
698 | |
699 | void MacroAssembler::movptr(Register dst, ArrayAddress src) { |
700 | movq(dst, as_Address(src)); |
701 | } |
702 | |
703 | // src should NEVER be a real pointer. Use AddressLiteral for true pointers |
704 | void MacroAssembler::movptr(Address dst, intptr_t src) { |
705 | if (is_simm32(src)) { |
706 | movptr(dst, checked_cast<int32_t>(src)); |
707 | } else { |
708 | mov64(rscratch1, src); |
709 | movq(dst, rscratch1); |
710 | } |
711 | } |
712 | |
713 | // These are mostly for initializing NULL |
714 | void MacroAssembler::movptr(Address dst, int32_t src) { |
715 | movslq(dst, src); |
716 | } |
717 | |
718 | void MacroAssembler::movptr(Register dst, int32_t src) { |
719 | mov64(dst, (intptr_t)src); |
720 | } |
721 | |
722 | void MacroAssembler::pushoop(jobject obj) { |
723 | movoop(rscratch1, obj); |
724 | push(rscratch1); |
725 | } |
726 | |
727 | void MacroAssembler::pushklass(Metadata* obj) { |
728 | mov_metadata(rscratch1, obj); |
729 | push(rscratch1); |
730 | } |
731 | |
732 | void MacroAssembler::pushptr(AddressLiteral src) { |
733 | lea(rscratch1, src); |
734 | if (src.is_lval()) { |
735 | push(rscratch1); |
736 | } else { |
737 | pushq(Address(rscratch1, 0)); |
738 | } |
739 | } |
740 | |
741 | void MacroAssembler::reset_last_Java_frame(bool clear_fp) { |
742 | reset_last_Java_frame(r15_thread, clear_fp); |
743 | } |
744 | |
745 | void MacroAssembler::set_last_Java_frame(Register last_java_sp, |
746 | Register last_java_fp, |
747 | address last_java_pc) { |
748 | vzeroupper(); |
749 | // determine last_java_sp register |
750 | if (!last_java_sp->is_valid()) { |
751 | last_java_sp = rsp; |
752 | } |
753 | |
754 | // last_java_fp is optional |
755 | if (last_java_fp->is_valid()) { |
756 | movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), |
757 | last_java_fp); |
758 | } |
759 | |
760 | // last_java_pc is optional |
761 | if (last_java_pc != NULL__null) { |
762 | Address java_pc(r15_thread, |
763 | JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); |
764 | lea(rscratch1, InternalAddress(last_java_pc)); |
765 | movptr(java_pc, rscratch1); |
766 | } |
767 | |
768 | movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); |
769 | } |
770 | |
771 | static void pass_arg0(MacroAssembler* masm, Register arg) { |
772 | if (c_rarg0 != arg ) { |
773 | masm->mov(c_rarg0, arg); |
774 | } |
775 | } |
776 | |
777 | static void pass_arg1(MacroAssembler* masm, Register arg) { |
778 | if (c_rarg1 != arg ) { |
779 | masm->mov(c_rarg1, arg); |
780 | } |
781 | } |
782 | |
783 | static void pass_arg2(MacroAssembler* masm, Register arg) { |
784 | if (c_rarg2 != arg ) { |
785 | masm->mov(c_rarg2, arg); |
786 | } |
787 | } |
788 | |
789 | static void pass_arg3(MacroAssembler* masm, Register arg) { |
790 | if (c_rarg3 != arg ) { |
791 | masm->mov(c_rarg3, arg); |
792 | } |
793 | } |
794 | |
795 | void MacroAssembler::stop(const char* msg) { |
796 | if (ShowMessageBoxOnError) { |
797 | address rip = pc(); |
798 | pusha(); // get regs on stack |
799 | lea(c_rarg1, InternalAddress(rip)); |
800 | movq(c_rarg2, rsp); // pass pointer to regs array |
801 | } |
802 | lea(c_rarg0, ExternalAddress((address) msg)); |
803 | andq(rsp, -16); // align stack as required by ABI |
804 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)((address)((address_word)(MacroAssembler::debug64))))); |
805 | hlt(); |
806 | } |
807 | |
808 | void MacroAssembler::warn(const char* msg) { |
809 | push(rbp); |
810 | movq(rbp, rsp); |
811 | andq(rsp, -16); // align stack as required by push_CPU_state and call |
812 | push_CPU_state(); // keeps alignment at 16 bytes |
813 | lea(c_rarg0, ExternalAddress((address) msg)); |
814 | lea(rax, ExternalAddress(CAST_FROM_FN_PTR(address, warning)((address)((address_word)(warning))))); |
815 | call(rax); |
816 | pop_CPU_state(); |
817 | mov(rsp, rbp); |
818 | pop(rbp); |
819 | } |
820 | |
821 | void MacroAssembler::print_state() { |
822 | address rip = pc(); |
823 | pusha(); // get regs on stack |
824 | push(rbp); |
825 | movq(rbp, rsp); |
826 | andq(rsp, -16); // align stack as required by push_CPU_state and call |
827 | push_CPU_state(); // keeps alignment at 16 bytes |
828 | |
829 | lea(c_rarg0, InternalAddress(rip)); |
830 | lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array |
831 | call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64)((address)((address_word)(MacroAssembler::print_state64))), c_rarg0, c_rarg1); |
832 | |
833 | pop_CPU_state(); |
834 | mov(rsp, rbp); |
835 | pop(rbp); |
836 | popa(); |
837 | } |
838 | |
839 | #ifndef PRODUCT |
840 | extern "C" void findpc(intptr_t x); |
841 | #endif |
842 | |
843 | void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { |
844 | // In order to get locks to work, we need to fake a in_VM state |
845 | if (ShowMessageBoxOnError) { |
846 | JavaThread* thread = JavaThread::current(); |
847 | JavaThreadState saved_state = thread->thread_state(); |
Value stored to 'saved_state' during its initialization is never read | |
848 | thread->set_thread_state(_thread_in_vm); |
849 | #ifndef PRODUCT |
850 | if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { |
851 | ttyLocker ttyl; |
852 | BytecodeCounter::print(); |
853 | } |
854 | #endif |
855 | // To see where a verify_oop failed, get $ebx+40/X for this frame. |
856 | // XXX correct this offset for amd64 |
857 | // This is the value of eip which points to where verify_oop will return. |
858 | if (os::message_box(msg, "Execution stopped, print registers?")) { |
859 | print_state64(pc, regs); |
860 | BREAKPOINT::breakpoint(); |
861 | } |
862 | } |
863 | fatal("DEBUG MESSAGE: %s", msg)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 863, "DEBUG MESSAGE: %s", msg); ::breakpoint(); } while (0); |
864 | } |
865 | |
866 | void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { |
867 | ttyLocker ttyl; |
868 | FlagSetting fs(Debugging, true); |
869 | tty->print_cr("rip = 0x%016lx", (intptr_t)pc); |
870 | #ifndef PRODUCT |
871 | tty->cr(); |
872 | findpc(pc); |
873 | tty->cr(); |
874 | #endif |
875 | #define PRINT_REG(rax, value) \ |
876 | { tty->print("%s = ", #rax); os::print_location(tty, value); } |
877 | PRINT_REG(rax, regs[15]); |
878 | PRINT_REG(rbx, regs[12]); |
879 | PRINT_REG(rcx, regs[14]); |
880 | PRINT_REG(rdx, regs[13]); |
881 | PRINT_REG(rdi, regs[8]); |
882 | PRINT_REG(rsi, regs[9]); |
883 | PRINT_REG(rbp, regs[10]); |
884 | // rsp is actually not stored by pusha(), compute the old rsp from regs (rsp after pusha): regs + 16 = old rsp |
885 | PRINT_REG(rsp, (intptr_t)(®s[16])); |
886 | PRINT_REG(r8 , regs[7]); |
887 | PRINT_REG(r9 , regs[6]); |
888 | PRINT_REG(r10, regs[5]); |
889 | PRINT_REG(r11, regs[4]); |
890 | PRINT_REG(r12, regs[3]); |
891 | PRINT_REG(r13, regs[2]); |
892 | PRINT_REG(r14, regs[1]); |
893 | PRINT_REG(r15, regs[0]); |
894 | #undef PRINT_REG |
895 | // Print some words near the top of the stack. |
896 | int64_t* rsp = ®s[16]; |
897 | int64_t* dump_sp = rsp; |
898 | for (int col1 = 0; col1 < 8; col1++) { |
899 | tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); |
900 | os::print_location(tty, *dump_sp++); |
901 | } |
902 | for (int row = 0; row < 25; row++) { |
903 | tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); |
904 | for (int col = 0; col < 4; col++) { |
905 | tty->print(" 0x%016lx", (intptr_t)*dump_sp++); |
906 | } |
907 | tty->cr(); |
908 | } |
909 | // Print some instructions around pc: |
910 | Disassembler::decode((address)pc-64, (address)pc); |
911 | tty->print_cr("--------"); |
912 | Disassembler::decode((address)pc, (address)pc+32); |
913 | } |
914 | |
915 | // The java_calling_convention describes stack locations as ideal slots on |
916 | // a frame with no abi restrictions. Since we must observe abi restrictions |
917 | // (like the placement of the register window) the slots must be biased by |
918 | // the following value. |
919 | static int reg2offset_in(VMReg r) { |
920 | // Account for saved rbp and return address |
921 | // This should really be in_preserve_stack_slots |
922 | return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; |
923 | } |
924 | |
925 | static int reg2offset_out(VMReg r) { |
926 | return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; |
927 | } |
928 | |
929 | // A long move |
930 | void MacroAssembler::long_move(VMRegPair src, VMRegPair dst) { |
931 | |
932 | // The calling conventions assures us that each VMregpair is either |
933 | // all really one physical register or adjacent stack slots. |
934 | |
935 | if (src.is_single_phys_reg() ) { |
936 | if (dst.is_single_phys_reg()) { |
937 | if (dst.first() != src.first()) { |
938 | mov(dst.first()->as_Register(), src.first()->as_Register()); |
939 | } |
940 | } else { |
941 | assert(dst.is_single_reg(), "not a stack pair")do { if (!(dst.is_single_reg())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 941, "assert(" "dst.is_single_reg()" ") failed", "not a stack pair" ); ::breakpoint(); } } while (0); |
942 | movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); |
943 | } |
944 | } else if (dst.is_single_phys_reg()) { |
945 | assert(src.is_single_reg(), "not a stack pair")do { if (!(src.is_single_reg())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 945, "assert(" "src.is_single_reg()" ") failed", "not a stack pair" ); ::breakpoint(); } } while (0); |
946 | movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first()))); |
947 | } else { |
948 | assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs")do { if (!(src.is_single_reg() && dst.is_single_reg() )) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 948, "assert(" "src.is_single_reg() && dst.is_single_reg()" ") failed", "not stack pairs"); ::breakpoint(); } } while (0 ); |
949 | movq(rax, Address(rbp, reg2offset_in(src.first()))); |
950 | movq(Address(rsp, reg2offset_out(dst.first())), rax); |
951 | } |
952 | } |
953 | |
954 | // A double move |
955 | void MacroAssembler::double_move(VMRegPair src, VMRegPair dst) { |
956 | |
957 | // The calling conventions assures us that each VMregpair is either |
958 | // all really one physical register or adjacent stack slots. |
959 | |
960 | if (src.is_single_phys_reg() ) { |
961 | if (dst.is_single_phys_reg()) { |
962 | // In theory these overlap but the ordering is such that this is likely a nop |
963 | if ( src.first() != dst.first()) { |
964 | movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); |
965 | } |
966 | } else { |
967 | assert(dst.is_single_reg(), "not a stack pair")do { if (!(dst.is_single_reg())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 967, "assert(" "dst.is_single_reg()" ") failed", "not a stack pair" ); ::breakpoint(); } } while (0); |
968 | movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); |
969 | } |
970 | } else if (dst.is_single_phys_reg()) { |
971 | assert(src.is_single_reg(), "not a stack pair")do { if (!(src.is_single_reg())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 971, "assert(" "src.is_single_reg()" ") failed", "not a stack pair" ); ::breakpoint(); } } while (0); |
972 | movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first()))); |
973 | } else { |
974 | assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs")do { if (!(src.is_single_reg() && dst.is_single_reg() )) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 974, "assert(" "src.is_single_reg() && dst.is_single_reg()" ") failed", "not stack pairs"); ::breakpoint(); } } while (0 ); |
975 | movq(rax, Address(rbp, reg2offset_in(src.first()))); |
976 | movq(Address(rsp, reg2offset_out(dst.first())), rax); |
977 | } |
978 | } |
979 | |
980 | |
981 | // A float arg may have to do float reg int reg conversion |
982 | void MacroAssembler::float_move(VMRegPair src, VMRegPair dst) { |
983 | assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move")do { if (!(!src.second()->is_valid() && !dst.second ()->is_valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 983, "assert(" "!src.second()->is_valid() && !dst.second()->is_valid()" ") failed", "bad float_move"); ::breakpoint(); } } while (0); |
984 | |
985 | // The calling conventions assures us that each VMregpair is either |
986 | // all really one physical register or adjacent stack slots. |
987 | |
988 | if (src.first()->is_stack()) { |
989 | if (dst.first()->is_stack()) { |
990 | movl(rax, Address(rbp, reg2offset_in(src.first()))); |
991 | movptr(Address(rsp, reg2offset_out(dst.first())), rax); |
992 | } else { |
993 | // stack to reg |
994 | assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters")do { if (!(dst.first()->is_XMMRegister())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 994, "assert(" "dst.first()->is_XMMRegister()" ") failed" , "only expect xmm registers as parameters"); ::breakpoint(); } } while (0); |
995 | movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first()))); |
996 | } |
997 | } else if (dst.first()->is_stack()) { |
998 | // reg to stack |
999 | assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters")do { if (!(src.first()->is_XMMRegister())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 999, "assert(" "src.first()->is_XMMRegister()" ") failed" , "only expect xmm registers as parameters"); ::breakpoint(); } } while (0); |
1000 | movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); |
1001 | } else { |
1002 | // reg to reg |
1003 | // In theory these overlap but the ordering is such that this is likely a nop |
1004 | if ( src.first() != dst.first()) { |
1005 | movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); |
1006 | } |
1007 | } |
1008 | } |
1009 | |
1010 | // On 64 bit we will store integer like items to the stack as |
1011 | // 64 bits items (x86_32/64 abi) even though java would only store |
1012 | // 32bits for a parameter. On 32bit it will simply be 32 bits |
1013 | // So this routine will do 32->32 on 32bit and 32->64 on 64bit |
1014 | void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst) { |
1015 | if (src.first()->is_stack()) { |
1016 | if (dst.first()->is_stack()) { |
1017 | // stack to stack |
1018 | movslq(rax, Address(rbp, reg2offset_in(src.first()))); |
1019 | movq(Address(rsp, reg2offset_out(dst.first())), rax); |
1020 | } else { |
1021 | // stack to reg |
1022 | movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); |
1023 | } |
1024 | } else if (dst.first()->is_stack()) { |
1025 | // reg to stack |
1026 | // Do we really have to sign extend??? |
1027 | // __ movslq(src.first()->as_Register(), src.first()->as_Register()); |
1028 | movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); |
1029 | } else { |
1030 | // Do we really have to sign extend??? |
1031 | // __ movslq(dst.first()->as_Register(), src.first()->as_Register()); |
1032 | if (dst.first() != src.first()) { |
1033 | movq(dst.first()->as_Register(), src.first()->as_Register()); |
1034 | } |
1035 | } |
1036 | } |
1037 | |
1038 | void MacroAssembler::move_ptr(VMRegPair src, VMRegPair dst) { |
1039 | if (src.first()->is_stack()) { |
1040 | if (dst.first()->is_stack()) { |
1041 | // stack to stack |
1042 | movq(rax, Address(rbp, reg2offset_in(src.first()))); |
1043 | movq(Address(rsp, reg2offset_out(dst.first())), rax); |
1044 | } else { |
1045 | // stack to reg |
1046 | movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); |
1047 | } |
1048 | } else if (dst.first()->is_stack()) { |
1049 | // reg to stack |
1050 | movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); |
1051 | } else { |
1052 | if (dst.first() != src.first()) { |
1053 | movq(dst.first()->as_Register(), src.first()->as_Register()); |
1054 | } |
1055 | } |
1056 | } |
1057 | |
1058 | // An oop arg. Must pass a handle not the oop itself |
1059 | void MacroAssembler::object_move(OopMap* map, |
1060 | int oop_handle_offset, |
1061 | int framesize_in_slots, |
1062 | VMRegPair src, |
1063 | VMRegPair dst, |
1064 | bool is_receiver, |
1065 | int* receiver_offset) { |
1066 | |
1067 | // must pass a handle. First figure out the location we use as a handle |
1068 | |
1069 | Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register(); |
1070 | |
1071 | // See if oop is NULL if it is we need no handle |
1072 | |
1073 | if (src.first()->is_stack()) { |
1074 | |
1075 | // Oop is already on the stack as an argument |
1076 | int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); |
1077 | map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); |
1078 | if (is_receiver) { |
1079 | *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; |
1080 | } |
1081 | |
1082 | cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD0L); |
1083 | lea(rHandle, Address(rbp, reg2offset_in(src.first()))); |
1084 | // conditionally move a NULL |
1085 | cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first()))); |
1086 | } else { |
1087 | |
1088 | // Oop is in an a register we must store it to the space we reserve |
1089 | // on the stack for oop_handles and pass a handle if oop is non-NULL |
1090 | |
1091 | const Register rOop = src.first()->as_Register(); |
1092 | int oop_slot; |
1093 | if (rOop == j_rarg0) |
1094 | oop_slot = 0; |
1095 | else if (rOop == j_rarg1) |
1096 | oop_slot = 1; |
1097 | else if (rOop == j_rarg2) |
1098 | oop_slot = 2; |
1099 | else if (rOop == j_rarg3) |
1100 | oop_slot = 3; |
1101 | else if (rOop == j_rarg4) |
1102 | oop_slot = 4; |
1103 | else { |
1104 | assert(rOop == j_rarg5, "wrong register")do { if (!(rOop == j_rarg5)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1104, "assert(" "rOop == j_rarg5" ") failed", "wrong register" ); ::breakpoint(); } } while (0); |
1105 | oop_slot = 5; |
1106 | } |
1107 | |
1108 | oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; |
1109 | int offset = oop_slot*VMRegImpl::stack_slot_size; |
1110 | |
1111 | map->set_oop(VMRegImpl::stack2reg(oop_slot)); |
1112 | // Store oop in handle area, may be NULL |
1113 | movptr(Address(rsp, offset), rOop); |
1114 | if (is_receiver) { |
1115 | *receiver_offset = offset; |
1116 | } |
1117 | |
1118 | cmpptr(rOop, (int32_t)NULL_WORD0L); |
1119 | lea(rHandle, Address(rsp, offset)); |
1120 | // conditionally move a NULL from the handle area where it was just stored |
1121 | cmovptr(Assembler::equal, rHandle, Address(rsp, offset)); |
1122 | } |
1123 | |
1124 | // If arg is on the stack then place it otherwise it is already in correct reg. |
1125 | if (dst.first()->is_stack()) { |
1126 | movptr(Address(rsp, reg2offset_out(dst.first())), rHandle); |
1127 | } |
1128 | } |
1129 | |
1130 | #endif // _LP64 |
1131 | |
1132 | // Now versions that are common to 32/64 bit |
1133 | |
1134 | void MacroAssembler::addptr(Register dst, int32_t imm32) { |
1135 | LP64_ONLY(addq(dst, imm32))addq(dst, imm32) NOT_LP64(addl(dst, imm32)); |
1136 | } |
1137 | |
1138 | void MacroAssembler::addptr(Register dst, Register src) { |
1139 | LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); |
1140 | } |
1141 | |
1142 | void MacroAssembler::addptr(Address dst, Register src) { |
1143 | LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); |
1144 | } |
1145 | |
1146 | void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { |
1147 | if (reachable(src)) { |
1148 | Assembler::addsd(dst, as_Address(src)); |
1149 | } else { |
1150 | lea(rscratch1, src); |
1151 | Assembler::addsd(dst, Address(rscratch1, 0)); |
1152 | } |
1153 | } |
1154 | |
1155 | void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { |
1156 | if (reachable(src)) { |
1157 | addss(dst, as_Address(src)); |
1158 | } else { |
1159 | lea(rscratch1, src); |
1160 | addss(dst, Address(rscratch1, 0)); |
1161 | } |
1162 | } |
1163 | |
1164 | void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) { |
1165 | if (reachable(src)) { |
1166 | Assembler::addpd(dst, as_Address(src)); |
1167 | } else { |
1168 | lea(rscratch1, src); |
1169 | Assembler::addpd(dst, Address(rscratch1, 0)); |
1170 | } |
1171 | } |
1172 | |
1173 | // See 8273459. Function for ensuring 64-byte alignment, intended for stubs only. |
1174 | // Stub code is generated once and never copied. |
1175 | // NMethods can't use this because they get copied and we can't force alignment > 32 bytes. |
1176 | void MacroAssembler::align64() { |
1177 | align(64, (unsigned long long) pc()); |
1178 | } |
1179 | |
1180 | void MacroAssembler::align32() { |
1181 | align(32, (unsigned long long) pc()); |
1182 | } |
1183 | |
1184 | void MacroAssembler::align(int modulus) { |
1185 | // 8273459: Ensure alignment is possible with current segment alignment |
1186 | assert(modulus <= CodeEntryAlignment, "Alignment must be <= CodeEntryAlignment")do { if (!(modulus <= CodeEntryAlignment)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1186, "assert(" "modulus <= CodeEntryAlignment" ") failed" , "Alignment must be <= CodeEntryAlignment"); ::breakpoint (); } } while (0); |
1187 | align(modulus, offset()); |
1188 | } |
1189 | |
1190 | void MacroAssembler::align(int modulus, int target) { |
1191 | if (target % modulus != 0) { |
1192 | nop(modulus - (target % modulus)); |
1193 | } |
1194 | } |
1195 | |
1196 | void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { |
1197 | // Used in sign-masking with aligned address. |
1198 | assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15 ) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1198, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)" ") failed", "SSE mode requires address alignment 16 bytes"); ::breakpoint(); } } while (0); |
1199 | if (reachable(src)) { |
1200 | Assembler::andpd(dst, as_Address(src)); |
1201 | } else { |
1202 | lea(scratch_reg, src); |
1203 | Assembler::andpd(dst, Address(scratch_reg, 0)); |
1204 | } |
1205 | } |
1206 | |
1207 | void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { |
1208 | // Used in sign-masking with aligned address. |
1209 | assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15 ) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1209, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)" ") failed", "SSE mode requires address alignment 16 bytes"); ::breakpoint(); } } while (0); |
1210 | if (reachable(src)) { |
1211 | Assembler::andps(dst, as_Address(src)); |
1212 | } else { |
1213 | lea(scratch_reg, src); |
1214 | Assembler::andps(dst, Address(scratch_reg, 0)); |
1215 | } |
1216 | } |
1217 | |
1218 | void MacroAssembler::andptr(Register dst, int32_t imm32) { |
1219 | LP64_ONLY(andq(dst, imm32))andq(dst, imm32) NOT_LP64(andl(dst, imm32)); |
1220 | } |
1221 | |
1222 | void MacroAssembler::atomic_incl(Address counter_addr) { |
1223 | lock(); |
1224 | incrementl(counter_addr); |
1225 | } |
1226 | |
1227 | void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) { |
1228 | if (reachable(counter_addr)) { |
1229 | atomic_incl(as_Address(counter_addr)); |
1230 | } else { |
1231 | lea(scr, counter_addr); |
1232 | atomic_incl(Address(scr, 0)); |
1233 | } |
1234 | } |
1235 | |
1236 | #ifdef _LP641 |
1237 | void MacroAssembler::atomic_incq(Address counter_addr) { |
1238 | lock(); |
1239 | incrementq(counter_addr); |
1240 | } |
1241 | |
1242 | void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) { |
1243 | if (reachable(counter_addr)) { |
1244 | atomic_incq(as_Address(counter_addr)); |
1245 | } else { |
1246 | lea(scr, counter_addr); |
1247 | atomic_incq(Address(scr, 0)); |
1248 | } |
1249 | } |
1250 | #endif |
1251 | |
1252 | // Writes to stack successive pages until offset reached to check for |
1253 | // stack overflow + shadow pages. This clobbers tmp. |
1254 | void MacroAssembler::bang_stack_size(Register size, Register tmp) { |
1255 | movptr(tmp, rsp); |
1256 | // Bang stack for total size given plus shadow page size. |
1257 | // Bang one page at a time because large size can bang beyond yellow and |
1258 | // red zones. |
1259 | Label loop; |
1260 | bind(loop); |
1261 | movl(Address(tmp, (-os::vm_page_size())), size ); |
1262 | subptr(tmp, os::vm_page_size()); |
1263 | subl(size, os::vm_page_size()); |
1264 | jcc(Assembler::greater, loop); |
1265 | |
1266 | // Bang down shadow pages too. |
1267 | // At this point, (tmp-0) is the last address touched, so don't |
1268 | // touch it again. (It was touched as (tmp-pagesize) but then tmp |
1269 | // was post-decremented.) Skip this address by starting at i=1, and |
1270 | // touch a few more pages below. N.B. It is important to touch all |
1271 | // the way down including all pages in the shadow zone. |
1272 | for (int i = 1; i < ((int)StackOverflow::stack_shadow_zone_size() / os::vm_page_size()); i++) { |
1273 | // this could be any sized move but this is can be a debugging crumb |
1274 | // so the bigger the better. |
1275 | movptr(Address(tmp, (-i*os::vm_page_size())), size ); |
1276 | } |
1277 | } |
1278 | |
1279 | void MacroAssembler::reserved_stack_check() { |
1280 | // testing if reserved zone needs to be enabled |
1281 | Label no_reserved_zone_enabling; |
1282 | Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread)r15_thread; |
1283 | NOT_LP64(get_thread(rsi);) |
1284 | |
1285 | cmpptr(rsp, Address(thread, JavaThread::reserved_stack_activation_offset())); |
1286 | jcc(Assembler::below, no_reserved_zone_enabling); |
1287 | |
1288 | call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)((address)((address_word)(SharedRuntime::enable_stack_reserved_zone ))), thread); |
1289 | jump(RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry())); |
1290 | should_not_reach_here(); |
1291 | |
1292 | bind(no_reserved_zone_enabling); |
1293 | } |
1294 | |
1295 | void MacroAssembler::c2bool(Register x) { |
1296 | // implements x == 0 ? 0 : 1 |
1297 | // note: must only look at least-significant byte of x |
1298 | // since C-style booleans are stored in one byte |
1299 | // only! (was bug) |
1300 | andl(x, 0xFF); |
1301 | setb(Assembler::notZero, x); |
1302 | } |
1303 | |
1304 | // Wouldn't need if AddressLiteral version had new name |
1305 | void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { |
1306 | Assembler::call(L, rtype); |
1307 | } |
1308 | |
1309 | void MacroAssembler::call(Register entry) { |
1310 | Assembler::call(entry); |
1311 | } |
1312 | |
1313 | void MacroAssembler::call(AddressLiteral entry) { |
1314 | if (reachable(entry)) { |
1315 | Assembler::call_literal(entry.target(), entry.rspec()); |
1316 | } else { |
1317 | lea(rscratch1, entry); |
1318 | Assembler::call(rscratch1); |
1319 | } |
1320 | } |
1321 | |
1322 | void MacroAssembler::ic_call(address entry, jint method_index) { |
1323 | RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); |
1324 | movptr(rax, (intptr_t)Universe::non_oop_word()); |
1325 | call(AddressLiteral(entry, rh)); |
1326 | } |
1327 | |
1328 | // Implementation of call_VM versions |
1329 | |
1330 | void MacroAssembler::call_VM(Register oop_result, |
1331 | address entry_point, |
1332 | bool check_exceptions) { |
1333 | Label C, E; |
1334 | call(C, relocInfo::none); |
1335 | jmp(E); |
1336 | |
1337 | bind(C); |
1338 | call_VM_helper(oop_result, entry_point, 0, check_exceptions); |
1339 | ret(0); |
1340 | |
1341 | bind(E); |
1342 | } |
1343 | |
1344 | void MacroAssembler::call_VM(Register oop_result, |
1345 | address entry_point, |
1346 | Register arg_1, |
1347 | bool check_exceptions) { |
1348 | Label C, E; |
1349 | call(C, relocInfo::none); |
1350 | jmp(E); |
1351 | |
1352 | bind(C); |
1353 | pass_arg1(this, arg_1); |
1354 | call_VM_helper(oop_result, entry_point, 1, check_exceptions); |
1355 | ret(0); |
1356 | |
1357 | bind(E); |
1358 | } |
1359 | |
1360 | void MacroAssembler::call_VM(Register oop_result, |
1361 | address entry_point, |
1362 | Register arg_1, |
1363 | Register arg_2, |
1364 | bool check_exceptions) { |
1365 | Label C, E; |
1366 | call(C, relocInfo::none); |
1367 | jmp(E); |
1368 | |
1369 | bind(C); |
1370 | |
1371 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1371, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1372 | |
1373 | pass_arg2(this, arg_2); |
1374 | pass_arg1(this, arg_1); |
1375 | call_VM_helper(oop_result, entry_point, 2, check_exceptions); |
1376 | ret(0); |
1377 | |
1378 | bind(E); |
1379 | } |
1380 | |
1381 | void MacroAssembler::call_VM(Register oop_result, |
1382 | address entry_point, |
1383 | Register arg_1, |
1384 | Register arg_2, |
1385 | Register arg_3, |
1386 | bool check_exceptions) { |
1387 | Label C, E; |
1388 | call(C, relocInfo::none); |
1389 | jmp(E); |
1390 | |
1391 | bind(C); |
1392 | |
1393 | LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1393, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1394 | LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1394, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1395 | pass_arg3(this, arg_3); |
1396 | |
1397 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1397, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1398 | pass_arg2(this, arg_2); |
1399 | |
1400 | pass_arg1(this, arg_1); |
1401 | call_VM_helper(oop_result, entry_point, 3, check_exceptions); |
1402 | ret(0); |
1403 | |
1404 | bind(E); |
1405 | } |
1406 | |
1407 | void MacroAssembler::call_VM(Register oop_result, |
1408 | Register last_java_sp, |
1409 | address entry_point, |
1410 | int number_of_arguments, |
1411 | bool check_exceptions) { |
1412 | Register thread = LP64_ONLY(r15_thread)r15_thread NOT_LP64(noreg); |
1413 | call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); |
1414 | } |
1415 | |
1416 | void MacroAssembler::call_VM(Register oop_result, |
1417 | Register last_java_sp, |
1418 | address entry_point, |
1419 | Register arg_1, |
1420 | bool check_exceptions) { |
1421 | pass_arg1(this, arg_1); |
1422 | call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); |
1423 | } |
1424 | |
1425 | void MacroAssembler::call_VM(Register oop_result, |
1426 | Register last_java_sp, |
1427 | address entry_point, |
1428 | Register arg_1, |
1429 | Register arg_2, |
1430 | bool check_exceptions) { |
1431 | |
1432 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1432, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1433 | pass_arg2(this, arg_2); |
1434 | pass_arg1(this, arg_1); |
1435 | call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); |
1436 | } |
1437 | |
1438 | void MacroAssembler::call_VM(Register oop_result, |
1439 | Register last_java_sp, |
1440 | address entry_point, |
1441 | Register arg_1, |
1442 | Register arg_2, |
1443 | Register arg_3, |
1444 | bool check_exceptions) { |
1445 | LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1445, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1446 | LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1446, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1447 | pass_arg3(this, arg_3); |
1448 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1448, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1449 | pass_arg2(this, arg_2); |
1450 | pass_arg1(this, arg_1); |
1451 | call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); |
1452 | } |
1453 | |
1454 | void MacroAssembler::super_call_VM(Register oop_result, |
1455 | Register last_java_sp, |
1456 | address entry_point, |
1457 | int number_of_arguments, |
1458 | bool check_exceptions) { |
1459 | Register thread = LP64_ONLY(r15_thread)r15_thread NOT_LP64(noreg); |
1460 | MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); |
1461 | } |
1462 | |
1463 | void MacroAssembler::super_call_VM(Register oop_result, |
1464 | Register last_java_sp, |
1465 | address entry_point, |
1466 | Register arg_1, |
1467 | bool check_exceptions) { |
1468 | pass_arg1(this, arg_1); |
1469 | super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); |
1470 | } |
1471 | |
1472 | void MacroAssembler::super_call_VM(Register oop_result, |
1473 | Register last_java_sp, |
1474 | address entry_point, |
1475 | Register arg_1, |
1476 | Register arg_2, |
1477 | bool check_exceptions) { |
1478 | |
1479 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1479, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1480 | pass_arg2(this, arg_2); |
1481 | pass_arg1(this, arg_1); |
1482 | super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); |
1483 | } |
1484 | |
1485 | void MacroAssembler::super_call_VM(Register oop_result, |
1486 | Register last_java_sp, |
1487 | address entry_point, |
1488 | Register arg_1, |
1489 | Register arg_2, |
1490 | Register arg_3, |
1491 | bool check_exceptions) { |
1492 | LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1492, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1493 | LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1493, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1494 | pass_arg3(this, arg_3); |
1495 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1495, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1496 | pass_arg2(this, arg_2); |
1497 | pass_arg1(this, arg_1); |
1498 | super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); |
1499 | } |
1500 | |
1501 | void MacroAssembler::call_VM_base(Register oop_result, |
1502 | Register java_thread, |
1503 | Register last_java_sp, |
1504 | address entry_point, |
1505 | int number_of_arguments, |
1506 | bool check_exceptions) { |
1507 | // determine java_thread register |
1508 | if (!java_thread->is_valid()) { |
1509 | #ifdef _LP641 |
1510 | java_thread = r15_thread; |
1511 | #else |
1512 | java_thread = rdi; |
1513 | get_thread(java_thread); |
1514 | #endif // LP64 |
1515 | } |
1516 | // determine last_java_sp register |
1517 | if (!last_java_sp->is_valid()) { |
1518 | last_java_sp = rsp; |
1519 | } |
1520 | // debugging support |
1521 | assert(number_of_arguments >= 0 , "cannot have negative number of arguments")do { if (!(number_of_arguments >= 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1521, "assert(" "number_of_arguments >= 0" ") failed", "cannot have negative number of arguments" ); ::breakpoint(); } } while (0); |
1522 | LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"))do { if (!(java_thread == r15_thread)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1522, "assert(" "java_thread == r15_thread" ") failed", "unexpected register" ); ::breakpoint(); } } while (0); |
1523 | #ifdef ASSERT1 |
1524 | // TraceBytecodes does not use r12 but saves it over the call, so don't verify |
1525 | // r12 is the heapbase. |
1526 | LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)if (UseCompressedOops && !TraceBytecodes) verify_heapbase ("call_VM_base: heap base corrupted?"); |
1527 | #endif // ASSERT |
1528 | |
1529 | assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result")do { if (!(java_thread != oop_result)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1529, "assert(" "java_thread != oop_result" ") failed", "cannot use the same register for java_thread & oop_result" ); ::breakpoint(); } } while (0); |
1530 | assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp")do { if (!(java_thread != last_java_sp)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1530, "assert(" "java_thread != last_java_sp" ") failed", "cannot use the same register for java_thread & last_java_sp" ); ::breakpoint(); } } while (0); |
1531 | |
1532 | // push java thread (becomes first argument of C function) |
1533 | |
1534 | NOT_LP64(push(java_thread); number_of_arguments++); |
1535 | LP64_ONLY(mov(c_rarg0, r15_thread))mov(c_rarg0, r15_thread); |
1536 | |
1537 | // set last Java frame before call |
1538 | assert(last_java_sp != rbp, "can't use ebp/rbp")do { if (!(last_java_sp != rbp)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1538, "assert(" "last_java_sp != rbp" ") failed", "can't use ebp/rbp" ); ::breakpoint(); } } while (0); |
1539 | |
1540 | // Only interpreter should have to set fp |
1541 | set_last_Java_frame(java_thread, last_java_sp, rbp, NULL__null); |
1542 | |
1543 | // do the call, remove parameters |
1544 | MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); |
1545 | |
1546 | // restore the thread (cannot use the pushed argument since arguments |
1547 | // may be overwritten by C code generated by an optimizing compiler); |
1548 | // however can use the register value directly if it is callee saved. |
1549 | if (LP64_ONLY(true ||)true || java_thread == rdi || java_thread == rsi) { |
1550 | // rdi & rsi (also r15) are callee saved -> nothing to do |
1551 | #ifdef ASSERT1 |
1552 | guarantee(java_thread != rax, "change this code")do { if (!(java_thread != rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1552, "guarantee(" "java_thread != rax" ") failed", "change this code" ); ::breakpoint(); } } while (0); |
1553 | push(rax); |
1554 | { Label L; |
1555 | get_thread(rax); |
1556 | cmpptr(java_thread, rax); |
1557 | jcc(Assembler::equal, L); |
1558 | STOP("MacroAssembler::call_VM_base: rdi not callee saved?")block_comment("MacroAssembler::call_VM_base: rdi not callee saved?" ); stop("MacroAssembler::call_VM_base: rdi not callee saved?" ); |
1559 | bind(L); |
1560 | } |
1561 | pop(rax); |
1562 | #endif |
1563 | } else { |
1564 | get_thread(java_thread); |
1565 | } |
1566 | // reset last Java frame |
1567 | // Only interpreter should have to clear fp |
1568 | reset_last_Java_frame(java_thread, true); |
1569 | |
1570 | // C++ interp handles this in the interpreter |
1571 | check_and_handle_popframe(java_thread); |
1572 | check_and_handle_earlyret(java_thread); |
1573 | |
1574 | if (check_exceptions) { |
1575 | // check for pending exceptions (java_thread is set upon return) |
1576 | cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD0L); |
1577 | #ifndef _LP641 |
1578 | jump_cc(Assembler::notEqual, |
1579 | RuntimeAddress(StubRoutines::forward_exception_entry())); |
1580 | #else |
1581 | // This used to conditionally jump to forward_exception however it is |
1582 | // possible if we relocate that the branch will not reach. So we must jump |
1583 | // around so we can always reach |
1584 | |
1585 | Label ok; |
1586 | jcc(Assembler::equal, ok); |
1587 | jump(RuntimeAddress(StubRoutines::forward_exception_entry())); |
1588 | bind(ok); |
1589 | #endif // LP64 |
1590 | } |
1591 | |
1592 | // get oop result if there is one and reset the value in the thread |
1593 | if (oop_result->is_valid()) { |
1594 | get_vm_result(oop_result, java_thread); |
1595 | } |
1596 | } |
1597 | |
1598 | void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { |
1599 | |
1600 | // Calculate the value for last_Java_sp |
1601 | // somewhat subtle. call_VM does an intermediate call |
1602 | // which places a return address on the stack just under the |
1603 | // stack pointer as the user finsihed with it. This allows |
1604 | // use to retrieve last_Java_pc from last_Java_sp[-1]. |
1605 | // On 32bit we then have to push additional args on the stack to accomplish |
1606 | // the actual requested call. On 64bit call_VM only can use register args |
1607 | // so the only extra space is the return address that call_VM created. |
1608 | // This hopefully explains the calculations here. |
1609 | |
1610 | #ifdef _LP641 |
1611 | // We've pushed one address, correct last_Java_sp |
1612 | lea(rax, Address(rsp, wordSize)); |
1613 | #else |
1614 | lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); |
1615 | #endif // LP64 |
1616 | |
1617 | call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); |
1618 | |
1619 | } |
1620 | |
1621 | // Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter. |
1622 | void MacroAssembler::call_VM_leaf0(address entry_point) { |
1623 | MacroAssembler::call_VM_leaf_base(entry_point, 0); |
1624 | } |
1625 | |
1626 | void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { |
1627 | call_VM_leaf_base(entry_point, number_of_arguments); |
1628 | } |
1629 | |
1630 | void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { |
1631 | pass_arg0(this, arg_0); |
1632 | call_VM_leaf(entry_point, 1); |
1633 | } |
1634 | |
1635 | void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { |
1636 | |
1637 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1637, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1638 | pass_arg1(this, arg_1); |
1639 | pass_arg0(this, arg_0); |
1640 | call_VM_leaf(entry_point, 2); |
1641 | } |
1642 | |
1643 | void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { |
1644 | LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1644, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1645 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1645, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1646 | pass_arg2(this, arg_2); |
1647 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1647, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1648 | pass_arg1(this, arg_1); |
1649 | pass_arg0(this, arg_0); |
1650 | call_VM_leaf(entry_point, 3); |
1651 | } |
1652 | |
1653 | void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { |
1654 | pass_arg0(this, arg_0); |
1655 | MacroAssembler::call_VM_leaf_base(entry_point, 1); |
1656 | } |
1657 | |
1658 | void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { |
1659 | |
1660 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1660, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1661 | pass_arg1(this, arg_1); |
1662 | pass_arg0(this, arg_0); |
1663 | MacroAssembler::call_VM_leaf_base(entry_point, 2); |
1664 | } |
1665 | |
1666 | void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { |
1667 | LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1667, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1668 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1668, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1669 | pass_arg2(this, arg_2); |
1670 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1670, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1671 | pass_arg1(this, arg_1); |
1672 | pass_arg0(this, arg_0); |
1673 | MacroAssembler::call_VM_leaf_base(entry_point, 3); |
1674 | } |
1675 | |
1676 | void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { |
1677 | LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"))do { if (!(arg_0 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1677, "assert(" "arg_0 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1678 | LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1678, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1679 | LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1679, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1680 | pass_arg3(this, arg_3); |
1681 | LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1681, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1682 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1682, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1683 | pass_arg2(this, arg_2); |
1684 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1684, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); |
1685 | pass_arg1(this, arg_1); |
1686 | pass_arg0(this, arg_0); |
1687 | MacroAssembler::call_VM_leaf_base(entry_point, 4); |
1688 | } |
1689 | |
1690 | void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { |
1691 | movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); |
1692 | movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD0L); |
1693 | verify_oop_msg(oop_result, "broken oop in call_VM_base")_verify_oop_checked(oop_result, "broken oop " "oop_result" ", " "\"broken oop in call_VM_base\"", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1693); |
1694 | } |
1695 | |
1696 | void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { |
1697 | movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); |
1698 | movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD0L); |
1699 | } |
1700 | |
1701 | void MacroAssembler::check_and_handle_earlyret(Register java_thread) { |
1702 | } |
1703 | |
1704 | void MacroAssembler::check_and_handle_popframe(Register java_thread) { |
1705 | } |
1706 | |
1707 | void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { |
1708 | if (reachable(src1)) { |
1709 | cmpl(as_Address(src1), imm); |
1710 | } else { |
1711 | lea(rscratch1, src1); |
1712 | cmpl(Address(rscratch1, 0), imm); |
1713 | } |
1714 | } |
1715 | |
1716 | void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { |
1717 | assert(!src2.is_lval(), "use cmpptr")do { if (!(!src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1717, "assert(" "!src2.is_lval()" ") failed", "use cmpptr") ; ::breakpoint(); } } while (0); |
1718 | if (reachable(src2)) { |
1719 | cmpl(src1, as_Address(src2)); |
1720 | } else { |
1721 | lea(rscratch1, src2); |
1722 | cmpl(src1, Address(rscratch1, 0)); |
1723 | } |
1724 | } |
1725 | |
1726 | void MacroAssembler::cmp32(Register src1, int32_t imm) { |
1727 | Assembler::cmpl(src1, imm); |
1728 | } |
1729 | |
1730 | void MacroAssembler::cmp32(Register src1, Address src2) { |
1731 | Assembler::cmpl(src1, src2); |
1732 | } |
1733 | |
1734 | void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { |
1735 | ucomisd(opr1, opr2); |
1736 | |
1737 | Label L; |
1738 | if (unordered_is_less) { |
1739 | movl(dst, -1); |
1740 | jcc(Assembler::parity, L); |
1741 | jcc(Assembler::below , L); |
1742 | movl(dst, 0); |
1743 | jcc(Assembler::equal , L); |
1744 | increment(dst); |
1745 | } else { // unordered is greater |
1746 | movl(dst, 1); |
1747 | jcc(Assembler::parity, L); |
1748 | jcc(Assembler::above , L); |
1749 | movl(dst, 0); |
1750 | jcc(Assembler::equal , L); |
1751 | decrementl(dst); |
1752 | } |
1753 | bind(L); |
1754 | } |
1755 | |
1756 | void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { |
1757 | ucomiss(opr1, opr2); |
1758 | |
1759 | Label L; |
1760 | if (unordered_is_less) { |
1761 | movl(dst, -1); |
1762 | jcc(Assembler::parity, L); |
1763 | jcc(Assembler::below , L); |
1764 | movl(dst, 0); |
1765 | jcc(Assembler::equal , L); |
1766 | increment(dst); |
1767 | } else { // unordered is greater |
1768 | movl(dst, 1); |
1769 | jcc(Assembler::parity, L); |
1770 | jcc(Assembler::above , L); |
1771 | movl(dst, 0); |
1772 | jcc(Assembler::equal , L); |
1773 | decrementl(dst); |
1774 | } |
1775 | bind(L); |
1776 | } |
1777 | |
1778 | |
1779 | void MacroAssembler::cmp8(AddressLiteral src1, int imm) { |
1780 | if (reachable(src1)) { |
1781 | cmpb(as_Address(src1), imm); |
1782 | } else { |
1783 | lea(rscratch1, src1); |
1784 | cmpb(Address(rscratch1, 0), imm); |
1785 | } |
1786 | } |
1787 | |
1788 | void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { |
1789 | #ifdef _LP641 |
1790 | if (src2.is_lval()) { |
1791 | movptr(rscratch1, src2); |
1792 | Assembler::cmpq(src1, rscratch1); |
1793 | } else if (reachable(src2)) { |
1794 | cmpq(src1, as_Address(src2)); |
1795 | } else { |
1796 | lea(rscratch1, src2); |
1797 | Assembler::cmpq(src1, Address(rscratch1, 0)); |
1798 | } |
1799 | #else |
1800 | if (src2.is_lval()) { |
1801 | cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); |
1802 | } else { |
1803 | cmpl(src1, as_Address(src2)); |
1804 | } |
1805 | #endif // _LP64 |
1806 | } |
1807 | |
1808 | void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { |
1809 | assert(src2.is_lval(), "not a mem-mem compare")do { if (!(src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1809, "assert(" "src2.is_lval()" ") failed", "not a mem-mem compare" ); ::breakpoint(); } } while (0); |
1810 | #ifdef _LP641 |
1811 | // moves src2's literal address |
1812 | movptr(rscratch1, src2); |
1813 | Assembler::cmpq(src1, rscratch1); |
1814 | #else |
1815 | cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); |
1816 | #endif // _LP64 |
1817 | } |
1818 | |
1819 | void MacroAssembler::cmpoop(Register src1, Register src2) { |
1820 | cmpptr(src1, src2); |
1821 | } |
1822 | |
1823 | void MacroAssembler::cmpoop(Register src1, Address src2) { |
1824 | cmpptr(src1, src2); |
1825 | } |
1826 | |
1827 | #ifdef _LP641 |
1828 | void MacroAssembler::cmpoop(Register src1, jobject src2) { |
1829 | movoop(rscratch1, src2); |
1830 | cmpptr(src1, rscratch1); |
1831 | } |
1832 | #endif |
1833 | |
1834 | void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { |
1835 | if (reachable(adr)) { |
1836 | lock(); |
1837 | cmpxchgptr(reg, as_Address(adr)); |
1838 | } else { |
1839 | lea(rscratch1, adr); |
1840 | lock(); |
1841 | cmpxchgptr(reg, Address(rscratch1, 0)); |
1842 | } |
1843 | } |
1844 | |
1845 | void MacroAssembler::cmpxchgptr(Register reg, Address adr) { |
1846 | LP64_ONLY(cmpxchgq(reg, adr))cmpxchgq(reg, adr) NOT_LP64(cmpxchgl(reg, adr)); |
1847 | } |
1848 | |
1849 | void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { |
1850 | if (reachable(src)) { |
1851 | Assembler::comisd(dst, as_Address(src)); |
1852 | } else { |
1853 | lea(rscratch1, src); |
1854 | Assembler::comisd(dst, Address(rscratch1, 0)); |
1855 | } |
1856 | } |
1857 | |
1858 | void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { |
1859 | if (reachable(src)) { |
1860 | Assembler::comiss(dst, as_Address(src)); |
1861 | } else { |
1862 | lea(rscratch1, src); |
1863 | Assembler::comiss(dst, Address(rscratch1, 0)); |
1864 | } |
1865 | } |
1866 | |
1867 | |
1868 | void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { |
1869 | Condition negated_cond = negate_condition(cond); |
1870 | Label L; |
1871 | jcc(negated_cond, L); |
1872 | pushf(); // Preserve flags |
1873 | atomic_incl(counter_addr); |
1874 | popf(); |
1875 | bind(L); |
1876 | } |
1877 | |
1878 | int MacroAssembler::corrected_idivl(Register reg) { |
1879 | // Full implementation of Java idiv and irem; checks for |
1880 | // special case as described in JVM spec., p.243 & p.271. |
1881 | // The function returns the (pc) offset of the idivl |
1882 | // instruction - may be needed for implicit exceptions. |
1883 | // |
1884 | // normal case special case |
1885 | // |
1886 | // input : rax,: dividend min_int |
1887 | // reg: divisor (may not be rax,/rdx) -1 |
1888 | // |
1889 | // output: rax,: quotient (= rax, idiv reg) min_int |
1890 | // rdx: remainder (= rax, irem reg) 0 |
1891 | assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register")do { if (!(reg != rax && reg != rdx)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1891, "assert(" "reg != rax && reg != rdx" ") failed" , "reg cannot be rax, or rdx register"); ::breakpoint(); } } while (0); |
1892 | const int min_int = 0x80000000; |
1893 | Label normal_case, special_case; |
1894 | |
1895 | // check for special case |
1896 | cmpl(rax, min_int); |
1897 | jcc(Assembler::notEqual, normal_case); |
1898 | xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) |
1899 | cmpl(reg, -1); |
1900 | jcc(Assembler::equal, special_case); |
1901 | |
1902 | // handle normal case |
1903 | bind(normal_case); |
1904 | cdql(); |
1905 | int idivl_offset = offset(); |
1906 | idivl(reg); |
1907 | |
1908 | // normal and special case exit |
1909 | bind(special_case); |
1910 | |
1911 | return idivl_offset; |
1912 | } |
1913 | |
1914 | |
1915 | |
1916 | void MacroAssembler::decrementl(Register reg, int value) { |
1917 | if (value == min_jint) {subl(reg, value) ; return; } |
1918 | if (value < 0) { incrementl(reg, -value); return; } |
1919 | if (value == 0) { ; return; } |
1920 | if (value == 1 && UseIncDec) { decl(reg) ; return; } |
1921 | /* else */ { subl(reg, value) ; return; } |
1922 | } |
1923 | |
1924 | void MacroAssembler::decrementl(Address dst, int value) { |
1925 | if (value == min_jint) {subl(dst, value) ; return; } |
1926 | if (value < 0) { incrementl(dst, -value); return; } |
1927 | if (value == 0) { ; return; } |
1928 | if (value == 1 && UseIncDec) { decl(dst) ; return; } |
1929 | /* else */ { subl(dst, value) ; return; } |
1930 | } |
1931 | |
1932 | void MacroAssembler::division_with_shift (Register reg, int shift_value) { |
1933 | assert (shift_value > 0, "illegal shift value")do { if (!(shift_value > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1933, "assert(" "shift_value > 0" ") failed", "illegal shift value" ); ::breakpoint(); } } while (0); |
1934 | Label _is_positive; |
1935 | testl (reg, reg); |
1936 | jcc (Assembler::positive, _is_positive); |
1937 | int offset = (1 << shift_value) - 1 ; |
1938 | |
1939 | if (offset == 1) { |
1940 | incrementl(reg); |
1941 | } else { |
1942 | addl(reg, offset); |
1943 | } |
1944 | |
1945 | bind (_is_positive); |
1946 | sarl(reg, shift_value); |
1947 | } |
1948 | |
1949 | void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { |
1950 | if (reachable(src)) { |
1951 | Assembler::divsd(dst, as_Address(src)); |
1952 | } else { |
1953 | lea(rscratch1, src); |
1954 | Assembler::divsd(dst, Address(rscratch1, 0)); |
1955 | } |
1956 | } |
1957 | |
1958 | void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { |
1959 | if (reachable(src)) { |
1960 | Assembler::divss(dst, as_Address(src)); |
1961 | } else { |
1962 | lea(rscratch1, src); |
1963 | Assembler::divss(dst, Address(rscratch1, 0)); |
1964 | } |
1965 | } |
1966 | |
1967 | void MacroAssembler::enter() { |
1968 | push(rbp); |
1969 | mov(rbp, rsp); |
1970 | } |
1971 | |
1972 | // A 5 byte nop that is safe for patching (see patch_verified_entry) |
1973 | void MacroAssembler::fat_nop() { |
1974 | if (UseAddressNop) { |
1975 | addr_nop_5(); |
1976 | } else { |
1977 | emit_int8(0x26); // es: |
1978 | emit_int8(0x2e); // cs: |
1979 | emit_int8(0x64); // fs: |
1980 | emit_int8(0x65); // gs: |
1981 | emit_int8((unsigned char)0x90); |
1982 | } |
1983 | } |
1984 | |
1985 | #ifndef _LP641 |
1986 | void MacroAssembler::fcmp(Register tmp) { |
1987 | fcmp(tmp, 1, true, true); |
1988 | } |
1989 | |
1990 | void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { |
1991 | assert(!pop_right || pop_left, "usage error")do { if (!(!pop_right || pop_left)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1991, "assert(" "!pop_right || pop_left" ") failed", "usage error" ); ::breakpoint(); } } while (0); |
1992 | if (VM_Version::supports_cmov()) { |
1993 | assert(tmp == noreg, "unneeded temp")do { if (!(tmp == noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1993, "assert(" "tmp == noreg" ") failed", "unneeded temp") ; ::breakpoint(); } } while (0); |
1994 | if (pop_left) { |
1995 | fucomip(index); |
1996 | } else { |
1997 | fucomi(index); |
1998 | } |
1999 | if (pop_right) { |
2000 | fpop(); |
2001 | } |
2002 | } else { |
2003 | assert(tmp != noreg, "need temp")do { if (!(tmp != noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2003, "assert(" "tmp != noreg" ") failed", "need temp"); :: breakpoint(); } } while (0); |
2004 | if (pop_left) { |
2005 | if (pop_right) { |
2006 | fcompp(); |
2007 | } else { |
2008 | fcomp(index); |
2009 | } |
2010 | } else { |
2011 | fcom(index); |
2012 | } |
2013 | // convert FPU condition into eflags condition via rax, |
2014 | save_rax(tmp); |
2015 | fwait(); fnstsw_ax(); |
2016 | sahf(); |
2017 | restore_rax(tmp); |
2018 | } |
2019 | // condition codes set as follows: |
2020 | // |
2021 | // CF (corresponds to C0) if x < y |
2022 | // PF (corresponds to C2) if unordered |
2023 | // ZF (corresponds to C3) if x = y |
2024 | } |
2025 | |
2026 | void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { |
2027 | fcmp2int(dst, unordered_is_less, 1, true, true); |
2028 | } |
2029 | |
2030 | void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { |
2031 | fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); |
2032 | Label L; |
2033 | if (unordered_is_less) { |
2034 | movl(dst, -1); |
2035 | jcc(Assembler::parity, L); |
2036 | jcc(Assembler::below , L); |
2037 | movl(dst, 0); |
2038 | jcc(Assembler::equal , L); |
2039 | increment(dst); |
2040 | } else { // unordered is greater |
2041 | movl(dst, 1); |
2042 | jcc(Assembler::parity, L); |
2043 | jcc(Assembler::above , L); |
2044 | movl(dst, 0); |
2045 | jcc(Assembler::equal , L); |
2046 | decrementl(dst); |
2047 | } |
2048 | bind(L); |
2049 | } |
2050 | |
2051 | void MacroAssembler::fld_d(AddressLiteral src) { |
2052 | fld_d(as_Address(src)); |
2053 | } |
2054 | |
2055 | void MacroAssembler::fld_s(AddressLiteral src) { |
2056 | fld_s(as_Address(src)); |
2057 | } |
2058 | |
2059 | void MacroAssembler::fldcw(AddressLiteral src) { |
2060 | Assembler::fldcw(as_Address(src)); |
2061 | } |
2062 | |
2063 | void MacroAssembler::fpop() { |
2064 | ffree(); |
2065 | fincstp(); |
2066 | } |
2067 | |
2068 | void MacroAssembler::fremr(Register tmp) { |
2069 | save_rax(tmp); |
2070 | { Label L; |
2071 | bind(L); |
2072 | fprem(); |
2073 | fwait(); fnstsw_ax(); |
2074 | sahf(); |
2075 | jcc(Assembler::parity, L); |
2076 | } |
2077 | restore_rax(tmp); |
2078 | // Result is in ST0. |
2079 | // Note: fxch & fpop to get rid of ST1 |
2080 | // (otherwise FPU stack could overflow eventually) |
2081 | fxch(1); |
2082 | fpop(); |
2083 | } |
2084 | |
2085 | void MacroAssembler::empty_FPU_stack() { |
2086 | if (VM_Version::supports_mmx()) { |
2087 | emms(); |
2088 | } else { |
2089 | for (int i = 8; i-- > 0; ) ffree(i); |
2090 | } |
2091 | } |
2092 | #endif // !LP64 |
2093 | |
2094 | void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) { |
2095 | if (reachable(src)) { |
2096 | Assembler::mulpd(dst, as_Address(src)); |
2097 | } else { |
2098 | lea(rscratch1, src); |
2099 | Assembler::mulpd(dst, Address(rscratch1, 0)); |
2100 | } |
2101 | } |
2102 | |
2103 | void MacroAssembler::load_float(Address src) { |
2104 | #ifdef _LP641 |
2105 | movflt(xmm0, src); |
2106 | #else |
2107 | if (UseSSE >= 1) { |
2108 | movflt(xmm0, src); |
2109 | } else { |
2110 | fld_s(src); |
2111 | } |
2112 | #endif // LP64 |
2113 | } |
2114 | |
2115 | void MacroAssembler::store_float(Address dst) { |
2116 | #ifdef _LP641 |
2117 | movflt(dst, xmm0); |
2118 | #else |
2119 | if (UseSSE >= 1) { |
2120 | movflt(dst, xmm0); |
2121 | } else { |
2122 | fstp_s(dst); |
2123 | } |
2124 | #endif // LP64 |
2125 | } |
2126 | |
2127 | void MacroAssembler::load_double(Address src) { |
2128 | #ifdef _LP641 |
2129 | movdbl(xmm0, src); |
2130 | #else |
2131 | if (UseSSE >= 2) { |
2132 | movdbl(xmm0, src); |
2133 | } else { |
2134 | fld_d(src); |
2135 | } |
2136 | #endif // LP64 |
2137 | } |
2138 | |
2139 | void MacroAssembler::store_double(Address dst) { |
2140 | #ifdef _LP641 |
2141 | movdbl(dst, xmm0); |
2142 | #else |
2143 | if (UseSSE >= 2) { |
2144 | movdbl(dst, xmm0); |
2145 | } else { |
2146 | fstp_d(dst); |
2147 | } |
2148 | #endif // LP64 |
2149 | } |
2150 | |
2151 | // dst = c = a * b + c |
2152 | void MacroAssembler::fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) { |
2153 | Assembler::vfmadd231sd(c, a, b); |
2154 | if (dst != c) { |
2155 | movdbl(dst, c); |
2156 | } |
2157 | } |
2158 | |
2159 | // dst = c = a * b + c |
2160 | void MacroAssembler::fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) { |
2161 | Assembler::vfmadd231ss(c, a, b); |
2162 | if (dst != c) { |
2163 | movflt(dst, c); |
2164 | } |
2165 | } |
2166 | |
2167 | // dst = c = a * b + c |
2168 | void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) { |
2169 | Assembler::vfmadd231pd(c, a, b, vector_len); |
2170 | if (dst != c) { |
2171 | vmovdqu(dst, c); |
2172 | } |
2173 | } |
2174 | |
2175 | // dst = c = a * b + c |
2176 | void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) { |
2177 | Assembler::vfmadd231ps(c, a, b, vector_len); |
2178 | if (dst != c) { |
2179 | vmovdqu(dst, c); |
2180 | } |
2181 | } |
2182 | |
2183 | // dst = c = a * b + c |
2184 | void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) { |
2185 | Assembler::vfmadd231pd(c, a, b, vector_len); |
2186 | if (dst != c) { |
2187 | vmovdqu(dst, c); |
2188 | } |
2189 | } |
2190 | |
2191 | // dst = c = a * b + c |
2192 | void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) { |
2193 | Assembler::vfmadd231ps(c, a, b, vector_len); |
2194 | if (dst != c) { |
2195 | vmovdqu(dst, c); |
2196 | } |
2197 | } |
2198 | |
2199 | void MacroAssembler::incrementl(AddressLiteral dst) { |
2200 | if (reachable(dst)) { |
2201 | incrementl(as_Address(dst)); |
2202 | } else { |
2203 | lea(rscratch1, dst); |
2204 | incrementl(Address(rscratch1, 0)); |
2205 | } |
2206 | } |
2207 | |
2208 | void MacroAssembler::incrementl(ArrayAddress dst) { |
2209 | incrementl(as_Address(dst)); |
2210 | } |
2211 | |
2212 | void MacroAssembler::incrementl(Register reg, int value) { |
2213 | if (value == min_jint) {addl(reg, value) ; return; } |
2214 | if (value < 0) { decrementl(reg, -value); return; } |
2215 | if (value == 0) { ; return; } |
2216 | if (value == 1 && UseIncDec) { incl(reg) ; return; } |
2217 | /* else */ { addl(reg, value) ; return; } |
2218 | } |
2219 | |
2220 | void MacroAssembler::incrementl(Address dst, int value) { |
2221 | if (value == min_jint) {addl(dst, value) ; return; } |
2222 | if (value < 0) { decrementl(dst, -value); return; } |
2223 | if (value == 0) { ; return; } |
2224 | if (value == 1 && UseIncDec) { incl(dst) ; return; } |
2225 | /* else */ { addl(dst, value) ; return; } |
2226 | } |
2227 | |
2228 | void MacroAssembler::jump(AddressLiteral dst) { |
2229 | if (reachable(dst)) { |
2230 | jmp_literal(dst.target(), dst.rspec()); |
2231 | } else { |
2232 | lea(rscratch1, dst); |
2233 | jmp(rscratch1); |
2234 | } |
2235 | } |
2236 | |
2237 | void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { |
2238 | if (reachable(dst)) { |
2239 | InstructionMark im(this); |
2240 | relocate(dst.reloc()); |
2241 | const int short_size = 2; |
2242 | const int long_size = 6; |
2243 | int offs = (intptr_t)dst.target() - ((intptr_t)pc()); |
2244 | if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { |
2245 | // 0111 tttn #8-bit disp |
2246 | emit_int8(0x70 | cc); |
2247 | emit_int8((offs - short_size) & 0xFF); |
2248 | } else { |
2249 | // 0000 1111 1000 tttn #32-bit disp |
2250 | emit_int8(0x0F); |
2251 | emit_int8((unsigned char)(0x80 | cc)); |
2252 | emit_int32(offs - long_size); |
2253 | } |
2254 | } else { |
2255 | #ifdef ASSERT1 |
2256 | warning("reversing conditional branch"); |
2257 | #endif /* ASSERT */ |
2258 | Label skip; |
2259 | jccb(reverse[cc], skip)jccb_0(reverse[cc], skip, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2259); |
2260 | lea(rscratch1, dst); |
2261 | Assembler::jmp(rscratch1); |
2262 | bind(skip); |
2263 | } |
2264 | } |
2265 | |
2266 | void MacroAssembler::fld_x(AddressLiteral src) { |
2267 | Assembler::fld_x(as_Address(src)); |
2268 | } |
2269 | |
2270 | void MacroAssembler::ldmxcsr(AddressLiteral src) { |
2271 | if (reachable(src)) { |
2272 | Assembler::ldmxcsr(as_Address(src)); |
2273 | } else { |
2274 | lea(rscratch1, src); |
2275 | Assembler::ldmxcsr(Address(rscratch1, 0)); |
2276 | } |
2277 | } |
2278 | |
2279 | int MacroAssembler::load_signed_byte(Register dst, Address src) { |
2280 | int off; |
2281 | if (LP64_ONLY(true ||)true || VM_Version::is_P6()) { |
2282 | off = offset(); |
2283 | movsbl(dst, src); // movsxb |
2284 | } else { |
2285 | off = load_unsigned_byte(dst, src); |
2286 | shll(dst, 24); |
2287 | sarl(dst, 24); |
2288 | } |
2289 | return off; |
2290 | } |
2291 | |
2292 | // Note: load_signed_short used to be called load_signed_word. |
2293 | // Although the 'w' in x86 opcodes refers to the term "word" in the assembler |
2294 | // manual, which means 16 bits, that usage is found nowhere in HotSpot code. |
2295 | // The term "word" in HotSpot means a 32- or 64-bit machine word. |
2296 | int MacroAssembler::load_signed_short(Register dst, Address src) { |
2297 | int off; |
2298 | if (LP64_ONLY(true ||)true || VM_Version::is_P6()) { |
2299 | // This is dubious to me since it seems safe to do a signed 16 => 64 bit |
2300 | // version but this is what 64bit has always done. This seems to imply |
2301 | // that users are only using 32bits worth. |
2302 | off = offset(); |
2303 | movswl(dst, src); // movsxw |
2304 | } else { |
2305 | off = load_unsigned_short(dst, src); |
2306 | shll(dst, 16); |
2307 | sarl(dst, 16); |
2308 | } |
2309 | return off; |
2310 | } |
2311 | |
2312 | int MacroAssembler::load_unsigned_byte(Register dst, Address src) { |
2313 | // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, |
2314 | // and "3.9 Partial Register Penalties", p. 22). |
2315 | int off; |
2316 | if (LP64_ONLY(true || )true || VM_Version::is_P6() || src.uses(dst)) { |
2317 | off = offset(); |
2318 | movzbl(dst, src); // movzxb |
2319 | } else { |
2320 | xorl(dst, dst); |
2321 | off = offset(); |
2322 | movb(dst, src); |
2323 | } |
2324 | return off; |
2325 | } |
2326 | |
2327 | // Note: load_unsigned_short used to be called load_unsigned_word. |
2328 | int MacroAssembler::load_unsigned_short(Register dst, Address src) { |
2329 | // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, |
2330 | // and "3.9 Partial Register Penalties", p. 22). |
2331 | int off; |
2332 | if (LP64_ONLY(true ||)true || VM_Version::is_P6() || src.uses(dst)) { |
2333 | off = offset(); |
2334 | movzwl(dst, src); // movzxw |
2335 | } else { |
2336 | xorl(dst, dst); |
2337 | off = offset(); |
2338 | movw(dst, src); |
2339 | } |
2340 | return off; |
2341 | } |
2342 | |
2343 | void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { |
2344 | switch (size_in_bytes) { |
2345 | #ifndef _LP641 |
2346 | case 8: |
2347 | assert(dst2 != noreg, "second dest register required")do { if (!(dst2 != noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2347, "assert(" "dst2 != noreg" ") failed", "second dest register required" ); ::breakpoint(); } } while (0); |
2348 | movl(dst, src); |
2349 | movl(dst2, src.plus_disp(BytesPerInt)); |
2350 | break; |
2351 | #else |
2352 | case 8: movq(dst, src); break; |
2353 | #endif |
2354 | case 4: movl(dst, src); break; |
2355 | case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; |
2356 | case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; |
2357 | default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2357); ::breakpoint(); } while (0); |
2358 | } |
2359 | } |
2360 | |
2361 | void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { |
2362 | switch (size_in_bytes) { |
2363 | #ifndef _LP641 |
2364 | case 8: |
2365 | assert(src2 != noreg, "second source register required")do { if (!(src2 != noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2365, "assert(" "src2 != noreg" ") failed", "second source register required" ); ::breakpoint(); } } while (0); |
2366 | movl(dst, src); |
2367 | movl(dst.plus_disp(BytesPerInt), src2); |
2368 | break; |
2369 | #else |
2370 | case 8: movq(dst, src); break; |
2371 | #endif |
2372 | case 4: movl(dst, src); break; |
2373 | case 2: movw(dst, src); break; |
2374 | case 1: movb(dst, src); break; |
2375 | default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2375); ::breakpoint(); } while (0); |
2376 | } |
2377 | } |
2378 | |
2379 | void MacroAssembler::mov32(AddressLiteral dst, Register src) { |
2380 | if (reachable(dst)) { |
2381 | movl(as_Address(dst), src); |
2382 | } else { |
2383 | lea(rscratch1, dst); |
2384 | movl(Address(rscratch1, 0), src); |
2385 | } |
2386 | } |
2387 | |
2388 | void MacroAssembler::mov32(Register dst, AddressLiteral src) { |
2389 | if (reachable(src)) { |
2390 | movl(dst, as_Address(src)); |
2391 | } else { |
2392 | lea(rscratch1, src); |
2393 | movl(dst, Address(rscratch1, 0)); |
2394 | } |
2395 | } |
2396 | |
2397 | // C++ bool manipulation |
2398 | |
2399 | void MacroAssembler::movbool(Register dst, Address src) { |
2400 | if(sizeof(bool) == 1) |
2401 | movb(dst, src); |
2402 | else if(sizeof(bool) == 2) |
2403 | movw(dst, src); |
2404 | else if(sizeof(bool) == 4) |
2405 | movl(dst, src); |
2406 | else |
2407 | // unsupported |
2408 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2408); ::breakpoint(); } while (0); |
2409 | } |
2410 | |
2411 | void MacroAssembler::movbool(Address dst, bool boolconst) { |
2412 | if(sizeof(bool) == 1) |
2413 | movb(dst, (int) boolconst); |
2414 | else if(sizeof(bool) == 2) |
2415 | movw(dst, (int) boolconst); |
2416 | else if(sizeof(bool) == 4) |
2417 | movl(dst, (int) boolconst); |
2418 | else |
2419 | // unsupported |
2420 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2420); ::breakpoint(); } while (0); |
2421 | } |
2422 | |
2423 | void MacroAssembler::movbool(Address dst, Register src) { |
2424 | if(sizeof(bool) == 1) |
2425 | movb(dst, src); |
2426 | else if(sizeof(bool) == 2) |
2427 | movw(dst, src); |
2428 | else if(sizeof(bool) == 4) |
2429 | movl(dst, src); |
2430 | else |
2431 | // unsupported |
2432 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2432); ::breakpoint(); } while (0); |
2433 | } |
2434 | |
2435 | void MacroAssembler::movbyte(ArrayAddress dst, int src) { |
2436 | movb(as_Address(dst), src); |
2437 | } |
2438 | |
2439 | void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { |
2440 | if (reachable(src)) { |
2441 | movdl(dst, as_Address(src)); |
2442 | } else { |
2443 | lea(rscratch1, src); |
2444 | movdl(dst, Address(rscratch1, 0)); |
2445 | } |
2446 | } |
2447 | |
2448 | void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { |
2449 | if (reachable(src)) { |
2450 | movq(dst, as_Address(src)); |
2451 | } else { |
2452 | lea(rscratch1, src); |
2453 | movq(dst, Address(rscratch1, 0)); |
2454 | } |
2455 | } |
2456 | |
2457 | void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { |
2458 | if (reachable(src)) { |
2459 | if (UseXmmLoadAndClearUpper) { |
2460 | movsd (dst, as_Address(src)); |
2461 | } else { |
2462 | movlpd(dst, as_Address(src)); |
2463 | } |
2464 | } else { |
2465 | lea(rscratch1, src); |
2466 | if (UseXmmLoadAndClearUpper) { |
2467 | movsd (dst, Address(rscratch1, 0)); |
2468 | } else { |
2469 | movlpd(dst, Address(rscratch1, 0)); |
2470 | } |
2471 | } |
2472 | } |
2473 | |
2474 | void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { |
2475 | if (reachable(src)) { |
2476 | movss(dst, as_Address(src)); |
2477 | } else { |
2478 | lea(rscratch1, src); |
2479 | movss(dst, Address(rscratch1, 0)); |
2480 | } |
2481 | } |
2482 | |
2483 | void MacroAssembler::movptr(Register dst, Register src) { |
2484 | LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src)); |
2485 | } |
2486 | |
2487 | void MacroAssembler::movptr(Register dst, Address src) { |
2488 | LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src)); |
2489 | } |
2490 | |
2491 | // src should NEVER be a real pointer. Use AddressLiteral for true pointers |
2492 | void MacroAssembler::movptr(Register dst, intptr_t src) { |
2493 | LP64_ONLY(mov64(dst, src))mov64(dst, src) NOT_LP64(movl(dst, src)); |
2494 | } |
2495 | |
2496 | void MacroAssembler::movptr(Address dst, Register src) { |
2497 | LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src)); |
2498 | } |
2499 | |
2500 | void MacroAssembler::movdqu(Address dst, XMMRegister src) { |
2501 | assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((src->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2501, "assert(" "((src->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2502 | Assembler::movdqu(dst, src); |
2503 | } |
2504 | |
2505 | void MacroAssembler::movdqu(XMMRegister dst, Address src) { |
2506 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2506, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2507 | Assembler::movdqu(dst, src); |
2508 | } |
2509 | |
2510 | void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { |
2511 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vl()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2511, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2512 | Assembler::movdqu(dst, src); |
2513 | } |
2514 | |
2515 | void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) { |
2516 | if (reachable(src)) { |
2517 | movdqu(dst, as_Address(src)); |
2518 | } else { |
2519 | lea(scratchReg, src); |
2520 | movdqu(dst, Address(scratchReg, 0)); |
2521 | } |
2522 | } |
2523 | |
2524 | void MacroAssembler::vmovdqu(Address dst, XMMRegister src) { |
2525 | assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((src->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2525, "assert(" "((src->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2526 | Assembler::vmovdqu(dst, src); |
2527 | } |
2528 | |
2529 | void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { |
2530 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2530, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2531 | Assembler::vmovdqu(dst, src); |
2532 | } |
2533 | |
2534 | void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { |
2535 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vl()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2535, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2536 | Assembler::vmovdqu(dst, src); |
2537 | } |
2538 | |
2539 | void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) { |
2540 | if (reachable(src)) { |
2541 | vmovdqu(dst, as_Address(src)); |
2542 | } |
2543 | else { |
2544 | lea(scratch_reg, src); |
2545 | vmovdqu(dst, Address(scratch_reg, 0)); |
2546 | } |
2547 | } |
2548 | |
2549 | void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len) { |
2550 | assert(vector_len <= AVX_256bit, "AVX2 vector length")do { if (!(vector_len <= AVX_256bit)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2550, "assert(" "vector_len <= AVX_256bit" ") failed", "AVX2 vector length" ); ::breakpoint(); } } while (0); |
2551 | if (vector_len == AVX_256bit) { |
2552 | vmovdqu(dst, src, scratch_reg); |
2553 | } else { |
2554 | movdqu(dst, src, scratch_reg); |
2555 | } |
2556 | } |
2557 | |
2558 | void MacroAssembler::kmov(KRegister dst, Address src) { |
2559 | if (VM_Version::supports_avx512bw()) { |
2560 | kmovql(dst, src); |
2561 | } else { |
2562 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2562, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); |
2563 | kmovwl(dst, src); |
2564 | } |
2565 | } |
2566 | |
2567 | void MacroAssembler::kmov(Address dst, KRegister src) { |
2568 | if (VM_Version::supports_avx512bw()) { |
2569 | kmovql(dst, src); |
2570 | } else { |
2571 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2571, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); |
2572 | kmovwl(dst, src); |
2573 | } |
2574 | } |
2575 | |
2576 | void MacroAssembler::kmov(KRegister dst, KRegister src) { |
2577 | if (VM_Version::supports_avx512bw()) { |
2578 | kmovql(dst, src); |
2579 | } else { |
2580 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2580, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); |
2581 | kmovwl(dst, src); |
2582 | } |
2583 | } |
2584 | |
2585 | void MacroAssembler::kmov(Register dst, KRegister src) { |
2586 | if (VM_Version::supports_avx512bw()) { |
2587 | kmovql(dst, src); |
2588 | } else { |
2589 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2589, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); |
2590 | kmovwl(dst, src); |
2591 | } |
2592 | } |
2593 | |
2594 | void MacroAssembler::kmov(KRegister dst, Register src) { |
2595 | if (VM_Version::supports_avx512bw()) { |
2596 | kmovql(dst, src); |
2597 | } else { |
2598 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2598, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); |
2599 | kmovwl(dst, src); |
2600 | } |
2601 | } |
2602 | |
2603 | void MacroAssembler::kmovql(KRegister dst, AddressLiteral src, Register scratch_reg) { |
2604 | if (reachable(src)) { |
2605 | kmovql(dst, as_Address(src)); |
2606 | } else { |
2607 | lea(scratch_reg, src); |
2608 | kmovql(dst, Address(scratch_reg, 0)); |
2609 | } |
2610 | } |
2611 | |
2612 | void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) { |
2613 | if (reachable(src)) { |
2614 | kmovwl(dst, as_Address(src)); |
2615 | } else { |
2616 | lea(scratch_reg, src); |
2617 | kmovwl(dst, Address(scratch_reg, 0)); |
2618 | } |
2619 | } |
2620 | |
2621 | void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, |
2622 | int vector_len, Register scratch_reg) { |
2623 | if (reachable(src)) { |
2624 | if (mask == k0) { |
2625 | Assembler::evmovdqub(dst, as_Address(src), merge, vector_len); |
2626 | } else { |
2627 | Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len); |
2628 | } |
2629 | } else { |
2630 | lea(scratch_reg, src); |
2631 | if (mask == k0) { |
2632 | Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len); |
2633 | } else { |
2634 | Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len); |
2635 | } |
2636 | } |
2637 | } |
2638 | |
2639 | void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, |
2640 | int vector_len, Register scratch_reg) { |
2641 | if (reachable(src)) { |
2642 | Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len); |
2643 | } else { |
2644 | lea(scratch_reg, src); |
2645 | Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len); |
2646 | } |
2647 | } |
2648 | |
2649 | void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, |
2650 | int vector_len, Register scratch_reg) { |
2651 | if (reachable(src)) { |
2652 | Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len); |
2653 | } else { |
2654 | lea(scratch_reg, src); |
2655 | Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len); |
2656 | } |
2657 | } |
2658 | |
2659 | void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, |
2660 | int vector_len, Register scratch_reg) { |
2661 | if (reachable(src)) { |
2662 | Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len); |
2663 | } else { |
2664 | lea(scratch_reg, src); |
2665 | Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len); |
2666 | } |
2667 | } |
2668 | |
2669 | void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { |
2670 | if (reachable(src)) { |
2671 | Assembler::evmovdquq(dst, as_Address(src), vector_len); |
2672 | } else { |
2673 | lea(rscratch, src); |
2674 | Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len); |
2675 | } |
2676 | } |
2677 | |
2678 | void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) { |
2679 | if (reachable(src)) { |
2680 | Assembler::movdqa(dst, as_Address(src)); |
2681 | } else { |
2682 | lea(rscratch1, src); |
2683 | Assembler::movdqa(dst, Address(rscratch1, 0)); |
2684 | } |
2685 | } |
2686 | |
2687 | void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { |
2688 | if (reachable(src)) { |
2689 | Assembler::movsd(dst, as_Address(src)); |
2690 | } else { |
2691 | lea(rscratch1, src); |
2692 | Assembler::movsd(dst, Address(rscratch1, 0)); |
2693 | } |
2694 | } |
2695 | |
2696 | void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { |
2697 | if (reachable(src)) { |
2698 | Assembler::movss(dst, as_Address(src)); |
2699 | } else { |
2700 | lea(rscratch1, src); |
2701 | Assembler::movss(dst, Address(rscratch1, 0)); |
2702 | } |
2703 | } |
2704 | |
2705 | void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { |
2706 | if (reachable(src)) { |
2707 | Assembler::mulsd(dst, as_Address(src)); |
2708 | } else { |
2709 | lea(rscratch1, src); |
2710 | Assembler::mulsd(dst, Address(rscratch1, 0)); |
2711 | } |
2712 | } |
2713 | |
2714 | void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { |
2715 | if (reachable(src)) { |
2716 | Assembler::mulss(dst, as_Address(src)); |
2717 | } else { |
2718 | lea(rscratch1, src); |
2719 | Assembler::mulss(dst, Address(rscratch1, 0)); |
2720 | } |
2721 | } |
2722 | |
2723 | void MacroAssembler::null_check(Register reg, int offset) { |
2724 | if (needs_explicit_null_check(offset)) { |
2725 | // provoke OS NULL exception if reg = NULL by |
2726 | // accessing M[reg] w/o changing any (non-CC) registers |
2727 | // NOTE: cmpl is plenty here to provoke a segv |
2728 | cmpptr(rax, Address(reg, 0)); |
2729 | // Note: should probably use testl(rax, Address(reg, 0)); |
2730 | // may be shorter code (however, this version of |
2731 | // testl needs to be implemented first) |
2732 | } else { |
2733 | // nothing to do, (later) access of M[reg + offset] |
2734 | // will provoke OS NULL exception if reg = NULL |
2735 | } |
2736 | } |
2737 | |
2738 | void MacroAssembler::os_breakpoint() { |
2739 | // instead of directly emitting a breakpoint, call os:breakpoint for better debugability |
2740 | // (e.g., MSVC can't call ps() otherwise) |
2741 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)((address)((address_word)(os::breakpoint))))); |
2742 | } |
2743 | |
2744 | void MacroAssembler::unimplemented(const char* what) { |
2745 | const char* buf = NULL__null; |
2746 | { |
2747 | ResourceMark rm; |
2748 | stringStream ss; |
2749 | ss.print("unimplemented: %s", what); |
2750 | buf = code_string(ss.as_string()); |
2751 | } |
2752 | stop(buf); |
2753 | } |
2754 | |
2755 | #ifdef _LP641 |
2756 | #define XSTATE_BV0x200 0x200 |
2757 | #endif |
2758 | |
2759 | void MacroAssembler::pop_CPU_state() { |
2760 | pop_FPU_state(); |
2761 | pop_IU_state(); |
2762 | } |
2763 | |
2764 | void MacroAssembler::pop_FPU_state() { |
2765 | #ifndef _LP641 |
2766 | frstor(Address(rsp, 0)); |
2767 | #else |
2768 | fxrstor(Address(rsp, 0)); |
2769 | #endif |
2770 | addptr(rsp, FPUStateSizeInWords * wordSize); |
2771 | } |
2772 | |
2773 | void MacroAssembler::pop_IU_state() { |
2774 | popa(); |
2775 | LP64_ONLY(addq(rsp, 8))addq(rsp, 8); |
2776 | popf(); |
2777 | } |
2778 | |
2779 | // Save Integer and Float state |
2780 | // Warning: Stack must be 16 byte aligned (64bit) |
2781 | void MacroAssembler::push_CPU_state() { |
2782 | push_IU_state(); |
2783 | push_FPU_state(); |
2784 | } |
2785 | |
2786 | void MacroAssembler::push_FPU_state() { |
2787 | subptr(rsp, FPUStateSizeInWords * wordSize); |
2788 | #ifndef _LP641 |
2789 | fnsave(Address(rsp, 0)); |
2790 | fwait(); |
2791 | #else |
2792 | fxsave(Address(rsp, 0)); |
2793 | #endif // LP64 |
2794 | } |
2795 | |
2796 | void MacroAssembler::push_IU_state() { |
2797 | // Push flags first because pusha kills them |
2798 | pushf(); |
2799 | // Make sure rsp stays 16-byte aligned |
2800 | LP64_ONLY(subq(rsp, 8))subq(rsp, 8); |
2801 | pusha(); |
2802 | } |
2803 | |
2804 | void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { // determine java_thread register |
2805 | if (!java_thread->is_valid()) { |
2806 | java_thread = rdi; |
2807 | get_thread(java_thread); |
2808 | } |
2809 | // we must set sp to zero to clear frame |
2810 | movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD0L); |
2811 | // must clear fp, so that compiled frames are not confused; it is |
2812 | // possible that we need it only for debugging |
2813 | if (clear_fp) { |
2814 | movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD0L); |
2815 | } |
2816 | // Always clear the pc because it could have been set by make_walkable() |
2817 | movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD0L); |
2818 | vzeroupper(); |
2819 | } |
2820 | |
2821 | void MacroAssembler::restore_rax(Register tmp) { |
2822 | if (tmp == noreg) pop(rax); |
2823 | else if (tmp != rax) mov(rax, tmp); |
2824 | } |
2825 | |
2826 | void MacroAssembler::round_to(Register reg, int modulus) { |
2827 | addptr(reg, modulus - 1); |
2828 | andptr(reg, -modulus); |
2829 | } |
2830 | |
2831 | void MacroAssembler::save_rax(Register tmp) { |
2832 | if (tmp == noreg) push(rax); |
2833 | else if (tmp != rax) mov(tmp, rax); |
2834 | } |
2835 | |
2836 | void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod) { |
2837 | if (at_return) { |
2838 | // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore, |
2839 | // we may safely use rsp instead to perform the stack watermark check. |
2840 | cmpptr(in_nmethod ? rsp : rbp, Address(thread_reg, JavaThread::polling_word_offset())); |
2841 | jcc(Assembler::above, slow_path); |
2842 | return; |
2843 | } |
2844 | testb(Address(thread_reg, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit()); |
2845 | jcc(Assembler::notZero, slow_path); // handshake bit set implies poll |
2846 | } |
2847 | |
2848 | // Calls to C land |
2849 | // |
2850 | // When entering C land, the rbp, & rsp of the last Java frame have to be recorded |
2851 | // in the (thread-local) JavaThread object. When leaving C land, the last Java fp |
2852 | // has to be reset to 0. This is required to allow proper stack traversal. |
2853 | void MacroAssembler::set_last_Java_frame(Register java_thread, |
2854 | Register last_java_sp, |
2855 | Register last_java_fp, |
2856 | address last_java_pc) { |
2857 | vzeroupper(); |
2858 | // determine java_thread register |
2859 | if (!java_thread->is_valid()) { |
2860 | java_thread = rdi; |
2861 | get_thread(java_thread); |
2862 | } |
2863 | // determine last_java_sp register |
2864 | if (!last_java_sp->is_valid()) { |
2865 | last_java_sp = rsp; |
2866 | } |
2867 | |
2868 | // last_java_fp is optional |
2869 | |
2870 | if (last_java_fp->is_valid()) { |
2871 | movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); |
2872 | } |
2873 | |
2874 | // last_java_pc is optional |
2875 | |
2876 | if (last_java_pc != NULL__null) { |
2877 | lea(Address(java_thread, |
2878 | JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), |
2879 | InternalAddress(last_java_pc)); |
2880 | |
2881 | } |
2882 | movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); |
2883 | } |
2884 | |
2885 | void MacroAssembler::shlptr(Register dst, int imm8) { |
2886 | LP64_ONLY(shlq(dst, imm8))shlq(dst, imm8) NOT_LP64(shll(dst, imm8)); |
2887 | } |
2888 | |
2889 | void MacroAssembler::shrptr(Register dst, int imm8) { |
2890 | LP64_ONLY(shrq(dst, imm8))shrq(dst, imm8) NOT_LP64(shrl(dst, imm8)); |
2891 | } |
2892 | |
2893 | void MacroAssembler::sign_extend_byte(Register reg) { |
2894 | if (LP64_ONLY(true ||)true || (VM_Version::is_P6() && reg->has_byte_register())) { |
2895 | movsbl(reg, reg); // movsxb |
2896 | } else { |
2897 | shll(reg, 24); |
2898 | sarl(reg, 24); |
2899 | } |
2900 | } |
2901 | |
2902 | void MacroAssembler::sign_extend_short(Register reg) { |
2903 | if (LP64_ONLY(true ||)true || VM_Version::is_P6()) { |
2904 | movswl(reg, reg); // movsxw |
2905 | } else { |
2906 | shll(reg, 16); |
2907 | sarl(reg, 16); |
2908 | } |
2909 | } |
2910 | |
2911 | void MacroAssembler::testl(Register dst, AddressLiteral src) { |
2912 | assert(reachable(src), "Address should be reachable")do { if (!(reachable(src))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2912, "assert(" "reachable(src)" ") failed", "Address should be reachable" ); ::breakpoint(); } } while (0); |
2913 | testl(dst, as_Address(src)); |
2914 | } |
2915 | |
2916 | void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) { |
2917 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2917, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2918 | Assembler::pcmpeqb(dst, src); |
2919 | } |
2920 | |
2921 | void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) { |
2922 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2922, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2923 | Assembler::pcmpeqw(dst, src); |
2924 | } |
2925 | |
2926 | void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) { |
2927 | assert((dst->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2927, "assert(" "(dst->encoding() < 16)" ") failed", "XMM register should be 0-15" ); ::breakpoint(); } } while (0); |
2928 | Assembler::pcmpestri(dst, src, imm8); |
2929 | } |
2930 | |
2931 | void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { |
2932 | assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding () < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2932, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2933 | Assembler::pcmpestri(dst, src, imm8); |
2934 | } |
2935 | |
2936 | void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) { |
2937 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2937, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2938 | Assembler::pmovzxbw(dst, src); |
2939 | } |
2940 | |
2941 | void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) { |
2942 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2942, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2943 | Assembler::pmovzxbw(dst, src); |
2944 | } |
2945 | |
2946 | void MacroAssembler::pmovmskb(Register dst, XMMRegister src) { |
2947 | assert((src->encoding() < 16),"XMM register should be 0-15")do { if (!((src->encoding() < 16))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2947, "assert(" "(src->encoding() < 16)" ") failed", "XMM register should be 0-15" ); ::breakpoint(); } } while (0); |
2948 | Assembler::pmovmskb(dst, src); |
2949 | } |
2950 | |
2951 | void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) { |
2952 | assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding () < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2952, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
2953 | Assembler::ptest(dst, src); |
2954 | } |
2955 | |
2956 | void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { |
2957 | if (reachable(src)) { |
2958 | Assembler::sqrtsd(dst, as_Address(src)); |
2959 | } else { |
2960 | lea(rscratch1, src); |
2961 | Assembler::sqrtsd(dst, Address(rscratch1, 0)); |
2962 | } |
2963 | } |
2964 | |
2965 | void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { |
2966 | if (reachable(src)) { |
2967 | Assembler::sqrtss(dst, as_Address(src)); |
2968 | } else { |
2969 | lea(rscratch1, src); |
2970 | Assembler::sqrtss(dst, Address(rscratch1, 0)); |
2971 | } |
2972 | } |
2973 | |
2974 | void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { |
2975 | if (reachable(src)) { |
2976 | Assembler::subsd(dst, as_Address(src)); |
2977 | } else { |
2978 | lea(rscratch1, src); |
2979 | Assembler::subsd(dst, Address(rscratch1, 0)); |
2980 | } |
2981 | } |
2982 | |
2983 | void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg) { |
2984 | if (reachable(src)) { |
2985 | Assembler::roundsd(dst, as_Address(src), rmode); |
2986 | } else { |
2987 | lea(scratch_reg, src); |
2988 | Assembler::roundsd(dst, Address(scratch_reg, 0), rmode); |
2989 | } |
2990 | } |
2991 | |
2992 | void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { |
2993 | if (reachable(src)) { |
2994 | Assembler::subss(dst, as_Address(src)); |
2995 | } else { |
2996 | lea(rscratch1, src); |
2997 | Assembler::subss(dst, Address(rscratch1, 0)); |
2998 | } |
2999 | } |
3000 | |
3001 | void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { |
3002 | if (reachable(src)) { |
3003 | Assembler::ucomisd(dst, as_Address(src)); |
3004 | } else { |
3005 | lea(rscratch1, src); |
3006 | Assembler::ucomisd(dst, Address(rscratch1, 0)); |
3007 | } |
3008 | } |
3009 | |
3010 | void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { |
3011 | if (reachable(src)) { |
3012 | Assembler::ucomiss(dst, as_Address(src)); |
3013 | } else { |
3014 | lea(rscratch1, src); |
3015 | Assembler::ucomiss(dst, Address(rscratch1, 0)); |
3016 | } |
3017 | } |
3018 | |
3019 | void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { |
3020 | // Used in sign-bit flipping with aligned address. |
3021 | assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15 ) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3021, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)" ") failed", "SSE mode requires address alignment 16 bytes"); ::breakpoint(); } } while (0); |
3022 | if (reachable(src)) { |
3023 | Assembler::xorpd(dst, as_Address(src)); |
3024 | } else { |
3025 | lea(scratch_reg, src); |
3026 | Assembler::xorpd(dst, Address(scratch_reg, 0)); |
3027 | } |
3028 | } |
3029 | |
3030 | void MacroAssembler::xorpd(XMMRegister dst, XMMRegister src) { |
3031 | if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) { |
3032 | Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit); |
3033 | } |
3034 | else { |
3035 | Assembler::xorpd(dst, src); |
3036 | } |
3037 | } |
3038 | |
3039 | void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) { |
3040 | if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) { |
3041 | Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit); |
3042 | } else { |
3043 | Assembler::xorps(dst, src); |
3044 | } |
3045 | } |
3046 | |
3047 | void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { |
3048 | // Used in sign-bit flipping with aligned address. |
3049 | assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15 ) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3049, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)" ") failed", "SSE mode requires address alignment 16 bytes"); ::breakpoint(); } } while (0); |
3050 | if (reachable(src)) { |
3051 | Assembler::xorps(dst, as_Address(src)); |
3052 | } else { |
3053 | lea(scratch_reg, src); |
3054 | Assembler::xorps(dst, Address(scratch_reg, 0)); |
3055 | } |
3056 | } |
3057 | |
3058 | void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { |
3059 | // Used in sign-bit flipping with aligned address. |
3060 | bool aligned_adr = (((intptr_t)src.target() & 15) == 0); |
3061 | assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || aligned_adr)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3061, "assert(" "(UseAVX > 0) || aligned_adr" ") failed" , "SSE mode requires address alignment 16 bytes"); ::breakpoint (); } } while (0); |
3062 | if (reachable(src)) { |
3063 | Assembler::pshufb(dst, as_Address(src)); |
3064 | } else { |
3065 | lea(rscratch1, src); |
3066 | Assembler::pshufb(dst, Address(rscratch1, 0)); |
3067 | } |
3068 | } |
3069 | |
3070 | // AVX 3-operands instructions |
3071 | |
3072 | void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3073 | if (reachable(src)) { |
3074 | vaddsd(dst, nds, as_Address(src)); |
3075 | } else { |
3076 | lea(rscratch1, src); |
3077 | vaddsd(dst, nds, Address(rscratch1, 0)); |
3078 | } |
3079 | } |
3080 | |
3081 | void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3082 | if (reachable(src)) { |
3083 | vaddss(dst, nds, as_Address(src)); |
3084 | } else { |
3085 | lea(rscratch1, src); |
3086 | vaddss(dst, nds, Address(rscratch1, 0)); |
3087 | } |
3088 | } |
3089 | |
3090 | void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) { |
3091 | assert(UseAVX > 0, "requires some form of AVX")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3091, "assert(" "UseAVX > 0" ") failed", "requires some form of AVX" ); ::breakpoint(); } } while (0); |
3092 | if (reachable(src)) { |
3093 | Assembler::vpaddb(dst, nds, as_Address(src), vector_len); |
3094 | } else { |
3095 | lea(rscratch, src); |
3096 | Assembler::vpaddb(dst, nds, Address(rscratch, 0), vector_len); |
3097 | } |
3098 | } |
3099 | |
3100 | void MacroAssembler::vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) { |
3101 | assert(UseAVX > 0, "requires some form of AVX")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3101, "assert(" "UseAVX > 0" ") failed", "requires some form of AVX" ); ::breakpoint(); } } while (0); |
3102 | if (reachable(src)) { |
3103 | Assembler::vpaddd(dst, nds, as_Address(src), vector_len); |
3104 | } else { |
3105 | lea(rscratch, src); |
3106 | Assembler::vpaddd(dst, nds, Address(rscratch, 0), vector_len); |
3107 | } |
3108 | } |
3109 | |
3110 | void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { |
3111 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vldq()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3111, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3112 | vandps(dst, nds, negate_field, vector_len); |
3113 | } |
3114 | |
3115 | void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { |
3116 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vldq()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3116, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3117 | vandpd(dst, nds, negate_field, vector_len); |
3118 | } |
3119 | |
3120 | void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
3121 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3121, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3122 | Assembler::vpaddb(dst, nds, src, vector_len); |
3123 | } |
3124 | |
3125 | void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
3126 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3126, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3127 | Assembler::vpaddb(dst, nds, src, vector_len); |
3128 | } |
3129 | |
3130 | void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
3131 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3131, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3132 | Assembler::vpaddw(dst, nds, src, vector_len); |
3133 | } |
3134 | |
3135 | void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
3136 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3136, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3137 | Assembler::vpaddw(dst, nds, src, vector_len); |
3138 | } |
3139 | |
3140 | void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { |
3141 | if (reachable(src)) { |
3142 | Assembler::vpand(dst, nds, as_Address(src), vector_len); |
3143 | } else { |
3144 | lea(scratch_reg, src); |
3145 | Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len); |
3146 | } |
3147 | } |
3148 | |
3149 | void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) { |
3150 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3150, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3151 | Assembler::vpbroadcastw(dst, src, vector_len); |
3152 | } |
3153 | |
3154 | void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
3155 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3155, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3156 | Assembler::vpcmpeqb(dst, nds, src, vector_len); |
3157 | } |
3158 | |
3159 | void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
3160 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3160, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3161 | Assembler::vpcmpeqw(dst, nds, src, vector_len); |
3162 | } |
3163 | |
3164 | void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, |
3165 | AddressLiteral src, int vector_len, Register scratch_reg) { |
3166 | if (reachable(src)) { |
3167 | Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len); |
3168 | } else { |
3169 | lea(scratch_reg, src); |
3170 | Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len); |
3171 | } |
3172 | } |
3173 | |
3174 | void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, |
3175 | int comparison, bool is_signed, int vector_len, Register scratch_reg) { |
3176 | if (reachable(src)) { |
3177 | Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len); |
3178 | } else { |
3179 | lea(scratch_reg, src); |
3180 | Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len); |
3181 | } |
3182 | } |
3183 | |
3184 | void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, |
3185 | int comparison, bool is_signed, int vector_len, Register scratch_reg) { |
3186 | if (reachable(src)) { |
3187 | Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len); |
3188 | } else { |
3189 | lea(scratch_reg, src); |
3190 | Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len); |
3191 | } |
3192 | } |
3193 | |
3194 | void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, |
3195 | int comparison, bool is_signed, int vector_len, Register scratch_reg) { |
3196 | if (reachable(src)) { |
3197 | Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len); |
3198 | } else { |
3199 | lea(scratch_reg, src); |
3200 | Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len); |
3201 | } |
3202 | } |
3203 | |
3204 | void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, |
3205 | int comparison, bool is_signed, int vector_len, Register scratch_reg) { |
3206 | if (reachable(src)) { |
3207 | Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len); |
3208 | } else { |
3209 | lea(scratch_reg, src); |
3210 | Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len); |
3211 | } |
3212 | } |
3213 | |
3214 | void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) { |
3215 | if (width == Assembler::Q) { |
3216 | Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len); |
3217 | } else { |
3218 | Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len); |
3219 | } |
3220 | } |
3221 | |
3222 | void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) { |
3223 | int eq_cond_enc = 0x29; |
3224 | int gt_cond_enc = 0x37; |
3225 | if (width != Assembler::Q) { |
3226 | eq_cond_enc = 0x74 + width; |
3227 | gt_cond_enc = 0x64 + width; |
3228 | } |
3229 | switch (cond) { |
3230 | case eq: |
3231 | vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); |
3232 | break; |
3233 | case neq: |
3234 | vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); |
3235 | vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); |
3236 | break; |
3237 | case le: |
3238 | vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); |
3239 | vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); |
3240 | break; |
3241 | case nlt: |
3242 | vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); |
3243 | vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); |
3244 | break; |
3245 | case lt: |
3246 | vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); |
3247 | break; |
3248 | case nle: |
3249 | vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); |
3250 | break; |
3251 | default: |
3252 | assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3252, "assert(" "false" ") failed", "Should not reach here" ); ::breakpoint(); } } while (0); |
3253 | } |
3254 | } |
3255 | |
3256 | void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { |
3257 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3257, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3258 | Assembler::vpmovzxbw(dst, src, vector_len); |
3259 | } |
3260 | |
3261 | void MacroAssembler::vpmovmskb(Register dst, XMMRegister src, int vector_len) { |
3262 | assert((src->encoding() < 16),"XMM register should be 0-15")do { if (!((src->encoding() < 16))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3262, "assert(" "(src->encoding() < 16)" ") failed", "XMM register should be 0-15" ); ::breakpoint(); } } while (0); |
3263 | Assembler::vpmovmskb(dst, src, vector_len); |
3264 | } |
3265 | |
3266 | void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
3267 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3267, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3268 | Assembler::vpmullw(dst, nds, src, vector_len); |
3269 | } |
3270 | |
3271 | void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
3272 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3272, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3273 | Assembler::vpmullw(dst, nds, src, vector_len); |
3274 | } |
3275 | |
3276 | void MacroAssembler::vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { |
3277 | assert((UseAVX > 0), "AVX support is needed")do { if (!((UseAVX > 0))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3277, "assert(" "(UseAVX > 0)" ") failed", "AVX support is needed" ); ::breakpoint(); } } while (0); |
3278 | if (reachable(src)) { |
3279 | Assembler::vpmulld(dst, nds, as_Address(src), vector_len); |
3280 | } else { |
3281 | lea(scratch_reg, src); |
3282 | Assembler::vpmulld(dst, nds, Address(scratch_reg, 0), vector_len); |
3283 | } |
3284 | } |
3285 | |
3286 | void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
3287 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3287, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3288 | Assembler::vpsubb(dst, nds, src, vector_len); |
3289 | } |
3290 | |
3291 | void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
3292 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3292, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3293 | Assembler::vpsubb(dst, nds, src, vector_len); |
3294 | } |
3295 | |
3296 | void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
3297 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3297, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3298 | Assembler::vpsubw(dst, nds, src, vector_len); |
3299 | } |
3300 | |
3301 | void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
3302 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3302, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3303 | Assembler::vpsubw(dst, nds, src, vector_len); |
3304 | } |
3305 | |
3306 | void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { |
3307 | assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift-> encoding() < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3307, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3308 | Assembler::vpsraw(dst, nds, shift, vector_len); |
3309 | } |
3310 | |
3311 | void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
3312 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3312, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3313 | Assembler::vpsraw(dst, nds, shift, vector_len); |
3314 | } |
3315 | |
3316 | void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { |
3317 | assert(UseAVX > 2,"")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3317, "assert(" "UseAVX > 2" ") failed", ""); ::breakpoint (); } } while (0); |
3318 | if (!VM_Version::supports_avx512vl() && vector_len < 2) { |
3319 | vector_len = 2; |
3320 | } |
3321 | Assembler::evpsraq(dst, nds, shift, vector_len); |
3322 | } |
3323 | |
3324 | void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
3325 | assert(UseAVX > 2,"")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3325, "assert(" "UseAVX > 2" ") failed", ""); ::breakpoint (); } } while (0); |
3326 | if (!VM_Version::supports_avx512vl() && vector_len < 2) { |
3327 | vector_len = 2; |
3328 | } |
3329 | Assembler::evpsraq(dst, nds, shift, vector_len); |
3330 | } |
3331 | |
3332 | void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { |
3333 | assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift-> encoding() < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3333, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3334 | Assembler::vpsrlw(dst, nds, shift, vector_len); |
3335 | } |
3336 | |
3337 | void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
3338 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3338, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3339 | Assembler::vpsrlw(dst, nds, shift, vector_len); |
3340 | } |
3341 | |
3342 | void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { |
3343 | assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift-> encoding() < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3343, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3344 | Assembler::vpsllw(dst, nds, shift, vector_len); |
3345 | } |
3346 | |
3347 | void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
3348 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3348, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3349 | Assembler::vpsllw(dst, nds, shift, vector_len); |
3350 | } |
3351 | |
3352 | void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) { |
3353 | assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding () < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3353, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3354 | Assembler::vptest(dst, src); |
3355 | } |
3356 | |
3357 | void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) { |
3358 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3358, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3359 | Assembler::punpcklbw(dst, src); |
3360 | } |
3361 | |
3362 | void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) { |
3363 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3363, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3364 | Assembler::pshufd(dst, src, mode); |
3365 | } |
3366 | |
3367 | void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { |
3368 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3368, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3369 | Assembler::pshuflw(dst, src, mode); |
3370 | } |
3371 | |
3372 | void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { |
3373 | if (reachable(src)) { |
3374 | vandpd(dst, nds, as_Address(src), vector_len); |
3375 | } else { |
3376 | lea(scratch_reg, src); |
3377 | vandpd(dst, nds, Address(scratch_reg, 0), vector_len); |
3378 | } |
3379 | } |
3380 | |
3381 | void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { |
3382 | if (reachable(src)) { |
3383 | vandps(dst, nds, as_Address(src), vector_len); |
3384 | } else { |
3385 | lea(scratch_reg, src); |
3386 | vandps(dst, nds, Address(scratch_reg, 0), vector_len); |
3387 | } |
3388 | } |
3389 | |
3390 | void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, |
3391 | bool merge, int vector_len, Register scratch_reg) { |
3392 | if (reachable(src)) { |
3393 | Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len); |
3394 | } else { |
3395 | lea(scratch_reg, src); |
3396 | Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len); |
3397 | } |
3398 | } |
3399 | |
3400 | void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3401 | if (reachable(src)) { |
3402 | vdivsd(dst, nds, as_Address(src)); |
3403 | } else { |
3404 | lea(rscratch1, src); |
3405 | vdivsd(dst, nds, Address(rscratch1, 0)); |
3406 | } |
3407 | } |
3408 | |
3409 | void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3410 | if (reachable(src)) { |
3411 | vdivss(dst, nds, as_Address(src)); |
3412 | } else { |
3413 | lea(rscratch1, src); |
3414 | vdivss(dst, nds, Address(rscratch1, 0)); |
3415 | } |
3416 | } |
3417 | |
3418 | void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3419 | if (reachable(src)) { |
3420 | vmulsd(dst, nds, as_Address(src)); |
3421 | } else { |
3422 | lea(rscratch1, src); |
3423 | vmulsd(dst, nds, Address(rscratch1, 0)); |
3424 | } |
3425 | } |
3426 | |
3427 | void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3428 | if (reachable(src)) { |
3429 | vmulss(dst, nds, as_Address(src)); |
3430 | } else { |
3431 | lea(rscratch1, src); |
3432 | vmulss(dst, nds, Address(rscratch1, 0)); |
3433 | } |
3434 | } |
3435 | |
3436 | void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3437 | if (reachable(src)) { |
3438 | vsubsd(dst, nds, as_Address(src)); |
3439 | } else { |
3440 | lea(rscratch1, src); |
3441 | vsubsd(dst, nds, Address(rscratch1, 0)); |
3442 | } |
3443 | } |
3444 | |
3445 | void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3446 | if (reachable(src)) { |
3447 | vsubss(dst, nds, as_Address(src)); |
3448 | } else { |
3449 | lea(rscratch1, src); |
3450 | vsubss(dst, nds, Address(rscratch1, 0)); |
3451 | } |
3452 | } |
3453 | |
3454 | void MacroAssembler::vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3455 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vldq()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3455, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3456 | vxorps(dst, nds, src, Assembler::AVX_128bit); |
3457 | } |
3458 | |
3459 | void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { |
3460 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vldq()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3460, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); |
3461 | vxorpd(dst, nds, src, Assembler::AVX_128bit); |
3462 | } |
3463 | |
3464 | void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { |
3465 | if (reachable(src)) { |
3466 | vxorpd(dst, nds, as_Address(src), vector_len); |
3467 | } else { |
3468 | lea(scratch_reg, src); |
3469 | vxorpd(dst, nds, Address(scratch_reg, 0), vector_len); |
3470 | } |
3471 | } |
3472 | |
3473 | void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { |
3474 | if (reachable(src)) { |
3475 | vxorps(dst, nds, as_Address(src), vector_len); |
3476 | } else { |
3477 | lea(scratch_reg, src); |
3478 | vxorps(dst, nds, Address(scratch_reg, 0), vector_len); |
3479 | } |
3480 | } |
3481 | |
3482 | void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { |
3483 | if (UseAVX > 1 || (vector_len < 1)) { |
3484 | if (reachable(src)) { |
3485 | Assembler::vpxor(dst, nds, as_Address(src), vector_len); |
3486 | } else { |
3487 | lea(scratch_reg, src); |
3488 | Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len); |
3489 | } |
3490 | } |
3491 | else { |
3492 | MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg); |
3493 | } |
3494 | } |
3495 | |
3496 | void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { |
3497 | if (reachable(src)) { |
3498 | Assembler::vpermd(dst, nds, as_Address(src), vector_len); |
3499 | } else { |
3500 | lea(scratch_reg, src); |
3501 | Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len); |
3502 | } |
3503 | } |
3504 | |
3505 | void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { |
3506 | const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask); |
3507 | STATIC_ASSERT(inverted_jweak_mask == -2)static_assert((inverted_jweak_mask == -2), "inverted_jweak_mask == -2" ); // otherwise check this code |
3508 | // The inverted mask is sign-extended |
3509 | andptr(possibly_jweak, inverted_jweak_mask); |
3510 | } |
3511 | |
3512 | void MacroAssembler::resolve_jobject(Register value, |
3513 | Register thread, |
3514 | Register tmp) { |
3515 | assert_different_registers(value, thread, tmp); |
3516 | Label done, not_weak; |
3517 | testptr(value, value); |
3518 | jcc(Assembler::zero, done); // Use NULL as-is. |
3519 | testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag. |
3520 | jcc(Assembler::zero, not_weak); |
3521 | // Resolve jweak. |
3522 | access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, |
3523 | value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); |
3524 | verify_oop(value)_verify_oop_checked(value, "broken oop " "value", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3524); |
3525 | jmp(done); |
3526 | bind(not_weak); |
3527 | // Resolve (untagged) jobject. |
3528 | access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); |
3529 | verify_oop(value)_verify_oop_checked(value, "broken oop " "value", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3529); |
3530 | bind(done); |
3531 | } |
3532 | |
3533 | void MacroAssembler::subptr(Register dst, int32_t imm32) { |
3534 | LP64_ONLY(subq(dst, imm32))subq(dst, imm32) NOT_LP64(subl(dst, imm32)); |
3535 | } |
3536 | |
3537 | // Force generation of a 4 byte immediate value even if it fits into 8bit |
3538 | void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { |
3539 | LP64_ONLY(subq_imm32(dst, imm32))subq_imm32(dst, imm32) NOT_LP64(subl_imm32(dst, imm32)); |
3540 | } |
3541 | |
3542 | void MacroAssembler::subptr(Register dst, Register src) { |
3543 | LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); |
3544 | } |
3545 | |
3546 | // C++ bool manipulation |
3547 | void MacroAssembler::testbool(Register dst) { |
3548 | if(sizeof(bool) == 1) |
3549 | testb(dst, 0xff); |
3550 | else if(sizeof(bool) == 2) { |
3551 | // testw implementation needed for two byte bools |
3552 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3552); ::breakpoint(); } while (0); |
3553 | } else if(sizeof(bool) == 4) |
3554 | testl(dst, dst); |
3555 | else |
3556 | // unsupported |
3557 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3557); ::breakpoint(); } while (0); |
3558 | } |
3559 | |
3560 | void MacroAssembler::testptr(Register dst, Register src) { |
3561 | LP64_ONLY(testq(dst, src))testq(dst, src) NOT_LP64(testl(dst, src)); |
3562 | } |
3563 | |
3564 | // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. |
3565 | void MacroAssembler::tlab_allocate(Register thread, Register obj, |
3566 | Register var_size_in_bytes, |
3567 | int con_size_in_bytes, |
3568 | Register t1, |
3569 | Register t2, |
3570 | Label& slow_case) { |
3571 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); |
3572 | bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); |
3573 | } |
3574 | |
3575 | // Defines obj, preserves var_size_in_bytes |
3576 | void MacroAssembler::eden_allocate(Register thread, Register obj, |
3577 | Register var_size_in_bytes, |
3578 | int con_size_in_bytes, |
3579 | Register t1, |
3580 | Label& slow_case) { |
3581 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); |
3582 | bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); |
3583 | } |
3584 | |
3585 | // Preserves the contents of address, destroys the contents length_in_bytes and temp. |
3586 | void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) { |
3587 | assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different")do { if (!(address != length_in_bytes && address != temp && temp != length_in_bytes)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3587, "assert(" "address != length_in_bytes && address != temp && temp != length_in_bytes" ") failed", "registers must be different"); ::breakpoint(); } } while (0); |
3588 | assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord")do { if (!((offset_in_bytes & (BytesPerWord - 1)) == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3588, "assert(" "(offset_in_bytes & (BytesPerWord - 1)) == 0" ") failed", "offset must be a multiple of BytesPerWord"); :: breakpoint(); } } while (0); |
3589 | Label done; |
3590 | |
3591 | testptr(length_in_bytes, length_in_bytes); |
3592 | jcc(Assembler::zero, done); |
3593 | |
3594 | // initialize topmost word, divide index by 2, check if odd and test if zero |
3595 | // note: for the remaining code to work, index must be a multiple of BytesPerWord |
3596 | #ifdef ASSERT1 |
3597 | { |
3598 | Label L; |
3599 | testptr(length_in_bytes, BytesPerWord - 1); |
3600 | jcc(Assembler::zero, L); |
3601 | stop("length must be a multiple of BytesPerWord"); |
3602 | bind(L); |
3603 | } |
3604 | #endif |
3605 | Register index = length_in_bytes; |
3606 | xorptr(temp, temp); // use _zero reg to clear memory (shorter code) |
3607 | if (UseIncDec) { |
3608 | shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set |
3609 | } else { |
3610 | shrptr(index, 2); // use 2 instructions to avoid partial flag stall |
3611 | shrptr(index, 1); |
3612 | } |
3613 | #ifndef _LP641 |
3614 | // index could have not been a multiple of 8 (i.e., bit 2 was set) |
3615 | { |
3616 | Label even; |
3617 | // note: if index was a multiple of 8, then it cannot |
3618 | // be 0 now otherwise it must have been 0 before |
3619 | // => if it is even, we don't need to check for 0 again |
3620 | jcc(Assembler::carryClear, even); |
3621 | // clear topmost word (no jump would be needed if conditional assignment worked here) |
3622 | movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp); |
3623 | // index could be 0 now, must check again |
3624 | jcc(Assembler::zero, done); |
3625 | bind(even); |
3626 | } |
3627 | #endif // !_LP64 |
3628 | // initialize remaining object fields: index is a multiple of 2 now |
3629 | { |
3630 | Label loop; |
3631 | bind(loop); |
3632 | movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp); |
3633 | NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);) |
3634 | decrement(index); |
3635 | jcc(Assembler::notZero, loop); |
3636 | } |
3637 | |
3638 | bind(done); |
3639 | } |
3640 | |
3641 | // Look up the method for a megamorphic invokeinterface call. |
3642 | // The target method is determined by <intf_klass, itable_index>. |
3643 | // The receiver klass is in recv_klass. |
3644 | // On success, the result will be in method_result, and execution falls through. |
3645 | // On failure, execution transfers to the given label. |
3646 | void MacroAssembler::lookup_interface_method(Register recv_klass, |
3647 | Register intf_klass, |
3648 | RegisterOrConstant itable_index, |
3649 | Register method_result, |
3650 | Register scan_temp, |
3651 | Label& L_no_such_interface, |
3652 | bool return_method) { |
3653 | assert_different_registers(recv_klass, intf_klass, scan_temp); |
3654 | assert_different_registers(method_result, intf_klass, scan_temp); |
3655 | assert(recv_klass != method_result || !return_method,do { if (!(recv_klass != method_result || !return_method)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3656, "assert(" "recv_klass != method_result || !return_method" ") failed", "recv_klass can be destroyed when method isn't needed" ); ::breakpoint(); } } while (0) |
3656 | "recv_klass can be destroyed when method isn't needed")do { if (!(recv_klass != method_result || !return_method)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3656, "assert(" "recv_klass != method_result || !return_method" ") failed", "recv_klass can be destroyed when method isn't needed" ); ::breakpoint(); } } while (0); |
3657 | |
3658 | assert(itable_index.is_constant() || itable_index.as_register() == method_result,do { if (!(itable_index.is_constant() || itable_index.as_register () == method_result)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3659, "assert(" "itable_index.is_constant() || itable_index.as_register() == method_result" ") failed", "caller must use same register for non-constant itable index as for method" ); ::breakpoint(); } } while (0) |
3659 | "caller must use same register for non-constant itable index as for method")do { if (!(itable_index.is_constant() || itable_index.as_register () == method_result)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3659, "assert(" "itable_index.is_constant() || itable_index.as_register() == method_result" ") failed", "caller must use same register for non-constant itable index as for method" ); ::breakpoint(); } } while (0); |
3660 | |
3661 | // Compute start of first itableOffsetEntry (which is at the end of the vtable) |
3662 | int vtable_base = in_bytes(Klass::vtable_start_offset()); |
3663 | int itentry_off = itableMethodEntry::method_offset_in_bytes(); |
3664 | int scan_step = itableOffsetEntry::size() * wordSize; |
3665 | int vte_size = vtableEntry::size_in_bytes(); |
3666 | Address::ScaleFactor times_vte_scale = Address::times_ptr; |
3667 | assert(vte_size == wordSize, "else adjust times_vte_scale")do { if (!(vte_size == wordSize)) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3667, "assert(" "vte_size == wordSize" ") failed", "else adjust times_vte_scale" ); ::breakpoint(); } } while (0); |
3668 | |
3669 | movl(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); |
3670 | |
3671 | // %%% Could store the aligned, prescaled offset in the klassoop. |
3672 | lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); |
3673 | |
3674 | if (return_method) { |
3675 | // Adjust recv_klass by scaled itable_index, so we can free itable_index. |
3676 | assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below")do { if (!(itableMethodEntry::size() * wordSize == wordSize)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3676, "assert(" "itableMethodEntry::size() * wordSize == wordSize" ") failed", "adjust the scaling in the code below"); ::breakpoint (); } } while (0); |
3677 | lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); |
3678 | } |
3679 | |
3680 | // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { |
3681 | // if (scan->interface() == intf) { |
3682 | // result = (klass + scan->offset() + itable_index); |
3683 | // } |
3684 | // } |
3685 | Label search, found_method; |
3686 | |
3687 | for (int peel = 1; peel >= 0; peel--) { |
3688 | movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); |
3689 | cmpptr(intf_klass, method_result); |
3690 | |
3691 | if (peel) { |
3692 | jccb(Assembler::equal, found_method)jccb_0(Assembler::equal, found_method, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3692); |
3693 | } else { |
3694 | jccb(Assembler::notEqual, search)jccb_0(Assembler::notEqual, search, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3694); |
3695 | // (invert the test to fall through to found_method...) |
3696 | } |
3697 | |
3698 | if (!peel) break; |
3699 | |
3700 | bind(search); |
3701 | |
3702 | // Check that the previous entry is non-null. A null entry means that |
3703 | // the receiver class doesn't implement the interface, and wasn't the |
3704 | // same as when the caller was compiled. |
3705 | testptr(method_result, method_result); |
3706 | jcc(Assembler::zero, L_no_such_interface); |
3707 | addptr(scan_temp, scan_step); |
3708 | } |
3709 | |
3710 | bind(found_method); |
3711 | |
3712 | if (return_method) { |
3713 | // Got a hit. |
3714 | movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); |
3715 | movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); |
3716 | } |
3717 | } |
3718 | |
3719 | |
3720 | // virtual method calling |
3721 | void MacroAssembler::lookup_virtual_method(Register recv_klass, |
3722 | RegisterOrConstant vtable_index, |
3723 | Register method_result) { |
3724 | const int base = in_bytes(Klass::vtable_start_offset()); |
3725 | assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below")do { if (!(vtableEntry::size() * wordSize == wordSize)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3725, "assert(" "vtableEntry::size() * wordSize == wordSize" ") failed", "else adjust the scaling in the code below"); :: breakpoint(); } } while (0); |
3726 | Address vtable_entry_addr(recv_klass, |
3727 | vtable_index, Address::times_ptr, |
3728 | base + vtableEntry::method_offset_in_bytes()); |
3729 | movptr(method_result, vtable_entry_addr); |
3730 | } |
3731 | |
3732 | |
3733 | void MacroAssembler::check_klass_subtype(Register sub_klass, |
3734 | Register super_klass, |
3735 | Register temp_reg, |
3736 | Label& L_success) { |
3737 | Label L_failure; |
3738 | check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL__null); |
3739 | check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL__null); |
3740 | bind(L_failure); |
3741 | } |
3742 | |
3743 | |
3744 | void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, |
3745 | Register super_klass, |
3746 | Register temp_reg, |
3747 | Label* L_success, |
3748 | Label* L_failure, |
3749 | Label* L_slow_path, |
3750 | RegisterOrConstant super_check_offset) { |
3751 | assert_different_registers(sub_klass, super_klass, temp_reg); |
3752 | bool must_load_sco = (super_check_offset.constant_or_zero() == -1); |
3753 | if (super_check_offset.is_register()) { |
3754 | assert_different_registers(sub_klass, super_klass, |
3755 | super_check_offset.as_register()); |
3756 | } else if (must_load_sco) { |
3757 | assert(temp_reg != noreg, "supply either a temp or a register offset")do { if (!(temp_reg != noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3757, "assert(" "temp_reg != noreg" ") failed", "supply either a temp or a register offset" ); ::breakpoint(); } } while (0); |
3758 | } |
3759 | |
3760 | Label L_fallthrough; |
3761 | int label_nulls = 0; |
3762 | if (L_success == NULL__null) { L_success = &L_fallthrough; label_nulls++; } |
3763 | if (L_failure == NULL__null) { L_failure = &L_fallthrough; label_nulls++; } |
3764 | if (L_slow_path == NULL__null) { L_slow_path = &L_fallthrough; label_nulls++; } |
3765 | assert(label_nulls <= 1, "at most one NULL in the batch")do { if (!(label_nulls <= 1)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3765, "assert(" "label_nulls <= 1" ") failed", "at most one NULL in the batch" ); ::breakpoint(); } } while (0); |
3766 | |
3767 | int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
3768 | int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
3769 | Address super_check_offset_addr(super_klass, sco_offset); |
3770 | |
3771 | // Hacked jcc, which "knows" that L_fallthrough, at least, is in |
3772 | // range of a jccb. If this routine grows larger, reconsider at |
3773 | // least some of these. |
3774 | #define local_jcc(assembler_cond, label) \ |
3775 | if (&(label) == &L_fallthrough) jccb(assembler_cond, label)jccb_0(assembler_cond, label, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3775); \ |
3776 | else jcc( assembler_cond, label) /*omit semi*/ |
3777 | |
3778 | // Hacked jmp, which may only be used just before L_fallthrough. |
3779 | #define final_jmp(label) \ |
3780 | if (&(label) == &L_fallthrough) { /*do nothing*/ } \ |
3781 | else jmp(label) /*omit semi*/ |
3782 | |
3783 | // If the pointers are equal, we are done (e.g., String[] elements). |
3784 | // This self-check enables sharing of secondary supertype arrays among |
3785 | // non-primary types such as array-of-interface. Otherwise, each such |
3786 | // type would need its own customized SSA. |
3787 | // We move this check to the front of the fast path because many |
3788 | // type checks are in fact trivially successful in this manner, |
3789 | // so we get a nicely predicted branch right at the start of the check. |
3790 | cmpptr(sub_klass, super_klass); |
3791 | local_jcc(Assembler::equal, *L_success); |
3792 | |
3793 | // Check the supertype display: |
3794 | if (must_load_sco) { |
3795 | // Positive movl does right thing on LP64. |
3796 | movl(temp_reg, super_check_offset_addr); |
3797 | super_check_offset = RegisterOrConstant(temp_reg); |
3798 | } |
3799 | Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); |
3800 | cmpptr(super_klass, super_check_addr); // load displayed supertype |
3801 | |
3802 | // This check has worked decisively for primary supers. |
3803 | // Secondary supers are sought in the super_cache ('super_cache_addr'). |
3804 | // (Secondary supers are interfaces and very deeply nested subtypes.) |
3805 | // This works in the same check above because of a tricky aliasing |
3806 | // between the super_cache and the primary super display elements. |
3807 | // (The 'super_check_addr' can address either, as the case requires.) |
3808 | // Note that the cache is updated below if it does not help us find |
3809 | // what we need immediately. |
3810 | // So if it was a primary super, we can just fail immediately. |
3811 | // Otherwise, it's the slow path for us (no success at this point). |
3812 | |
3813 | if (super_check_offset.is_register()) { |
3814 | local_jcc(Assembler::equal, *L_success); |
3815 | cmpl(super_check_offset.as_register(), sc_offset); |
3816 | if (L_failure == &L_fallthrough) { |
3817 | local_jcc(Assembler::equal, *L_slow_path); |
3818 | } else { |
3819 | local_jcc(Assembler::notEqual, *L_failure); |
3820 | final_jmp(*L_slow_path); |
3821 | } |
3822 | } else if (super_check_offset.as_constant() == sc_offset) { |
3823 | // Need a slow path; fast failure is impossible. |
3824 | if (L_slow_path == &L_fallthrough) { |
3825 | local_jcc(Assembler::equal, *L_success); |
3826 | } else { |
3827 | local_jcc(Assembler::notEqual, *L_slow_path); |
3828 | final_jmp(*L_success); |
3829 | } |
3830 | } else { |
3831 | // No slow path; it's a fast decision. |
3832 | if (L_failure == &L_fallthrough) { |
3833 | local_jcc(Assembler::equal, *L_success); |
3834 | } else { |
3835 | local_jcc(Assembler::notEqual, *L_failure); |
3836 | final_jmp(*L_success); |
3837 | } |
3838 | } |
3839 | |
3840 | bind(L_fallthrough); |
3841 | |
3842 | #undef local_jcc |
3843 | #undef final_jmp |
3844 | } |
3845 | |
3846 | |
3847 | void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, |
3848 | Register super_klass, |
3849 | Register temp_reg, |
3850 | Register temp2_reg, |
3851 | Label* L_success, |
3852 | Label* L_failure, |
3853 | bool set_cond_codes) { |
3854 | assert_different_registers(sub_klass, super_klass, temp_reg); |
3855 | if (temp2_reg != noreg) |
3856 | assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); |
3857 | #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) |
3858 | |
3859 | Label L_fallthrough; |
3860 | int label_nulls = 0; |
3861 | if (L_success == NULL__null) { L_success = &L_fallthrough; label_nulls++; } |
3862 | if (L_failure == NULL__null) { L_failure = &L_fallthrough; label_nulls++; } |
3863 | assert(label_nulls <= 1, "at most one NULL in the batch")do { if (!(label_nulls <= 1)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3863, "assert(" "label_nulls <= 1" ") failed", "at most one NULL in the batch" ); ::breakpoint(); } } while (0); |
3864 | |
3865 | // a couple of useful fields in sub_klass: |
3866 | int ss_offset = in_bytes(Klass::secondary_supers_offset()); |
3867 | int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
3868 | Address secondary_supers_addr(sub_klass, ss_offset); |
3869 | Address super_cache_addr( sub_klass, sc_offset); |
3870 | |
3871 | // Do a linear scan of the secondary super-klass chain. |
3872 | // This code is rarely used, so simplicity is a virtue here. |
3873 | // The repne_scan instruction uses fixed registers, which we must spill. |
3874 | // Don't worry too much about pre-existing connections with the input regs. |
3875 | |
3876 | assert(sub_klass != rax, "killed reg")do { if (!(sub_klass != rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3876, "assert(" "sub_klass != rax" ") failed", "killed reg" ); ::breakpoint(); } } while (0); // killed by mov(rax, super) |
3877 | assert(sub_klass != rcx, "killed reg")do { if (!(sub_klass != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3877, "assert(" "sub_klass != rcx" ") failed", "killed reg" ); ::breakpoint(); } } while (0); // killed by lea(rcx, &pst_counter) |
3878 | |
3879 | // Get super_klass value into rax (even if it was in rdi or rcx). |
3880 | bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; |
3881 | if (super_klass != rax || UseCompressedOops) { |
3882 | if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } |
3883 | mov(rax, super_klass); |
3884 | } |
3885 | if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } |
3886 | if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } |
3887 | |
3888 | #ifndef PRODUCT |
3889 | int* pst_counter = &SharedRuntime::_partial_subtype_ctr; |
3890 | ExternalAddress pst_counter_addr((address) pst_counter); |
3891 | NOT_LP64( incrementl(pst_counter_addr) ); |
3892 | LP64_ONLY( lea(rcx, pst_counter_addr) )lea(rcx, pst_counter_addr); |
3893 | LP64_ONLY( incrementl(Address(rcx, 0)) )incrementl(Address(rcx, 0)); |
3894 | #endif //PRODUCT |
3895 | |
3896 | // We will consult the secondary-super array. |
3897 | movptr(rdi, secondary_supers_addr); |
3898 | // Load the array length. (Positive movl does right thing on LP64.) |
3899 | movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); |
3900 | // Skip to start of data. |
3901 | addptr(rdi, Array<Klass*>::base_offset_in_bytes()); |
3902 | |
3903 | // Scan RCX words at [RDI] for an occurrence of RAX. |
3904 | // Set NZ/Z based on last compare. |
3905 | // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does |
3906 | // not change flags (only scas instruction which is repeated sets flags). |
3907 | // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. |
3908 | |
3909 | testptr(rax,rax); // Set Z = 0 |
3910 | repne_scan(); |
3911 | |
3912 | // Unspill the temp. registers: |
3913 | if (pushed_rdi) pop(rdi); |
3914 | if (pushed_rcx) pop(rcx); |
3915 | if (pushed_rax) pop(rax); |
3916 | |
3917 | if (set_cond_codes) { |
3918 | // Special hack for the AD files: rdi is guaranteed non-zero. |
3919 | assert(!pushed_rdi, "rdi must be left non-NULL")do { if (!(!pushed_rdi)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3919, "assert(" "!pushed_rdi" ") failed", "rdi must be left non-NULL" ); ::breakpoint(); } } while (0); |
3920 | // Also, the condition codes are properly set Z/NZ on succeed/failure. |
3921 | } |
3922 | |
3923 | if (L_failure == &L_fallthrough) |
3924 | jccb(Assembler::notEqual, *L_failure)jccb_0(Assembler::notEqual, *L_failure, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3924); |
3925 | else jcc(Assembler::notEqual, *L_failure); |
3926 | |
3927 | // Success. Cache the super we found and proceed in triumph. |
3928 | movptr(super_cache_addr, super_klass); |
3929 | |
3930 | if (L_success != &L_fallthrough) { |
3931 | jmp(*L_success); |
3932 | } |
3933 | |
3934 | #undef IS_A_TEMP |
3935 | |
3936 | bind(L_fallthrough); |
3937 | } |
3938 | |
3939 | void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { |
3940 | assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required")do { if (!(L_fast_path != __null || L_slow_path != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3940, "assert(" "L_fast_path != __null || L_slow_path != __null" ") failed", "at least one is required"); ::breakpoint(); } } while (0); |
3941 | |
3942 | Label L_fallthrough; |
3943 | if (L_fast_path == NULL__null) { |
3944 | L_fast_path = &L_fallthrough; |
3945 | } else if (L_slow_path == NULL__null) { |
3946 | L_slow_path = &L_fallthrough; |
3947 | } |
3948 | |
3949 | // Fast path check: class is fully initialized |
3950 | cmpb(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); |
3951 | jcc(Assembler::equal, *L_fast_path); |
3952 | |
3953 | // Fast path check: current thread is initializer thread |
3954 | cmpptr(thread, Address(klass, InstanceKlass::init_thread_offset())); |
3955 | if (L_slow_path == &L_fallthrough) { |
3956 | jcc(Assembler::equal, *L_fast_path); |
3957 | bind(*L_slow_path); |
3958 | } else if (L_fast_path == &L_fallthrough) { |
3959 | jcc(Assembler::notEqual, *L_slow_path); |
3960 | bind(*L_fast_path); |
3961 | } else { |
3962 | Unimplemented()do { (*g_assert_poison) = 'X';; report_unimplemented("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3962); ::breakpoint(); } while (0); |
3963 | } |
3964 | } |
3965 | |
3966 | void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { |
3967 | if (VM_Version::supports_cmov()) { |
3968 | cmovl(cc, dst, src); |
3969 | } else { |
3970 | Label L; |
3971 | jccb(negate_condition(cc), L)jccb_0(negate_condition(cc), L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3971); |
3972 | movl(dst, src); |
3973 | bind(L); |
3974 | } |
3975 | } |
3976 | |
3977 | void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { |
3978 | if (VM_Version::supports_cmov()) { |
3979 | cmovl(cc, dst, src); |
3980 | } else { |
3981 | Label L; |
3982 | jccb(negate_condition(cc), L)jccb_0(negate_condition(cc), L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3982); |
3983 | movl(dst, src); |
3984 | bind(L); |
3985 | } |
3986 | } |
3987 | |
3988 | void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { |
3989 | if (!VerifyOops) return; |
3990 | |
3991 | // Pass register number to verify_oop_subroutine |
3992 | const char* b = NULL__null; |
3993 | { |
3994 | ResourceMark rm; |
3995 | stringStream ss; |
3996 | ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); |
3997 | b = code_string(ss.as_string()); |
3998 | } |
3999 | BLOCK_COMMENT("verify_oop {"); |
4000 | #ifdef _LP641 |
4001 | push(rscratch1); // save r10, trashed by movptr() |
4002 | #endif |
4003 | push(rax); // save rax, |
4004 | push(reg); // pass register argument |
4005 | ExternalAddress buffer((address) b); |
4006 | // avoid using pushptr, as it modifies scratch registers |
4007 | // and our contract is not to modify anything |
4008 | movptr(rax, buffer.addr()); |
4009 | push(rax); |
4010 | // call indirectly to solve generation ordering problem |
4011 | movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); |
4012 | call(rax); |
4013 | // Caller pops the arguments (oop, message) and restores rax, r10 |
4014 | BLOCK_COMMENT("} verify_oop"); |
4015 | } |
4016 | |
4017 | void MacroAssembler::vallones(XMMRegister dst, int vector_len) { |
4018 | if (UseAVX > 2 && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) { |
4019 | vpternlogd(dst, 0xFF, dst, dst, vector_len); |
4020 | } else { |
4021 | assert(UseAVX > 0, "")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4021, "assert(" "UseAVX > 0" ") failed", ""); ::breakpoint (); } } while (0); |
4022 | vpcmpeqb(dst, dst, dst, vector_len); |
4023 | } |
4024 | } |
4025 | |
4026 | Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, |
4027 | int extra_slot_offset) { |
4028 | // cf. TemplateTable::prepare_invoke(), if (load_receiver). |
4029 | int stackElementSize = Interpreter::stackElementSize; |
4030 | int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); |
4031 | #ifdef ASSERT1 |
4032 | int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); |
4033 | assert(offset1 - offset == stackElementSize, "correct arithmetic")do { if (!(offset1 - offset == stackElementSize)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4033, "assert(" "offset1 - offset == stackElementSize" ") failed" , "correct arithmetic"); ::breakpoint(); } } while (0); |
4034 | #endif |
4035 | Register scale_reg = noreg; |
4036 | Address::ScaleFactor scale_factor = Address::no_scale; |
4037 | if (arg_slot.is_constant()) { |
4038 | offset += arg_slot.as_constant() * stackElementSize; |
4039 | } else { |
4040 | scale_reg = arg_slot.as_register(); |
4041 | scale_factor = Address::times(stackElementSize); |
4042 | } |
4043 | offset += wordSize; // return PC is on stack |
4044 | return Address(rsp, scale_reg, scale_factor, offset); |
4045 | } |
4046 | |
4047 | void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { |
4048 | if (!VerifyOops) return; |
4049 | |
4050 | // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); |
4051 | // Pass register number to verify_oop_subroutine |
4052 | const char* b = NULL__null; |
4053 | { |
4054 | ResourceMark rm; |
4055 | stringStream ss; |
4056 | ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); |
4057 | b = code_string(ss.as_string()); |
4058 | } |
4059 | #ifdef _LP641 |
4060 | push(rscratch1); // save r10, trashed by movptr() |
4061 | #endif |
4062 | push(rax); // save rax, |
4063 | // addr may contain rsp so we will have to adjust it based on the push |
4064 | // we just did (and on 64 bit we do two pushes) |
4065 | // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which |
4066 | // stores rax into addr which is backwards of what was intended. |
4067 | if (addr.uses(rsp)) { |
4068 | lea(rax, addr); |
4069 | pushptr(Address(rax, LP64_ONLY(2 *)2 * BytesPerWord)); |
4070 | } else { |
4071 | pushptr(addr); |
4072 | } |
4073 | |
4074 | ExternalAddress buffer((address) b); |
4075 | // pass msg argument |
4076 | // avoid using pushptr, as it modifies scratch registers |
4077 | // and our contract is not to modify anything |
4078 | movptr(rax, buffer.addr()); |
4079 | push(rax); |
4080 | |
4081 | // call indirectly to solve generation ordering problem |
4082 | movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); |
4083 | call(rax); |
4084 | // Caller pops the arguments (addr, message) and restores rax, r10. |
4085 | } |
4086 | |
4087 | void MacroAssembler::verify_tlab() { |
4088 | #ifdef ASSERT1 |
4089 | if (UseTLAB && VerifyOops) { |
4090 | Label next, ok; |
4091 | Register t1 = rsi; |
4092 | Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread)r15_thread; |
4093 | |
4094 | push(t1); |
4095 | NOT_LP64(push(thread_reg)); |
4096 | NOT_LP64(get_thread(thread_reg)); |
4097 | |
4098 | movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); |
4099 | cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); |
4100 | jcc(Assembler::aboveEqual, next); |
4101 | STOP("assert(top >= start)")block_comment("assert(top >= start)"); stop("assert(top >= start)" ); |
4102 | should_not_reach_here(); |
4103 | |
4104 | bind(next); |
4105 | movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); |
4106 | cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); |
4107 | jcc(Assembler::aboveEqual, ok); |
4108 | STOP("assert(top <= end)")block_comment("assert(top <= end)"); stop("assert(top <= end)" ); |
4109 | should_not_reach_here(); |
4110 | |
4111 | bind(ok); |
4112 | NOT_LP64(pop(thread_reg)); |
4113 | pop(t1); |
4114 | } |
4115 | #endif |
4116 | } |
4117 | |
4118 | class ControlWord { |
4119 | public: |
4120 | int32_t _value; |
4121 | |
4122 | int rounding_control() const { return (_value >> 10) & 3 ; } |
4123 | int precision_control() const { return (_value >> 8) & 3 ; } |
4124 | bool precision() const { return ((_value >> 5) & 1) != 0; } |
4125 | bool underflow() const { return ((_value >> 4) & 1) != 0; } |
4126 | bool overflow() const { return ((_value >> 3) & 1) != 0; } |
4127 | bool zero_divide() const { return ((_value >> 2) & 1) != 0; } |
4128 | bool denormalized() const { return ((_value >> 1) & 1) != 0; } |
4129 | bool invalid() const { return ((_value >> 0) & 1) != 0; } |
4130 | |
4131 | void print() const { |
4132 | // rounding control |
4133 | const char* rc; |
4134 | switch (rounding_control()) { |
4135 | case 0: rc = "round near"; break; |
4136 | case 1: rc = "round down"; break; |
4137 | case 2: rc = "round up "; break; |
4138 | case 3: rc = "chop "; break; |
4139 | default: |
4140 | rc = NULL__null; // silence compiler warnings |
4141 | fatal("Unknown rounding control: %d", rounding_control())do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4141, "Unknown rounding control: %d", rounding_control()); :: breakpoint(); } while (0); |
4142 | }; |
4143 | // precision control |
4144 | const char* pc; |
4145 | switch (precision_control()) { |
4146 | case 0: pc = "24 bits "; break; |
4147 | case 1: pc = "reserved"; break; |
4148 | case 2: pc = "53 bits "; break; |
4149 | case 3: pc = "64 bits "; break; |
4150 | default: |
4151 | pc = NULL__null; // silence compiler warnings |
4152 | fatal("Unknown precision control: %d", precision_control())do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4152, "Unknown precision control: %d", precision_control()) ; ::breakpoint(); } while (0); |
4153 | }; |
4154 | // flags |
4155 | char f[9]; |
4156 | f[0] = ' '; |
4157 | f[1] = ' '; |
4158 | f[2] = (precision ()) ? 'P' : 'p'; |
4159 | f[3] = (underflow ()) ? 'U' : 'u'; |
4160 | f[4] = (overflow ()) ? 'O' : 'o'; |
4161 | f[5] = (zero_divide ()) ? 'Z' : 'z'; |
4162 | f[6] = (denormalized()) ? 'D' : 'd'; |
4163 | f[7] = (invalid ()) ? 'I' : 'i'; |
4164 | f[8] = '\x0'; |
4165 | // output |
4166 | printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); |
4167 | } |
4168 | |
4169 | }; |
4170 | |
4171 | class StatusWord { |
4172 | public: |
4173 | int32_t _value; |
4174 | |
4175 | bool busy() const { return ((_value >> 15) & 1) != 0; } |
4176 | bool C3() const { return ((_value >> 14) & 1) != 0; } |
4177 | bool C2() const { return ((_value >> 10) & 1) != 0; } |
4178 | bool C1() const { return ((_value >> 9) & 1) != 0; } |
4179 | bool C0() const { return ((_value >> 8) & 1) != 0; } |
4180 | int top() const { return (_value >> 11) & 7 ; } |
4181 | bool error_status() const { return ((_value >> 7) & 1) != 0; } |
4182 | bool stack_fault() const { return ((_value >> 6) & 1) != 0; } |
4183 | bool precision() const { return ((_value >> 5) & 1) != 0; } |
4184 | bool underflow() const { return ((_value >> 4) & 1) != 0; } |
4185 | bool overflow() const { return ((_value >> 3) & 1) != 0; } |
4186 | bool zero_divide() const { return ((_value >> 2) & 1) != 0; } |
4187 | bool denormalized() const { return ((_value >> 1) & 1) != 0; } |
4188 | bool invalid() const { return ((_value >> 0) & 1) != 0; } |
4189 | |
4190 | void print() const { |
4191 | // condition codes |
4192 | char c[5]; |
4193 | c[0] = (C3()) ? '3' : '-'; |
4194 | c[1] = (C2()) ? '2' : '-'; |
4195 | c[2] = (C1()) ? '1' : '-'; |
4196 | c[3] = (C0()) ? '0' : '-'; |
4197 | c[4] = '\x0'; |
4198 | // flags |
4199 | char f[9]; |
4200 | f[0] = (error_status()) ? 'E' : '-'; |
4201 | f[1] = (stack_fault ()) ? 'S' : '-'; |
4202 | f[2] = (precision ()) ? 'P' : '-'; |
4203 | f[3] = (underflow ()) ? 'U' : '-'; |
4204 | f[4] = (overflow ()) ? 'O' : '-'; |
4205 | f[5] = (zero_divide ()) ? 'Z' : '-'; |
4206 | f[6] = (denormalized()) ? 'D' : '-'; |
4207 | f[7] = (invalid ()) ? 'I' : '-'; |
4208 | f[8] = '\x0'; |
4209 | // output |
4210 | printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); |
4211 | } |
4212 | |
4213 | }; |
4214 | |
4215 | class TagWord { |
4216 | public: |
4217 | int32_t _value; |
4218 | |
4219 | int tag_at(int i) const { return (_value >> (i*2)) & 3; } |
4220 | |
4221 | void print() const { |
4222 | printf("%04x", _value & 0xFFFF); |
4223 | } |
4224 | |
4225 | }; |
4226 | |
4227 | class FPU_Register { |
4228 | public: |
4229 | int32_t _m0; |
4230 | int32_t _m1; |
4231 | int16_t _ex; |
4232 | |
4233 | bool is_indefinite() const { |
4234 | return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; |
4235 | } |
4236 | |
4237 | void print() const { |
4238 | char sign = (_ex < 0) ? '-' : '+'; |
4239 | const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; |
4240 | printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); |
4241 | }; |
4242 | |
4243 | }; |
4244 | |
4245 | class FPU_State { |
4246 | public: |
4247 | enum { |
4248 | register_size = 10, |
4249 | number_of_registers = 8, |
4250 | register_mask = 7 |
4251 | }; |
4252 | |
4253 | ControlWord _control_word; |
4254 | StatusWord _status_word; |
4255 | TagWord _tag_word; |
4256 | int32_t _error_offset; |
4257 | int32_t _error_selector; |
4258 | int32_t _data_offset; |
4259 | int32_t _data_selector; |
4260 | int8_t _register[register_size * number_of_registers]; |
4261 | |
4262 | int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } |
4263 | FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } |
4264 | |
4265 | const char* tag_as_string(int tag) const { |
4266 | switch (tag) { |
4267 | case 0: return "valid"; |
4268 | case 1: return "zero"; |
4269 | case 2: return "special"; |
4270 | case 3: return "empty"; |
4271 | } |
4272 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4272); ::breakpoint(); } while (0); |
4273 | return NULL__null; |
4274 | } |
4275 | |
4276 | void print() const { |
4277 | // print computation registers |
4278 | { int t = _status_word.top(); |
4279 | for (int i = 0; i < number_of_registers; i++) { |
4280 | int j = (i - t) & register_mask; |
4281 | printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); |
4282 | st(j)->print(); |
4283 | printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); |
4284 | } |
4285 | } |
4286 | printf("\n"); |
4287 | // print control registers |
4288 | printf("ctrl = "); _control_word.print(); printf("\n"); |
4289 | printf("stat = "); _status_word .print(); printf("\n"); |
4290 | printf("tags = "); _tag_word .print(); printf("\n"); |
4291 | } |
4292 | |
4293 | }; |
4294 | |
4295 | class Flag_Register { |
4296 | public: |
4297 | int32_t _value; |
4298 | |
4299 | bool overflow() const { return ((_value >> 11) & 1) != 0; } |
4300 | bool direction() const { return ((_value >> 10) & 1) != 0; } |
4301 | bool sign() const { return ((_value >> 7) & 1) != 0; } |
4302 | bool zero() const { return ((_value >> 6) & 1) != 0; } |
4303 | bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } |
4304 | bool parity() const { return ((_value >> 2) & 1) != 0; } |
4305 | bool carry() const { return ((_value >> 0) & 1) != 0; } |
4306 | |
4307 | void print() const { |
4308 | // flags |
4309 | char f[8]; |
4310 | f[0] = (overflow ()) ? 'O' : '-'; |
4311 | f[1] = (direction ()) ? 'D' : '-'; |
4312 | f[2] = (sign ()) ? 'S' : '-'; |
4313 | f[3] = (zero ()) ? 'Z' : '-'; |
4314 | f[4] = (auxiliary_carry()) ? 'A' : '-'; |
4315 | f[5] = (parity ()) ? 'P' : '-'; |
4316 | f[6] = (carry ()) ? 'C' : '-'; |
4317 | f[7] = '\x0'; |
4318 | // output |
4319 | printf("%08x flags = %s", _value, f); |
4320 | } |
4321 | |
4322 | }; |
4323 | |
4324 | class IU_Register { |
4325 | public: |
4326 | int32_t _value; |
4327 | |
4328 | void print() const { |
4329 | printf("%08x %11d", _value, _value); |
4330 | } |
4331 | |
4332 | }; |
4333 | |
4334 | class IU_State { |
4335 | public: |
4336 | Flag_Register _eflags; |
4337 | IU_Register _rdi; |
4338 | IU_Register _rsi; |
4339 | IU_Register _rbp; |
4340 | IU_Register _rsp; |
4341 | IU_Register _rbx; |
4342 | IU_Register _rdx; |
4343 | IU_Register _rcx; |
4344 | IU_Register _rax; |
4345 | |
4346 | void print() const { |
4347 | // computation registers |
4348 | printf("rax, = "); _rax.print(); printf("\n"); |
4349 | printf("rbx, = "); _rbx.print(); printf("\n"); |
4350 | printf("rcx = "); _rcx.print(); printf("\n"); |
4351 | printf("rdx = "); _rdx.print(); printf("\n"); |
4352 | printf("rdi = "); _rdi.print(); printf("\n"); |
4353 | printf("rsi = "); _rsi.print(); printf("\n"); |
4354 | printf("rbp, = "); _rbp.print(); printf("\n"); |
4355 | printf("rsp = "); _rsp.print(); printf("\n"); |
4356 | printf("\n"); |
4357 | // control registers |
4358 | printf("flgs = "); _eflags.print(); printf("\n"); |
4359 | } |
4360 | }; |
4361 | |
4362 | |
4363 | class CPU_State { |
4364 | public: |
4365 | FPU_State _fpu_state; |
4366 | IU_State _iu_state; |
4367 | |
4368 | void print() const { |
4369 | printf("--------------------------------------------------\n"); |
4370 | _iu_state .print(); |
4371 | printf("\n"); |
4372 | _fpu_state.print(); |
4373 | printf("--------------------------------------------------\n"); |
4374 | } |
4375 | |
4376 | }; |
4377 | |
4378 | |
4379 | static void _print_CPU_state(CPU_State* state) { |
4380 | state->print(); |
4381 | }; |
4382 | |
4383 | |
4384 | void MacroAssembler::print_CPU_state() { |
4385 | push_CPU_state(); |
4386 | push(rsp); // pass CPU state |
4387 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)((address)((address_word)(_print_CPU_state))))); |
4388 | addptr(rsp, wordSize); // discard argument |
4389 | pop_CPU_state(); |
4390 | } |
4391 | |
4392 | |
4393 | #ifndef _LP641 |
4394 | static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { |
4395 | static int counter = 0; |
4396 | FPU_State* fs = &state->_fpu_state; |
4397 | counter++; |
4398 | // For leaf calls, only verify that the top few elements remain empty. |
4399 | // We only need 1 empty at the top for C2 code. |
4400 | if( stack_depth < 0 ) { |
4401 | if( fs->tag_for_st(7) != 3 ) { |
4402 | printf("FPR7 not empty\n"); |
4403 | state->print(); |
4404 | assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4404, "assert(" "false" ") failed", "error"); ::breakpoint( ); } } while (0); |
4405 | return false; |
4406 | } |
4407 | return true; // All other stack states do not matter |
4408 | } |
4409 | |
4410 | assert((fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std(),do { if (!((fs->_control_word._value & 0xffff) == StubRoutines ::x86::fpu_cntrl_wrd_std())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4411, "assert(" "(fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std()" ") failed", "bad FPU control word"); ::breakpoint(); } } while (0) |
4411 | "bad FPU control word")do { if (!((fs->_control_word._value & 0xffff) == StubRoutines ::x86::fpu_cntrl_wrd_std())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4411, "assert(" "(fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std()" ") failed", "bad FPU control word"); ::breakpoint(); } } while (0); |
4412 | |
4413 | // compute stack depth |
4414 | int i = 0; |
4415 | while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; |
4416 | int d = i; |
4417 | while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; |
4418 | // verify findings |
4419 | if (i != FPU_State::number_of_registers) { |
4420 | // stack not contiguous |
4421 | printf("%s: stack not contiguous at ST%d\n", s, i); |
4422 | state->print(); |
4423 | assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4423, "assert(" "false" ") failed", "error"); ::breakpoint( ); } } while (0); |
4424 | return false; |
4425 | } |
4426 | // check if computed stack depth corresponds to expected stack depth |
4427 | if (stack_depth < 0) { |
4428 | // expected stack depth is -stack_depth or less |
4429 | if (d > -stack_depth) { |
4430 | // too many elements on the stack |
4431 | printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); |
4432 | state->print(); |
4433 | assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4433, "assert(" "false" ") failed", "error"); ::breakpoint( ); } } while (0); |
4434 | return false; |
4435 | } |
4436 | } else { |
4437 | // expected stack depth is stack_depth |
4438 | if (d != stack_depth) { |
4439 | // wrong stack depth |
4440 | printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); |
4441 | state->print(); |
4442 | assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4442, "assert(" "false" ") failed", "error"); ::breakpoint( ); } } while (0); |
4443 | return false; |
4444 | } |
4445 | } |
4446 | // everything is cool |
4447 | return true; |
4448 | } |
4449 | |
4450 | void MacroAssembler::verify_FPU(int stack_depth, const char* s) { |
4451 | if (!VerifyFPU) return; |
4452 | push_CPU_state(); |
4453 | push(rsp); // pass CPU state |
4454 | ExternalAddress msg((address) s); |
4455 | // pass message string s |
4456 | pushptr(msg.addr()); |
4457 | push(stack_depth); // pass stack depth |
4458 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)((address)((address_word)(_verify_FPU))))); |
4459 | addptr(rsp, 3 * wordSize); // discard arguments |
4460 | // check for error |
4461 | { Label L; |
4462 | testl(rax, rax); |
4463 | jcc(Assembler::notZero, L); |
4464 | int3(); // break if error condition |
4465 | bind(L); |
4466 | } |
4467 | pop_CPU_state(); |
4468 | } |
4469 | #endif // _LP64 |
4470 | |
4471 | void MacroAssembler::restore_cpu_control_state_after_jni() { |
4472 | // Either restore the MXCSR register after returning from the JNI Call |
4473 | // or verify that it wasn't changed (with -Xcheck:jni flag). |
4474 | if (VM_Version::supports_sse()) { |
4475 | if (RestoreMXCSROnJNICalls) { |
4476 | ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std())); |
4477 | } else if (CheckJNICalls) { |
4478 | call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); |
4479 | } |
4480 | } |
4481 | // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. |
4482 | vzeroupper(); |
4483 | // Reset k1 to 0xffff. |
4484 | |
4485 | #ifdef COMPILER21 |
4486 | if (PostLoopMultiversioning && VM_Version::supports_evex()) { |
4487 | push(rcx); |
4488 | movl(rcx, 0xffff); |
4489 | kmovwl(k1, rcx); |
4490 | pop(rcx); |
4491 | } |
4492 | #endif // COMPILER2 |
4493 | |
4494 | #ifndef _LP641 |
4495 | // Either restore the x87 floating pointer control word after returning |
4496 | // from the JNI call or verify that it wasn't changed. |
4497 | if (CheckJNICalls) { |
4498 | call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); |
4499 | } |
4500 | #endif // _LP64 |
4501 | } |
4502 | |
4503 | // ((OopHandle)result).resolve(); |
4504 | void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { |
4505 | assert_different_registers(result, tmp); |
4506 | |
4507 | // Only 64 bit platforms support GCs that require a tmp register |
4508 | // Only IN_HEAP loads require a thread_tmp register |
4509 | // OopHandle::resolve is an indirection like jobject. |
4510 | access_load_at(T_OBJECT, IN_NATIVE, |
4511 | result, Address(result, 0), tmp, /*tmp_thread*/noreg); |
4512 | } |
4513 | |
4514 | // ((WeakHandle)result).resolve(); |
4515 | void MacroAssembler::resolve_weak_handle(Register rresult, Register rtmp) { |
4516 | assert_different_registers(rresult, rtmp); |
4517 | Label resolved; |
4518 | |
4519 | // A null weak handle resolves to null. |
4520 | cmpptr(rresult, 0); |
4521 | jcc(Assembler::equal, resolved); |
4522 | |
4523 | // Only 64 bit platforms support GCs that require a tmp register |
4524 | // Only IN_HEAP loads require a thread_tmp register |
4525 | // WeakHandle::resolve is an indirection like jweak. |
4526 | access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, |
4527 | rresult, Address(rresult, 0), rtmp, /*tmp_thread*/noreg); |
4528 | bind(resolved); |
4529 | } |
4530 | |
4531 | void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { |
4532 | // get mirror |
4533 | const int mirror_offset = in_bytes(Klass::java_mirror_offset()); |
4534 | load_method_holder(mirror, method); |
4535 | movptr(mirror, Address(mirror, mirror_offset)); |
4536 | resolve_oop_handle(mirror, tmp); |
4537 | } |
4538 | |
4539 | void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) { |
4540 | load_method_holder(rresult, rmethod); |
4541 | movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset())); |
4542 | } |
4543 | |
4544 | void MacroAssembler::load_method_holder(Register holder, Register method) { |
4545 | movptr(holder, Address(method, Method::const_offset())); // ConstMethod* |
4546 | movptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* |
4547 | movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* |
4548 | } |
4549 | |
4550 | void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { |
4551 | assert_different_registers(src, tmp); |
4552 | assert_different_registers(dst, tmp); |
4553 | #ifdef _LP641 |
4554 | if (UseCompressedClassPointers) { |
4555 | movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); |
4556 | decode_klass_not_null(dst, tmp); |
4557 | } else |
4558 | #endif |
4559 | movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); |
4560 | } |
4561 | |
4562 | void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { |
4563 | assert_different_registers(src, tmp); |
4564 | assert_different_registers(dst, tmp); |
4565 | #ifdef _LP641 |
4566 | if (UseCompressedClassPointers) { |
4567 | encode_klass_not_null(src, tmp); |
4568 | movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); |
4569 | } else |
4570 | #endif |
4571 | movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); |
4572 | } |
4573 | |
4574 | void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, |
4575 | Register tmp1, Register thread_tmp) { |
4576 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); |
4577 | decorators = AccessInternal::decorator_fixup(decorators); |
4578 | bool as_raw = (decorators & AS_RAW) != 0; |
4579 | if (as_raw) { |
4580 | bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); |
4581 | } else { |
4582 | bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); |
4583 | } |
4584 | } |
4585 | |
4586 | void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, |
4587 | Register tmp1, Register tmp2) { |
4588 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); |
4589 | decorators = AccessInternal::decorator_fixup(decorators); |
4590 | bool as_raw = (decorators & AS_RAW) != 0; |
4591 | if (as_raw) { |
4592 | bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); |
4593 | } else { |
4594 | bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); |
4595 | } |
4596 | } |
4597 | |
4598 | void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, |
4599 | Register thread_tmp, DecoratorSet decorators) { |
4600 | access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); |
4601 | } |
4602 | |
4603 | // Doesn't do verfication, generates fixed size code |
4604 | void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, |
4605 | Register thread_tmp, DecoratorSet decorators) { |
4606 | access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); |
4607 | } |
4608 | |
4609 | void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, |
4610 | Register tmp2, DecoratorSet decorators) { |
4611 | access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); |
4612 | } |
4613 | |
4614 | // Used for storing NULLs. |
4615 | void MacroAssembler::store_heap_oop_null(Address dst) { |
4616 | access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); |
4617 | } |
4618 | |
4619 | #ifdef _LP641 |
4620 | void MacroAssembler::store_klass_gap(Register dst, Register src) { |
4621 | if (UseCompressedClassPointers) { |
4622 | // Store to klass gap in destination |
4623 | movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); |
4624 | } |
4625 | } |
4626 | |
4627 | #ifdef ASSERT1 |
4628 | void MacroAssembler::verify_heapbase(const char* msg) { |
4629 | assert (UseCompressedOops, "should be compressed")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4629, "assert(" "UseCompressedOops" ") failed", "should be compressed" ); ::breakpoint(); } } while (0); |
4630 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4630, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); |
4631 | if (CheckCompressedOops) { |
4632 | Label ok; |
4633 | push(rscratch1); // cmpptr trashes rscratch1 |
4634 | cmpptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr())); |
4635 | jcc(Assembler::equal, ok); |
4636 | STOP(msg)block_comment(msg); stop(msg); |
4637 | bind(ok); |
4638 | pop(rscratch1); |
4639 | } |
4640 | } |
4641 | #endif |
4642 | |
4643 | // Algorithm must match oop.inline.hpp encode_heap_oop. |
4644 | void MacroAssembler::encode_heap_oop(Register r) { |
4645 | #ifdef ASSERT1 |
4646 | verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); |
4647 | #endif |
4648 | verify_oop_msg(r, "broken oop in encode_heap_oop")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in encode_heap_oop\"" , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4648); |
4649 | if (CompressedOops::base() == NULL__null) { |
4650 | if (CompressedOops::shift() != 0) { |
4651 | assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4651, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4652 | shrq(r, LogMinObjAlignmentInBytes); |
4653 | } |
4654 | return; |
4655 | } |
4656 | testq(r, r); |
4657 | cmovq(Assembler::equal, r, r12_heapbase); |
4658 | subq(r, r12_heapbase); |
4659 | shrq(r, LogMinObjAlignmentInBytes); |
4660 | } |
4661 | |
4662 | void MacroAssembler::encode_heap_oop_not_null(Register r) { |
4663 | #ifdef ASSERT1 |
4664 | verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); |
4665 | if (CheckCompressedOops) { |
4666 | Label ok; |
4667 | testq(r, r); |
4668 | jcc(Assembler::notEqual, ok); |
4669 | STOP("null oop passed to encode_heap_oop_not_null")block_comment("null oop passed to encode_heap_oop_not_null"); stop("null oop passed to encode_heap_oop_not_null"); |
4670 | bind(ok); |
4671 | } |
4672 | #endif |
4673 | verify_oop_msg(r, "broken oop in encode_heap_oop_not_null")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in encode_heap_oop_not_null\"" , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4673); |
4674 | if (CompressedOops::base() != NULL__null) { |
4675 | subq(r, r12_heapbase); |
4676 | } |
4677 | if (CompressedOops::shift() != 0) { |
4678 | assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4678, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4679 | shrq(r, LogMinObjAlignmentInBytes); |
4680 | } |
4681 | } |
4682 | |
4683 | void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { |
4684 | #ifdef ASSERT1 |
4685 | verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); |
4686 | if (CheckCompressedOops) { |
4687 | Label ok; |
4688 | testq(src, src); |
4689 | jcc(Assembler::notEqual, ok); |
4690 | STOP("null oop passed to encode_heap_oop_not_null2")block_comment("null oop passed to encode_heap_oop_not_null2") ; stop("null oop passed to encode_heap_oop_not_null2"); |
4691 | bind(ok); |
4692 | } |
4693 | #endif |
4694 | verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2")_verify_oop_checked(src, "broken oop " "src" ", " "\"broken oop in encode_heap_oop_not_null2\"" , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4694); |
4695 | if (dst != src) { |
4696 | movq(dst, src); |
4697 | } |
4698 | if (CompressedOops::base() != NULL__null) { |
4699 | subq(dst, r12_heapbase); |
4700 | } |
4701 | if (CompressedOops::shift() != 0) { |
4702 | assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4702, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4703 | shrq(dst, LogMinObjAlignmentInBytes); |
4704 | } |
4705 | } |
4706 | |
4707 | void MacroAssembler::decode_heap_oop(Register r) { |
4708 | #ifdef ASSERT1 |
4709 | verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); |
4710 | #endif |
4711 | if (CompressedOops::base() == NULL__null) { |
4712 | if (CompressedOops::shift() != 0) { |
4713 | assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4713, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4714 | shlq(r, LogMinObjAlignmentInBytes); |
4715 | } |
4716 | } else { |
4717 | Label done; |
4718 | shlq(r, LogMinObjAlignmentInBytes); |
4719 | jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4719); |
4720 | addq(r, r12_heapbase); |
4721 | bind(done); |
4722 | } |
4723 | verify_oop_msg(r, "broken oop in decode_heap_oop")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in decode_heap_oop\"" , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4723); |
4724 | } |
4725 | |
4726 | void MacroAssembler::decode_heap_oop_not_null(Register r) { |
4727 | // Note: it will change flags |
4728 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4728, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4729 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4729, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); |
4730 | // Cannot assert, unverified entry point counts instructions (see .ad file) |
4731 | // vtableStubs also counts instructions in pd_code_size_limit. |
4732 | // Also do not verify_oop as this is called by verify_oop. |
4733 | if (CompressedOops::shift() != 0) { |
4734 | assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4734, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4735 | shlq(r, LogMinObjAlignmentInBytes); |
4736 | if (CompressedOops::base() != NULL__null) { |
4737 | addq(r, r12_heapbase); |
4738 | } |
4739 | } else { |
4740 | assert (CompressedOops::base() == NULL, "sanity")do { if (!(CompressedOops::base() == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4740, "assert(" "CompressedOops::base() == __null" ") failed" , "sanity"); ::breakpoint(); } } while (0); |
4741 | } |
4742 | } |
4743 | |
4744 | void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { |
4745 | // Note: it will change flags |
4746 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4746, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4747 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4747, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); |
4748 | // Cannot assert, unverified entry point counts instructions (see .ad file) |
4749 | // vtableStubs also counts instructions in pd_code_size_limit. |
4750 | // Also do not verify_oop as this is called by verify_oop. |
4751 | if (CompressedOops::shift() != 0) { |
4752 | assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4752, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4753 | if (LogMinObjAlignmentInBytes == Address::times_8) { |
4754 | leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); |
4755 | } else { |
4756 | if (dst != src) { |
4757 | movq(dst, src); |
4758 | } |
4759 | shlq(dst, LogMinObjAlignmentInBytes); |
4760 | if (CompressedOops::base() != NULL__null) { |
4761 | addq(dst, r12_heapbase); |
4762 | } |
4763 | } |
4764 | } else { |
4765 | assert (CompressedOops::base() == NULL, "sanity")do { if (!(CompressedOops::base() == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4765, "assert(" "CompressedOops::base() == __null" ") failed" , "sanity"); ::breakpoint(); } } while (0); |
4766 | if (dst != src) { |
4767 | movq(dst, src); |
4768 | } |
4769 | } |
4770 | } |
4771 | |
4772 | void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { |
4773 | assert_different_registers(r, tmp); |
4774 | if (CompressedKlassPointers::base() != NULL__null) { |
4775 | mov64(tmp, (int64_t)CompressedKlassPointers::base()); |
4776 | subq(r, tmp); |
4777 | } |
4778 | if (CompressedKlassPointers::shift() != 0) { |
4779 | assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers ::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4779, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4780 | shrq(r, LogKlassAlignmentInBytes); |
4781 | } |
4782 | } |
4783 | |
4784 | void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) { |
4785 | assert_different_registers(src, dst); |
4786 | if (CompressedKlassPointers::base() != NULL__null) { |
4787 | mov64(dst, -(int64_t)CompressedKlassPointers::base()); |
4788 | addq(dst, src); |
4789 | } else { |
4790 | movptr(dst, src); |
4791 | } |
4792 | if (CompressedKlassPointers::shift() != 0) { |
4793 | assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers ::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4793, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4794 | shrq(dst, LogKlassAlignmentInBytes); |
4795 | } |
4796 | } |
4797 | |
4798 | void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { |
4799 | assert_different_registers(r, tmp); |
4800 | // Note: it will change flags |
4801 | assert(UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4801, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4802 | // Cannot assert, unverified entry point counts instructions (see .ad file) |
4803 | // vtableStubs also counts instructions in pd_code_size_limit. |
4804 | // Also do not verify_oop as this is called by verify_oop. |
4805 | if (CompressedKlassPointers::shift() != 0) { |
4806 | assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers ::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4806, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4807 | shlq(r, LogKlassAlignmentInBytes); |
4808 | } |
4809 | if (CompressedKlassPointers::base() != NULL__null) { |
4810 | mov64(tmp, (int64_t)CompressedKlassPointers::base()); |
4811 | addq(r, tmp); |
4812 | } |
4813 | } |
4814 | |
4815 | void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) { |
4816 | assert_different_registers(src, dst); |
4817 | // Note: it will change flags |
4818 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4818, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4819 | // Cannot assert, unverified entry point counts instructions (see .ad file) |
4820 | // vtableStubs also counts instructions in pd_code_size_limit. |
4821 | // Also do not verify_oop as this is called by verify_oop. |
4822 | |
4823 | if (CompressedKlassPointers::base() == NULL__null && |
4824 | CompressedKlassPointers::shift() == 0) { |
4825 | // The best case scenario is that there is no base or shift. Then it is already |
4826 | // a pointer that needs nothing but a register rename. |
4827 | movl(dst, src); |
4828 | } else { |
4829 | if (CompressedKlassPointers::base() != NULL__null) { |
4830 | mov64(dst, (int64_t)CompressedKlassPointers::base()); |
4831 | } else { |
4832 | xorq(dst, dst); |
4833 | } |
4834 | if (CompressedKlassPointers::shift() != 0) { |
4835 | assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers ::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4835, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); |
4836 | assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?")do { if (!(LogKlassAlignmentInBytes == Address::times_8)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4836, "assert(" "LogKlassAlignmentInBytes == Address::times_8" ") failed", "klass not aligned on 64bits?"); ::breakpoint(); } } while (0); |
4837 | leaq(dst, Address(dst, src, Address::times_8, 0)); |
4838 | } else { |
4839 | addq(dst, src); |
4840 | } |
4841 | } |
4842 | } |
4843 | |
4844 | void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { |
4845 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4845, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4846 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4846, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); |
4847 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4847, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); |
4848 | int oop_index = oop_recorder()->find_index(obj); |
4849 | RelocationHolder rspec = oop_Relocation::spec(oop_index); |
4850 | mov_narrow_oop(dst, oop_index, rspec); |
4851 | } |
4852 | |
4853 | void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { |
4854 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4854, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4855 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4855, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); |
4856 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4856, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); |
4857 | int oop_index = oop_recorder()->find_index(obj); |
4858 | RelocationHolder rspec = oop_Relocation::spec(oop_index); |
4859 | mov_narrow_oop(dst, oop_index, rspec); |
4860 | } |
4861 | |
4862 | void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { |
4863 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4863, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4864 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4864, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); |
4865 | int klass_index = oop_recorder()->find_index(k); |
4866 | RelocationHolder rspec = metadata_Relocation::spec(klass_index); |
4867 | mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); |
4868 | } |
4869 | |
4870 | void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { |
4871 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4871, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4872 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4872, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); |
4873 | int klass_index = oop_recorder()->find_index(k); |
4874 | RelocationHolder rspec = metadata_Relocation::spec(klass_index); |
4875 | mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); |
4876 | } |
4877 | |
4878 | void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { |
4879 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4879, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4880 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4880, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); |
4881 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4881, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); |
4882 | int oop_index = oop_recorder()->find_index(obj); |
4883 | RelocationHolder rspec = oop_Relocation::spec(oop_index); |
4884 | Assembler::cmp_narrow_oop(dst, oop_index, rspec); |
4885 | } |
4886 | |
4887 | void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { |
4888 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4888, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4889 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4889, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); |
4890 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4890, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); |
4891 | int oop_index = oop_recorder()->find_index(obj); |
4892 | RelocationHolder rspec = oop_Relocation::spec(oop_index); |
4893 | Assembler::cmp_narrow_oop(dst, oop_index, rspec); |
4894 | } |
4895 | |
4896 | void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { |
4897 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4897, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4898 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4898, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); |
4899 | int klass_index = oop_recorder()->find_index(k); |
4900 | RelocationHolder rspec = metadata_Relocation::spec(klass_index); |
4901 | Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); |
4902 | } |
4903 | |
4904 | void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { |
4905 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4905, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); |
4906 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4906, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); |
4907 | int klass_index = oop_recorder()->find_index(k); |
4908 | RelocationHolder rspec = metadata_Relocation::spec(klass_index); |
4909 | Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); |
4910 | } |
4911 | |
4912 | void MacroAssembler::reinit_heapbase() { |
4913 | if (UseCompressedOops) { |
4914 | if (Universe::heap() != NULL__null) { |
4915 | if (CompressedOops::base() == NULL__null) { |
4916 | MacroAssembler::xorptr(r12_heapbase, r12_heapbase); |
4917 | } else { |
4918 | mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base()); |
4919 | } |
4920 | } else { |
4921 | movptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr())); |
4922 | } |
4923 | } |
4924 | } |
4925 | |
4926 | #endif // _LP64 |
4927 | |
4928 | // C2 compiled method's prolog code. |
4929 | void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) { |
4930 | |
4931 | // WARNING: Initial instruction MUST be 5 bytes or longer so that |
4932 | // NativeJump::patch_verified_entry will be able to patch out the entry |
4933 | // code safely. The push to verify stack depth is ok at 5 bytes, |
4934 | // the frame allocation can be either 3 or 6 bytes. So if we don't do |
4935 | // stack bang then we must use the 6 byte frame allocation even if |
4936 | // we have no frame. :-( |
4937 | assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect")do { if (!(stack_bang_size >= framesize || stack_bang_size <= 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4937, "assert(" "stack_bang_size >= framesize || stack_bang_size <= 0" ") failed", "stack bang size incorrect"); ::breakpoint(); } } while (0); |
4938 | |
4939 | assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned")do { if (!((framesize & (StackAlignmentInBytes-1)) == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4939, "assert(" "(framesize & (StackAlignmentInBytes-1)) == 0" ") failed", "frame size not aligned"); ::breakpoint(); } } while (0); |
4940 | // Remove word for return addr |
4941 | framesize -= wordSize; |
4942 | stack_bang_size -= wordSize; |
4943 | |
4944 | // Calls to C2R adapters often do not accept exceptional returns. |
4945 | // We require that their callers must bang for them. But be careful, because |
4946 | // some VM calls (such as call site linkage) can use several kilobytes of |
4947 | // stack. But the stack safety zone should account for that. |
4948 | // See bugs 4446381, 4468289, 4497237. |
4949 | if (stack_bang_size > 0) { |
4950 | generate_stack_overflow_check(stack_bang_size); |
4951 | |
4952 | // We always push rbp, so that on return to interpreter rbp, will be |
4953 | // restored correctly and we can correct the stack. |
4954 | push(rbp); |
4955 | // Save caller's stack pointer into RBP if the frame pointer is preserved. |
4956 | if (PreserveFramePointer) { |
4957 | mov(rbp, rsp); |
4958 | } |
4959 | // Remove word for ebp |
4960 | framesize -= wordSize; |
4961 | |
4962 | // Create frame |
4963 | if (framesize) { |
4964 | subptr(rsp, framesize); |
4965 | } |
4966 | } else { |
4967 | // Create frame (force generation of a 4 byte immediate value) |
4968 | subptr_imm32(rsp, framesize); |
4969 | |
4970 | // Save RBP register now. |
4971 | framesize -= wordSize; |
4972 | movptr(Address(rsp, framesize), rbp); |
4973 | // Save caller's stack pointer into RBP if the frame pointer is preserved. |
4974 | if (PreserveFramePointer) { |
4975 | movptr(rbp, rsp); |
4976 | if (framesize > 0) { |
4977 | addptr(rbp, framesize); |
4978 | } |
4979 | } |
4980 | } |
4981 | |
4982 | if (VerifyStackAtCalls) { // Majik cookie to verify stack depth |
4983 | framesize -= wordSize; |
4984 | movptr(Address(rsp, framesize), (int32_t)0xbadb100d); |
4985 | } |
4986 | |
4987 | #ifndef _LP641 |
4988 | // If method sets FPU control word do it now |
4989 | if (fp_mode_24b) { |
4990 | fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); |
4991 | } |
4992 | if (UseSSE >= 2 && VerifyFPU) { |
4993 | verify_FPU(0, "FPU stack must be clean on entry"); |
4994 | } |
4995 | #endif |
4996 | |
4997 | #ifdef ASSERT1 |
4998 | if (VerifyStackAtCalls) { |
4999 | Label L; |
5000 | push(rax); |
5001 | mov(rax, rsp); |
5002 | andptr(rax, StackAlignmentInBytes-1); |
5003 | cmpptr(rax, StackAlignmentInBytes-wordSize); |
5004 | pop(rax); |
5005 | jcc(Assembler::equal, L); |
5006 | STOP("Stack is not properly aligned!")block_comment("Stack is not properly aligned!"); stop("Stack is not properly aligned!" ); |
5007 | bind(L); |
5008 | } |
5009 | #endif |
5010 | |
5011 | if (!is_stub) { |
5012 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); |
5013 | bs->nmethod_entry_barrier(this); |
5014 | } |
5015 | } |
5016 | |
5017 | #if COMPILER2_OR_JVMCI1 |
5018 | |
5019 | // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers |
5020 | void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) { |
5021 | // cnt - number of qwords (8-byte words). |
5022 | // base - start address, qword aligned. |
5023 | Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end; |
5024 | bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0); |
5025 | if (use64byteVector) { |
5026 | vpxor(xtmp, xtmp, xtmp, AVX_512bit); |
5027 | } else if (MaxVectorSize >= 32) { |
5028 | vpxor(xtmp, xtmp, xtmp, AVX_256bit); |
5029 | } else { |
5030 | pxor(xtmp, xtmp); |
5031 | } |
5032 | jmp(L_zero_64_bytes); |
5033 | |
5034 | BIND(L_loop); |
5035 | if (MaxVectorSize >= 32) { |
5036 | fill64(base, 0, xtmp, use64byteVector); |
5037 | } else { |
5038 | movdqu(Address(base, 0), xtmp); |
5039 | movdqu(Address(base, 16), xtmp); |
5040 | movdqu(Address(base, 32), xtmp); |
5041 | movdqu(Address(base, 48), xtmp); |
5042 | } |
5043 | addptr(base, 64); |
5044 | |
5045 | BIND(L_zero_64_bytes); |
5046 | subptr(cnt, 8); |
5047 | jccb(Assembler::greaterEqual, L_loop)jccb_0(Assembler::greaterEqual, L_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5047); |
5048 | |
5049 | // Copy trailing 64 bytes |
5050 | if (use64byteVector) { |
5051 | addptr(cnt, 8); |
5052 | jccb(Assembler::equal, L_end)jccb_0(Assembler::equal, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5052); |
5053 | fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true); |
5054 | jmp(L_end); |
5055 | } else { |
5056 | addptr(cnt, 4); |
5057 | jccb(Assembler::less, L_tail)jccb_0(Assembler::less, L_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5057); |
5058 | if (MaxVectorSize >= 32) { |
5059 | vmovdqu(Address(base, 0), xtmp); |
5060 | } else { |
5061 | movdqu(Address(base, 0), xtmp); |
5062 | movdqu(Address(base, 16), xtmp); |
5063 | } |
5064 | } |
5065 | addptr(base, 32); |
5066 | subptr(cnt, 4); |
5067 | |
5068 | BIND(L_tail); |
5069 | addptr(cnt, 4); |
5070 | jccb(Assembler::lessEqual, L_end)jccb_0(Assembler::lessEqual, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5070); |
5071 | if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) { |
5072 | fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp); |
5073 | } else { |
5074 | decrement(cnt); |
5075 | |
5076 | BIND(L_sloop); |
5077 | movq(Address(base, 0), xtmp); |
5078 | addptr(base, 8); |
5079 | decrement(cnt); |
5080 | jccb(Assembler::greaterEqual, L_sloop)jccb_0(Assembler::greaterEqual, L_sloop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5080); |
5081 | } |
5082 | BIND(L_end); |
5083 | } |
5084 | |
5085 | // Clearing constant sized memory using YMM/ZMM registers. |
5086 | void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) { |
5087 | assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "")do { if (!(UseAVX > 2 && VM_Version::supports_avx512vlbw ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5087, "assert(" "UseAVX > 2 && VM_Version::supports_avx512vlbw()" ") failed", ""); ::breakpoint(); } } while (0); |
5088 | bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0); |
5089 | |
5090 | int vector64_count = (cnt & (~0x7)) >> 3; |
5091 | cnt = cnt & 0x7; |
5092 | |
5093 | // 64 byte initialization loop. |
5094 | vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit); |
5095 | for (int i = 0; i < vector64_count; i++) { |
5096 | fill64(base, i * 64, xtmp, use64byteVector); |
5097 | } |
5098 | |
5099 | // Clear remaining 64 byte tail. |
5100 | int disp = vector64_count * 64; |
5101 | if (cnt) { |
5102 | switch (cnt) { |
5103 | case 1: |
5104 | movq(Address(base, disp), xtmp); |
5105 | break; |
5106 | case 2: |
5107 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_128bit); |
5108 | break; |
5109 | case 3: |
5110 | movl(rtmp, 0x7); |
5111 | kmovwl(mask, rtmp); |
5112 | evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_256bit); |
5113 | break; |
5114 | case 4: |
5115 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); |
5116 | break; |
5117 | case 5: |
5118 | if (use64byteVector) { |
5119 | movl(rtmp, 0x1F); |
5120 | kmovwl(mask, rtmp); |
5121 | evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); |
5122 | } else { |
5123 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); |
5124 | movq(Address(base, disp + 32), xtmp); |
5125 | } |
5126 | break; |
5127 | case 6: |
5128 | if (use64byteVector) { |
5129 | movl(rtmp, 0x3F); |
5130 | kmovwl(mask, rtmp); |
5131 | evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); |
5132 | } else { |
5133 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); |
5134 | evmovdqu(T_LONG, k0, Address(base, disp + 32), xtmp, Assembler::AVX_128bit); |
5135 | } |
5136 | break; |
5137 | case 7: |
5138 | if (use64byteVector) { |
5139 | movl(rtmp, 0x7F); |
5140 | kmovwl(mask, rtmp); |
5141 | evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); |
5142 | } else { |
5143 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); |
5144 | movl(rtmp, 0x7); |
5145 | kmovwl(mask, rtmp); |
5146 | evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit); |
5147 | } |
5148 | break; |
5149 | default: |
5150 | fatal("Unexpected length : %d\n",cnt)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5150, "Unexpected length : %d\n",cnt); ::breakpoint(); } while (0); |
5151 | break; |
5152 | } |
5153 | } |
5154 | } |
5155 | |
5156 | void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, |
5157 | bool is_large, KRegister mask) { |
5158 | // cnt - number of qwords (8-byte words). |
5159 | // base - start address, qword aligned. |
5160 | // is_large - if optimizers know cnt is larger than InitArrayShortSize |
5161 | assert(base==rdi, "base register must be edi for rep stos")do { if (!(base==rdi)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5161, "assert(" "base==rdi" ") failed", "base register must be edi for rep stos" ); ::breakpoint(); } } while (0); |
5162 | assert(tmp==rax, "tmp register must be eax for rep stos")do { if (!(tmp==rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5162, "assert(" "tmp==rax" ") failed", "tmp register must be eax for rep stos" ); ::breakpoint(); } } while (0); |
5163 | assert(cnt==rcx, "cnt register must be ecx for rep stos")do { if (!(cnt==rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5163, "assert(" "cnt==rcx" ") failed", "cnt register must be ecx for rep stos" ); ::breakpoint(); } } while (0); |
5164 | assert(InitArrayShortSize % BytesPerLong == 0,do { if (!(InitArrayShortSize % BytesPerLong == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5165, "assert(" "InitArrayShortSize % BytesPerLong == 0" ") failed" , "InitArrayShortSize should be the multiple of BytesPerLong" ); ::breakpoint(); } } while (0) |
5165 | "InitArrayShortSize should be the multiple of BytesPerLong")do { if (!(InitArrayShortSize % BytesPerLong == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5165, "assert(" "InitArrayShortSize % BytesPerLong == 0" ") failed" , "InitArrayShortSize should be the multiple of BytesPerLong" ); ::breakpoint(); } } while (0); |
5166 | |
5167 | Label DONE; |
5168 | if (!is_large || !UseXMMForObjInit) { |
5169 | xorptr(tmp, tmp); |
5170 | } |
5171 | |
5172 | if (!is_large) { |
5173 | Label LOOP, LONG; |
5174 | cmpptr(cnt, InitArrayShortSize/BytesPerLong); |
5175 | jccb(Assembler::greater, LONG)jccb_0(Assembler::greater, LONG, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5175); |
5176 | |
5177 | NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM |
5178 | |
5179 | decrement(cnt); |
5180 | jccb(Assembler::negative, DONE)jccb_0(Assembler::negative, DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5180); // Zero length |
5181 | |
5182 | // Use individual pointer-sized stores for small counts: |
5183 | BIND(LOOP); |
5184 | movptr(Address(base, cnt, Address::times_ptr), tmp); |
5185 | decrement(cnt); |
5186 | jccb(Assembler::greaterEqual, LOOP)jccb_0(Assembler::greaterEqual, LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5186); |
5187 | jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5187); |
5188 | |
5189 | BIND(LONG); |
5190 | } |
5191 | |
5192 | // Use longer rep-prefixed ops for non-small counts: |
5193 | if (UseFastStosb) { |
5194 | shlptr(cnt, 3); // convert to number of bytes |
5195 | rep_stosb(); |
5196 | } else if (UseXMMForObjInit) { |
5197 | xmm_clear_mem(base, cnt, tmp, xtmp, mask); |
5198 | } else { |
5199 | NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM |
5200 | rep_stos(); |
5201 | } |
5202 | |
5203 | BIND(DONE); |
5204 | } |
5205 | |
5206 | #endif //COMPILER2_OR_JVMCI |
5207 | |
5208 | |
5209 | void MacroAssembler::generate_fill(BasicType t, bool aligned, |
5210 | Register to, Register value, Register count, |
5211 | Register rtmp, XMMRegister xtmp) { |
5212 | ShortBranchVerifier sbv(this); |
5213 | assert_different_registers(to, value, count, rtmp); |
5214 | Label L_exit; |
5215 | Label L_fill_2_bytes, L_fill_4_bytes; |
5216 | |
5217 | #if defined(COMPILER21) && defined(_LP641) |
5218 | if(MaxVectorSize >=32 && |
5219 | VM_Version::supports_avx512vlbw() && |
5220 | VM_Version::supports_bmi2()) { |
5221 | generate_fill_avx3(t, to, value, count, rtmp, xtmp); |
5222 | return; |
5223 | } |
5224 | #endif |
5225 | |
5226 | int shift = -1; |
5227 | switch (t) { |
5228 | case T_BYTE: |
5229 | shift = 2; |
5230 | break; |
5231 | case T_SHORT: |
5232 | shift = 1; |
5233 | break; |
5234 | case T_INT: |
5235 | shift = 0; |
5236 | break; |
5237 | default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5237); ::breakpoint(); } while (0); |
5238 | } |
5239 | |
5240 | if (t == T_BYTE) { |
5241 | andl(value, 0xff); |
5242 | movl(rtmp, value); |
5243 | shll(rtmp, 8); |
5244 | orl(value, rtmp); |
5245 | } |
5246 | if (t == T_SHORT) { |
5247 | andl(value, 0xffff); |
5248 | } |
5249 | if (t == T_BYTE || t == T_SHORT) { |
5250 | movl(rtmp, value); |
5251 | shll(rtmp, 16); |
5252 | orl(value, rtmp); |
5253 | } |
5254 | |
5255 | cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element |
5256 | jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp |
5257 | if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { |
5258 | Label L_skip_align2; |
5259 | // align source address at 4 bytes address boundary |
5260 | if (t == T_BYTE) { |
5261 | Label L_skip_align1; |
5262 | // One byte misalignment happens only for byte arrays |
5263 | testptr(to, 1); |
5264 | jccb(Assembler::zero, L_skip_align1)jccb_0(Assembler::zero, L_skip_align1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5264); |
5265 | movb(Address(to, 0), value); |
5266 | increment(to); |
5267 | decrement(count); |
5268 | BIND(L_skip_align1); |
5269 | } |
5270 | // Two bytes misalignment happens only for byte and short (char) arrays |
5271 | testptr(to, 2); |
5272 | jccb(Assembler::zero, L_skip_align2)jccb_0(Assembler::zero, L_skip_align2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5272); |
5273 | movw(Address(to, 0), value); |
5274 | addptr(to, 2); |
5275 | subl(count, 1<<(shift-1)); |
5276 | BIND(L_skip_align2); |
5277 | } |
5278 | if (UseSSE < 2) { |
5279 | Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; |
5280 | // Fill 32-byte chunks |
5281 | subl(count, 8 << shift); |
5282 | jcc(Assembler::less, L_check_fill_8_bytes); |
5283 | align(16); |
5284 | |
5285 | BIND(L_fill_32_bytes_loop); |
5286 | |
5287 | for (int i = 0; i < 32; i += 4) { |
5288 | movl(Address(to, i), value); |
5289 | } |
5290 | |
5291 | addptr(to, 32); |
5292 | subl(count, 8 << shift); |
5293 | jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
5294 | BIND(L_check_fill_8_bytes); |
5295 | addl(count, 8 << shift); |
5296 | jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5296); |
5297 | jmpb(L_fill_8_bytes)jmpb_0(L_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5297); |
5298 | |
5299 | // |
5300 | // length is too short, just fill qwords |
5301 | // |
5302 | BIND(L_fill_8_bytes_loop); |
5303 | movl(Address(to, 0), value); |
5304 | movl(Address(to, 4), value); |
5305 | addptr(to, 8); |
5306 | BIND(L_fill_8_bytes); |
5307 | subl(count, 1 << (shift + 1)); |
5308 | jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); |
5309 | // fall through to fill 4 bytes |
5310 | } else { |
5311 | Label L_fill_32_bytes; |
5312 | if (!UseUnalignedLoadStores) { |
5313 | // align to 8 bytes, we know we are 4 byte aligned to start |
5314 | testptr(to, 4); |
5315 | jccb(Assembler::zero, L_fill_32_bytes)jccb_0(Assembler::zero, L_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5315); |
5316 | movl(Address(to, 0), value); |
5317 | addptr(to, 4); |
5318 | subl(count, 1<<shift); |
5319 | } |
5320 | BIND(L_fill_32_bytes); |
5321 | { |
5322 | assert( UseSSE >= 2, "supported cpu only" )do { if (!(UseSSE >= 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5322, "assert(" "UseSSE >= 2" ") failed", "supported cpu only" ); ::breakpoint(); } } while (0); |
5323 | Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; |
5324 | movdl(xtmp, value); |
5325 | if (UseAVX >= 2 && UseUnalignedLoadStores) { |
5326 | Label L_check_fill_32_bytes; |
5327 | if (UseAVX > 2) { |
5328 | // Fill 64-byte chunks |
5329 | Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2; |
5330 | |
5331 | // If number of bytes to fill < VM_Version::avx3_threshold(), perform fill using AVX2 |
5332 | cmpl(count, VM_Version::avx3_threshold()); |
5333 | jccb(Assembler::below, L_check_fill_64_bytes_avx2)jccb_0(Assembler::below, L_check_fill_64_bytes_avx2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5333); |
5334 | |
5335 | vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit); |
5336 | |
5337 | subl(count, 16 << shift); |
5338 | jccb(Assembler::less, L_check_fill_32_bytes)jccb_0(Assembler::less, L_check_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5338); |
5339 | align(16); |
5340 | |
5341 | BIND(L_fill_64_bytes_loop_avx3); |
5342 | evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit); |
5343 | addptr(to, 64); |
5344 | subl(count, 16 << shift); |
5345 | jcc(Assembler::greaterEqual, L_fill_64_bytes_loop_avx3); |
5346 | jmpb(L_check_fill_32_bytes)jmpb_0(L_check_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5346); |
5347 | |
5348 | BIND(L_check_fill_64_bytes_avx2); |
5349 | } |
5350 | // Fill 64-byte chunks |
5351 | Label L_fill_64_bytes_loop; |
5352 | vpbroadcastd(xtmp, xtmp, Assembler::AVX_256bit); |
5353 | |
5354 | subl(count, 16 << shift); |
5355 | jcc(Assembler::less, L_check_fill_32_bytes); |
5356 | align(16); |
5357 | |
5358 | BIND(L_fill_64_bytes_loop); |
5359 | vmovdqu(Address(to, 0), xtmp); |
5360 | vmovdqu(Address(to, 32), xtmp); |
5361 | addptr(to, 64); |
5362 | subl(count, 16 << shift); |
5363 | jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); |
5364 | |
5365 | BIND(L_check_fill_32_bytes); |
5366 | addl(count, 8 << shift); |
5367 | jccb(Assembler::less, L_check_fill_8_bytes)jccb_0(Assembler::less, L_check_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5367); |
5368 | vmovdqu(Address(to, 0), xtmp); |
5369 | addptr(to, 32); |
5370 | subl(count, 8 << shift); |
5371 | |
5372 | BIND(L_check_fill_8_bytes); |
5373 | // clean upper bits of YMM registers |
5374 | movdl(xtmp, value); |
5375 | pshufd(xtmp, xtmp, 0); |
5376 | } else { |
5377 | // Fill 32-byte chunks |
5378 | pshufd(xtmp, xtmp, 0); |
5379 | |
5380 | subl(count, 8 << shift); |
5381 | jcc(Assembler::less, L_check_fill_8_bytes); |
5382 | align(16); |
5383 | |
5384 | BIND(L_fill_32_bytes_loop); |
5385 | |
5386 | if (UseUnalignedLoadStores) { |
5387 | movdqu(Address(to, 0), xtmp); |
5388 | movdqu(Address(to, 16), xtmp); |
5389 | } else { |
5390 | movq(Address(to, 0), xtmp); |
5391 | movq(Address(to, 8), xtmp); |
5392 | movq(Address(to, 16), xtmp); |
5393 | movq(Address(to, 24), xtmp); |
5394 | } |
5395 | |
5396 | addptr(to, 32); |
5397 | subl(count, 8 << shift); |
5398 | jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
5399 | |
5400 | BIND(L_check_fill_8_bytes); |
5401 | } |
5402 | addl(count, 8 << shift); |
5403 | jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5403); |
5404 | jmpb(L_fill_8_bytes)jmpb_0(L_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5404); |
5405 | |
5406 | // |
5407 | // length is too short, just fill qwords |
5408 | // |
5409 | BIND(L_fill_8_bytes_loop); |
5410 | movq(Address(to, 0), xtmp); |
5411 | addptr(to, 8); |
5412 | BIND(L_fill_8_bytes); |
5413 | subl(count, 1 << (shift + 1)); |
5414 | jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); |
5415 | } |
5416 | } |
5417 | // fill trailing 4 bytes |
5418 | BIND(L_fill_4_bytes); |
5419 | testl(count, 1<<shift); |
5420 | jccb(Assembler::zero, L_fill_2_bytes)jccb_0(Assembler::zero, L_fill_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5420); |
5421 | movl(Address(to, 0), value); |
5422 | if (t == T_BYTE || t == T_SHORT) { |
5423 | Label L_fill_byte; |
5424 | addptr(to, 4); |
5425 | BIND(L_fill_2_bytes); |
5426 | // fill trailing 2 bytes |
5427 | testl(count, 1<<(shift-1)); |
5428 | jccb(Assembler::zero, L_fill_byte)jccb_0(Assembler::zero, L_fill_byte, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5428); |
5429 | movw(Address(to, 0), value); |
5430 | if (t == T_BYTE) { |
5431 | addptr(to, 2); |
5432 | BIND(L_fill_byte); |
5433 | // fill trailing byte |
5434 | testl(count, 1); |
5435 | jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5435); |
5436 | movb(Address(to, 0), value); |
5437 | } else { |
5438 | BIND(L_fill_byte); |
5439 | } |
5440 | } else { |
5441 | BIND(L_fill_2_bytes); |
5442 | } |
5443 | BIND(L_exit); |
5444 | } |
5445 | |
5446 | void MacroAssembler::evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len) { |
5447 | switch(type) { |
5448 | case T_BYTE: |
5449 | case T_BOOLEAN: |
5450 | evpbroadcastb(dst, src, vector_len); |
5451 | break; |
5452 | case T_SHORT: |
5453 | case T_CHAR: |
5454 | evpbroadcastw(dst, src, vector_len); |
5455 | break; |
5456 | case T_INT: |
5457 | case T_FLOAT: |
5458 | evpbroadcastd(dst, src, vector_len); |
5459 | break; |
5460 | case T_LONG: |
5461 | case T_DOUBLE: |
5462 | evpbroadcastq(dst, src, vector_len); |
5463 | break; |
5464 | default: |
5465 | fatal("Unhandled type : %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5465, "Unhandled type : %s", type2name(type)); ::breakpoint (); } while (0); |
5466 | break; |
5467 | } |
5468 | } |
5469 | |
5470 | // encode char[] to byte[] in ISO_8859_1 or ASCII |
5471 | //@IntrinsicCandidate |
5472 | //private static int implEncodeISOArray(byte[] sa, int sp, |
5473 | //byte[] da, int dp, int len) { |
5474 | // int i = 0; |
5475 | // for (; i < len; i++) { |
5476 | // char c = StringUTF16.getChar(sa, sp++); |
5477 | // if (c > '\u00FF') |
5478 | // break; |
5479 | // da[dp++] = (byte)c; |
5480 | // } |
5481 | // return i; |
5482 | //} |
5483 | // |
5484 | //@IntrinsicCandidate |
5485 | //private static int implEncodeAsciiArray(char[] sa, int sp, |
5486 | // byte[] da, int dp, int len) { |
5487 | // int i = 0; |
5488 | // for (; i < len; i++) { |
5489 | // char c = sa[sp++]; |
5490 | // if (c >= '\u0080') |
5491 | // break; |
5492 | // da[dp++] = (byte)c; |
5493 | // } |
5494 | // return i; |
5495 | //} |
5496 | void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, |
5497 | XMMRegister tmp1Reg, XMMRegister tmp2Reg, |
5498 | XMMRegister tmp3Reg, XMMRegister tmp4Reg, |
5499 | Register tmp5, Register result, bool ascii) { |
5500 | |
5501 | // rsi: src |
5502 | // rdi: dst |
5503 | // rdx: len |
5504 | // rcx: tmp5 |
5505 | // rax: result |
5506 | ShortBranchVerifier sbv(this); |
5507 | assert_different_registers(src, dst, len, tmp5, result); |
5508 | Label L_done, L_copy_1_char, L_copy_1_char_exit; |
5509 | |
5510 | int mask = ascii ? 0xff80ff80 : 0xff00ff00; |
5511 | int short_mask = ascii ? 0xff80 : 0xff00; |
5512 | |
5513 | // set result |
5514 | xorl(result, result); |
5515 | // check for zero length |
5516 | testl(len, len); |
5517 | jcc(Assembler::zero, L_done); |
5518 | |
5519 | movl(result, len); |
5520 | |
5521 | // Setup pointers |
5522 | lea(src, Address(src, len, Address::times_2)); // char[] |
5523 | lea(dst, Address(dst, len, Address::times_1)); // byte[] |
5524 | negptr(len); |
5525 | |
5526 | if (UseSSE42Intrinsics || UseAVX >= 2) { |
5527 | Label L_copy_8_chars, L_copy_8_chars_exit; |
5528 | Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; |
5529 | |
5530 | if (UseAVX >= 2) { |
5531 | Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; |
5532 | movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector |
5533 | movdl(tmp1Reg, tmp5); |
5534 | vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit); |
5535 | jmp(L_chars_32_check); |
5536 | |
5537 | bind(L_copy_32_chars); |
5538 | vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); |
5539 | vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); |
5540 | vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); |
5541 | vptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector |
5542 | jccb(Assembler::notZero, L_copy_32_chars_exit)jccb_0(Assembler::notZero, L_copy_32_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5542); |
5543 | vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); |
5544 | vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1); |
5545 | vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); |
5546 | |
5547 | bind(L_chars_32_check); |
5548 | addptr(len, 32); |
5549 | jcc(Assembler::lessEqual, L_copy_32_chars); |
5550 | |
5551 | bind(L_copy_32_chars_exit); |
5552 | subptr(len, 16); |
5553 | jccb(Assembler::greater, L_copy_16_chars_exit)jccb_0(Assembler::greater, L_copy_16_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5553); |
5554 | |
5555 | } else if (UseSSE42Intrinsics) { |
5556 | movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector |
5557 | movdl(tmp1Reg, tmp5); |
5558 | pshufd(tmp1Reg, tmp1Reg, 0); |
5559 | jmpb(L_chars_16_check)jmpb_0(L_chars_16_check, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5559); |
5560 | } |
5561 | |
5562 | bind(L_copy_16_chars); |
5563 | if (UseAVX >= 2) { |
5564 | vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); |
5565 | vptest(tmp2Reg, tmp1Reg); |
5566 | jcc(Assembler::notZero, L_copy_16_chars_exit); |
5567 | vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector_len */ 1); |
5568 | vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector_len */ 1); |
5569 | } else { |
5570 | if (UseAVX > 0) { |
5571 | movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); |
5572 | movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); |
5573 | vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 0); |
5574 | } else { |
5575 | movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); |
5576 | por(tmp2Reg, tmp3Reg); |
5577 | movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); |
5578 | por(tmp2Reg, tmp4Reg); |
5579 | } |
5580 | ptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector |
5581 | jccb(Assembler::notZero, L_copy_16_chars_exit)jccb_0(Assembler::notZero, L_copy_16_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5581); |
5582 | packuswb(tmp3Reg, tmp4Reg); |
5583 | } |
5584 | movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg); |
5585 | |
5586 | bind(L_chars_16_check); |
5587 | addptr(len, 16); |
5588 | jcc(Assembler::lessEqual, L_copy_16_chars); |
5589 | |
5590 | bind(L_copy_16_chars_exit); |
5591 | if (UseAVX >= 2) { |
5592 | // clean upper bits of YMM registers |
5593 | vpxor(tmp2Reg, tmp2Reg); |
5594 | vpxor(tmp3Reg, tmp3Reg); |
5595 | vpxor(tmp4Reg, tmp4Reg); |
5596 | movdl(tmp1Reg, tmp5); |
5597 | pshufd(tmp1Reg, tmp1Reg, 0); |
5598 | } |
5599 | subptr(len, 8); |
5600 | jccb(Assembler::greater, L_copy_8_chars_exit)jccb_0(Assembler::greater, L_copy_8_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5600); |
5601 | |
5602 | bind(L_copy_8_chars); |
5603 | movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); |
5604 | ptest(tmp3Reg, tmp1Reg); |
5605 | jccb(Assembler::notZero, L_copy_8_chars_exit)jccb_0(Assembler::notZero, L_copy_8_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5605); |
5606 | packuswb(tmp3Reg, tmp1Reg); |
5607 | movq(Address(dst, len, Address::times_1, -8), tmp3Reg); |
5608 | addptr(len, 8); |
5609 | jccb(Assembler::lessEqual, L_copy_8_chars)jccb_0(Assembler::lessEqual, L_copy_8_chars, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5609); |
5610 | |
5611 | bind(L_copy_8_chars_exit); |
5612 | subptr(len, 8); |
5613 | jccb(Assembler::zero, L_done)jccb_0(Assembler::zero, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5613); |
5614 | } |
5615 | |
5616 | bind(L_copy_1_char); |
5617 | load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); |
5618 | testl(tmp5, short_mask); // check if Unicode or non-ASCII char |
5619 | jccb(Assembler::notZero, L_copy_1_char_exit)jccb_0(Assembler::notZero, L_copy_1_char_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5619); |
5620 | movb(Address(dst, len, Address::times_1, 0), tmp5); |
5621 | addptr(len, 1); |
5622 | jccb(Assembler::less, L_copy_1_char)jccb_0(Assembler::less, L_copy_1_char, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5622); |
5623 | |
5624 | bind(L_copy_1_char_exit); |
5625 | addptr(result, len); // len is negative count of not processed elements |
5626 | |
5627 | bind(L_done); |
5628 | } |
5629 | |
5630 | #ifdef _LP641 |
5631 | /** |
5632 | * Helper for multiply_to_len(). |
5633 | */ |
5634 | void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { |
5635 | addq(dest_lo, src1); |
5636 | adcq(dest_hi, 0); |
5637 | addq(dest_lo, src2); |
5638 | adcq(dest_hi, 0); |
5639 | } |
5640 | |
5641 | /** |
5642 | * Multiply 64 bit by 64 bit first loop. |
5643 | */ |
5644 | void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, |
5645 | Register y, Register y_idx, Register z, |
5646 | Register carry, Register product, |
5647 | Register idx, Register kdx) { |
5648 | // |
5649 | // jlong carry, x[], y[], z[]; |
5650 | // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { |
5651 | // huge_128 product = y[idx] * x[xstart] + carry; |
5652 | // z[kdx] = (jlong)product; |
5653 | // carry = (jlong)(product >>> 64); |
5654 | // } |
5655 | // z[xstart] = carry; |
5656 | // |
5657 | |
5658 | Label L_first_loop, L_first_loop_exit; |
5659 | Label L_one_x, L_one_y, L_multiply; |
5660 | |
5661 | decrementl(xstart); |
5662 | jcc(Assembler::negative, L_one_x); |
5663 | |
5664 | movq(x_xstart, Address(x, xstart, Address::times_4, 0)); |
5665 | rorq(x_xstart, 32); // convert big-endian to little-endian |
5666 | |
5667 | bind(L_first_loop); |
5668 | decrementl(idx); |
5669 | jcc(Assembler::negative, L_first_loop_exit); |
5670 | decrementl(idx); |
5671 | jcc(Assembler::negative, L_one_y); |
5672 | movq(y_idx, Address(y, idx, Address::times_4, 0)); |
5673 | rorq(y_idx, 32); // convert big-endian to little-endian |
5674 | bind(L_multiply); |
5675 | movq(product, x_xstart); |
5676 | mulq(y_idx); // product(rax) * y_idx -> rdx:rax |
5677 | addq(product, carry); |
5678 | adcq(rdx, 0); |
5679 | subl(kdx, 2); |
5680 | movl(Address(z, kdx, Address::times_4, 4), product); |
5681 | shrq(product, 32); |
5682 | movl(Address(z, kdx, Address::times_4, 0), product); |
5683 | movq(carry, rdx); |
5684 | jmp(L_first_loop); |
5685 | |
5686 | bind(L_one_y); |
5687 | movl(y_idx, Address(y, 0)); |
5688 | jmp(L_multiply); |
5689 | |
5690 | bind(L_one_x); |
5691 | movl(x_xstart, Address(x, 0)); |
5692 | jmp(L_first_loop); |
5693 | |
5694 | bind(L_first_loop_exit); |
5695 | } |
5696 | |
5697 | /** |
5698 | * Multiply 64 bit by 64 bit and add 128 bit. |
5699 | */ |
5700 | void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, Register z, |
5701 | Register yz_idx, Register idx, |
5702 | Register carry, Register product, int offset) { |
5703 | // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; |
5704 | // z[kdx] = (jlong)product; |
5705 | |
5706 | movq(yz_idx, Address(y, idx, Address::times_4, offset)); |
5707 | rorq(yz_idx, 32); // convert big-endian to little-endian |
5708 | movq(product, x_xstart); |
5709 | mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax) |
5710 | movq(yz_idx, Address(z, idx, Address::times_4, offset)); |
5711 | rorq(yz_idx, 32); // convert big-endian to little-endian |
5712 | |
5713 | add2_with_carry(rdx, product, carry, yz_idx); |
5714 | |
5715 | movl(Address(z, idx, Address::times_4, offset+4), product); |
5716 | shrq(product, 32); |
5717 | movl(Address(z, idx, Address::times_4, offset), product); |
5718 | |
5719 | } |
5720 | |
5721 | /** |
5722 | * Multiply 128 bit by 128 bit. Unrolled inner loop. |
5723 | */ |
5724 | void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, Register y, Register z, |
5725 | Register yz_idx, Register idx, Register jdx, |
5726 | Register carry, Register product, |
5727 | Register carry2) { |
5728 | // jlong carry, x[], y[], z[]; |
5729 | // int kdx = ystart+1; |
5730 | // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop |
5731 | // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; |
5732 | // z[kdx+idx+1] = (jlong)product; |
5733 | // jlong carry2 = (jlong)(product >>> 64); |
5734 | // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; |
5735 | // z[kdx+idx] = (jlong)product; |
5736 | // carry = (jlong)(product >>> 64); |
5737 | // } |
5738 | // idx += 2; |
5739 | // if (idx > 0) { |
5740 | // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; |
5741 | // z[kdx+idx] = (jlong)product; |
5742 | // carry = (jlong)(product >>> 64); |
5743 | // } |
5744 | // |
5745 | |
5746 | Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; |
5747 | |
5748 | movl(jdx, idx); |
5749 | andl(jdx, 0xFFFFFFFC); |
5750 | shrl(jdx, 2); |
5751 | |
5752 | bind(L_third_loop); |
5753 | subl(jdx, 1); |
5754 | jcc(Assembler::negative, L_third_loop_exit); |
5755 | subl(idx, 4); |
5756 | |
5757 | multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); |
5758 | movq(carry2, rdx); |
5759 | |
5760 | multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); |
5761 | movq(carry, rdx); |
5762 | jmp(L_third_loop); |
5763 | |
5764 | bind (L_third_loop_exit); |
5765 | |
5766 | andl (idx, 0x3); |
5767 | jcc(Assembler::zero, L_post_third_loop_done); |
5768 | |
5769 | Label L_check_1; |
5770 | subl(idx, 2); |
5771 | jcc(Assembler::negative, L_check_1); |
5772 | |
5773 | multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); |
5774 | movq(carry, rdx); |
5775 | |
5776 | bind (L_check_1); |
5777 | addl (idx, 0x2); |
5778 | andl (idx, 0x1); |
5779 | subl(idx, 1); |
5780 | jcc(Assembler::negative, L_post_third_loop_done); |
5781 | |
5782 | movl(yz_idx, Address(y, idx, Address::times_4, 0)); |
5783 | movq(product, x_xstart); |
5784 | mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax) |
5785 | movl(yz_idx, Address(z, idx, Address::times_4, 0)); |
5786 | |
5787 | add2_with_carry(rdx, product, yz_idx, carry); |
5788 | |
5789 | movl(Address(z, idx, Address::times_4, 0), product); |
5790 | shrq(product, 32); |
5791 | |
5792 | shlq(rdx, 32); |
5793 | orq(product, rdx); |
5794 | movq(carry, product); |
5795 | |
5796 | bind(L_post_third_loop_done); |
5797 | } |
5798 | |
5799 | /** |
5800 | * Multiply 128 bit by 128 bit using BMI2. Unrolled inner loop. |
5801 | * |
5802 | */ |
5803 | void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z, |
5804 | Register carry, Register carry2, |
5805 | Register idx, Register jdx, |
5806 | Register yz_idx1, Register yz_idx2, |
5807 | Register tmp, Register tmp3, Register tmp4) { |
5808 | assert(UseBMI2Instructions, "should be used only when BMI2 is available")do { if (!(UseBMI2Instructions)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5808, "assert(" "UseBMI2Instructions" ") failed", "should be used only when BMI2 is available" ); ::breakpoint(); } } while (0); |
5809 | |
5810 | // jlong carry, x[], y[], z[]; |
5811 | // int kdx = ystart+1; |
5812 | // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop |
5813 | // huge_128 tmp3 = (y[idx+1] * rdx) + z[kdx+idx+1] + carry; |
5814 | // jlong carry2 = (jlong)(tmp3 >>> 64); |
5815 | // huge_128 tmp4 = (y[idx] * rdx) + z[kdx+idx] + carry2; |
5816 | // carry = (jlong)(tmp4 >>> 64); |
5817 | // z[kdx+idx+1] = (jlong)tmp3; |
5818 | // z[kdx+idx] = (jlong)tmp4; |
5819 | // } |
5820 | // idx += 2; |
5821 | // if (idx > 0) { |
5822 | // yz_idx1 = (y[idx] * rdx) + z[kdx+idx] + carry; |
5823 | // z[kdx+idx] = (jlong)yz_idx1; |
5824 | // carry = (jlong)(yz_idx1 >>> 64); |
5825 | // } |
5826 | // |
5827 | |
5828 | Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; |
5829 | |
5830 | movl(jdx, idx); |
5831 | andl(jdx, 0xFFFFFFFC); |
5832 | shrl(jdx, 2); |
5833 | |
5834 | bind(L_third_loop); |
5835 | subl(jdx, 1); |
5836 | jcc(Assembler::negative, L_third_loop_exit); |
5837 | subl(idx, 4); |
5838 | |
5839 | movq(yz_idx1, Address(y, idx, Address::times_4, 8)); |
5840 | rorxq(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian |
5841 | movq(yz_idx2, Address(y, idx, Address::times_4, 0)); |
5842 | rorxq(yz_idx2, yz_idx2, 32); |
5843 | |
5844 | mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3 |
5845 | mulxq(carry2, tmp, yz_idx2); // yz_idx2 * rdx -> carry2:tmp |
5846 | |
5847 | movq(yz_idx1, Address(z, idx, Address::times_4, 8)); |
5848 | rorxq(yz_idx1, yz_idx1, 32); |
5849 | movq(yz_idx2, Address(z, idx, Address::times_4, 0)); |
5850 | rorxq(yz_idx2, yz_idx2, 32); |
5851 | |
5852 | if (VM_Version::supports_adx()) { |
5853 | adcxq(tmp3, carry); |
5854 | adoxq(tmp3, yz_idx1); |
5855 | |
5856 | adcxq(tmp4, tmp); |
5857 | adoxq(tmp4, yz_idx2); |
5858 | |
5859 | movl(carry, 0); // does not affect flags |
5860 | adcxq(carry2, carry); |
5861 | adoxq(carry2, carry); |
5862 | } else { |
5863 | add2_with_carry(tmp4, tmp3, carry, yz_idx1); |
5864 | add2_with_carry(carry2, tmp4, tmp, yz_idx2); |
5865 | } |
5866 | movq(carry, carry2); |
5867 | |
5868 | movl(Address(z, idx, Address::times_4, 12), tmp3); |
5869 | shrq(tmp3, 32); |
5870 | movl(Address(z, idx, Address::times_4, 8), tmp3); |
5871 | |
5872 | movl(Address(z, idx, Address::times_4, 4), tmp4); |
5873 | shrq(tmp4, 32); |
5874 | movl(Address(z, idx, Address::times_4, 0), tmp4); |
5875 | |
5876 | jmp(L_third_loop); |
5877 | |
5878 | bind (L_third_loop_exit); |
5879 | |
5880 | andl (idx, 0x3); |
5881 | jcc(Assembler::zero, L_post_third_loop_done); |
5882 | |
5883 | Label L_check_1; |
5884 | subl(idx, 2); |
5885 | jcc(Assembler::negative, L_check_1); |
5886 | |
5887 | movq(yz_idx1, Address(y, idx, Address::times_4, 0)); |
5888 | rorxq(yz_idx1, yz_idx1, 32); |
5889 | mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3 |
5890 | movq(yz_idx2, Address(z, idx, Address::times_4, 0)); |
5891 | rorxq(yz_idx2, yz_idx2, 32); |
5892 | |
5893 | add2_with_carry(tmp4, tmp3, carry, yz_idx2); |
5894 | |
5895 | movl(Address(z, idx, Address::times_4, 4), tmp3); |
5896 | shrq(tmp3, 32); |
5897 | movl(Address(z, idx, Address::times_4, 0), tmp3); |
5898 | movq(carry, tmp4); |
5899 | |
5900 | bind (L_check_1); |
5901 | addl (idx, 0x2); |
5902 | andl (idx, 0x1); |
5903 | subl(idx, 1); |
5904 | jcc(Assembler::negative, L_post_third_loop_done); |
5905 | movl(tmp4, Address(y, idx, Address::times_4, 0)); |
5906 | mulxq(carry2, tmp3, tmp4); // tmp4 * rdx -> carry2:tmp3 |
5907 | movl(tmp4, Address(z, idx, Address::times_4, 0)); |
5908 | |
5909 | add2_with_carry(carry2, tmp3, tmp4, carry); |
5910 | |
5911 | movl(Address(z, idx, Address::times_4, 0), tmp3); |
5912 | shrq(tmp3, 32); |
5913 | |
5914 | shlq(carry2, 32); |
5915 | orq(tmp3, carry2); |
5916 | movq(carry, tmp3); |
5917 | |
5918 | bind(L_post_third_loop_done); |
5919 | } |
5920 | |
5921 | /** |
5922 | * Code for BigInteger::multiplyToLen() instrinsic. |
5923 | * |
5924 | * rdi: x |
5925 | * rax: xlen |
5926 | * rsi: y |
5927 | * rcx: ylen |
5928 | * r8: z |
5929 | * r11: zlen |
5930 | * r12: tmp1 |
5931 | * r13: tmp2 |
5932 | * r14: tmp3 |
5933 | * r15: tmp4 |
5934 | * rbx: tmp5 |
5935 | * |
5936 | */ |
5937 | void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, |
5938 | Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) { |
5939 | ShortBranchVerifier sbv(this); |
5940 | assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx); |
5941 | |
5942 | push(tmp1); |
5943 | push(tmp2); |
5944 | push(tmp3); |
5945 | push(tmp4); |
5946 | push(tmp5); |
5947 | |
5948 | push(xlen); |
5949 | push(zlen); |
5950 | |
5951 | const Register idx = tmp1; |
5952 | const Register kdx = tmp2; |
5953 | const Register xstart = tmp3; |
5954 | |
5955 | const Register y_idx = tmp4; |
5956 | const Register carry = tmp5; |
5957 | const Register product = xlen; |
5958 | const Register x_xstart = zlen; // reuse register |
5959 | |
5960 | // First Loop. |
5961 | // |
5962 | // final static long LONG_MASK = 0xffffffffL; |
5963 | // int xstart = xlen - 1; |
5964 | // int ystart = ylen - 1; |
5965 | // long carry = 0; |
5966 | // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { |
5967 | // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; |
5968 | // z[kdx] = (int)product; |
5969 | // carry = product >>> 32; |
5970 | // } |
5971 | // z[xstart] = (int)carry; |
5972 | // |
5973 | |
5974 | movl(idx, ylen); // idx = ylen; |
5975 | movl(kdx, zlen); // kdx = xlen+ylen; |
5976 | xorq(carry, carry); // carry = 0; |
5977 | |
5978 | Label L_done; |
5979 | |
5980 | movl(xstart, xlen); |
5981 | decrementl(xstart); |
5982 | jcc(Assembler::negative, L_done); |
5983 | |
5984 | multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); |
5985 | |
5986 | Label L_second_loop; |
5987 | testl(kdx, kdx); |
5988 | jcc(Assembler::zero, L_second_loop); |
5989 | |
5990 | Label L_carry; |
5991 | subl(kdx, 1); |
5992 | jcc(Assembler::zero, L_carry); |
5993 | |
5994 | movl(Address(z, kdx, Address::times_4, 0), carry); |
5995 | shrq(carry, 32); |
5996 | subl(kdx, 1); |
5997 | |
5998 | bind(L_carry); |
5999 | movl(Address(z, kdx, Address::times_4, 0), carry); |
6000 | |
6001 | // Second and third (nested) loops. |
6002 | // |
6003 | // for (int i = xstart-1; i >= 0; i--) { // Second loop |
6004 | // carry = 0; |
6005 | // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop |
6006 | // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + |
6007 | // (z[k] & LONG_MASK) + carry; |
6008 | // z[k] = (int)product; |
6009 | // carry = product >>> 32; |
6010 | // } |
6011 | // z[i] = (int)carry; |
6012 | // } |
6013 | // |
6014 | // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx |
6015 | |
6016 | const Register jdx = tmp1; |
6017 | |
6018 | bind(L_second_loop); |
6019 | xorl(carry, carry); // carry = 0; |
6020 | movl(jdx, ylen); // j = ystart+1 |
6021 | |
6022 | subl(xstart, 1); // i = xstart-1; |
6023 | jcc(Assembler::negative, L_done); |
6024 | |
6025 | push (z); |
6026 | |
6027 | Label L_last_x; |
6028 | lea(z, Address(z, xstart, Address::times_4, 4)); // z = z + k - j |
6029 | subl(xstart, 1); // i = xstart-1; |
6030 | jcc(Assembler::negative, L_last_x); |
6031 | |
6032 | if (UseBMI2Instructions) { |
6033 | movq(rdx, Address(x, xstart, Address::times_4, 0)); |
6034 | rorxq(rdx, rdx, 32); // convert big-endian to little-endian |
6035 | } else { |
6036 | movq(x_xstart, Address(x, xstart, Address::times_4, 0)); |
6037 | rorq(x_xstart, 32); // convert big-endian to little-endian |
6038 | } |
6039 | |
6040 | Label L_third_loop_prologue; |
6041 | bind(L_third_loop_prologue); |
6042 | |
6043 | push (x); |
6044 | push (xstart); |
6045 | push (ylen); |
6046 | |
6047 | |
6048 | if (UseBMI2Instructions) { |
6049 | multiply_128_x_128_bmi2_loop(y, z, carry, x, jdx, ylen, product, tmp2, x_xstart, tmp3, tmp4); |
6050 | } else { // !UseBMI2Instructions |
6051 | multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); |
6052 | } |
6053 | |
6054 | pop(ylen); |
6055 | pop(xlen); |
6056 | pop(x); |
6057 | pop(z); |
6058 | |
6059 | movl(tmp3, xlen); |
6060 | addl(tmp3, 1); |
6061 | movl(Address(z, tmp3, Address::times_4, 0), carry); |
6062 | subl(tmp3, 1); |
6063 | jccb(Assembler::negative, L_done)jccb_0(Assembler::negative, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6063); |
6064 | |
6065 | shrq(carry, 32); |
6066 | movl(Address(z, tmp3, Address::times_4, 0), carry); |
6067 | jmp(L_second_loop); |
6068 | |
6069 | // Next infrequent code is moved outside loops. |
6070 | bind(L_last_x); |
6071 | if (UseBMI2Instructions) { |
6072 | movl(rdx, Address(x, 0)); |
6073 | } else { |
6074 | movl(x_xstart, Address(x, 0)); |
6075 | } |
6076 | jmp(L_third_loop_prologue); |
6077 | |
6078 | bind(L_done); |
6079 | |
6080 | pop(zlen); |
6081 | pop(xlen); |
6082 | |
6083 | pop(tmp5); |
6084 | pop(tmp4); |
6085 | pop(tmp3); |
6086 | pop(tmp2); |
6087 | pop(tmp1); |
6088 | } |
6089 | |
6090 | void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, |
6091 | Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){ |
6092 | assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6092, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 must be enabled." ); ::breakpoint(); } } while (0); |
6093 | Label VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP; |
6094 | Label VECTOR8_TAIL, VECTOR4_TAIL; |
6095 | Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL; |
6096 | Label SAME_TILL_END, DONE; |
6097 | Label BYTES_LOOP, BYTES_TAIL, BYTES_NOT_EQUAL; |
6098 | |
6099 | //scale is in rcx in both Win64 and Unix |
6100 | ShortBranchVerifier sbv(this); |
6101 | |
6102 | shlq(length); |
6103 | xorq(result, result); |
6104 | |
6105 | if ((AVX3Threshold == 0) && (UseAVX > 2) && |
6106 | VM_Version::supports_avx512vlbw()) { |
6107 | Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL; |
6108 | |
6109 | cmpq(length, 64); |
6110 | jcc(Assembler::less, VECTOR32_TAIL); |
6111 | |
6112 | movq(tmp1, length); |
6113 | andq(tmp1, 0x3F); // tail count |
6114 | andq(length, ~(0x3F)); //vector count |
6115 | |
6116 | bind(VECTOR64_LOOP); |
6117 | // AVX512 code to compare 64 byte vectors. |
6118 | evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit); |
6119 | evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); |
6120 | kortestql(k7, k7); |
6121 | jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch |
6122 | addq(result, 64); |
6123 | subq(length, 64); |
6124 | jccb(Assembler::notZero, VECTOR64_LOOP)jccb_0(Assembler::notZero, VECTOR64_LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6124); |
6125 | |
6126 | //bind(VECTOR64_TAIL); |
6127 | testq(tmp1, tmp1); |
6128 | jcc(Assembler::zero, SAME_TILL_END); |
6129 | |
6130 | //bind(VECTOR64_TAIL); |
6131 | // AVX512 code to compare upto 63 byte vectors. |
6132 | mov64(tmp2, 0xFFFFFFFFFFFFFFFF); |
6133 | shlxq(tmp2, tmp2, tmp1); |
6134 | notq(tmp2); |
6135 | kmovql(k3, tmp2); |
6136 | |
6137 | evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit); |
6138 | evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit); |
6139 | |
6140 | ktestql(k7, k3); |
6141 | jcc(Assembler::below, SAME_TILL_END); // not mismatch |
6142 | |
6143 | bind(VECTOR64_NOT_EQUAL); |
6144 | kmovql(tmp1, k7); |
6145 | notq(tmp1); |
6146 | tzcntq(tmp1, tmp1); |
6147 | addq(result, tmp1); |
6148 | shrq(result); |
6149 | jmp(DONE); |
6150 | bind(VECTOR32_TAIL); |
6151 | } |
6152 | |
6153 | cmpq(length, 8); |
6154 | jcc(Assembler::equal, VECTOR8_LOOP); |
6155 | jcc(Assembler::less, VECTOR4_TAIL); |
6156 | |
6157 | if (UseAVX >= 2) { |
6158 | Label VECTOR16_TAIL, VECTOR32_LOOP; |
6159 | |
6160 | cmpq(length, 16); |
6161 | jcc(Assembler::equal, VECTOR16_LOOP); |
6162 | jcc(Assembler::less, VECTOR8_LOOP); |
6163 | |
6164 | cmpq(length, 32); |
6165 | jccb(Assembler::less, VECTOR16_TAIL)jccb_0(Assembler::less, VECTOR16_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6165); |
6166 | |
6167 | subq(length, 32); |
6168 | bind(VECTOR32_LOOP); |
6169 | vmovdqu(rymm0, Address(obja, result)); |
6170 | vmovdqu(rymm1, Address(objb, result)); |
6171 | vpxor(rymm2, rymm0, rymm1, Assembler::AVX_256bit); |
6172 | vptest(rymm2, rymm2); |
6173 | jcc(Assembler::notZero, VECTOR32_NOT_EQUAL);//mismatch found |
6174 | addq(result, 32); |
6175 | subq(length, 32); |
6176 | jcc(Assembler::greaterEqual, VECTOR32_LOOP); |
6177 | addq(length, 32); |
6178 | jcc(Assembler::equal, SAME_TILL_END); |
6179 | //falling through if less than 32 bytes left //close the branch here. |
6180 | |
6181 | bind(VECTOR16_TAIL); |
6182 | cmpq(length, 16); |
6183 | jccb(Assembler::less, VECTOR8_TAIL)jccb_0(Assembler::less, VECTOR8_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6183); |
6184 | bind(VECTOR16_LOOP); |
6185 | movdqu(rymm0, Address(obja, result)); |
6186 | movdqu(rymm1, Address(objb, result)); |
6187 | vpxor(rymm2, rymm0, rymm1, Assembler::AVX_128bit); |
6188 | ptest(rymm2, rymm2); |
6189 | jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found |
6190 | addq(result, 16); |
6191 | subq(length, 16); |
6192 | jcc(Assembler::equal, SAME_TILL_END); |
6193 | //falling through if less than 16 bytes left |
6194 | } else {//regular intrinsics |
6195 | |
6196 | cmpq(length, 16); |
6197 | jccb(Assembler::less, VECTOR8_TAIL)jccb_0(Assembler::less, VECTOR8_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6197); |
6198 | |
6199 | subq(length, 16); |
6200 | bind(VECTOR16_LOOP); |
6201 | movdqu(rymm0, Address(obja, result)); |
6202 | movdqu(rymm1, Address(objb, result)); |
6203 | pxor(rymm0, rymm1); |
6204 | ptest(rymm0, rymm0); |
6205 | jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found |
6206 | addq(result, 16); |
6207 | subq(length, 16); |
6208 | jccb(Assembler::greaterEqual, VECTOR16_LOOP)jccb_0(Assembler::greaterEqual, VECTOR16_LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6208); |
6209 | addq(length, 16); |
6210 | jcc(Assembler::equal, SAME_TILL_END); |
6211 | //falling through if less than 16 bytes left |
6212 | } |
6213 | |
6214 | bind(VECTOR8_TAIL); |
6215 | cmpq(length, 8); |
6216 | jccb(Assembler::less, VECTOR4_TAIL)jccb_0(Assembler::less, VECTOR4_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6216); |
6217 | bind(VECTOR8_LOOP); |
6218 | movq(tmp1, Address(obja, result)); |
6219 | movq(tmp2, Address(objb, result)); |
6220 | xorq(tmp1, tmp2); |
6221 | testq(tmp1, tmp1); |
6222 | jcc(Assembler::notZero, VECTOR8_NOT_EQUAL);//mismatch found |
6223 | addq(result, 8); |
6224 | subq(length, 8); |
6225 | jcc(Assembler::equal, SAME_TILL_END); |
6226 | //falling through if less than 8 bytes left |
6227 | |
6228 | bind(VECTOR4_TAIL); |
6229 | cmpq(length, 4); |
6230 | jccb(Assembler::less, BYTES_TAIL)jccb_0(Assembler::less, BYTES_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6230); |
6231 | bind(VECTOR4_LOOP); |
6232 | movl(tmp1, Address(obja, result)); |
6233 | xorl(tmp1, Address(objb, result)); |
6234 | testl(tmp1, tmp1); |
6235 | jcc(Assembler::notZero, VECTOR4_NOT_EQUAL);//mismatch found |
6236 | addq(result, 4); |
6237 | subq(length, 4); |
6238 | jcc(Assembler::equal, SAME_TILL_END); |
6239 | //falling through if less than 4 bytes left |
6240 | |
6241 | bind(BYTES_TAIL); |
6242 | bind(BYTES_LOOP); |
6243 | load_unsigned_byte(tmp1, Address(obja, result)); |
6244 | load_unsigned_byte(tmp2, Address(objb, result)); |
6245 | xorl(tmp1, tmp2); |
6246 | testl(tmp1, tmp1); |
6247 | jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found |
6248 | decq(length); |
6249 | jcc(Assembler::zero, SAME_TILL_END); |
6250 | incq(result); |
6251 | load_unsigned_byte(tmp1, Address(obja, result)); |
6252 | load_unsigned_byte(tmp2, Address(objb, result)); |
6253 | xorl(tmp1, tmp2); |
6254 | testl(tmp1, tmp1); |
6255 | jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found |
6256 | decq(length); |
6257 | jcc(Assembler::zero, SAME_TILL_END); |
6258 | incq(result); |
6259 | load_unsigned_byte(tmp1, Address(obja, result)); |
6260 | load_unsigned_byte(tmp2, Address(objb, result)); |
6261 | xorl(tmp1, tmp2); |
6262 | testl(tmp1, tmp1); |
6263 | jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found |
6264 | jmp(SAME_TILL_END); |
6265 | |
6266 | if (UseAVX >= 2) { |
6267 | bind(VECTOR32_NOT_EQUAL); |
6268 | vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit); |
6269 | vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit); |
6270 | vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit); |
6271 | vpmovmskb(tmp1, rymm0); |
6272 | bsfq(tmp1, tmp1); |
6273 | addq(result, tmp1); |
6274 | shrq(result); |
6275 | jmp(DONE); |
6276 | } |
6277 | |
6278 | bind(VECTOR16_NOT_EQUAL); |
6279 | if (UseAVX >= 2) { |
6280 | vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit); |
6281 | vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit); |
6282 | pxor(rymm0, rymm2); |
6283 | } else { |
6284 | pcmpeqb(rymm2, rymm2); |
6285 | pxor(rymm0, rymm1); |
6286 | pcmpeqb(rymm0, rymm1); |
6287 | pxor(rymm0, rymm2); |
6288 | } |
6289 | pmovmskb(tmp1, rymm0); |
6290 | bsfq(tmp1, tmp1); |
6291 | addq(result, tmp1); |
6292 | shrq(result); |
6293 | jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6293); |
6294 | |
6295 | bind(VECTOR8_NOT_EQUAL); |
6296 | bind(VECTOR4_NOT_EQUAL); |
6297 | bsfq(tmp1, tmp1); |
6298 | shrq(tmp1, 3); |
6299 | addq(result, tmp1); |
6300 | bind(BYTES_NOT_EQUAL); |
6301 | shrq(result); |
6302 | jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6302); |
6303 | |
6304 | bind(SAME_TILL_END); |
6305 | mov64(result, -1); |
6306 | |
6307 | bind(DONE); |
6308 | } |
6309 | |
6310 | //Helper functions for square_to_len() |
6311 | |
6312 | /** |
6313 | * Store the squares of x[], right shifted one bit (divided by 2) into z[] |
6314 | * Preserves x and z and modifies rest of the registers. |
6315 | */ |
6316 | void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { |
6317 | // Perform square and right shift by 1 |
6318 | // Handle odd xlen case first, then for even xlen do the following |
6319 | // jlong carry = 0; |
6320 | // for (int j=0, i=0; j < xlen; j+=2, i+=4) { |
6321 | // huge_128 product = x[j:j+1] * x[j:j+1]; |
6322 | // z[i:i+1] = (carry << 63) | (jlong)(product >>> 65); |
6323 | // z[i+2:i+3] = (jlong)(product >>> 1); |
6324 | // carry = (jlong)product; |
6325 | // } |
6326 | |
6327 | xorq(tmp5, tmp5); // carry |
6328 | xorq(rdxReg, rdxReg); |
6329 | xorl(tmp1, tmp1); // index for x |
6330 | xorl(tmp4, tmp4); // index for z |
6331 | |
6332 | Label L_first_loop, L_first_loop_exit; |
6333 | |
6334 | testl(xlen, 1); |
6335 | jccb(Assembler::zero, L_first_loop)jccb_0(Assembler::zero, L_first_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6335); //jump if xlen is even |
6336 | |
6337 | // Square and right shift by 1 the odd element using 32 bit multiply |
6338 | movl(raxReg, Address(x, tmp1, Address::times_4, 0)); |
6339 | imulq(raxReg, raxReg); |
6340 | shrq(raxReg, 1); |
6341 | adcq(tmp5, 0); |
6342 | movq(Address(z, tmp4, Address::times_4, 0), raxReg); |
6343 | incrementl(tmp1); |
6344 | addl(tmp4, 2); |
6345 | |
6346 | // Square and right shift by 1 the rest using 64 bit multiply |
6347 | bind(L_first_loop); |
6348 | cmpptr(tmp1, xlen); |
6349 | jccb(Assembler::equal, L_first_loop_exit)jccb_0(Assembler::equal, L_first_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6349); |
6350 | |
6351 | // Square |
6352 | movq(raxReg, Address(x, tmp1, Address::times_4, 0)); |
6353 | rorq(raxReg, 32); // convert big-endian to little-endian |
6354 | mulq(raxReg); // 64-bit multiply rax * rax -> rdx:rax |
6355 | |
6356 | // Right shift by 1 and save carry |
6357 | shrq(tmp5, 1); // rdx:rax:tmp5 = (tmp5:rdx:rax) >>> 1 |
6358 | rcrq(rdxReg, 1); |
6359 | rcrq(raxReg, 1); |
6360 | adcq(tmp5, 0); |
6361 | |
6362 | // Store result in z |
6363 | movq(Address(z, tmp4, Address::times_4, 0), rdxReg); |
6364 | movq(Address(z, tmp4, Address::times_4, 8), raxReg); |
6365 | |
6366 | // Update indices for x and z |
6367 | addl(tmp1, 2); |
6368 | addl(tmp4, 4); |
6369 | jmp(L_first_loop); |
6370 | |
6371 | bind(L_first_loop_exit); |
6372 | } |
6373 | |
6374 | |
6375 | /** |
6376 | * Perform the following multiply add operation using BMI2 instructions |
6377 | * carry:sum = sum + op1*op2 + carry |
6378 | * op2 should be in rdx |
6379 | * op2 is preserved, all other registers are modified |
6380 | */ |
6381 | void MacroAssembler::multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, Register tmp2) { |
6382 | // assert op2 is rdx |
6383 | mulxq(tmp2, op1, op1); // op1 * op2 -> tmp2:op1 |
6384 | addq(sum, carry); |
6385 | adcq(tmp2, 0); |
6386 | addq(sum, op1); |
6387 | adcq(tmp2, 0); |
6388 | movq(carry, tmp2); |
6389 | } |
6390 | |
6391 | /** |
6392 | * Perform the following multiply add operation: |
6393 | * carry:sum = sum + op1*op2 + carry |
6394 | * Preserves op1, op2 and modifies rest of registers |
6395 | */ |
6396 | void MacroAssembler::multiply_add_64(Register sum, Register op1, Register op2, Register carry, Register rdxReg, Register raxReg) { |
6397 | // rdx:rax = op1 * op2 |
6398 | movq(raxReg, op2); |
6399 | mulq(op1); |
6400 | |
6401 | // rdx:rax = sum + carry + rdx:rax |
6402 | addq(sum, carry); |
6403 | adcq(rdxReg, 0); |
6404 | addq(sum, raxReg); |
6405 | adcq(rdxReg, 0); |
6406 | |
6407 | // carry:sum = rdx:sum |
6408 | movq(carry, rdxReg); |
6409 | } |
6410 | |
6411 | /** |
6412 | * Add 64 bit long carry into z[] with carry propogation. |
6413 | * Preserves z and carry register values and modifies rest of registers. |
6414 | * |
6415 | */ |
6416 | void MacroAssembler::add_one_64(Register z, Register zlen, Register carry, Register tmp1) { |
6417 | Label L_fourth_loop, L_fourth_loop_exit; |
6418 | |
6419 | movl(tmp1, 1); |
6420 | subl(zlen, 2); |
6421 | addq(Address(z, zlen, Address::times_4, 0), carry); |
6422 | |
6423 | bind(L_fourth_loop); |
6424 | jccb(Assembler::carryClear, L_fourth_loop_exit)jccb_0(Assembler::carryClear, L_fourth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6424); |
6425 | subl(zlen, 2); |
6426 | jccb(Assembler::negative, L_fourth_loop_exit)jccb_0(Assembler::negative, L_fourth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6426); |
6427 | addq(Address(z, zlen, Address::times_4, 0), tmp1); |
6428 | jmp(L_fourth_loop); |
6429 | bind(L_fourth_loop_exit); |
6430 | } |
6431 | |
6432 | /** |
6433 | * Shift z[] left by 1 bit. |
6434 | * Preserves x, len, z and zlen registers and modifies rest of the registers. |
6435 | * |
6436 | */ |
6437 | void MacroAssembler::lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4) { |
6438 | |
6439 | Label L_fifth_loop, L_fifth_loop_exit; |
6440 | |
6441 | // Fifth loop |
6442 | // Perform primitiveLeftShift(z, zlen, 1) |
6443 | |
6444 | const Register prev_carry = tmp1; |
6445 | const Register new_carry = tmp4; |
6446 | const Register value = tmp2; |
6447 | const Register zidx = tmp3; |
6448 | |
6449 | // int zidx, carry; |
6450 | // long value; |
6451 | // carry = 0; |
6452 | // for (zidx = zlen-2; zidx >=0; zidx -= 2) { |
6453 | // (carry:value) = (z[i] << 1) | carry ; |
6454 | // z[i] = value; |
6455 | // } |
6456 | |
6457 | movl(zidx, zlen); |
6458 | xorl(prev_carry, prev_carry); // clear carry flag and prev_carry register |
6459 | |
6460 | bind(L_fifth_loop); |
6461 | decl(zidx); // Use decl to preserve carry flag |
6462 | decl(zidx); |
6463 | jccb(Assembler::negative, L_fifth_loop_exit)jccb_0(Assembler::negative, L_fifth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6463); |
6464 | |
6465 | if (UseBMI2Instructions) { |
6466 | movq(value, Address(z, zidx, Address::times_4, 0)); |
6467 | rclq(value, 1); |
6468 | rorxq(value, value, 32); |
6469 | movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form |
6470 | } |
6471 | else { |
6472 | // clear new_carry |
6473 | xorl(new_carry, new_carry); |
6474 | |
6475 | // Shift z[i] by 1, or in previous carry and save new carry |
6476 | movq(value, Address(z, zidx, Address::times_4, 0)); |
6477 | shlq(value, 1); |
6478 | adcl(new_carry, 0); |
6479 | |
6480 | orq(value, prev_carry); |
6481 | rorq(value, 0x20); |
6482 | movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form |
6483 | |
6484 | // Set previous carry = new carry |
6485 | movl(prev_carry, new_carry); |
6486 | } |
6487 | jmp(L_fifth_loop); |
6488 | |
6489 | bind(L_fifth_loop_exit); |
6490 | } |
6491 | |
6492 | |
6493 | /** |
6494 | * Code for BigInteger::squareToLen() intrinsic |
6495 | * |
6496 | * rdi: x |
6497 | * rsi: len |
6498 | * r8: z |
6499 | * rcx: zlen |
6500 | * r12: tmp1 |
6501 | * r13: tmp2 |
6502 | * r14: tmp3 |
6503 | * r15: tmp4 |
6504 | * rbx: tmp5 |
6505 | * |
6506 | */ |
6507 | void MacroAssembler::square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { |
6508 | |
6509 | Label L_second_loop, L_second_loop_exit, L_third_loop, L_third_loop_exit, L_last_x, L_multiply; |
6510 | push(tmp1); |
6511 | push(tmp2); |
6512 | push(tmp3); |
6513 | push(tmp4); |
6514 | push(tmp5); |
6515 | |
6516 | // First loop |
6517 | // Store the squares, right shifted one bit (i.e., divided by 2). |
6518 | square_rshift(x, len, z, tmp1, tmp3, tmp4, tmp5, rdxReg, raxReg); |
6519 | |
6520 | // Add in off-diagonal sums. |
6521 | // |
6522 | // Second, third (nested) and fourth loops. |
6523 | // zlen +=2; |
6524 | // for (int xidx=len-2,zidx=zlen-4; xidx > 0; xidx-=2,zidx-=4) { |
6525 | // carry = 0; |
6526 | // long op2 = x[xidx:xidx+1]; |
6527 | // for (int j=xidx-2,k=zidx; j >= 0; j-=2) { |
6528 | // k -= 2; |
6529 | // long op1 = x[j:j+1]; |
6530 | // long sum = z[k:k+1]; |
6531 | // carry:sum = multiply_add_64(sum, op1, op2, carry, tmp_regs); |
6532 | // z[k:k+1] = sum; |
6533 | // } |
6534 | // add_one_64(z, k, carry, tmp_regs); |
6535 | // } |
6536 | |
6537 | const Register carry = tmp5; |
6538 | const Register sum = tmp3; |
6539 | const Register op1 = tmp4; |
6540 | Register op2 = tmp2; |
6541 | |
6542 | push(zlen); |
6543 | push(len); |
6544 | addl(zlen,2); |
6545 | bind(L_second_loop); |
6546 | xorq(carry, carry); |
6547 | subl(zlen, 4); |
6548 | subl(len, 2); |
6549 | push(zlen); |
6550 | push(len); |
6551 | cmpl(len, 0); |
6552 | jccb(Assembler::lessEqual, L_second_loop_exit)jccb_0(Assembler::lessEqual, L_second_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6552); |
6553 | |
6554 | // Multiply an array by one 64 bit long. |
6555 | if (UseBMI2Instructions) { |
6556 | op2 = rdxReg; |
6557 | movq(op2, Address(x, len, Address::times_4, 0)); |
6558 | rorxq(op2, op2, 32); |
6559 | } |
6560 | else { |
6561 | movq(op2, Address(x, len, Address::times_4, 0)); |
6562 | rorq(op2, 32); |
6563 | } |
6564 | |
6565 | bind(L_third_loop); |
6566 | decrementl(len); |
6567 | jccb(Assembler::negative, L_third_loop_exit)jccb_0(Assembler::negative, L_third_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6567); |
6568 | decrementl(len); |
6569 | jccb(Assembler::negative, L_last_x)jccb_0(Assembler::negative, L_last_x, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6569); |
6570 | |
6571 | movq(op1, Address(x, len, Address::times_4, 0)); |
6572 | rorq(op1, 32); |
6573 | |
6574 | bind(L_multiply); |
6575 | subl(zlen, 2); |
6576 | movq(sum, Address(z, zlen, Address::times_4, 0)); |
6577 | |
6578 | // Multiply 64 bit by 64 bit and add 64 bits lower half and upper 64 bits as carry. |
6579 | if (UseBMI2Instructions) { |
6580 | multiply_add_64_bmi2(sum, op1, op2, carry, tmp2); |
6581 | } |
6582 | else { |
6583 | multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); |
6584 | } |
6585 | |
6586 | movq(Address(z, zlen, Address::times_4, 0), sum); |
6587 | |
6588 | jmp(L_third_loop); |
6589 | bind(L_third_loop_exit); |
6590 | |
6591 | // Fourth loop |
6592 | // Add 64 bit long carry into z with carry propogation. |
6593 | // Uses offsetted zlen. |
6594 | add_one_64(z, zlen, carry, tmp1); |
6595 | |
6596 | pop(len); |
6597 | pop(zlen); |
6598 | jmp(L_second_loop); |
6599 | |
6600 | // Next infrequent code is moved outside loops. |
6601 | bind(L_last_x); |
6602 | movl(op1, Address(x, 0)); |
6603 | jmp(L_multiply); |
6604 | |
6605 | bind(L_second_loop_exit); |
6606 | pop(len); |
6607 | pop(zlen); |
6608 | pop(len); |
6609 | pop(zlen); |
6610 | |
6611 | // Fifth loop |
6612 | // Shift z left 1 bit. |
6613 | lshift_by_1(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4); |
6614 | |
6615 | // z[zlen-1] |= x[len-1] & 1; |
6616 | movl(tmp3, Address(x, len, Address::times_4, -4)); |
6617 | andl(tmp3, 1); |
6618 | orl(Address(z, zlen, Address::times_4, -4), tmp3); |
6619 | |
6620 | pop(tmp5); |
6621 | pop(tmp4); |
6622 | pop(tmp3); |
6623 | pop(tmp2); |
6624 | pop(tmp1); |
6625 | } |
6626 | |
6627 | /** |
6628 | * Helper function for mul_add() |
6629 | * Multiply the in[] by int k and add to out[] starting at offset offs using |
6630 | * 128 bit by 32 bit multiply and return the carry in tmp5. |
6631 | * Only quad int aligned length of in[] is operated on in this function. |
6632 | * k is in rdxReg for BMI2Instructions, for others it is in tmp2. |
6633 | * This function preserves out, in and k registers. |
6634 | * len and offset point to the appropriate index in "in" & "out" correspondingly |
6635 | * tmp5 has the carry. |
6636 | * other registers are temporary and are modified. |
6637 | * |
6638 | */ |
6639 | void MacroAssembler::mul_add_128_x_32_loop(Register out, Register in, |
6640 | Register offset, Register len, Register tmp1, Register tmp2, Register tmp3, |
6641 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { |
6642 | |
6643 | Label L_first_loop, L_first_loop_exit; |
6644 | |
6645 | movl(tmp1, len); |
6646 | shrl(tmp1, 2); |
6647 | |
6648 | bind(L_first_loop); |
6649 | subl(tmp1, 1); |
6650 | jccb(Assembler::negative, L_first_loop_exit)jccb_0(Assembler::negative, L_first_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6650); |
6651 | |
6652 | subl(len, 4); |
6653 | subl(offset, 4); |
6654 | |
6655 | Register op2 = tmp2; |
6656 | const Register sum = tmp3; |
6657 | const Register op1 = tmp4; |
6658 | const Register carry = tmp5; |
6659 | |
6660 | if (UseBMI2Instructions) { |
6661 | op2 = rdxReg; |
6662 | } |
6663 | |
6664 | movq(op1, Address(in, len, Address::times_4, 8)); |
6665 | rorq(op1, 32); |
6666 | movq(sum, Address(out, offset, Address::times_4, 8)); |
6667 | rorq(sum, 32); |
6668 | if (UseBMI2Instructions) { |
6669 | multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); |
6670 | } |
6671 | else { |
6672 | multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); |
6673 | } |
6674 | // Store back in big endian from little endian |
6675 | rorq(sum, 0x20); |
6676 | movq(Address(out, offset, Address::times_4, 8), sum); |
6677 | |
6678 | movq(op1, Address(in, len, Address::times_4, 0)); |
6679 | rorq(op1, 32); |
6680 | movq(sum, Address(out, offset, Address::times_4, 0)); |
6681 | rorq(sum, 32); |
6682 | if (UseBMI2Instructions) { |
6683 | multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); |
6684 | } |
6685 | else { |
6686 | multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); |
6687 | } |
6688 | // Store back in big endian from little endian |
6689 | rorq(sum, 0x20); |
6690 | movq(Address(out, offset, Address::times_4, 0), sum); |
6691 | |
6692 | jmp(L_first_loop); |
6693 | bind(L_first_loop_exit); |
6694 | } |
6695 | |
6696 | /** |
6697 | * Code for BigInteger::mulAdd() intrinsic |
6698 | * |
6699 | * rdi: out |
6700 | * rsi: in |
6701 | * r11: offs (out.length - offset) |
6702 | * rcx: len |
6703 | * r8: k |
6704 | * r12: tmp1 |
6705 | * r13: tmp2 |
6706 | * r14: tmp3 |
6707 | * r15: tmp4 |
6708 | * rbx: tmp5 |
6709 | * Multiply the in[] by word k and add to out[], return the carry in rax |
6710 | */ |
6711 | void MacroAssembler::mul_add(Register out, Register in, Register offs, |
6712 | Register len, Register k, Register tmp1, Register tmp2, Register tmp3, |
6713 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { |
6714 | |
6715 | Label L_carry, L_last_in, L_done; |
6716 | |
6717 | // carry = 0; |
6718 | // for (int j=len-1; j >= 0; j--) { |
6719 | // long product = (in[j] & LONG_MASK) * kLong + |
6720 | // (out[offs] & LONG_MASK) + carry; |
6721 | // out[offs--] = (int)product; |
6722 | // carry = product >>> 32; |
6723 | // } |
6724 | // |
6725 | push(tmp1); |
6726 | push(tmp2); |
6727 | push(tmp3); |
6728 | push(tmp4); |
6729 | push(tmp5); |
6730 | |
6731 | Register op2 = tmp2; |
6732 | const Register sum = tmp3; |
6733 | const Register op1 = tmp4; |
6734 | const Register carry = tmp5; |
6735 | |
6736 | if (UseBMI2Instructions) { |
6737 | op2 = rdxReg; |
6738 | movl(op2, k); |
6739 | } |
6740 | else { |
6741 | movl(op2, k); |
6742 | } |
6743 | |
6744 | xorq(carry, carry); |
6745 | |
6746 | //First loop |
6747 | |
6748 | //Multiply in[] by k in a 4 way unrolled loop using 128 bit by 32 bit multiply |
6749 | //The carry is in tmp5 |
6750 | mul_add_128_x_32_loop(out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg); |
6751 | |
6752 | //Multiply the trailing in[] entry using 64 bit by 32 bit, if any |
6753 | decrementl(len); |
6754 | jccb(Assembler::negative, L_carry)jccb_0(Assembler::negative, L_carry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6754); |
6755 | decrementl(len); |
6756 | jccb(Assembler::negative, L_last_in)jccb_0(Assembler::negative, L_last_in, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6756); |
6757 | |
6758 | movq(op1, Address(in, len, Address::times_4, 0)); |
6759 | rorq(op1, 32); |
6760 | |
6761 | subl(offs, 2); |
6762 | movq(sum, Address(out, offs, Address::times_4, 0)); |
6763 | rorq(sum, 32); |
6764 | |
6765 | if (UseBMI2Instructions) { |
6766 | multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); |
6767 | } |
6768 | else { |
6769 | multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); |
6770 | } |
6771 | |
6772 | // Store back in big endian from little endian |
6773 | rorq(sum, 0x20); |
6774 | movq(Address(out, offs, Address::times_4, 0), sum); |
6775 | |
6776 | testl(len, len); |
6777 | jccb(Assembler::zero, L_carry)jccb_0(Assembler::zero, L_carry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6777); |
6778 | |
6779 | //Multiply the last in[] entry, if any |
6780 | bind(L_last_in); |
6781 | movl(op1, Address(in, 0)); |
6782 | movl(sum, Address(out, offs, Address::times_4, -4)); |
6783 | |
6784 | movl(raxReg, k); |
6785 | mull(op1); //tmp4 * eax -> edx:eax |
6786 | addl(sum, carry); |
6787 | adcl(rdxReg, 0); |
6788 | addl(sum, raxReg); |
6789 | adcl(rdxReg, 0); |
6790 | movl(carry, rdxReg); |
6791 | |
6792 | movl(Address(out, offs, Address::times_4, -4), sum); |
6793 | |
6794 | bind(L_carry); |
6795 | //return tmp5/carry as carry in rax |
6796 | movl(rax, carry); |
6797 | |
6798 | bind(L_done); |
6799 | pop(tmp5); |
6800 | pop(tmp4); |
6801 | pop(tmp3); |
6802 | pop(tmp2); |
6803 | pop(tmp1); |
6804 | } |
6805 | #endif |
6806 | |
6807 | /** |
6808 | * Emits code to update CRC-32 with a byte value according to constants in table |
6809 | * |
6810 | * @param [in,out]crc Register containing the crc. |
6811 | * @param [in]val Register containing the byte to fold into the CRC. |
6812 | * @param [in]table Register containing the table of crc constants. |
6813 | * |
6814 | * uint32_t crc; |
6815 | * val = crc_table[(val ^ crc) & 0xFF]; |
6816 | * crc = val ^ (crc >> 8); |
6817 | * |
6818 | */ |
6819 | void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { |
6820 | xorl(val, crc); |
6821 | andl(val, 0xFF); |
6822 | shrl(crc, 8); // unsigned shift |
6823 | xorl(crc, Address(table, val, Address::times_4, 0)); |
6824 | } |
6825 | |
6826 | /** |
6827 | * Fold 128-bit data chunk |
6828 | */ |
6829 | void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { |
6830 | if (UseAVX > 0) { |
6831 | vpclmulhdq(xtmp, xK, xcrc); // [123:64] |
6832 | vpclmulldq(xcrc, xK, xcrc); // [63:0] |
6833 | vpxor(xcrc, xcrc, Address(buf, offset), 0 /* vector_len */); |
6834 | pxor(xcrc, xtmp); |
6835 | } else { |
6836 | movdqa(xtmp, xcrc); |
6837 | pclmulhdq(xtmp, xK); // [123:64] |
6838 | pclmulldq(xcrc, xK); // [63:0] |
6839 | pxor(xcrc, xtmp); |
6840 | movdqu(xtmp, Address(buf, offset)); |
6841 | pxor(xcrc, xtmp); |
6842 | } |
6843 | } |
6844 | |
6845 | void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { |
6846 | if (UseAVX > 0) { |
6847 | vpclmulhdq(xtmp, xK, xcrc); |
6848 | vpclmulldq(xcrc, xK, xcrc); |
6849 | pxor(xcrc, xbuf); |
6850 | pxor(xcrc, xtmp); |
6851 | } else { |
6852 | movdqa(xtmp, xcrc); |
6853 | pclmulhdq(xtmp, xK); |
6854 | pclmulldq(xcrc, xK); |
6855 | pxor(xcrc, xbuf); |
6856 | pxor(xcrc, xtmp); |
6857 | } |
6858 | } |
6859 | |
6860 | /** |
6861 | * 8-bit folds to compute 32-bit CRC |
6862 | * |
6863 | * uint64_t xcrc; |
6864 | * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8); |
6865 | */ |
6866 | void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) { |
6867 | movdl(tmp, xcrc); |
6868 | andl(tmp, 0xFF); |
6869 | movdl(xtmp, Address(table, tmp, Address::times_4, 0)); |
6870 | psrldq(xcrc, 1); // unsigned shift one byte |
6871 | pxor(xcrc, xtmp); |
6872 | } |
6873 | |
6874 | /** |
6875 | * uint32_t crc; |
6876 | * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); |
6877 | */ |
6878 | void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { |
6879 | movl(tmp, crc); |
6880 | andl(tmp, 0xFF); |
6881 | shrl(crc, 8); |
6882 | xorl(crc, Address(table, tmp, Address::times_4, 0)); |
6883 | } |
6884 | |
6885 | /** |
6886 | * @param crc register containing existing CRC (32-bit) |
6887 | * @param buf register pointing to input byte buffer (byte*) |
6888 | * @param len register containing number of bytes |
6889 | * @param table register that will contain address of CRC table |
6890 | * @param tmp scratch register |
6891 | */ |
6892 | void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) { |
6893 | assert_different_registers(crc, buf, len, table, tmp, rax); |
6894 | |
6895 | Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned; |
6896 | Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop; |
6897 | |
6898 | // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
6899 | // context for the registers used, where all instructions below are using 128-bit mode |
6900 | // On EVEX without VL and BW, these instructions will all be AVX. |
6901 | lea(table, ExternalAddress(StubRoutines::crc_table_addr())); |
6902 | notl(crc); // ~crc |
6903 | cmpl(len, 16); |
6904 | jcc(Assembler::less, L_tail); |
6905 | |
6906 | // Align buffer to 16 bytes |
6907 | movl(tmp, buf); |
6908 | andl(tmp, 0xF); |
6909 | jccb(Assembler::zero, L_aligned)jccb_0(Assembler::zero, L_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6909); |
6910 | subl(tmp, 16); |
6911 | addl(len, tmp); |
6912 | |
6913 | align(4); |
6914 | BIND(L_align_loop); |
6915 | movsbl(rax, Address(buf, 0)); // load byte with sign extension |
6916 | update_byte_crc32(crc, rax, table); |
6917 | increment(buf); |
6918 | incrementl(tmp); |
6919 | jccb(Assembler::less, L_align_loop)jccb_0(Assembler::less, L_align_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6919); |
6920 | |
6921 | BIND(L_aligned); |
6922 | movl(tmp, len); // save |
6923 | shrl(len, 4); |
6924 | jcc(Assembler::zero, L_tail_restore); |
6925 | |
6926 | // Fold crc into first bytes of vector |
6927 | movdqa(xmm1, Address(buf, 0)); |
6928 | movdl(rax, xmm1); |
6929 | xorl(crc, rax); |
6930 | if (VM_Version::supports_sse4_1()) { |
6931 | pinsrd(xmm1, crc, 0); |
6932 | } else { |
6933 | pinsrw(xmm1, crc, 0); |
6934 | shrl(crc, 16); |
6935 | pinsrw(xmm1, crc, 1); |
6936 | } |
6937 | addptr(buf, 16); |
6938 | subl(len, 4); // len > 0 |
6939 | jcc(Assembler::less, L_fold_tail); |
6940 | |
6941 | movdqa(xmm2, Address(buf, 0)); |
6942 | movdqa(xmm3, Address(buf, 16)); |
6943 | movdqa(xmm4, Address(buf, 32)); |
6944 | addptr(buf, 48); |
6945 | subl(len, 3); |
6946 | jcc(Assembler::lessEqual, L_fold_512b); |
6947 | |
6948 | // Fold total 512 bits of polynomial on each iteration, |
6949 | // 128 bits per each of 4 parallel streams. |
6950 | movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32)); |
6951 | |
6952 | align32(); |
6953 | BIND(L_fold_512b_loop); |
6954 | fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); |
6955 | fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16); |
6956 | fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32); |
6957 | fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48); |
6958 | addptr(buf, 64); |
6959 | subl(len, 4); |
6960 | jcc(Assembler::greater, L_fold_512b_loop); |
6961 | |
6962 | // Fold 512 bits to 128 bits. |
6963 | BIND(L_fold_512b); |
6964 | movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); |
6965 | fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2); |
6966 | fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3); |
6967 | fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4); |
6968 | |
6969 | // Fold the rest of 128 bits data chunks |
6970 | BIND(L_fold_tail); |
6971 | addl(len, 3); |
6972 | jccb(Assembler::lessEqual, L_fold_128b)jccb_0(Assembler::lessEqual, L_fold_128b, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6972); |
6973 | movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); |
6974 | |
6975 | BIND(L_fold_tail_loop); |
6976 | fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); |
6977 | addptr(buf, 16); |
6978 | decrementl(len); |
6979 | jccb(Assembler::greater, L_fold_tail_loop)jccb_0(Assembler::greater, L_fold_tail_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6979); |
6980 | |
6981 | // Fold 128 bits in xmm1 down into 32 bits in crc register. |
6982 | BIND(L_fold_128b); |
6983 | movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); |
6984 | if (UseAVX > 0) { |
6985 | vpclmulqdq(xmm2, xmm0, xmm1, 0x1); |
6986 | vpand(xmm3, xmm0, xmm2, 0 /* vector_len */); |
6987 | vpclmulqdq(xmm0, xmm0, xmm3, 0x1); |
6988 | } else { |
6989 | movdqa(xmm2, xmm0); |
6990 | pclmulqdq(xmm2, xmm1, 0x1); |
6991 | movdqa(xmm3, xmm0); |
6992 | pand(xmm3, xmm2); |
6993 | pclmulqdq(xmm0, xmm3, 0x1); |
6994 | } |
6995 | psrldq(xmm1, 8); |
6996 | psrldq(xmm2, 4); |
6997 | pxor(xmm0, xmm1); |
6998 | pxor(xmm0, xmm2); |
6999 | |
7000 | // 8 8-bit folds to compute 32-bit CRC. |
7001 | for (int j = 0; j < 4; j++) { |
7002 | fold_8bit_crc32(xmm0, table, xmm1, rax); |
7003 | } |
7004 | movdl(crc, xmm0); // mov 32 bits to general register |
7005 | for (int j = 0; j < 4; j++) { |
7006 | fold_8bit_crc32(crc, table, rax); |
7007 | } |
7008 | |
7009 | BIND(L_tail_restore); |
7010 | movl(len, tmp); // restore |
7011 | BIND(L_tail); |
7012 | andl(len, 0xf); |
7013 | jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7013); |
7014 | |
7015 | // Fold the rest of bytes |
7016 | align(4); |
7017 | BIND(L_tail_loop); |
7018 | movsbl(rax, Address(buf, 0)); // load byte with sign extension |
7019 | update_byte_crc32(crc, rax, table); |
7020 | increment(buf); |
7021 | decrementl(len); |
7022 | jccb(Assembler::greater, L_tail_loop)jccb_0(Assembler::greater, L_tail_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7022); |
7023 | |
7024 | BIND(L_exit); |
7025 | notl(crc); // ~c |
7026 | } |
7027 | |
7028 | #ifdef _LP641 |
7029 | // Helper function for AVX 512 CRC32 |
7030 | // Fold 512-bit data chunks |
7031 | void MacroAssembler::fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, |
7032 | Register pos, int offset) { |
7033 | evmovdquq(xmm3, Address(buf, pos, Address::times_1, offset), Assembler::AVX_512bit); |
7034 | evpclmulqdq(xtmp, xcrc, xK, 0x10, Assembler::AVX_512bit); // [123:64] |
7035 | evpclmulqdq(xmm2, xcrc, xK, 0x01, Assembler::AVX_512bit); // [63:0] |
7036 | evpxorq(xcrc, xtmp, xmm2, Assembler::AVX_512bit /* vector_len */); |
7037 | evpxorq(xcrc, xcrc, xmm3, Assembler::AVX_512bit /* vector_len */); |
7038 | } |
7039 | |
7040 | // Helper function for AVX 512 CRC32 |
7041 | // Compute CRC32 for < 256B buffers |
7042 | void MacroAssembler::kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register table, Register pos, |
7043 | Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop, |
7044 | Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup) { |
7045 | |
7046 | Label L_less_than_32, L_exact_16_left, L_less_than_16_left; |
7047 | Label L_less_than_8_left, L_less_than_4_left, L_less_than_2_left, L_zero_left; |
7048 | Label L_only_less_than_4, L_only_less_than_3, L_only_less_than_2; |
7049 | |
7050 | // check if there is enough buffer to be able to fold 16B at a time |
7051 | cmpl(len, 32); |
7052 | jcc(Assembler::less, L_less_than_32); |
7053 | |
7054 | // if there is, load the constants |
7055 | movdqu(xmm10, Address(table, 1 * 16)); //rk1 and rk2 in xmm10 |
7056 | movdl(xmm0, crc); // get the initial crc value |
7057 | movdqu(xmm7, Address(buf, pos, Address::times_1, 0 * 16)); //load the plaintext |
7058 | pxor(xmm7, xmm0); |
7059 | |
7060 | // update the buffer pointer |
7061 | addl(pos, 16); |
7062 | //update the counter.subtract 32 instead of 16 to save one instruction from the loop |
7063 | subl(len, 32); |
7064 | jmp(L_16B_reduction_loop); |
7065 | |
7066 | bind(L_less_than_32); |
7067 | //mov initial crc to the return value. this is necessary for zero - length buffers. |
7068 | movl(rax, crc); |
7069 | testl(len, len); |
7070 | jcc(Assembler::equal, L_cleanup); |
7071 | |
7072 | movdl(xmm0, crc); //get the initial crc value |
7073 | |
7074 | cmpl(len, 16); |
7075 | jcc(Assembler::equal, L_exact_16_left); |
7076 | jcc(Assembler::less, L_less_than_16_left); |
7077 | |
7078 | movdqu(xmm7, Address(buf, pos, Address::times_1, 0 * 16)); //load the plaintext |
7079 | pxor(xmm7, xmm0); //xor the initial crc value |
7080 | addl(pos, 16); |
7081 | subl(len, 16); |
7082 | movdqu(xmm10, Address(table, 1 * 16)); // rk1 and rk2 in xmm10 |
7083 | jmp(L_get_last_two_xmms); |
7084 | |
7085 | bind(L_less_than_16_left); |
7086 | //use stack space to load data less than 16 bytes, zero - out the 16B in memory first. |
7087 | pxor(xmm1, xmm1); |
7088 | movptr(tmp1, rsp); |
7089 | movdqu(Address(tmp1, 0 * 16), xmm1); |
7090 | |
7091 | cmpl(len, 4); |
7092 | jcc(Assembler::less, L_only_less_than_4); |
7093 | |
7094 | //backup the counter value |
7095 | movl(tmp2, len); |
7096 | cmpl(len, 8); |
7097 | jcc(Assembler::less, L_less_than_8_left); |
7098 | |
7099 | //load 8 Bytes |
7100 | movq(rax, Address(buf, pos, Address::times_1, 0 * 16)); |
7101 | movq(Address(tmp1, 0 * 16), rax); |
7102 | addptr(tmp1, 8); |
7103 | subl(len, 8); |
7104 | addl(pos, 8); |
7105 | |
7106 | bind(L_less_than_8_left); |
7107 | cmpl(len, 4); |
7108 | jcc(Assembler::less, L_less_than_4_left); |
7109 | |
7110 | //load 4 Bytes |
7111 | movl(rax, Address(buf, pos, Address::times_1, 0)); |
7112 | movl(Address(tmp1, 0 * 16), rax); |
7113 | addptr(tmp1, 4); |
7114 | subl(len, 4); |
7115 | addl(pos, 4); |
7116 | |
7117 | bind(L_less_than_4_left); |
7118 | cmpl(len, 2); |
7119 | jcc(Assembler::less, L_less_than_2_left); |
7120 | |
7121 | // load 2 Bytes |
7122 | movw(rax, Address(buf, pos, Address::times_1, 0)); |
7123 | movl(Address(tmp1, 0 * 16), rax); |
7124 | addptr(tmp1, 2); |
7125 | subl(len, 2); |
7126 | addl(pos, 2); |
7127 | |
7128 | bind(L_less_than_2_left); |
7129 | cmpl(len, 1); |
7130 | jcc(Assembler::less, L_zero_left); |
7131 | |
7132 | // load 1 Byte |
7133 | movb(rax, Address(buf, pos, Address::times_1, 0)); |
7134 | movb(Address(tmp1, 0 * 16), rax); |
7135 | |
7136 | bind(L_zero_left); |
7137 | movdqu(xmm7, Address(rsp, 0)); |
7138 | pxor(xmm7, xmm0); //xor the initial crc value |
7139 | |
7140 | lea(rax, ExternalAddress(StubRoutines::x86::shuf_table_crc32_avx512_addr())); |
7141 | movdqu(xmm0, Address(rax, tmp2)); |
7142 | pshufb(xmm7, xmm0); |
7143 | jmp(L_128_done); |
7144 | |
7145 | bind(L_exact_16_left); |
7146 | movdqu(xmm7, Address(buf, pos, Address::times_1, 0)); |
7147 | pxor(xmm7, xmm0); //xor the initial crc value |
7148 | jmp(L_128_done); |
7149 | |
7150 | bind(L_only_less_than_4); |
7151 | cmpl(len, 3); |
7152 | jcc(Assembler::less, L_only_less_than_3); |
7153 | |
7154 | // load 3 Bytes |
7155 | movb(rax, Address(buf, pos, Address::times_1, 0)); |
7156 | movb(Address(tmp1, 0), rax); |
7157 | |
7158 | movb(rax, Address(buf, pos, Address::times_1, 1)); |
7159 | movb(Address(tmp1, 1), rax); |
7160 | |
7161 | movb(rax, Address(buf, pos, Address::times_1, 2)); |
7162 | movb(Address(tmp1, 2), rax); |
7163 | |
7164 | movdqu(xmm7, Address(rsp, 0)); |
7165 | pxor(xmm7, xmm0); //xor the initial crc value |
7166 | |
7167 | pslldq(xmm7, 0x5); |
7168 | jmp(L_barrett); |
7169 | bind(L_only_less_than_3); |
7170 | cmpl(len, 2); |
7171 | jcc(Assembler::less, L_only_less_than_2); |
7172 | |
7173 | // load 2 Bytes |
7174 | movb(rax, Address(buf, pos, Address::times_1, 0)); |
7175 | movb(Address(tmp1, 0), rax); |
7176 | |
7177 | movb(rax, Address(buf, pos, Address::times_1, 1)); |
7178 | movb(Address(tmp1, 1), rax); |
7179 | |
7180 | movdqu(xmm7, Address(rsp, 0)); |
7181 | pxor(xmm7, xmm0); //xor the initial crc value |
7182 | |
7183 | pslldq(xmm7, 0x6); |
7184 | jmp(L_barrett); |
7185 | |
7186 | bind(L_only_less_than_2); |
7187 | //load 1 Byte |
7188 | movb(rax, Address(buf, pos, Address::times_1, 0)); |
7189 | movb(Address(tmp1, 0), rax); |
7190 | |
7191 | movdqu(xmm7, Address(rsp, 0)); |
7192 | pxor(xmm7, xmm0); //xor the initial crc value |
7193 | |
7194 | pslldq(xmm7, 0x7); |
7195 | } |
7196 | |
7197 | /** |
7198 | * Compute CRC32 using AVX512 instructions |
7199 | * param crc register containing existing CRC (32-bit) |
7200 | * param buf register pointing to input byte buffer (byte*) |
7201 | * param len register containing number of bytes |
7202 | * param table address of crc or crc32c table |
7203 | * param tmp1 scratch register |
7204 | * param tmp2 scratch register |
7205 | * return rax result register |
7206 | * |
7207 | * This routine is identical for crc32c with the exception of the precomputed constant |
7208 | * table which will be passed as the table argument. The calculation steps are |
7209 | * the same for both variants. |
7210 | */ |
7211 | void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2) { |
7212 | assert_different_registers(crc, buf, len, table, tmp1, tmp2, rax, r12); |
7213 | |
7214 | Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned; |
7215 | Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop; |
7216 | Label L_less_than_256, L_fold_128_B_loop, L_fold_256_B_loop; |
7217 | Label L_fold_128_B_register, L_final_reduction_for_128, L_16B_reduction_loop; |
7218 | Label L_128_done, L_get_last_two_xmms, L_barrett, L_cleanup; |
7219 | |
7220 | const Register pos = r12; |
7221 | push(r12); |
7222 | subptr(rsp, 16 * 2 + 8); |
7223 | |
7224 | // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
7225 | // context for the registers used, where all instructions below are using 128-bit mode |
7226 | // On EVEX without VL and BW, these instructions will all be AVX. |
7227 | movl(pos, 0); |
7228 | |
7229 | // check if smaller than 256B |
7230 | cmpl(len, 256); |
7231 | jcc(Assembler::less, L_less_than_256); |
7232 | |
7233 | // load the initial crc value |
7234 | movdl(xmm10, crc); |
7235 | |
7236 | // receive the initial 64B data, xor the initial crc value |
7237 | evmovdquq(xmm0, Address(buf, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); |
7238 | evmovdquq(xmm4, Address(buf, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); |
7239 | evpxorq(xmm0, xmm0, xmm10, Assembler::AVX_512bit); |
7240 | evbroadcasti32x4(xmm10, Address(table, 2 * 16), Assembler::AVX_512bit); //zmm10 has rk3 and rk4 |
7241 | |
7242 | subl(len, 256); |
7243 | cmpl(len, 256); |
7244 | jcc(Assembler::less, L_fold_128_B_loop); |
7245 | |
7246 | evmovdquq(xmm7, Address(buf, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); |
7247 | evmovdquq(xmm8, Address(buf, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); |
7248 | evbroadcasti32x4(xmm16, Address(table, 0 * 16), Assembler::AVX_512bit); //zmm16 has rk-1 and rk-2 |
7249 | subl(len, 256); |
7250 | |
7251 | bind(L_fold_256_B_loop); |
7252 | addl(pos, 256); |
7253 | fold512bit_crc32_avx512(xmm0, xmm16, xmm1, buf, pos, 0 * 64); |
7254 | fold512bit_crc32_avx512(xmm4, xmm16, xmm1, buf, pos, 1 * 64); |
7255 | fold512bit_crc32_avx512(xmm7, xmm16, xmm1, buf, pos, 2 * 64); |
7256 | fold512bit_crc32_avx512(xmm8, xmm16, xmm1, buf, pos, 3 * 64); |
7257 | |
7258 | subl(len, 256); |
7259 | jcc(Assembler::greaterEqual, L_fold_256_B_loop); |
7260 | |
7261 | // Fold 256 into 128 |
7262 | addl(pos, 256); |
7263 | evpclmulqdq(xmm1, xmm0, xmm10, 0x01, Assembler::AVX_512bit); |
7264 | evpclmulqdq(xmm2, xmm0, xmm10, 0x10, Assembler::AVX_512bit); |
7265 | vpternlogq(xmm7, 0x96, xmm1, xmm2, Assembler::AVX_512bit); // xor ABC |
7266 | |
7267 | evpclmulqdq(xmm5, xmm4, xmm10, 0x01, Assembler::AVX_512bit); |
7268 | evpclmulqdq(xmm6, xmm4, xmm10, 0x10, Assembler::AVX_512bit); |
7269 | vpternlogq(xmm8, 0x96, xmm5, xmm6, Assembler::AVX_512bit); // xor ABC |
7270 | |
7271 | evmovdquq(xmm0, xmm7, Assembler::AVX_512bit); |
7272 | evmovdquq(xmm4, xmm8, Assembler::AVX_512bit); |
7273 | |
7274 | addl(len, 128); |
7275 | jmp(L_fold_128_B_register); |
7276 | |
7277 | // at this section of the code, there is 128 * x + y(0 <= y<128) bytes of buffer.The fold_128_B_loop |
7278 | // loop will fold 128B at a time until we have 128 + y Bytes of buffer |
7279 | |
7280 | // fold 128B at a time.This section of the code folds 8 xmm registers in parallel |
7281 | bind(L_fold_128_B_loop); |
7282 | addl(pos, 128); |
7283 | fold512bit_crc32_avx512(xmm0, xmm10, xmm1, buf, pos, 0 * 64); |
7284 | fold512bit_crc32_avx512(xmm4, xmm10, xmm1, buf, pos, 1 * 64); |
7285 | |
7286 | subl(len, 128); |
7287 | jcc(Assembler::greaterEqual, L_fold_128_B_loop); |
7288 | |
7289 | addl(pos, 128); |
7290 | |
7291 | // at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 |
7292 | // the 128B of folded data is in 8 of the xmm registers : xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
7293 | bind(L_fold_128_B_register); |
7294 | evmovdquq(xmm16, Address(table, 5 * 16), Assembler::AVX_512bit); // multiply by rk9-rk16 |
7295 | evmovdquq(xmm11, Address(table, 9 * 16), Assembler::AVX_512bit); // multiply by rk17-rk20, rk1,rk2, 0,0 |
7296 | evpclmulqdq(xmm1, xmm0, xmm16, 0x01, Assembler::AVX_512bit); |
7297 | evpclmulqdq(xmm2, xmm0, xmm16, 0x10, Assembler::AVX_512bit); |
7298 | // save last that has no multiplicand |
7299 | vextracti64x2(xmm7, xmm4, 3); |
7300 | |
7301 | evpclmulqdq(xmm5, xmm4, xmm11, 0x01, Assembler::AVX_512bit); |
7302 | evpclmulqdq(xmm6, xmm4, xmm11, 0x10, Assembler::AVX_512bit); |
7303 | // Needed later in reduction loop |
7304 | movdqu(xmm10, Address(table, 1 * 16)); |
7305 | vpternlogq(xmm1, 0x96, xmm2, xmm5, Assembler::AVX_512bit); // xor ABC |
7306 | vpternlogq(xmm1, 0x96, xmm6, xmm7, Assembler::AVX_512bit); // xor ABC |
7307 | |
7308 | // Swap 1,0,3,2 - 01 00 11 10 |
7309 | evshufi64x2(xmm8, xmm1, xmm1, 0x4e, Assembler::AVX_512bit); |
7310 | evpxorq(xmm8, xmm8, xmm1, Assembler::AVX_256bit); |
7311 | vextracti128(xmm5, xmm8, 1); |
7312 | evpxorq(xmm7, xmm5, xmm8, Assembler::AVX_128bit); |
7313 | |
7314 | // instead of 128, we add 128 - 16 to the loop counter to save 1 instruction from the loop |
7315 | // instead of a cmp instruction, we use the negative flag with the jl instruction |
7316 | addl(len, 128 - 16); |
7317 | jcc(Assembler::less, L_final_reduction_for_128); |
7318 | |
7319 | bind(L_16B_reduction_loop); |
7320 | vpclmulqdq(xmm8, xmm7, xmm10, 0x01); |
7321 | vpclmulqdq(xmm7, xmm7, xmm10, 0x10); |
7322 | vpxor(xmm7, xmm7, xmm8, Assembler::AVX_128bit); |
7323 | movdqu(xmm0, Address(buf, pos, Address::times_1, 0 * 16)); |
7324 | vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit); |
7325 | addl(pos, 16); |
7326 | subl(len, 16); |
7327 | jcc(Assembler::greaterEqual, L_16B_reduction_loop); |
7328 | |
7329 | bind(L_final_reduction_for_128); |
7330 | addl(len, 16); |
7331 | jcc(Assembler::equal, L_128_done); |
7332 | |
7333 | bind(L_get_last_two_xmms); |
7334 | movdqu(xmm2, xmm7); |
7335 | addl(pos, len); |
7336 | movdqu(xmm1, Address(buf, pos, Address::times_1, -16)); |
7337 | subl(pos, len); |
7338 | |
7339 | // get rid of the extra data that was loaded before |
7340 | // load the shift constant |
7341 | lea(rax, ExternalAddress(StubRoutines::x86::shuf_table_crc32_avx512_addr())); |
7342 | movdqu(xmm0, Address(rax, len)); |
7343 | addl(rax, len); |
7344 | |
7345 | vpshufb(xmm7, xmm7, xmm0, Assembler::AVX_128bit); |
7346 | //Change mask to 512 |
7347 | vpxor(xmm0, xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr() + 2 * 16), Assembler::AVX_128bit, tmp2); |
7348 | vpshufb(xmm2, xmm2, xmm0, Assembler::AVX_128bit); |
7349 | |
7350 | blendvpb(xmm2, xmm2, xmm1, xmm0, Assembler::AVX_128bit); |
7351 | vpclmulqdq(xmm8, xmm7, xmm10, 0x01); |
7352 | vpclmulqdq(xmm7, xmm7, xmm10, 0x10); |
7353 | vpxor(xmm7, xmm7, xmm8, Assembler::AVX_128bit); |
7354 | vpxor(xmm7, xmm7, xmm2, Assembler::AVX_128bit); |
7355 | |
7356 | bind(L_128_done); |
7357 | // compute crc of a 128-bit value |
7358 | movdqu(xmm10, Address(table, 3 * 16)); |
7359 | movdqu(xmm0, xmm7); |
7360 | |
7361 | // 64b fold |
7362 | vpclmulqdq(xmm7, xmm7, xmm10, 0x0); |
7363 | vpsrldq(xmm0, xmm0, 0x8, Assembler::AVX_128bit); |
7364 | vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit); |
7365 | |
7366 | // 32b fold |
7367 | movdqu(xmm0, xmm7); |
7368 | vpslldq(xmm7, xmm7, 0x4, Assembler::AVX_128bit); |
7369 | vpclmulqdq(xmm7, xmm7, xmm10, 0x10); |
7370 | vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit); |
7371 | jmp(L_barrett); |
7372 | |
7373 | bind(L_less_than_256); |
7374 | kernel_crc32_avx512_256B(crc, buf, len, table, pos, tmp1, tmp2, L_barrett, L_16B_reduction_loop, L_get_last_two_xmms, L_128_done, L_cleanup); |
7375 | |
7376 | //barrett reduction |
7377 | bind(L_barrett); |
7378 | vpand(xmm7, xmm7, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr() + 1 * 16), Assembler::AVX_128bit, tmp2); |
7379 | movdqu(xmm1, xmm7); |
7380 | movdqu(xmm2, xmm7); |
7381 | movdqu(xmm10, Address(table, 4 * 16)); |
7382 | |
7383 | pclmulqdq(xmm7, xmm10, 0x0); |
7384 | pxor(xmm7, xmm2); |
7385 | vpand(xmm7, xmm7, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr()), Assembler::AVX_128bit, tmp2); |
7386 | movdqu(xmm2, xmm7); |
7387 | pclmulqdq(xmm7, xmm10, 0x10); |
7388 | pxor(xmm7, xmm2); |
7389 | pxor(xmm7, xmm1); |
7390 | pextrd(crc, xmm7, 2); |
7391 | |
7392 | bind(L_cleanup); |
7393 | addptr(rsp, 16 * 2 + 8); |
7394 | pop(r12); |
7395 | } |
7396 | |
7397 | // S. Gueron / Information Processing Letters 112 (2012) 184 |
7398 | // Algorithm 4: Computing carry-less multiplication using a precomputed lookup table. |
7399 | // Input: A 32 bit value B = [byte3, byte2, byte1, byte0]. |
7400 | // Output: the 64-bit carry-less product of B * CONST |
7401 | void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n, |
7402 | Register tmp1, Register tmp2, Register tmp3) { |
7403 | lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr())); |
7404 | if (n > 0) { |
7405 | addq(tmp3, n * 256 * 8); |
7406 | } |
7407 | // Q1 = TABLEExt[n][B & 0xFF]; |
7408 | movl(tmp1, in); |
7409 | andl(tmp1, 0x000000FF); |
7410 | shll(tmp1, 3); |
7411 | addq(tmp1, tmp3); |
7412 | movq(tmp1, Address(tmp1, 0)); |
7413 | |
7414 | // Q2 = TABLEExt[n][B >> 8 & 0xFF]; |
7415 | movl(tmp2, in); |
7416 | shrl(tmp2, 8); |
7417 | andl(tmp2, 0x000000FF); |
7418 | shll(tmp2, 3); |
7419 | addq(tmp2, tmp3); |
7420 | movq(tmp2, Address(tmp2, 0)); |
7421 | |
7422 | shlq(tmp2, 8); |
7423 | xorq(tmp1, tmp2); |
7424 | |
7425 | // Q3 = TABLEExt[n][B >> 16 & 0xFF]; |
7426 | movl(tmp2, in); |
7427 | shrl(tmp2, 16); |
7428 | andl(tmp2, 0x000000FF); |
7429 | shll(tmp2, 3); |
7430 | addq(tmp2, tmp3); |
7431 | movq(tmp2, Address(tmp2, 0)); |
7432 | |
7433 | shlq(tmp2, 16); |
7434 | xorq(tmp1, tmp2); |
7435 | |
7436 | // Q4 = TABLEExt[n][B >> 24 & 0xFF]; |
7437 | shrl(in, 24); |
7438 | andl(in, 0x000000FF); |
7439 | shll(in, 3); |
7440 | addq(in, tmp3); |
7441 | movq(in, Address(in, 0)); |
7442 | |
7443 | shlq(in, 24); |
7444 | xorq(in, tmp1); |
7445 | // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24; |
7446 | } |
7447 | |
7448 | void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1, |
7449 | Register in_out, |
7450 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, |
7451 | XMMRegister w_xtmp2, |
7452 | Register tmp1, |
7453 | Register n_tmp2, Register n_tmp3) { |
7454 | if (is_pclmulqdq_supported) { |
7455 | movdl(w_xtmp1, in_out); // modified blindly |
7456 | |
7457 | movl(tmp1, const_or_pre_comp_const_index); |
7458 | movdl(w_xtmp2, tmp1); |
7459 | pclmulqdq(w_xtmp1, w_xtmp2, 0); |
7460 | |
7461 | movdq(in_out, w_xtmp1); |
7462 | } else { |
7463 | crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3); |
7464 | } |
7465 | } |
7466 | |
7467 | // Recombination Alternative 2: No bit-reflections |
7468 | // T1 = (CRC_A * U1) << 1 |
7469 | // T2 = (CRC_B * U2) << 1 |
7470 | // C1 = T1 >> 32 |
7471 | // C2 = T2 >> 32 |
7472 | // T1 = T1 & 0xFFFFFFFF |
7473 | // T2 = T2 & 0xFFFFFFFF |
7474 | // T1 = CRC32(0, T1) |
7475 | // T2 = CRC32(0, T2) |
7476 | // C1 = C1 ^ T1 |
7477 | // C2 = C2 ^ T2 |
7478 | // CRC = C1 ^ C2 ^ CRC_C |
7479 | void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, |
7480 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
7481 | Register tmp1, Register tmp2, |
7482 | Register n_tmp3) { |
7483 | crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); |
7484 | crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); |
7485 | shlq(in_out, 1); |
7486 | movl(tmp1, in_out); |
7487 | shrq(in_out, 32); |
7488 | xorl(tmp2, tmp2); |
7489 | crc32(tmp2, tmp1, 4); |
7490 | xorl(in_out, tmp2); // we don't care about upper 32 bit contents here |
7491 | shlq(in1, 1); |
7492 | movl(tmp1, in1); |
7493 | shrq(in1, 32); |
7494 | xorl(tmp2, tmp2); |
7495 | crc32(tmp2, tmp1, 4); |
7496 | xorl(in1, tmp2); |
7497 | xorl(in_out, in1); |
7498 | xorl(in_out, in2); |
7499 | } |
7500 | |
7501 | // Set N to predefined value |
7502 | // Subtract from a lenght of a buffer |
7503 | // execute in a loop: |
7504 | // CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0 |
7505 | // for i = 1 to N do |
7506 | // CRC_A = CRC32(CRC_A, A[i]) |
7507 | // CRC_B = CRC32(CRC_B, B[i]) |
7508 | // CRC_C = CRC32(CRC_C, C[i]) |
7509 | // end for |
7510 | // Recombine |
7511 | void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, |
7512 | Register in_out1, Register in_out2, Register in_out3, |
7513 | Register tmp1, Register tmp2, Register tmp3, |
7514 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
7515 | Register tmp4, Register tmp5, |
7516 | Register n_tmp6) { |
7517 | Label L_processPartitions; |
7518 | Label L_processPartition; |
7519 | Label L_exit; |
7520 | |
7521 | bind(L_processPartitions); |
7522 | cmpl(in_out1, 3 * size); |
7523 | jcc(Assembler::less, L_exit); |
7524 | xorl(tmp1, tmp1); |
7525 | xorl(tmp2, tmp2); |
7526 | movq(tmp3, in_out2); |
7527 | addq(tmp3, size); |
7528 | |
7529 | bind(L_processPartition); |
7530 | crc32(in_out3, Address(in_out2, 0), 8); |
7531 | crc32(tmp1, Address(in_out2, size), 8); |
7532 | crc32(tmp2, Address(in_out2, size * 2), 8); |
7533 | addq(in_out2, 8); |
7534 | cmpq(in_out2, tmp3); |
7535 | jcc(Assembler::less, L_processPartition); |
7536 | crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2, |
7537 | w_xtmp1, w_xtmp2, w_xtmp3, |
7538 | tmp4, tmp5, |
7539 | n_tmp6); |
7540 | addq(in_out2, 2 * size); |
7541 | subl(in_out1, 3 * size); |
7542 | jmp(L_processPartitions); |
7543 | |
7544 | bind(L_exit); |
7545 | } |
7546 | #else |
7547 | void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n, |
7548 | Register tmp1, Register tmp2, Register tmp3, |
7549 | XMMRegister xtmp1, XMMRegister xtmp2) { |
7550 | lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr())); |
7551 | if (n > 0) { |
7552 | addl(tmp3, n * 256 * 8); |
7553 | } |
7554 | // Q1 = TABLEExt[n][B & 0xFF]; |
7555 | movl(tmp1, in_out); |
7556 | andl(tmp1, 0x000000FF); |
7557 | shll(tmp1, 3); |
7558 | addl(tmp1, tmp3); |
7559 | movq(xtmp1, Address(tmp1, 0)); |
7560 | |
7561 | // Q2 = TABLEExt[n][B >> 8 & 0xFF]; |
7562 | movl(tmp2, in_out); |
7563 | shrl(tmp2, 8); |
7564 | andl(tmp2, 0x000000FF); |
7565 | shll(tmp2, 3); |
7566 | addl(tmp2, tmp3); |
7567 | movq(xtmp2, Address(tmp2, 0)); |
7568 | |
7569 | psllq(xtmp2, 8); |
7570 | pxor(xtmp1, xtmp2); |
7571 | |
7572 | // Q3 = TABLEExt[n][B >> 16 & 0xFF]; |
7573 | movl(tmp2, in_out); |
7574 | shrl(tmp2, 16); |
7575 | andl(tmp2, 0x000000FF); |
7576 | shll(tmp2, 3); |
7577 | addl(tmp2, tmp3); |
7578 | movq(xtmp2, Address(tmp2, 0)); |
7579 | |
7580 | psllq(xtmp2, 16); |
7581 | pxor(xtmp1, xtmp2); |
7582 | |
7583 | // Q4 = TABLEExt[n][B >> 24 & 0xFF]; |
7584 | shrl(in_out, 24); |
7585 | andl(in_out, 0x000000FF); |
7586 | shll(in_out, 3); |
7587 | addl(in_out, tmp3); |
7588 | movq(xtmp2, Address(in_out, 0)); |
7589 | |
7590 | psllq(xtmp2, 24); |
7591 | pxor(xtmp1, xtmp2); // Result in CXMM |
7592 | // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24; |
7593 | } |
7594 | |
7595 | void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1, |
7596 | Register in_out, |
7597 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, |
7598 | XMMRegister w_xtmp2, |
7599 | Register tmp1, |
7600 | Register n_tmp2, Register n_tmp3) { |
7601 | if (is_pclmulqdq_supported) { |
7602 | movdl(w_xtmp1, in_out); |
7603 | |
7604 | movl(tmp1, const_or_pre_comp_const_index); |
7605 | movdl(w_xtmp2, tmp1); |
7606 | pclmulqdq(w_xtmp1, w_xtmp2, 0); |
7607 | // Keep result in XMM since GPR is 32 bit in length |
7608 | } else { |
7609 | crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2); |
7610 | } |
7611 | } |
7612 | |
7613 | void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, |
7614 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
7615 | Register tmp1, Register tmp2, |
7616 | Register n_tmp3) { |
7617 | crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); |
7618 | crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); |
7619 | |
7620 | psllq(w_xtmp1, 1); |
7621 | movdl(tmp1, w_xtmp1); |
7622 | psrlq(w_xtmp1, 32); |
7623 | movdl(in_out, w_xtmp1); |
7624 | |
7625 | xorl(tmp2, tmp2); |
7626 | crc32(tmp2, tmp1, 4); |
7627 | xorl(in_out, tmp2); |
7628 | |
7629 | psllq(w_xtmp2, 1); |
7630 | movdl(tmp1, w_xtmp2); |
7631 | psrlq(w_xtmp2, 32); |
7632 | movdl(in1, w_xtmp2); |
7633 | |
7634 | xorl(tmp2, tmp2); |
7635 | crc32(tmp2, tmp1, 4); |
7636 | xorl(in1, tmp2); |
7637 | xorl(in_out, in1); |
7638 | xorl(in_out, in2); |
7639 | } |
7640 | |
7641 | void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, |
7642 | Register in_out1, Register in_out2, Register in_out3, |
7643 | Register tmp1, Register tmp2, Register tmp3, |
7644 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
7645 | Register tmp4, Register tmp5, |
7646 | Register n_tmp6) { |
7647 | Label L_processPartitions; |
7648 | Label L_processPartition; |
7649 | Label L_exit; |
7650 | |
7651 | bind(L_processPartitions); |
7652 | cmpl(in_out1, 3 * size); |
7653 | jcc(Assembler::less, L_exit); |
7654 | xorl(tmp1, tmp1); |
7655 | xorl(tmp2, tmp2); |
7656 | movl(tmp3, in_out2); |
7657 | addl(tmp3, size); |
7658 | |
7659 | bind(L_processPartition); |
7660 | crc32(in_out3, Address(in_out2, 0), 4); |
7661 | crc32(tmp1, Address(in_out2, size), 4); |
7662 | crc32(tmp2, Address(in_out2, size*2), 4); |
7663 | crc32(in_out3, Address(in_out2, 0+4), 4); |
7664 | crc32(tmp1, Address(in_out2, size+4), 4); |
7665 | crc32(tmp2, Address(in_out2, size*2+4), 4); |
7666 | addl(in_out2, 8); |
7667 | cmpl(in_out2, tmp3); |
7668 | jcc(Assembler::less, L_processPartition); |
7669 | |
7670 | push(tmp3); |
7671 | push(in_out1); |
7672 | push(in_out2); |
7673 | tmp4 = tmp3; |
7674 | tmp5 = in_out1; |
7675 | n_tmp6 = in_out2; |
7676 | |
7677 | crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2, |
7678 | w_xtmp1, w_xtmp2, w_xtmp3, |
7679 | tmp4, tmp5, |
7680 | n_tmp6); |
7681 | |
7682 | pop(in_out2); |
7683 | pop(in_out1); |
7684 | pop(tmp3); |
7685 | |
7686 | addl(in_out2, 2 * size); |
7687 | subl(in_out1, 3 * size); |
7688 | jmp(L_processPartitions); |
7689 | |
7690 | bind(L_exit); |
7691 | } |
7692 | #endif //LP64 |
7693 | |
7694 | #ifdef _LP641 |
7695 | // Algorithm 2: Pipelined usage of the CRC32 instruction. |
7696 | // Input: A buffer I of L bytes. |
7697 | // Output: the CRC32C value of the buffer. |
7698 | // Notations: |
7699 | // Write L = 24N + r, with N = floor (L/24). |
7700 | // r = L mod 24 (0 <= r < 24). |
7701 | // Consider I as the concatenation of A|B|C|R, where A, B, C, each, |
7702 | // N quadwords, and R consists of r bytes. |
7703 | // A[j] = I [8j+7:8j], j= 0, 1, ..., N-1 |
7704 | // B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1 |
7705 | // C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1 |
7706 | // if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1 |
7707 | void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, |
7708 | Register tmp1, Register tmp2, Register tmp3, |
7709 | Register tmp4, Register tmp5, Register tmp6, |
7710 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
7711 | bool is_pclmulqdq_supported) { |
7712 | uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS]; |
7713 | Label L_wordByWord; |
7714 | Label L_byteByByteProlog; |
7715 | Label L_byteByByte; |
7716 | Label L_exit; |
7717 | |
7718 | if (is_pclmulqdq_supported ) { |
7719 | const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr; |
7720 | const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1); |
7721 | |
7722 | const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2); |
7723 | const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3); |
7724 | |
7725 | const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4); |
7726 | const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5); |
7727 | assert((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"")do { if (!((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7727, "assert(" "(CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5" ") failed", "Checking whether you declared all of the constants based on the number of \"chunks\"" ); ::breakpoint(); } } while (0); |
7728 | } else { |
7729 | const_or_pre_comp_const_index[0] = 1; |
7730 | const_or_pre_comp_const_index[1] = 0; |
7731 | |
7732 | const_or_pre_comp_const_index[2] = 3; |
7733 | const_or_pre_comp_const_index[3] = 2; |
7734 | |
7735 | const_or_pre_comp_const_index[4] = 5; |
7736 | const_or_pre_comp_const_index[5] = 4; |
7737 | } |
7738 | crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported, |
7739 | in2, in1, in_out, |
7740 | tmp1, tmp2, tmp3, |
7741 | w_xtmp1, w_xtmp2, w_xtmp3, |
7742 | tmp4, tmp5, |
7743 | tmp6); |
7744 | crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported, |
7745 | in2, in1, in_out, |
7746 | tmp1, tmp2, tmp3, |
7747 | w_xtmp1, w_xtmp2, w_xtmp3, |
7748 | tmp4, tmp5, |
7749 | tmp6); |
7750 | crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported, |
7751 | in2, in1, in_out, |
7752 | tmp1, tmp2, tmp3, |
7753 | w_xtmp1, w_xtmp2, w_xtmp3, |
7754 | tmp4, tmp5, |
7755 | tmp6); |
7756 | movl(tmp1, in2); |
7757 | andl(tmp1, 0x00000007); |
7758 | negl(tmp1); |
7759 | addl(tmp1, in2); |
7760 | addq(tmp1, in1); |
7761 | |
7762 | BIND(L_wordByWord); |
7763 | cmpq(in1, tmp1); |
7764 | jcc(Assembler::greaterEqual, L_byteByByteProlog); |
7765 | crc32(in_out, Address(in1, 0), 4); |
7766 | addq(in1, 4); |
7767 | jmp(L_wordByWord); |
7768 | |
7769 | BIND(L_byteByByteProlog); |
7770 | andl(in2, 0x00000007); |
7771 | movl(tmp2, 1); |
7772 | |
7773 | BIND(L_byteByByte); |
7774 | cmpl(tmp2, in2); |
7775 | jccb(Assembler::greater, L_exit)jccb_0(Assembler::greater, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7775); |
7776 | crc32(in_out, Address(in1, 0), 1); |
7777 | incq(in1); |
7778 | incl(tmp2); |
7779 | jmp(L_byteByByte); |
7780 | |
7781 | BIND(L_exit); |
7782 | } |
7783 | #else |
7784 | void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, |
7785 | Register tmp1, Register tmp2, Register tmp3, |
7786 | Register tmp4, Register tmp5, Register tmp6, |
7787 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
7788 | bool is_pclmulqdq_supported) { |
7789 | uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS]; |
7790 | Label L_wordByWord; |
7791 | Label L_byteByByteProlog; |
7792 | Label L_byteByByte; |
7793 | Label L_exit; |
7794 | |
7795 | if (is_pclmulqdq_supported) { |
7796 | const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr; |
7797 | const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1); |
7798 | |
7799 | const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2); |
7800 | const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3); |
7801 | |
7802 | const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4); |
7803 | const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5); |
7804 | } else { |
7805 | const_or_pre_comp_const_index[0] = 1; |
7806 | const_or_pre_comp_const_index[1] = 0; |
7807 | |
7808 | const_or_pre_comp_const_index[2] = 3; |
7809 | const_or_pre_comp_const_index[3] = 2; |
7810 | |
7811 | const_or_pre_comp_const_index[4] = 5; |
7812 | const_or_pre_comp_const_index[5] = 4; |
7813 | } |
7814 | crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported, |
7815 | in2, in1, in_out, |
7816 | tmp1, tmp2, tmp3, |
7817 | w_xtmp1, w_xtmp2, w_xtmp3, |
7818 | tmp4, tmp5, |
7819 | tmp6); |
7820 | crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported, |
7821 | in2, in1, in_out, |
7822 | tmp1, tmp2, tmp3, |
7823 | w_xtmp1, w_xtmp2, w_xtmp3, |
7824 | tmp4, tmp5, |
7825 | tmp6); |
7826 | crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported, |
7827 | in2, in1, in_out, |
7828 | tmp1, tmp2, tmp3, |
7829 | w_xtmp1, w_xtmp2, w_xtmp3, |
7830 | tmp4, tmp5, |
7831 | tmp6); |
7832 | movl(tmp1, in2); |
7833 | andl(tmp1, 0x00000007); |
7834 | negl(tmp1); |
7835 | addl(tmp1, in2); |
7836 | addl(tmp1, in1); |
7837 | |
7838 | BIND(L_wordByWord); |
7839 | cmpl(in1, tmp1); |
7840 | jcc(Assembler::greaterEqual, L_byteByByteProlog); |
7841 | crc32(in_out, Address(in1,0), 4); |
7842 | addl(in1, 4); |
7843 | jmp(L_wordByWord); |
7844 | |
7845 | BIND(L_byteByByteProlog); |
7846 | andl(in2, 0x00000007); |
7847 | movl(tmp2, 1); |
7848 | |
7849 | BIND(L_byteByByte); |
7850 | cmpl(tmp2, in2); |
7851 | jccb(Assembler::greater, L_exit)jccb_0(Assembler::greater, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7851); |
7852 | movb(tmp1, Address(in1, 0)); |
7853 | crc32(in_out, tmp1, 1); |
7854 | incl(in1); |
7855 | incl(tmp2); |
7856 | jmp(L_byteByByte); |
7857 | |
7858 | BIND(L_exit); |
7859 | } |
7860 | #endif // LP64 |
7861 | #undef BIND |
7862 | #undef BLOCK_COMMENT |
7863 | |
7864 | // Compress char[] array to byte[]. |
7865 | // ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java |
7866 | // @IntrinsicCandidate |
7867 | // private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { |
7868 | // for (int i = 0; i < len; i++) { |
7869 | // int c = src[srcOff++]; |
7870 | // if (c >>> 8 != 0) { |
7871 | // return 0; |
7872 | // } |
7873 | // dst[dstOff++] = (byte)c; |
7874 | // } |
7875 | // return len; |
7876 | // } |
7877 | void MacroAssembler::char_array_compress(Register src, Register dst, Register len, |
7878 | XMMRegister tmp1Reg, XMMRegister tmp2Reg, |
7879 | XMMRegister tmp3Reg, XMMRegister tmp4Reg, |
7880 | Register tmp5, Register result, KRegister mask1, KRegister mask2) { |
7881 | Label copy_chars_loop, return_length, return_zero, done; |
7882 | |
7883 | // rsi: src |
7884 | // rdi: dst |
7885 | // rdx: len |
7886 | // rcx: tmp5 |
7887 | // rax: result |
7888 | |
7889 | // rsi holds start addr of source char[] to be compressed |
7890 | // rdi holds start addr of destination byte[] |
7891 | // rdx holds length |
7892 | |
7893 | assert(len != result, "")do { if (!(len != result)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7893, "assert(" "len != result" ") failed", ""); ::breakpoint (); } } while (0); |
7894 | |
7895 | // save length for return |
7896 | push(len); |
7897 | |
7898 | if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512 |
7899 | VM_Version::supports_avx512vlbw() && |
7900 | VM_Version::supports_bmi2()) { |
7901 | |
7902 | Label copy_32_loop, copy_loop_tail, below_threshold; |
7903 | |
7904 | // alignment |
7905 | Label post_alignment; |
7906 | |
7907 | // if length of the string is less than 16, handle it in an old fashioned way |
7908 | testl(len, -32); |
7909 | jcc(Assembler::zero, below_threshold); |
7910 | |
7911 | // First check whether a character is compressable ( <= 0xFF). |
7912 | // Create mask to test for Unicode chars inside zmm vector |
7913 | movl(result, 0x00FF); |
7914 | evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit); |
7915 | |
7916 | testl(len, -64); |
7917 | jcc(Assembler::zero, post_alignment); |
7918 | |
7919 | movl(tmp5, dst); |
7920 | andl(tmp5, (32 - 1)); |
7921 | negl(tmp5); |
7922 | andl(tmp5, (32 - 1)); |
7923 | |
7924 | // bail out when there is nothing to be done |
7925 | testl(tmp5, 0xFFFFFFFF); |
7926 | jcc(Assembler::zero, post_alignment); |
7927 | |
7928 | // ~(~0 << len), where len is the # of remaining elements to process |
7929 | movl(result, 0xFFFFFFFF); |
7930 | shlxl(result, result, tmp5); |
7931 | notl(result); |
7932 | kmovdl(mask2, result); |
7933 | |
7934 | evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); |
7935 | evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit); |
7936 | ktestd(mask1, mask2); |
7937 | jcc(Assembler::carryClear, return_zero); |
7938 | |
7939 | evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit); |
7940 | |
7941 | addptr(src, tmp5); |
7942 | addptr(src, tmp5); |
7943 | addptr(dst, tmp5); |
7944 | subl(len, tmp5); |
7945 | |
7946 | bind(post_alignment); |
7947 | // end of alignment |
7948 | |
7949 | movl(tmp5, len); |
7950 | andl(tmp5, (32 - 1)); // tail count (in chars) |
7951 | andl(len, ~(32 - 1)); // vector count (in chars) |
7952 | jcc(Assembler::zero, copy_loop_tail); |
7953 | |
7954 | lea(src, Address(src, len, Address::times_2)); |
7955 | lea(dst, Address(dst, len, Address::times_1)); |
7956 | negptr(len); |
7957 | |
7958 | bind(copy_32_loop); |
7959 | evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit); |
7960 | evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); |
7961 | kortestdl(mask1, mask1); |
7962 | jcc(Assembler::carryClear, return_zero); |
7963 | |
7964 | // All elements in current processed chunk are valid candidates for |
7965 | // compression. Write a truncated byte elements to the memory. |
7966 | evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit); |
7967 | addptr(len, 32); |
7968 | jcc(Assembler::notZero, copy_32_loop); |
7969 | |
7970 | bind(copy_loop_tail); |
7971 | // bail out when there is nothing to be done |
7972 | testl(tmp5, 0xFFFFFFFF); |
7973 | jcc(Assembler::zero, return_length); |
7974 | |
7975 | movl(len, tmp5); |
7976 | |
7977 | // ~(~0 << len), where len is the # of remaining elements to process |
7978 | movl(result, 0xFFFFFFFF); |
7979 | shlxl(result, result, len); |
7980 | notl(result); |
7981 | |
7982 | kmovdl(mask2, result); |
7983 | |
7984 | evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); |
7985 | evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit); |
7986 | ktestd(mask1, mask2); |
7987 | jcc(Assembler::carryClear, return_zero); |
7988 | |
7989 | evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit); |
7990 | jmp(return_length); |
7991 | |
7992 | bind(below_threshold); |
7993 | } |
7994 | |
7995 | if (UseSSE42Intrinsics) { |
7996 | Label copy_32_loop, copy_16, copy_tail; |
7997 | |
7998 | movl(result, len); |
7999 | |
8000 | movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors |
8001 | |
8002 | // vectored compression |
8003 | andl(len, 0xfffffff0); // vector count (in chars) |
8004 | andl(result, 0x0000000f); // tail count (in chars) |
8005 | testl(len, len); |
8006 | jcc(Assembler::zero, copy_16); |
8007 | |
8008 | // compress 16 chars per iter |
8009 | movdl(tmp1Reg, tmp5); |
8010 | pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg |
8011 | pxor(tmp4Reg, tmp4Reg); |
8012 | |
8013 | lea(src, Address(src, len, Address::times_2)); |
8014 | lea(dst, Address(dst, len, Address::times_1)); |
8015 | negptr(len); |
8016 | |
8017 | bind(copy_32_loop); |
8018 | movdqu(tmp2Reg, Address(src, len, Address::times_2)); // load 1st 8 characters |
8019 | por(tmp4Reg, tmp2Reg); |
8020 | movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters |
8021 | por(tmp4Reg, tmp3Reg); |
8022 | ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector |
8023 | jcc(Assembler::notZero, return_zero); |
8024 | packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte |
8025 | movdqu(Address(dst, len, Address::times_1), tmp2Reg); |
8026 | addptr(len, 16); |
8027 | jcc(Assembler::notZero, copy_32_loop); |
8028 | |
8029 | // compress next vector of 8 chars (if any) |
8030 | bind(copy_16); |
8031 | movl(len, result); |
8032 | andl(len, 0xfffffff8); // vector count (in chars) |
8033 | andl(result, 0x00000007); // tail count (in chars) |
8034 | testl(len, len); |
8035 | jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8035); |
8036 | |
8037 | movdl(tmp1Reg, tmp5); |
8038 | pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg |
8039 | pxor(tmp3Reg, tmp3Reg); |
8040 | |
8041 | movdqu(tmp2Reg, Address(src, 0)); |
8042 | ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector |
8043 | jccb(Assembler::notZero, return_zero)jccb_0(Assembler::notZero, return_zero, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8043); |
8044 | packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte |
8045 | movq(Address(dst, 0), tmp2Reg); |
8046 | addptr(src, 16); |
8047 | addptr(dst, 8); |
8048 | |
8049 | bind(copy_tail); |
8050 | movl(len, result); |
8051 | } |
8052 | // compress 1 char per iter |
8053 | testl(len, len); |
8054 | jccb(Assembler::zero, return_length)jccb_0(Assembler::zero, return_length, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8054); |
8055 | lea(src, Address(src, len, Address::times_2)); |
8056 | lea(dst, Address(dst, len, Address::times_1)); |
8057 | negptr(len); |
8058 | |
8059 | bind(copy_chars_loop); |
8060 | load_unsigned_short(result, Address(src, len, Address::times_2)); |
8061 | testl(result, 0xff00); // check if Unicode char |
8062 | jccb(Assembler::notZero, return_zero)jccb_0(Assembler::notZero, return_zero, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8062); |
8063 | movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte |
8064 | increment(len); |
8065 | jcc(Assembler::notZero, copy_chars_loop); |
8066 | |
8067 | // if compression succeeded, return length |
8068 | bind(return_length); |
8069 | pop(result); |
8070 | jmpb(done)jmpb_0(done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8070); |
8071 | |
8072 | // if compression failed, return 0 |
8073 | bind(return_zero); |
8074 | xorl(result, result); |
8075 | addptr(rsp, wordSize); |
8076 | |
8077 | bind(done); |
8078 | } |
8079 | |
8080 | // Inflate byte[] array to char[]. |
8081 | // ..\jdk\src\java.base\share\classes\java\lang\StringLatin1.java |
8082 | // @IntrinsicCandidate |
8083 | // private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) { |
8084 | // for (int i = 0; i < len; i++) { |
8085 | // dst[dstOff++] = (char)(src[srcOff++] & 0xff); |
8086 | // } |
8087 | // } |
8088 | void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, |
8089 | XMMRegister tmp1, Register tmp2, KRegister mask) { |
8090 | Label copy_chars_loop, done, below_threshold, avx3_threshold; |
8091 | // rsi: src |
8092 | // rdi: dst |
8093 | // rdx: len |
8094 | // rcx: tmp2 |
8095 | |
8096 | // rsi holds start addr of source byte[] to be inflated |
8097 | // rdi holds start addr of destination char[] |
8098 | // rdx holds length |
8099 | assert_different_registers(src, dst, len, tmp2); |
8100 | movl(tmp2, len); |
8101 | if ((UseAVX > 2) && // AVX512 |
8102 | VM_Version::supports_avx512vlbw() && |
8103 | VM_Version::supports_bmi2()) { |
8104 | |
8105 | Label copy_32_loop, copy_tail; |
8106 | Register tmp3_aliased = len; |
8107 | |
8108 | // if length of the string is less than 16, handle it in an old fashioned way |
8109 | testl(len, -16); |
8110 | jcc(Assembler::zero, below_threshold); |
8111 | |
8112 | testl(len, -1 * AVX3Threshold); |
8113 | jcc(Assembler::zero, avx3_threshold); |
8114 | |
8115 | // In order to use only one arithmetic operation for the main loop we use |
8116 | // this pre-calculation |
8117 | andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop |
8118 | andl(len, -32); // vector count |
8119 | jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8119); |
8120 | |
8121 | lea(src, Address(src, len, Address::times_1)); |
8122 | lea(dst, Address(dst, len, Address::times_2)); |
8123 | negptr(len); |
8124 | |
8125 | |
8126 | // inflate 32 chars per iter |
8127 | bind(copy_32_loop); |
8128 | vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); |
8129 | evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit); |
8130 | addptr(len, 32); |
8131 | jcc(Assembler::notZero, copy_32_loop); |
8132 | |
8133 | bind(copy_tail); |
8134 | // bail out when there is nothing to be done |
8135 | testl(tmp2, -1); // we don't destroy the contents of tmp2 here |
8136 | jcc(Assembler::zero, done); |
8137 | |
8138 | // ~(~0 << length), where length is the # of remaining elements to process |
8139 | movl(tmp3_aliased, -1); |
8140 | shlxl(tmp3_aliased, tmp3_aliased, tmp2); |
8141 | notl(tmp3_aliased); |
8142 | kmovdl(mask, tmp3_aliased); |
8143 | evpmovzxbw(tmp1, mask, Address(src, 0), Assembler::AVX_512bit); |
8144 | evmovdquw(Address(dst, 0), mask, tmp1, /*merge*/ true, Assembler::AVX_512bit); |
8145 | |
8146 | jmp(done); |
8147 | bind(avx3_threshold); |
8148 | } |
8149 | if (UseSSE42Intrinsics) { |
8150 | Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail; |
8151 | |
8152 | if (UseAVX > 1) { |
8153 | andl(tmp2, (16 - 1)); |
8154 | andl(len, -16); |
8155 | jccb(Assembler::zero, copy_new_tail)jccb_0(Assembler::zero, copy_new_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8155); |
8156 | } else { |
8157 | andl(tmp2, 0x00000007); // tail count (in chars) |
8158 | andl(len, 0xfffffff8); // vector count (in chars) |
8159 | jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8159); |
8160 | } |
8161 | |
8162 | // vectored inflation |
8163 | lea(src, Address(src, len, Address::times_1)); |
8164 | lea(dst, Address(dst, len, Address::times_2)); |
8165 | negptr(len); |
8166 | |
8167 | if (UseAVX > 1) { |
8168 | bind(copy_16_loop); |
8169 | vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_256bit); |
8170 | vmovdqu(Address(dst, len, Address::times_2), tmp1); |
8171 | addptr(len, 16); |
8172 | jcc(Assembler::notZero, copy_16_loop); |
8173 | |
8174 | bind(below_threshold); |
8175 | bind(copy_new_tail); |
8176 | movl(len, tmp2); |
8177 | andl(tmp2, 0x00000007); |
8178 | andl(len, 0xFFFFFFF8); |
8179 | jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8179); |
8180 | |
8181 | pmovzxbw(tmp1, Address(src, 0)); |
8182 | movdqu(Address(dst, 0), tmp1); |
8183 | addptr(src, 8); |
8184 | addptr(dst, 2 * 8); |
8185 | |
8186 | jmp(copy_tail, true); |
8187 | } |
8188 | |
8189 | // inflate 8 chars per iter |
8190 | bind(copy_8_loop); |
8191 | pmovzxbw(tmp1, Address(src, len, Address::times_1)); // unpack to 8 words |
8192 | movdqu(Address(dst, len, Address::times_2), tmp1); |
8193 | addptr(len, 8); |
8194 | jcc(Assembler::notZero, copy_8_loop); |
8195 | |
8196 | bind(copy_tail); |
8197 | movl(len, tmp2); |
8198 | |
8199 | cmpl(len, 4); |
8200 | jccb(Assembler::less, copy_bytes)jccb_0(Assembler::less, copy_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8200); |
8201 | |
8202 | movdl(tmp1, Address(src, 0)); // load 4 byte chars |
8203 | pmovzxbw(tmp1, tmp1); |
8204 | movq(Address(dst, 0), tmp1); |
8205 | subptr(len, 4); |
8206 | addptr(src, 4); |
8207 | addptr(dst, 8); |
8208 | |
8209 | bind(copy_bytes); |
8210 | } else { |
8211 | bind(below_threshold); |
8212 | } |
8213 | |
8214 | testl(len, len); |
8215 | jccb(Assembler::zero, done)jccb_0(Assembler::zero, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8215); |
8216 | lea(src, Address(src, len, Address::times_1)); |
8217 | lea(dst, Address(dst, len, Address::times_2)); |
8218 | negptr(len); |
8219 | |
8220 | // inflate 1 char per iter |
8221 | bind(copy_chars_loop); |
8222 | load_unsigned_byte(tmp2, Address(src, len, Address::times_1)); // load byte char |
8223 | movw(Address(dst, len, Address::times_2), tmp2); // inflate byte char to word |
8224 | increment(len); |
8225 | jcc(Assembler::notZero, copy_chars_loop); |
8226 | |
8227 | bind(done); |
8228 | } |
8229 | |
8230 | |
8231 | void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) { |
8232 | switch(type) { |
8233 | case T_BYTE: |
8234 | case T_BOOLEAN: |
8235 | evmovdqub(dst, kmask, src, false, vector_len); |
8236 | break; |
8237 | case T_CHAR: |
8238 | case T_SHORT: |
8239 | evmovdquw(dst, kmask, src, false, vector_len); |
8240 | break; |
8241 | case T_INT: |
8242 | case T_FLOAT: |
8243 | evmovdqul(dst, kmask, src, false, vector_len); |
8244 | break; |
8245 | case T_LONG: |
8246 | case T_DOUBLE: |
8247 | evmovdquq(dst, kmask, src, false, vector_len); |
8248 | break; |
8249 | default: |
8250 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8250, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); |
8251 | break; |
8252 | } |
8253 | } |
8254 | |
8255 | void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) { |
8256 | switch(type) { |
8257 | case T_BYTE: |
8258 | case T_BOOLEAN: |
8259 | evmovdqub(dst, kmask, src, true, vector_len); |
8260 | break; |
8261 | case T_CHAR: |
8262 | case T_SHORT: |
8263 | evmovdquw(dst, kmask, src, true, vector_len); |
8264 | break; |
8265 | case T_INT: |
8266 | case T_FLOAT: |
8267 | evmovdqul(dst, kmask, src, true, vector_len); |
8268 | break; |
8269 | case T_LONG: |
8270 | case T_DOUBLE: |
8271 | evmovdquq(dst, kmask, src, true, vector_len); |
8272 | break; |
8273 | default: |
8274 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8274, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); |
8275 | break; |
8276 | } |
8277 | } |
8278 | |
8279 | void MacroAssembler::knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp, Register rtmp) { |
8280 | switch(masklen) { |
8281 | case 2: |
8282 | knotbl(dst, src); |
8283 | movl(rtmp, 3); |
8284 | kmovbl(ktmp, rtmp); |
8285 | kandbl(dst, ktmp, dst); |
8286 | break; |
8287 | case 4: |
8288 | knotbl(dst, src); |
8289 | movl(rtmp, 15); |
8290 | kmovbl(ktmp, rtmp); |
8291 | kandbl(dst, ktmp, dst); |
8292 | break; |
8293 | case 8: |
8294 | knotbl(dst, src); |
8295 | break; |
8296 | case 16: |
8297 | knotwl(dst, src); |
8298 | break; |
8299 | case 32: |
8300 | knotdl(dst, src); |
8301 | break; |
8302 | case 64: |
8303 | knotql(dst, src); |
8304 | break; |
8305 | default: |
8306 | fatal("Unexpected vector length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8306, "Unexpected vector length %d", masklen); ::breakpoint (); } while (0); |
8307 | break; |
8308 | } |
8309 | } |
8310 | |
8311 | void MacroAssembler::kand(BasicType type, KRegister dst, KRegister src1, KRegister src2) { |
8312 | switch(type) { |
8313 | case T_BOOLEAN: |
8314 | case T_BYTE: |
8315 | kandbl(dst, src1, src2); |
8316 | break; |
8317 | case T_CHAR: |
8318 | case T_SHORT: |
8319 | kandwl(dst, src1, src2); |
8320 | break; |
8321 | case T_INT: |
8322 | case T_FLOAT: |
8323 | kanddl(dst, src1, src2); |
8324 | break; |
8325 | case T_LONG: |
8326 | case T_DOUBLE: |
8327 | kandql(dst, src1, src2); |
8328 | break; |
8329 | default: |
8330 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8330, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); |
8331 | break; |
8332 | } |
8333 | } |
8334 | |
8335 | void MacroAssembler::kor(BasicType type, KRegister dst, KRegister src1, KRegister src2) { |
8336 | switch(type) { |
8337 | case T_BOOLEAN: |
8338 | case T_BYTE: |
8339 | korbl(dst, src1, src2); |
8340 | break; |
8341 | case T_CHAR: |
8342 | case T_SHORT: |
8343 | korwl(dst, src1, src2); |
8344 | break; |
8345 | case T_INT: |
8346 | case T_FLOAT: |
8347 | kordl(dst, src1, src2); |
8348 | break; |
8349 | case T_LONG: |
8350 | case T_DOUBLE: |
8351 | korql(dst, src1, src2); |
8352 | break; |
8353 | default: |
8354 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8354, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); |
8355 | break; |
8356 | } |
8357 | } |
8358 | |
8359 | void MacroAssembler::kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2) { |
8360 | switch(type) { |
8361 | case T_BOOLEAN: |
8362 | case T_BYTE: |
8363 | kxorbl(dst, src1, src2); |
8364 | break; |
8365 | case T_CHAR: |
8366 | case T_SHORT: |
8367 | kxorwl(dst, src1, src2); |
8368 | break; |
8369 | case T_INT: |
8370 | case T_FLOAT: |
8371 | kxordl(dst, src1, src2); |
8372 | break; |
8373 | case T_LONG: |
8374 | case T_DOUBLE: |
8375 | kxorql(dst, src1, src2); |
8376 | break; |
8377 | default: |
8378 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8378, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); |
8379 | break; |
8380 | } |
8381 | } |
8382 | |
8383 | void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { |
8384 | switch(type) { |
8385 | case T_BOOLEAN: |
8386 | case T_BYTE: |
8387 | evpermb(dst, mask, nds, src, merge, vector_len); break; |
8388 | case T_CHAR: |
8389 | case T_SHORT: |
8390 | evpermw(dst, mask, nds, src, merge, vector_len); break; |
8391 | case T_INT: |
8392 | case T_FLOAT: |
8393 | evpermd(dst, mask, nds, src, merge, vector_len); break; |
8394 | case T_LONG: |
8395 | case T_DOUBLE: |
8396 | evpermq(dst, mask, nds, src, merge, vector_len); break; |
8397 | default: |
8398 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8398, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8399 | } |
8400 | } |
8401 | |
8402 | void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { |
8403 | switch(type) { |
8404 | case T_BOOLEAN: |
8405 | case T_BYTE: |
8406 | evpermb(dst, mask, nds, src, merge, vector_len); break; |
8407 | case T_CHAR: |
8408 | case T_SHORT: |
8409 | evpermw(dst, mask, nds, src, merge, vector_len); break; |
8410 | case T_INT: |
8411 | case T_FLOAT: |
8412 | evpermd(dst, mask, nds, src, merge, vector_len); break; |
8413 | case T_LONG: |
8414 | case T_DOUBLE: |
8415 | evpermq(dst, mask, nds, src, merge, vector_len); break; |
8416 | default: |
8417 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8417, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8418 | } |
8419 | } |
8420 | |
8421 | void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { |
8422 | switch(type) { |
8423 | case T_BYTE: |
8424 | evpminsb(dst, mask, nds, src, merge, vector_len); break; |
8425 | case T_SHORT: |
8426 | evpminsw(dst, mask, nds, src, merge, vector_len); break; |
8427 | case T_INT: |
8428 | evpminsd(dst, mask, nds, src, merge, vector_len); break; |
8429 | case T_LONG: |
8430 | evpminsq(dst, mask, nds, src, merge, vector_len); break; |
8431 | default: |
8432 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8432, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8433 | } |
8434 | } |
8435 | |
8436 | void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { |
8437 | switch(type) { |
8438 | case T_BYTE: |
8439 | evpmaxsb(dst, mask, nds, src, merge, vector_len); break; |
8440 | case T_SHORT: |
8441 | evpmaxsw(dst, mask, nds, src, merge, vector_len); break; |
8442 | case T_INT: |
8443 | evpmaxsd(dst, mask, nds, src, merge, vector_len); break; |
8444 | case T_LONG: |
8445 | evpmaxsq(dst, mask, nds, src, merge, vector_len); break; |
8446 | default: |
8447 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8447, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8448 | } |
8449 | } |
8450 | |
8451 | void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { |
8452 | switch(type) { |
8453 | case T_BYTE: |
8454 | evpminsb(dst, mask, nds, src, merge, vector_len); break; |
8455 | case T_SHORT: |
8456 | evpminsw(dst, mask, nds, src, merge, vector_len); break; |
8457 | case T_INT: |
8458 | evpminsd(dst, mask, nds, src, merge, vector_len); break; |
8459 | case T_LONG: |
8460 | evpminsq(dst, mask, nds, src, merge, vector_len); break; |
8461 | default: |
8462 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8462, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8463 | } |
8464 | } |
8465 | |
8466 | void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { |
8467 | switch(type) { |
8468 | case T_BYTE: |
8469 | evpmaxsb(dst, mask, nds, src, merge, vector_len); break; |
8470 | case T_SHORT: |
8471 | evpmaxsw(dst, mask, nds, src, merge, vector_len); break; |
8472 | case T_INT: |
8473 | evpmaxsd(dst, mask, nds, src, merge, vector_len); break; |
8474 | case T_LONG: |
8475 | evpmaxsq(dst, mask, nds, src, merge, vector_len); break; |
8476 | default: |
8477 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8477, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8478 | } |
8479 | } |
8480 | |
8481 | void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { |
8482 | switch(type) { |
8483 | case T_INT: |
8484 | evpxord(dst, mask, nds, src, merge, vector_len); break; |
8485 | case T_LONG: |
8486 | evpxorq(dst, mask, nds, src, merge, vector_len); break; |
8487 | default: |
8488 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8488, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8489 | } |
8490 | } |
8491 | |
8492 | void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { |
8493 | switch(type) { |
8494 | case T_INT: |
8495 | evpxord(dst, mask, nds, src, merge, vector_len); break; |
8496 | case T_LONG: |
8497 | evpxorq(dst, mask, nds, src, merge, vector_len); break; |
8498 | default: |
8499 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8499, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8500 | } |
8501 | } |
8502 | |
8503 | void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { |
8504 | switch(type) { |
8505 | case T_INT: |
8506 | Assembler::evpord(dst, mask, nds, src, merge, vector_len); break; |
8507 | case T_LONG: |
8508 | evporq(dst, mask, nds, src, merge, vector_len); break; |
8509 | default: |
8510 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8510, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8511 | } |
8512 | } |
8513 | |
8514 | void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { |
8515 | switch(type) { |
8516 | case T_INT: |
8517 | Assembler::evpord(dst, mask, nds, src, merge, vector_len); break; |
8518 | case T_LONG: |
8519 | evporq(dst, mask, nds, src, merge, vector_len); break; |
8520 | default: |
8521 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8521, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8522 | } |
8523 | } |
8524 | |
8525 | void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { |
8526 | switch(type) { |
8527 | case T_INT: |
8528 | evpandd(dst, mask, nds, src, merge, vector_len); break; |
8529 | case T_LONG: |
8530 | evpandq(dst, mask, nds, src, merge, vector_len); break; |
8531 | default: |
8532 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8532, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8533 | } |
8534 | } |
8535 | |
8536 | void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { |
8537 | switch(type) { |
8538 | case T_INT: |
8539 | evpandd(dst, mask, nds, src, merge, vector_len); break; |
8540 | case T_LONG: |
8541 | evpandq(dst, mask, nds, src, merge, vector_len); break; |
8542 | default: |
8543 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8543, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8544 | } |
8545 | } |
8546 | |
8547 | void MacroAssembler::anytrue(Register dst, uint masklen, KRegister src1, KRegister src2) { |
8548 | masklen = masklen < 8 ? 8 : masklen; |
8549 | ktest(masklen, src1, src2); |
8550 | setb(Assembler::notZero, dst); |
8551 | movzbl(dst, dst); |
8552 | } |
8553 | |
8554 | void MacroAssembler::alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch) { |
8555 | if (masklen < 8) { |
8556 | knotbl(kscratch, src2); |
8557 | kortestbl(src1, kscratch); |
8558 | setb(Assembler::carrySet, dst); |
8559 | movzbl(dst, dst); |
8560 | } else { |
8561 | ktest(masklen, src1, src2); |
8562 | setb(Assembler::carrySet, dst); |
8563 | movzbl(dst, dst); |
8564 | } |
8565 | } |
8566 | |
8567 | void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) { |
8568 | switch(masklen) { |
8569 | case 8: |
8570 | kortestbl(src1, src2); |
8571 | break; |
8572 | case 16: |
8573 | kortestwl(src1, src2); |
8574 | break; |
8575 | case 32: |
8576 | kortestdl(src1, src2); |
8577 | break; |
8578 | case 64: |
8579 | kortestql(src1, src2); |
8580 | break; |
8581 | default: |
8582 | fatal("Unexpected mask length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8582, "Unexpected mask length %d", masklen); ::breakpoint() ; } while (0); |
8583 | break; |
8584 | } |
8585 | } |
8586 | |
8587 | |
8588 | void MacroAssembler::ktest(uint masklen, KRegister src1, KRegister src2) { |
8589 | switch(masklen) { |
8590 | case 8: |
8591 | ktestbl(src1, src2); |
8592 | break; |
8593 | case 16: |
8594 | ktestwl(src1, src2); |
8595 | break; |
8596 | case 32: |
8597 | ktestdl(src1, src2); |
8598 | break; |
8599 | case 64: |
8600 | ktestql(src1, src2); |
8601 | break; |
8602 | default: |
8603 | fatal("Unexpected mask length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8603, "Unexpected mask length %d", masklen); ::breakpoint() ; } while (0); |
8604 | break; |
8605 | } |
8606 | } |
8607 | |
8608 | void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) { |
8609 | switch(type) { |
8610 | case T_INT: |
8611 | evprold(dst, mask, src, shift, merge, vlen_enc); break; |
8612 | case T_LONG: |
8613 | evprolq(dst, mask, src, shift, merge, vlen_enc); break; |
8614 | default: |
8615 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8615, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8616 | break; |
8617 | } |
8618 | } |
8619 | |
8620 | void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) { |
8621 | switch(type) { |
8622 | case T_INT: |
8623 | evprord(dst, mask, src, shift, merge, vlen_enc); break; |
8624 | case T_LONG: |
8625 | evprorq(dst, mask, src, shift, merge, vlen_enc); break; |
8626 | default: |
8627 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8627, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8628 | } |
8629 | } |
8630 | |
8631 | void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) { |
8632 | switch(type) { |
8633 | case T_INT: |
8634 | evprolvd(dst, mask, src1, src2, merge, vlen_enc); break; |
8635 | case T_LONG: |
8636 | evprolvq(dst, mask, src1, src2, merge, vlen_enc); break; |
8637 | default: |
8638 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8638, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8639 | } |
8640 | } |
8641 | |
8642 | void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) { |
8643 | switch(type) { |
8644 | case T_INT: |
8645 | evprorvd(dst, mask, src1, src2, merge, vlen_enc); break; |
8646 | case T_LONG: |
8647 | evprorvq(dst, mask, src1, src2, merge, vlen_enc); break; |
8648 | default: |
8649 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8649, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; |
8650 | } |
8651 | } |
8652 | #if COMPILER2_OR_JVMCI1 |
8653 | |
8654 | void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask, |
8655 | Register length, Register temp, int vec_enc) { |
8656 | // Computing mask for predicated vector store. |
8657 | movptr(temp, -1); |
8658 | bzhiq(temp, temp, length); |
8659 | kmov(mask, temp); |
8660 | evmovdqu(bt, mask, dst, xmm, vec_enc); |
8661 | } |
8662 | |
8663 | // Set memory operation for length "less than" 64 bytes. |
8664 | void MacroAssembler::fill64_masked(uint shift, Register dst, int disp, |
8665 | XMMRegister xmm, KRegister mask, Register length, |
8666 | Register temp, bool use64byteVector) { |
8667 | assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8667, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32" ); ::breakpoint(); } } while (0); |
8668 | BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG}; |
8669 | if (!use64byteVector) { |
8670 | fill32(dst, disp, xmm); |
8671 | subptr(length, 32 >> shift); |
8672 | fill32_masked(shift, dst, disp + 32, xmm, mask, length, temp); |
8673 | } else { |
8674 | assert(MaxVectorSize == 64, "vector length != 64")do { if (!(MaxVectorSize == 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8674, "assert(" "MaxVectorSize == 64" ") failed", "vector length != 64" ); ::breakpoint(); } } while (0); |
8675 | fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_512bit); |
8676 | } |
8677 | } |
8678 | |
8679 | |
8680 | void MacroAssembler::fill32_masked(uint shift, Register dst, int disp, |
8681 | XMMRegister xmm, KRegister mask, Register length, |
8682 | Register temp) { |
8683 | assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8683, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32" ); ::breakpoint(); } } while (0); |
8684 | BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG}; |
8685 | fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_256bit); |
8686 | } |
8687 | |
8688 | |
8689 | void MacroAssembler::fill32(Register dst, int disp, XMMRegister xmm) { |
8690 | assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8690, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32" ); ::breakpoint(); } } while (0); |
8691 | vmovdqu(Address(dst, disp), xmm); |
8692 | } |
8693 | |
8694 | void MacroAssembler::fill64(Register dst, int disp, XMMRegister xmm, bool use64byteVector) { |
8695 | assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8695, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32" ); ::breakpoint(); } } while (0); |
8696 | BasicType type[] = {T_BYTE, T_SHORT, T_INT, T_LONG}; |
8697 | if (!use64byteVector) { |
8698 | fill32(dst, disp, xmm); |
8699 | fill32(dst, disp + 32, xmm); |
8700 | } else { |
8701 | evmovdquq(Address(dst, disp), xmm, Assembler::AVX_512bit); |
8702 | } |
8703 | } |
8704 | |
8705 | #ifdef _LP641 |
8706 | void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register value, |
8707 | Register count, Register rtmp, XMMRegister xtmp) { |
8708 | Label L_exit; |
8709 | Label L_fill_start; |
8710 | Label L_fill_64_bytes; |
8711 | Label L_fill_96_bytes; |
8712 | Label L_fill_128_bytes; |
8713 | Label L_fill_128_bytes_loop; |
8714 | Label L_fill_128_loop_header; |
8715 | Label L_fill_128_bytes_loop_header; |
8716 | Label L_fill_128_bytes_loop_pre_header; |
8717 | Label L_fill_zmm_sequence; |
8718 | |
8719 | int shift = -1; |
8720 | int avx3threshold = VM_Version::avx3_threshold(); |
8721 | switch(type) { |
8722 | case T_BYTE: shift = 0; |
8723 | break; |
8724 | case T_SHORT: shift = 1; |
8725 | break; |
8726 | case T_INT: shift = 2; |
8727 | break; |
8728 | /* Uncomment when LONG fill stubs are supported. |
8729 | case T_LONG: shift = 3; |
8730 | break; |
8731 | */ |
8732 | default: |
8733 | fatal("Unhandled type: %s\n", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8733, "Unhandled type: %s\n", type2name(type)); ::breakpoint (); } while (0); |
8734 | } |
8735 | |
8736 | if ((avx3threshold != 0) || (MaxVectorSize == 32)) { |
8737 | |
8738 | if (MaxVectorSize == 64) { |
8739 | cmpq(count, avx3threshold >> shift); |
8740 | jcc(Assembler::greater, L_fill_zmm_sequence); |
8741 | } |
8742 | |
8743 | evpbroadcast(type, xtmp, value, Assembler::AVX_256bit); |
8744 | |
8745 | bind(L_fill_start); |
8746 | |
8747 | cmpq(count, 32 >> shift); |
8748 | jccb(Assembler::greater, L_fill_64_bytes)jccb_0(Assembler::greater, L_fill_64_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8748); |
8749 | fill32_masked(shift, to, 0, xtmp, k2, count, rtmp); |
8750 | jmp(L_exit); |
8751 | |
8752 | bind(L_fill_64_bytes); |
8753 | cmpq(count, 64 >> shift); |
8754 | jccb(Assembler::greater, L_fill_96_bytes)jccb_0(Assembler::greater, L_fill_96_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8754); |
8755 | fill64_masked(shift, to, 0, xtmp, k2, count, rtmp); |
8756 | jmp(L_exit); |
8757 | |
8758 | bind(L_fill_96_bytes); |
8759 | cmpq(count, 96 >> shift); |
8760 | jccb(Assembler::greater, L_fill_128_bytes)jccb_0(Assembler::greater, L_fill_128_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8760); |
8761 | fill64(to, 0, xtmp); |
8762 | subq(count, 64 >> shift); |
8763 | fill32_masked(shift, to, 64, xtmp, k2, count, rtmp); |
8764 | jmp(L_exit); |
8765 | |
8766 | bind(L_fill_128_bytes); |
8767 | cmpq(count, 128 >> shift); |
8768 | jccb(Assembler::greater, L_fill_128_bytes_loop_pre_header)jccb_0(Assembler::greater, L_fill_128_bytes_loop_pre_header, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8768); |
8769 | fill64(to, 0, xtmp); |
8770 | fill32(to, 64, xtmp); |
8771 | subq(count, 96 >> shift); |
8772 | fill32_masked(shift, to, 96, xtmp, k2, count, rtmp); |
8773 | jmp(L_exit); |
8774 | |
8775 | bind(L_fill_128_bytes_loop_pre_header); |
8776 | { |
8777 | mov(rtmp, to); |
8778 | andq(rtmp, 31); |
8779 | jccb(Assembler::zero, L_fill_128_bytes_loop_header)jccb_0(Assembler::zero, L_fill_128_bytes_loop_header, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8779); |
8780 | negq(rtmp); |
8781 | addq(rtmp, 32); |
8782 | mov64(r8, -1L); |
8783 | bzhiq(r8, r8, rtmp); |
8784 | kmovql(k2, r8); |
8785 | evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_256bit); |
8786 | addq(to, rtmp); |
8787 | shrq(rtmp, shift); |
8788 | subq(count, rtmp); |
8789 | } |
8790 | |
8791 | cmpq(count, 128 >> shift); |
8792 | jcc(Assembler::less, L_fill_start); |
8793 | |
8794 | bind(L_fill_128_bytes_loop_header); |
8795 | subq(count, 128 >> shift); |
8796 | |
8797 | align32(); |
8798 | bind(L_fill_128_bytes_loop); |
8799 | fill64(to, 0, xtmp); |
8800 | fill64(to, 64, xtmp); |
8801 | addq(to, 128); |
8802 | subq(count, 128 >> shift); |
8803 | jccb(Assembler::greaterEqual, L_fill_128_bytes_loop)jccb_0(Assembler::greaterEqual, L_fill_128_bytes_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8803); |
8804 | |
8805 | addq(count, 128 >> shift); |
8806 | jcc(Assembler::zero, L_exit); |
8807 | jmp(L_fill_start); |
8808 | } |
8809 | |
8810 | if (MaxVectorSize == 64) { |
8811 | // Sequence using 64 byte ZMM register. |
8812 | Label L_fill_128_bytes_zmm; |
8813 | Label L_fill_192_bytes_zmm; |
8814 | Label L_fill_192_bytes_loop_zmm; |
8815 | Label L_fill_192_bytes_loop_header_zmm; |
8816 | Label L_fill_192_bytes_loop_pre_header_zmm; |
8817 | Label L_fill_start_zmm_sequence; |
8818 | |
8819 | bind(L_fill_zmm_sequence); |
8820 | evpbroadcast(type, xtmp, value, Assembler::AVX_512bit); |
8821 | |
8822 | bind(L_fill_start_zmm_sequence); |
8823 | cmpq(count, 64 >> shift); |
8824 | jccb(Assembler::greater, L_fill_128_bytes_zmm)jccb_0(Assembler::greater, L_fill_128_bytes_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8824); |
8825 | fill64_masked(shift, to, 0, xtmp, k2, count, rtmp, true); |
8826 | jmp(L_exit); |
8827 | |
8828 | bind(L_fill_128_bytes_zmm); |
8829 | cmpq(count, 128 >> shift); |
8830 | jccb(Assembler::greater, L_fill_192_bytes_zmm)jccb_0(Assembler::greater, L_fill_192_bytes_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8830); |
8831 | fill64(to, 0, xtmp, true); |
8832 | subq(count, 64 >> shift); |
8833 | fill64_masked(shift, to, 64, xtmp, k2, count, rtmp, true); |
8834 | jmp(L_exit); |
8835 | |
8836 | bind(L_fill_192_bytes_zmm); |
8837 | cmpq(count, 192 >> shift); |
8838 | jccb(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm)jccb_0(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8838); |
8839 | fill64(to, 0, xtmp, true); |
8840 | fill64(to, 64, xtmp, true); |
8841 | subq(count, 128 >> shift); |
8842 | fill64_masked(shift, to, 128, xtmp, k2, count, rtmp, true); |
8843 | jmp(L_exit); |
8844 | |
8845 | bind(L_fill_192_bytes_loop_pre_header_zmm); |
8846 | { |
8847 | movq(rtmp, to); |
8848 | andq(rtmp, 63); |
8849 | jccb(Assembler::zero, L_fill_192_bytes_loop_header_zmm)jccb_0(Assembler::zero, L_fill_192_bytes_loop_header_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8849); |
8850 | negq(rtmp); |
8851 | addq(rtmp, 64); |
8852 | mov64(r8, -1L); |
8853 | bzhiq(r8, r8, rtmp); |
8854 | kmovql(k2, r8); |
8855 | evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_512bit); |
8856 | addq(to, rtmp); |
8857 | shrq(rtmp, shift); |
8858 | subq(count, rtmp); |
8859 | } |
8860 | |
8861 | cmpq(count, 192 >> shift); |
8862 | jcc(Assembler::less, L_fill_start_zmm_sequence); |
8863 | |
8864 | bind(L_fill_192_bytes_loop_header_zmm); |
8865 | subq(count, 192 >> shift); |
8866 | |
8867 | align32(); |
8868 | bind(L_fill_192_bytes_loop_zmm); |
8869 | fill64(to, 0, xtmp, true); |
8870 | fill64(to, 64, xtmp, true); |
8871 | fill64(to, 128, xtmp, true); |
8872 | addq(to, 192); |
8873 | subq(count, 192 >> shift); |
8874 | jccb(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm)jccb_0(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8874); |
8875 | |
8876 | addq(count, 192 >> shift); |
8877 | jcc(Assembler::zero, L_exit); |
8878 | jmp(L_fill_start_zmm_sequence); |
8879 | } |
8880 | bind(L_exit); |
8881 | } |
8882 | #endif |
8883 | #endif //COMPILER2_OR_JVMCI |
8884 | |
8885 | |
8886 | #ifdef _LP641 |
8887 | void MacroAssembler::convert_f2i(Register dst, XMMRegister src) { |
8888 | Label done; |
8889 | cvttss2sil(dst, src); |
8890 | // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub |
8891 | cmpl(dst, 0x80000000); // float_sign_flip |
8892 | jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8892); |
8893 | subptr(rsp, 8); |
8894 | movflt(Address(rsp, 0), src); |
8895 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())((address)((address_word)(StubRoutines::x86::f2i_fixup()))))); |
8896 | pop(dst); |
8897 | bind(done); |
8898 | } |
8899 | |
8900 | void MacroAssembler::convert_d2i(Register dst, XMMRegister src) { |
8901 | Label done; |
8902 | cvttsd2sil(dst, src); |
8903 | // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub |
8904 | cmpl(dst, 0x80000000); // float_sign_flip |
8905 | jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8905); |
8906 | subptr(rsp, 8); |
8907 | movdbl(Address(rsp, 0), src); |
8908 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())((address)((address_word)(StubRoutines::x86::d2i_fixup()))))); |
8909 | pop(dst); |
8910 | bind(done); |
8911 | } |
8912 | |
8913 | void MacroAssembler::convert_f2l(Register dst, XMMRegister src) { |
8914 | Label done; |
8915 | cvttss2siq(dst, src); |
8916 | cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip())); |
8917 | jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8917); |
8918 | subptr(rsp, 8); |
8919 | movflt(Address(rsp, 0), src); |
8920 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())((address)((address_word)(StubRoutines::x86::f2l_fixup()))))); |
8921 | pop(dst); |
8922 | bind(done); |
8923 | } |
8924 | |
8925 | void MacroAssembler::convert_d2l(Register dst, XMMRegister src) { |
8926 | Label done; |
8927 | cvttsd2siq(dst, src); |
8928 | cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip())); |
8929 | jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8929); |
8930 | subptr(rsp, 8); |
8931 | movdbl(Address(rsp, 0), src); |
8932 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())((address)((address_word)(StubRoutines::x86::d2l_fixup()))))); |
8933 | pop(dst); |
8934 | bind(done); |
8935 | } |
8936 | |
8937 | void MacroAssembler::cache_wb(Address line) |
8938 | { |
8939 | // 64 bit cpus always support clflush |
8940 | assert(VM_Version::supports_clflush(), "clflush should be available")do { if (!(VM_Version::supports_clflush())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8940, "assert(" "VM_Version::supports_clflush()" ") failed" , "clflush should be available"); ::breakpoint(); } } while ( 0); |
8941 | bool optimized = VM_Version::supports_clflushopt(); |
8942 | bool no_evict = VM_Version::supports_clwb(); |
8943 | |
8944 | // prefer clwb (writeback without evict) otherwise |
8945 | // prefer clflushopt (potentially parallel writeback with evict) |
8946 | // otherwise fallback on clflush (serial writeback with evict) |
8947 | |
8948 | if (optimized) { |
8949 | if (no_evict) { |
8950 | clwb(line); |
8951 | } else { |
8952 | clflushopt(line); |
8953 | } |
8954 | } else { |
8955 | // no need for fence when using CLFLUSH |
8956 | clflush(line); |
8957 | } |
8958 | } |
8959 | |
8960 | void MacroAssembler::cache_wbsync(bool is_pre) |
8961 | { |
8962 | assert(VM_Version::supports_clflush(), "clflush should be available")do { if (!(VM_Version::supports_clflush())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8962, "assert(" "VM_Version::supports_clflush()" ") failed" , "clflush should be available"); ::breakpoint(); } } while ( 0); |
8963 | bool optimized = VM_Version::supports_clflushopt(); |
8964 | bool no_evict = VM_Version::supports_clwb(); |
8965 | |
8966 | // pick the correct implementation |
8967 | |
8968 | if (!is_pre && (optimized || no_evict)) { |
8969 | // need an sfence for post flush when using clflushopt or clwb |
8970 | // otherwise no no need for any synchroniaztion |
8971 | |
8972 | sfence(); |
8973 | } |
8974 | } |
8975 | |
8976 | #endif // _LP64 |
8977 | |
8978 | Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { |
8979 | switch (cond) { |
8980 | // Note some conditions are synonyms for others |
8981 | case Assembler::zero: return Assembler::notZero; |
8982 | case Assembler::notZero: return Assembler::zero; |
8983 | case Assembler::less: return Assembler::greaterEqual; |
8984 | case Assembler::lessEqual: return Assembler::greater; |
8985 | case Assembler::greater: return Assembler::lessEqual; |
8986 | case Assembler::greaterEqual: return Assembler::less; |
8987 | case Assembler::below: return Assembler::aboveEqual; |
8988 | case Assembler::belowEqual: return Assembler::above; |
8989 | case Assembler::above: return Assembler::belowEqual; |
8990 | case Assembler::aboveEqual: return Assembler::below; |
8991 | case Assembler::overflow: return Assembler::noOverflow; |
8992 | case Assembler::noOverflow: return Assembler::overflow; |
8993 | case Assembler::negative: return Assembler::positive; |
8994 | case Assembler::positive: return Assembler::negative; |
8995 | case Assembler::parity: return Assembler::noParity; |
8996 | case Assembler::noParity: return Assembler::parity; |
8997 | } |
8998 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8998); ::breakpoint(); } while (0); return Assembler::overflow; |
8999 | } |
9000 | |
9001 | SkipIfEqual::SkipIfEqual( |
9002 | MacroAssembler* masm, const bool* flag_addr, bool value) { |
9003 | _masm = masm; |
9004 | _masm->cmp8(ExternalAddress((address)flag_addr), value); |
9005 | _masm->jcc(Assembler::equal, _label); |
9006 | } |
9007 | |
9008 | SkipIfEqual::~SkipIfEqual() { |
9009 | _masm->bind(_label); |
9010 | } |
9011 | |
9012 | // 32-bit Windows has its own fast-path implementation |
9013 | // of get_thread |
9014 | #if !defined(WIN32) || defined(_LP641) |
9015 | |
9016 | // This is simply a call to Thread::current() |
9017 | void MacroAssembler::get_thread(Register thread) { |
9018 | if (thread != rax) { |
9019 | push(rax); |
9020 | } |
9021 | LP64_ONLY(push(rdi);)push(rdi); |
9022 | LP64_ONLY(push(rsi);)push(rsi); |
9023 | push(rdx); |
9024 | push(rcx); |
9025 | #ifdef _LP641 |
9026 | push(r8); |
9027 | push(r9); |
9028 | push(r10); |
9029 | push(r11); |
9030 | #endif |
9031 | |
9032 | MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current)((address)((address_word)(Thread::current))), 0); |
9033 | |
9034 | #ifdef _LP641 |
9035 | pop(r11); |
9036 | pop(r10); |
9037 | pop(r9); |
9038 | pop(r8); |
9039 | #endif |
9040 | pop(rcx); |
9041 | pop(rdx); |
9042 | LP64_ONLY(pop(rsi);)pop(rsi); |
9043 | LP64_ONLY(pop(rdi);)pop(rdi); |
9044 | if (thread != rax) { |
9045 | mov(thread, rax); |
9046 | pop(rax); |
9047 | } |
9048 | } |
9049 | |
9050 | |
9051 | #endif // !WIN32 || _LP64 |