File: | jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp |
Warning: | line 1158, column 9 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. | |||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |||
4 | * | |||
5 | * This code is free software; you can redistribute it and/or modify it | |||
6 | * under the terms of the GNU General Public License version 2 only, as | |||
7 | * published by the Free Software Foundation. | |||
8 | * | |||
9 | * This code is distributed in the hope that it will be useful, but WITHOUT | |||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
12 | * version 2 for more details (a copy is included in the LICENSE file that | |||
13 | * accompanied this code). | |||
14 | * | |||
15 | * You should have received a copy of the GNU General Public License version | |||
16 | * 2 along with this work; if not, write to the Free Software Foundation, | |||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |||
18 | * | |||
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |||
20 | * or visit www.oracle.com if you need additional information or have any | |||
21 | * questions. | |||
22 | * | |||
23 | */ | |||
24 | ||||
25 | #include "precompiled.hpp" | |||
26 | #include "asm/macroAssembler.hpp" | |||
27 | #include "asm/macroAssembler.inline.hpp" | |||
28 | #include "ci/ciUtilities.hpp" | |||
29 | #include "compiler/oopMap.hpp" | |||
30 | #include "gc/shared/barrierSet.hpp" | |||
31 | #include "gc/shared/barrierSetAssembler.hpp" | |||
32 | #include "gc/shared/barrierSetNMethod.hpp" | |||
33 | #include "gc/shared/gc_globals.hpp" | |||
34 | #include "interpreter/interpreter.hpp" | |||
35 | #include "memory/universe.hpp" | |||
36 | #include "nativeInst_x86.hpp" | |||
37 | #include "oops/instanceOop.hpp" | |||
38 | #include "oops/method.hpp" | |||
39 | #include "oops/objArrayKlass.hpp" | |||
40 | #include "oops/oop.inline.hpp" | |||
41 | #include "prims/methodHandles.hpp" | |||
42 | #include "runtime/arguments.hpp" | |||
43 | #include "runtime/frame.inline.hpp" | |||
44 | #include "runtime/handles.inline.hpp" | |||
45 | #include "runtime/sharedRuntime.hpp" | |||
46 | #include "runtime/stubCodeGenerator.hpp" | |||
47 | #include "runtime/stubRoutines.hpp" | |||
48 | #include "runtime/thread.inline.hpp" | |||
49 | #ifdef COMPILER21 | |||
50 | #include "opto/runtime.hpp" | |||
51 | #endif | |||
52 | #if INCLUDE_JVMCI1 | |||
53 | #include "jvmci/jvmci_globals.hpp" | |||
54 | #endif | |||
55 | #if INCLUDE_ZGC1 | |||
56 | #include "gc/z/zThreadLocalData.hpp" | |||
57 | #endif | |||
58 | ||||
59 | // Declaration and definition of StubGenerator (no .hpp file). | |||
60 | // For a more detailed description of the stub routine structure | |||
61 | // see the comment in stubRoutines.hpp | |||
62 | ||||
63 | #define __masm-> _masm-> | |||
64 | #define TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8) (UseCompressedOops ? Address::times_4 : Address::times_8) | |||
65 | #define a__((Assembler*)_masm)-> ((Assembler*)_masm)-> | |||
66 | ||||
67 | #ifdef PRODUCT | |||
68 | #define BLOCK_COMMENT(str)masm-> block_comment(str) /* nothing */ | |||
69 | #else | |||
70 | #define BLOCK_COMMENT(str)masm-> block_comment(str) __masm-> block_comment(str) | |||
71 | #endif | |||
72 | ||||
73 | #define BIND(label)bind(label); masm-> block_comment("label" ":") bind(label); BLOCK_COMMENT(#label ":")masm-> block_comment(#label ":") | |||
74 | const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions | |||
75 | ||||
76 | // Stub Code definitions | |||
77 | ||||
78 | class StubGenerator: public StubCodeGenerator { | |||
79 | private: | |||
80 | ||||
81 | #ifdef PRODUCT | |||
82 | #define inc_counter_np(counter)masm-> block_comment("inc_counter " "counter"); inc_counter_np_ (counter); ((void)0) | |||
83 | #else | |||
84 | void inc_counter_np_(int& counter) { | |||
85 | // This can destroy rscratch1 if counter is far from the code cache | |||
86 | __masm-> incrementl(ExternalAddress((address)&counter)); | |||
87 | } | |||
88 | #define inc_counter_np(counter)masm-> block_comment("inc_counter " "counter"); inc_counter_np_ (counter); \ | |||
89 | BLOCK_COMMENT("inc_counter " #counter)masm-> block_comment("inc_counter " #counter); \ | |||
90 | inc_counter_np_(counter); | |||
91 | #endif | |||
92 | ||||
93 | // Call stubs are used to call Java from C | |||
94 | // | |||
95 | // Linux Arguments: | |||
96 | // c_rarg0: call wrapper address address | |||
97 | // c_rarg1: result address | |||
98 | // c_rarg2: result type BasicType | |||
99 | // c_rarg3: method Method* | |||
100 | // c_rarg4: (interpreter) entry point address | |||
101 | // c_rarg5: parameters intptr_t* | |||
102 | // 16(rbp): parameter size (in words) int | |||
103 | // 24(rbp): thread Thread* | |||
104 | // | |||
105 | // [ return_from_Java ] <--- rsp | |||
106 | // [ argument word n ] | |||
107 | // ... | |||
108 | // -12 [ argument word 1 ] | |||
109 | // -11 [ saved r15 ] <--- rsp_after_call | |||
110 | // -10 [ saved r14 ] | |||
111 | // -9 [ saved r13 ] | |||
112 | // -8 [ saved r12 ] | |||
113 | // -7 [ saved rbx ] | |||
114 | // -6 [ call wrapper ] | |||
115 | // -5 [ result ] | |||
116 | // -4 [ result type ] | |||
117 | // -3 [ method ] | |||
118 | // -2 [ entry point ] | |||
119 | // -1 [ parameters ] | |||
120 | // 0 [ saved rbp ] <--- rbp | |||
121 | // 1 [ return address ] | |||
122 | // 2 [ parameter size ] | |||
123 | // 3 [ thread ] | |||
124 | // | |||
125 | // Windows Arguments: | |||
126 | // c_rarg0: call wrapper address address | |||
127 | // c_rarg1: result address | |||
128 | // c_rarg2: result type BasicType | |||
129 | // c_rarg3: method Method* | |||
130 | // 48(rbp): (interpreter) entry point address | |||
131 | // 56(rbp): parameters intptr_t* | |||
132 | // 64(rbp): parameter size (in words) int | |||
133 | // 72(rbp): thread Thread* | |||
134 | // | |||
135 | // [ return_from_Java ] <--- rsp | |||
136 | // [ argument word n ] | |||
137 | // ... | |||
138 | // -60 [ argument word 1 ] | |||
139 | // -59 [ saved xmm31 ] <--- rsp after_call | |||
140 | // [ saved xmm16-xmm30 ] (EVEX enabled, else the space is blank) | |||
141 | // -27 [ saved xmm15 ] | |||
142 | // [ saved xmm7-xmm14 ] | |||
143 | // -9 [ saved xmm6 ] (each xmm register takes 2 slots) | |||
144 | // -7 [ saved r15 ] | |||
145 | // -6 [ saved r14 ] | |||
146 | // -5 [ saved r13 ] | |||
147 | // -4 [ saved r12 ] | |||
148 | // -3 [ saved rdi ] | |||
149 | // -2 [ saved rsi ] | |||
150 | // -1 [ saved rbx ] | |||
151 | // 0 [ saved rbp ] <--- rbp | |||
152 | // 1 [ return address ] | |||
153 | // 2 [ call wrapper ] | |||
154 | // 3 [ result ] | |||
155 | // 4 [ result type ] | |||
156 | // 5 [ method ] | |||
157 | // 6 [ entry point ] | |||
158 | // 7 [ parameters ] | |||
159 | // 8 [ parameter size ] | |||
160 | // 9 [ thread ] | |||
161 | // | |||
162 | // Windows reserves the callers stack space for arguments 1-4. | |||
163 | // We spill c_rarg0-c_rarg3 to this space. | |||
164 | ||||
165 | // Call stub stack layout word offsets from rbp | |||
166 | enum call_stub_layout { | |||
167 | #ifdef _WIN64 | |||
168 | xmm_save_first = 6, // save from xmm6 | |||
169 | xmm_save_last = 31, // to xmm31 | |||
170 | xmm_save_base = -9, | |||
171 | rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27 | |||
172 | r15_off = -7, | |||
173 | r14_off = -6, | |||
174 | r13_off = -5, | |||
175 | r12_off = -4, | |||
176 | rdi_off = -3, | |||
177 | rsi_off = -2, | |||
178 | rbx_off = -1, | |||
179 | rbp_off = 0, | |||
180 | retaddr_off = 1, | |||
181 | call_wrapper_off = 2, | |||
182 | result_off = 3, | |||
183 | result_type_off = 4, | |||
184 | method_off = 5, | |||
185 | entry_point_off = 6, | |||
186 | parameters_off = 7, | |||
187 | parameter_size_off = 8, | |||
188 | thread_off = 9 | |||
189 | #else | |||
190 | rsp_after_call_off = -12, | |||
191 | mxcsr_off = rsp_after_call_off, | |||
192 | r15_off = -11, | |||
193 | r14_off = -10, | |||
194 | r13_off = -9, | |||
195 | r12_off = -8, | |||
196 | rbx_off = -7, | |||
197 | call_wrapper_off = -6, | |||
198 | result_off = -5, | |||
199 | result_type_off = -4, | |||
200 | method_off = -3, | |||
201 | entry_point_off = -2, | |||
202 | parameters_off = -1, | |||
203 | rbp_off = 0, | |||
204 | retaddr_off = 1, | |||
205 | parameter_size_off = 2, | |||
206 | thread_off = 3 | |||
207 | #endif | |||
208 | }; | |||
209 | ||||
210 | #ifdef _WIN64 | |||
211 | Address xmm_save(int reg) { | |||
212 | assert(reg >= xmm_save_first && reg <= xmm_save_last, "XMM register number out of range")do { if (!(reg >= xmm_save_first && reg <= xmm_save_last )) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 212, "assert(" "reg >= xmm_save_first && reg <= xmm_save_last" ") failed", "XMM register number out of range"); ::breakpoint (); } } while (0); | |||
213 | return Address(rbp, (xmm_save_base - (reg - xmm_save_first) * 2) * wordSize); | |||
214 | } | |||
215 | #endif | |||
216 | ||||
217 | address generate_call_stub(address& return_address) { | |||
218 | assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 &&do { if (!((int)frame::entry_frame_after_call_words == -(int) rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off" ") failed", "adjust this code"); ::breakpoint(); } } while ( 0) | |||
219 | (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,do { if (!((int)frame::entry_frame_after_call_words == -(int) rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off" ") failed", "adjust this code"); ::breakpoint(); } } while ( 0) | |||
220 | "adjust this code")do { if (!((int)frame::entry_frame_after_call_words == -(int) rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off" ") failed", "adjust this code"); ::breakpoint(); } } while ( 0); | |||
221 | StubCodeMark mark(this, "StubRoutines", "call_stub"); | |||
222 | address start = __masm-> pc(); | |||
223 | ||||
224 | // same as in generate_catch_exception()! | |||
225 | const Address rsp_after_call(rbp, rsp_after_call_off * wordSize); | |||
226 | ||||
227 | const Address call_wrapper (rbp, call_wrapper_off * wordSize); | |||
228 | const Address result (rbp, result_off * wordSize); | |||
229 | const Address result_type (rbp, result_type_off * wordSize); | |||
230 | const Address method (rbp, method_off * wordSize); | |||
231 | const Address entry_point (rbp, entry_point_off * wordSize); | |||
232 | const Address parameters (rbp, parameters_off * wordSize); | |||
233 | const Address parameter_size(rbp, parameter_size_off * wordSize); | |||
234 | ||||
235 | // same as in generate_catch_exception()! | |||
236 | const Address thread (rbp, thread_off * wordSize); | |||
237 | ||||
238 | const Address r15_save(rbp, r15_off * wordSize); | |||
239 | const Address r14_save(rbp, r14_off * wordSize); | |||
240 | const Address r13_save(rbp, r13_off * wordSize); | |||
241 | const Address r12_save(rbp, r12_off * wordSize); | |||
242 | const Address rbx_save(rbp, rbx_off * wordSize); | |||
243 | ||||
244 | // stub code | |||
245 | __masm-> enter(); | |||
246 | __masm-> subptr(rsp, -rsp_after_call_off * wordSize); | |||
247 | ||||
248 | // save register parameters | |||
249 | #ifndef _WIN64 | |||
250 | __masm-> movptr(parameters, c_rarg5); // parameters | |||
251 | __masm-> movptr(entry_point, c_rarg4); // entry_point | |||
252 | #endif | |||
253 | ||||
254 | __masm-> movptr(method, c_rarg3); // method | |||
255 | __masm-> movl(result_type, c_rarg2); // result type | |||
256 | __masm-> movptr(result, c_rarg1); // result | |||
257 | __masm-> movptr(call_wrapper, c_rarg0); // call wrapper | |||
258 | ||||
259 | // save regs belonging to calling function | |||
260 | __masm-> movptr(rbx_save, rbx); | |||
261 | __masm-> movptr(r12_save, r12); | |||
262 | __masm-> movptr(r13_save, r13); | |||
263 | __masm-> movptr(r14_save, r14); | |||
264 | __masm-> movptr(r15_save, r15); | |||
265 | ||||
266 | #ifdef _WIN64 | |||
267 | int last_reg = 15; | |||
268 | if (UseAVX > 2) { | |||
269 | last_reg = 31; | |||
270 | } | |||
271 | if (VM_Version::supports_evex()) { | |||
272 | for (int i = xmm_save_first; i <= last_reg; i++) { | |||
273 | __masm-> vextractf32x4(xmm_save(i), as_XMMRegister(i), 0); | |||
274 | } | |||
275 | } else { | |||
276 | for (int i = xmm_save_first; i <= last_reg; i++) { | |||
277 | __masm-> movdqu(xmm_save(i), as_XMMRegister(i)); | |||
278 | } | |||
279 | } | |||
280 | ||||
281 | const Address rdi_save(rbp, rdi_off * wordSize); | |||
282 | const Address rsi_save(rbp, rsi_off * wordSize); | |||
283 | ||||
284 | __masm-> movptr(rsi_save, rsi); | |||
285 | __masm-> movptr(rdi_save, rdi); | |||
286 | #else | |||
287 | const Address mxcsr_save(rbp, mxcsr_off * wordSize); | |||
288 | { | |||
289 | Label skip_ldmx; | |||
290 | __masm-> stmxcsr(mxcsr_save); | |||
291 | __masm-> movl(rax, mxcsr_save); | |||
292 | __masm-> andl(rax, MXCSR_MASK); // Only check control and mask bits | |||
293 | ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std()); | |||
294 | __masm-> cmp32(rax, mxcsr_std); | |||
295 | __masm-> jcc(Assembler::equal, skip_ldmx); | |||
296 | __masm-> ldmxcsr(mxcsr_std); | |||
297 | __masm-> bind(skip_ldmx); | |||
298 | } | |||
299 | #endif | |||
300 | ||||
301 | // Load up thread register | |||
302 | __masm-> movptr(r15_thread, thread); | |||
303 | __masm-> reinit_heapbase(); | |||
304 | ||||
305 | #ifdef ASSERT1 | |||
306 | // make sure we have no pending exceptions | |||
307 | { | |||
308 | Label L; | |||
309 | __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD0L); | |||
310 | __masm-> jcc(Assembler::equal, L); | |||
311 | __masm-> stop("StubRoutines::call_stub: entered with pending exception"); | |||
312 | __masm-> bind(L); | |||
313 | } | |||
314 | #endif | |||
315 | ||||
316 | // pass parameters if any | |||
317 | BLOCK_COMMENT("pass parameters if any")masm-> block_comment("pass parameters if any"); | |||
318 | Label parameters_done; | |||
319 | __masm-> movl(c_rarg3, parameter_size); | |||
320 | __masm-> testl(c_rarg3, c_rarg3); | |||
321 | __masm-> jcc(Assembler::zero, parameters_done); | |||
322 | ||||
323 | Label loop; | |||
324 | __masm-> movptr(c_rarg2, parameters); // parameter pointer | |||
325 | __masm-> movl(c_rarg1, c_rarg3); // parameter counter is in c_rarg1 | |||
326 | __masm-> BIND(loop)bind(loop); masm-> block_comment("loop" ":"); | |||
327 | __masm-> movptr(rax, Address(c_rarg2, 0));// get parameter | |||
328 | __masm-> addptr(c_rarg2, wordSize); // advance to next parameter | |||
329 | __masm-> decrementl(c_rarg1); // decrement counter | |||
330 | __masm-> push(rax); // pass parameter | |||
331 | __masm-> jcc(Assembler::notZero, loop); | |||
332 | ||||
333 | // call Java function | |||
334 | __masm-> BIND(parameters_done)bind(parameters_done); masm-> block_comment("parameters_done" ":"); | |||
335 | __masm-> movptr(rbx, method); // get Method* | |||
336 | __masm-> movptr(c_rarg1, entry_point); // get entry_point | |||
337 | __masm-> mov(r13, rsp); // set sender sp | |||
338 | BLOCK_COMMENT("call Java function")masm-> block_comment("call Java function"); | |||
339 | __masm-> call(c_rarg1); | |||
340 | ||||
341 | BLOCK_COMMENT("call_stub_return_address:")masm-> block_comment("call_stub_return_address:"); | |||
342 | return_address = __masm-> pc(); | |||
343 | ||||
344 | // store result depending on type (everything that is not | |||
345 | // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) | |||
346 | __masm-> movptr(c_rarg0, result); | |||
347 | Label is_long, is_float, is_double, exit; | |||
348 | __masm-> movl(c_rarg1, result_type); | |||
349 | __masm-> cmpl(c_rarg1, T_OBJECT); | |||
350 | __masm-> jcc(Assembler::equal, is_long); | |||
351 | __masm-> cmpl(c_rarg1, T_LONG); | |||
352 | __masm-> jcc(Assembler::equal, is_long); | |||
353 | __masm-> cmpl(c_rarg1, T_FLOAT); | |||
354 | __masm-> jcc(Assembler::equal, is_float); | |||
355 | __masm-> cmpl(c_rarg1, T_DOUBLE); | |||
356 | __masm-> jcc(Assembler::equal, is_double); | |||
357 | ||||
358 | // handle T_INT case | |||
359 | __masm-> movl(Address(c_rarg0, 0), rax); | |||
360 | ||||
361 | __masm-> BIND(exit)bind(exit); masm-> block_comment("exit" ":"); | |||
362 | ||||
363 | // pop parameters | |||
364 | __masm-> lea(rsp, rsp_after_call); | |||
365 | ||||
366 | #ifdef ASSERT1 | |||
367 | // verify that threads correspond | |||
368 | { | |||
369 | Label L1, L2, L3; | |||
370 | __masm-> cmpptr(r15_thread, thread); | |||
371 | __masm-> jcc(Assembler::equal, L1); | |||
372 | __masm-> stop("StubRoutines::call_stub: r15_thread is corrupted"); | |||
373 | __masm-> bind(L1); | |||
374 | __masm-> get_thread(rbx); | |||
375 | __masm-> cmpptr(r15_thread, thread); | |||
376 | __masm-> jcc(Assembler::equal, L2); | |||
377 | __masm-> stop("StubRoutines::call_stub: r15_thread is modified by call"); | |||
378 | __masm-> bind(L2); | |||
379 | __masm-> cmpptr(r15_thread, rbx); | |||
380 | __masm-> jcc(Assembler::equal, L3); | |||
381 | __masm-> stop("StubRoutines::call_stub: threads must correspond"); | |||
382 | __masm-> bind(L3); | |||
383 | } | |||
384 | #endif | |||
385 | ||||
386 | // restore regs belonging to calling function | |||
387 | #ifdef _WIN64 | |||
388 | // emit the restores for xmm regs | |||
389 | if (VM_Version::supports_evex()) { | |||
390 | for (int i = xmm_save_first; i <= last_reg; i++) { | |||
391 | __masm-> vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0); | |||
392 | } | |||
393 | } else { | |||
394 | for (int i = xmm_save_first; i <= last_reg; i++) { | |||
395 | __masm-> movdqu(as_XMMRegister(i), xmm_save(i)); | |||
396 | } | |||
397 | } | |||
398 | #endif | |||
399 | __masm-> movptr(r15, r15_save); | |||
400 | __masm-> movptr(r14, r14_save); | |||
401 | __masm-> movptr(r13, r13_save); | |||
402 | __masm-> movptr(r12, r12_save); | |||
403 | __masm-> movptr(rbx, rbx_save); | |||
404 | ||||
405 | #ifdef _WIN64 | |||
406 | __masm-> movptr(rdi, rdi_save); | |||
407 | __masm-> movptr(rsi, rsi_save); | |||
408 | #else | |||
409 | __masm-> ldmxcsr(mxcsr_save); | |||
410 | #endif | |||
411 | ||||
412 | // restore rsp | |||
413 | __masm-> addptr(rsp, -rsp_after_call_off * wordSize); | |||
414 | ||||
415 | // return | |||
416 | __masm-> vzeroupper(); | |||
417 | __masm-> pop(rbp); | |||
418 | __masm-> ret(0); | |||
419 | ||||
420 | // handle return types different from T_INT | |||
421 | __masm-> BIND(is_long)bind(is_long); masm-> block_comment("is_long" ":"); | |||
422 | __masm-> movq(Address(c_rarg0, 0), rax); | |||
423 | __masm-> jmp(exit); | |||
424 | ||||
425 | __masm-> BIND(is_float)bind(is_float); masm-> block_comment("is_float" ":"); | |||
426 | __masm-> movflt(Address(c_rarg0, 0), xmm0); | |||
427 | __masm-> jmp(exit); | |||
428 | ||||
429 | __masm-> BIND(is_double)bind(is_double); masm-> block_comment("is_double" ":"); | |||
430 | __masm-> movdbl(Address(c_rarg0, 0), xmm0); | |||
431 | __masm-> jmp(exit); | |||
432 | ||||
433 | return start; | |||
434 | } | |||
435 | ||||
436 | // Return point for a Java call if there's an exception thrown in | |||
437 | // Java code. The exception is caught and transformed into a | |||
438 | // pending exception stored in JavaThread that can be tested from | |||
439 | // within the VM. | |||
440 | // | |||
441 | // Note: Usually the parameters are removed by the callee. In case | |||
442 | // of an exception crossing an activation frame boundary, that is | |||
443 | // not the case if the callee is compiled code => need to setup the | |||
444 | // rsp. | |||
445 | // | |||
446 | // rax: exception oop | |||
447 | ||||
448 | address generate_catch_exception() { | |||
449 | StubCodeMark mark(this, "StubRoutines", "catch_exception"); | |||
450 | address start = __masm-> pc(); | |||
451 | ||||
452 | // same as in generate_call_stub(): | |||
453 | const Address rsp_after_call(rbp, rsp_after_call_off * wordSize); | |||
454 | const Address thread (rbp, thread_off * wordSize); | |||
455 | ||||
456 | #ifdef ASSERT1 | |||
457 | // verify that threads correspond | |||
458 | { | |||
459 | Label L1, L2, L3; | |||
460 | __masm-> cmpptr(r15_thread, thread); | |||
461 | __masm-> jcc(Assembler::equal, L1); | |||
462 | __masm-> stop("StubRoutines::catch_exception: r15_thread is corrupted"); | |||
463 | __masm-> bind(L1); | |||
464 | __masm-> get_thread(rbx); | |||
465 | __masm-> cmpptr(r15_thread, thread); | |||
466 | __masm-> jcc(Assembler::equal, L2); | |||
467 | __masm-> stop("StubRoutines::catch_exception: r15_thread is modified by call"); | |||
468 | __masm-> bind(L2); | |||
469 | __masm-> cmpptr(r15_thread, rbx); | |||
470 | __masm-> jcc(Assembler::equal, L3); | |||
471 | __masm-> stop("StubRoutines::catch_exception: threads must correspond"); | |||
472 | __masm-> bind(L3); | |||
473 | } | |||
474 | #endif | |||
475 | ||||
476 | // set pending exception | |||
477 | __masm-> verify_oop(rax)_verify_oop_checked(rax, "broken oop " "rax", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 477); | |||
478 | ||||
479 | __masm-> movptr(Address(r15_thread, Thread::pending_exception_offset()), rax); | |||
480 | __masm-> lea(rscratch1, ExternalAddress((address)__FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp")); | |||
481 | __masm-> movptr(Address(r15_thread, Thread::exception_file_offset()), rscratch1); | |||
482 | __masm-> movl(Address(r15_thread, Thread::exception_line_offset()), (int) __LINE__482); | |||
483 | ||||
484 | // complete return to VM | |||
485 | assert(StubRoutines::_call_stub_return_address != NULL,do { if (!(StubRoutines::_call_stub_return_address != __null) ) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 486, "assert(" "StubRoutines::_call_stub_return_address != __null" ") failed", "_call_stub_return_address must have been generated before" ); ::breakpoint(); } } while (0) | |||
486 | "_call_stub_return_address must have been generated before")do { if (!(StubRoutines::_call_stub_return_address != __null) ) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 486, "assert(" "StubRoutines::_call_stub_return_address != __null" ") failed", "_call_stub_return_address must have been generated before" ); ::breakpoint(); } } while (0); | |||
487 | __masm-> jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); | |||
488 | ||||
489 | return start; | |||
490 | } | |||
491 | ||||
492 | // Continuation point for runtime calls returning with a pending | |||
493 | // exception. The pending exception check happened in the runtime | |||
494 | // or native call stub. The pending exception in Thread is | |||
495 | // converted into a Java-level exception. | |||
496 | // | |||
497 | // Contract with Java-level exception handlers: | |||
498 | // rax: exception | |||
499 | // rdx: throwing pc | |||
500 | // | |||
501 | // NOTE: At entry of this stub, exception-pc must be on stack !! | |||
502 | ||||
503 | address generate_forward_exception() { | |||
504 | StubCodeMark mark(this, "StubRoutines", "forward exception"); | |||
505 | address start = __masm-> pc(); | |||
506 | ||||
507 | // Upon entry, the sp points to the return address returning into | |||
508 | // Java (interpreted or compiled) code; i.e., the return address | |||
509 | // becomes the throwing pc. | |||
510 | // | |||
511 | // Arguments pushed before the runtime call are still on the stack | |||
512 | // but the exception handler will reset the stack pointer -> | |||
513 | // ignore them. A potential result in registers can be ignored as | |||
514 | // well. | |||
515 | ||||
516 | #ifdef ASSERT1 | |||
517 | // make sure this code is only executed if there is a pending exception | |||
518 | { | |||
519 | Label L; | |||
520 | __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t) NULL__null); | |||
521 | __masm-> jcc(Assembler::notEqual, L); | |||
522 | __masm-> stop("StubRoutines::forward exception: no pending exception (1)"); | |||
523 | __masm-> bind(L); | |||
524 | } | |||
525 | #endif | |||
526 | ||||
527 | // compute exception handler into rbx | |||
528 | __masm-> movptr(c_rarg0, Address(rsp, 0)); | |||
529 | BLOCK_COMMENT("call exception_handler_for_return_address")masm-> block_comment("call exception_handler_for_return_address" ); | |||
530 | __masm-> call_VM_leaf(CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime::exception_handler_for_return_address ))) | |||
531 | SharedRuntime::exception_handler_for_return_address)((address)((address_word)(SharedRuntime::exception_handler_for_return_address ))), | |||
532 | r15_thread, c_rarg0); | |||
533 | __masm-> mov(rbx, rax); | |||
534 | ||||
535 | // setup rax & rdx, remove return address & clear pending exception | |||
536 | __masm-> pop(rdx); | |||
537 | __masm-> movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); | |||
538 | __masm-> movptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD0L); | |||
539 | ||||
540 | #ifdef ASSERT1 | |||
541 | // make sure exception is set | |||
542 | { | |||
543 | Label L; | |||
544 | __masm-> testptr(rax, rax); | |||
545 | __masm-> jcc(Assembler::notEqual, L); | |||
546 | __masm-> stop("StubRoutines::forward exception: no pending exception (2)"); | |||
547 | __masm-> bind(L); | |||
548 | } | |||
549 | #endif | |||
550 | ||||
551 | // continue at exception handler (return address removed) | |||
552 | // rax: exception | |||
553 | // rbx: exception handler | |||
554 | // rdx: throwing pc | |||
555 | __masm-> verify_oop(rax)_verify_oop_checked(rax, "broken oop " "rax", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 555); | |||
556 | __masm-> jmp(rbx); | |||
557 | ||||
558 | return start; | |||
559 | } | |||
560 | ||||
561 | // Support for intptr_t OrderAccess::fence() | |||
562 | // | |||
563 | // Arguments : | |||
564 | // | |||
565 | // Result: | |||
566 | address generate_orderaccess_fence() { | |||
567 | StubCodeMark mark(this, "StubRoutines", "orderaccess_fence"); | |||
568 | address start = __masm-> pc(); | |||
569 | __masm-> membar(Assembler::StoreLoad); | |||
570 | __masm-> ret(0); | |||
571 | ||||
572 | return start; | |||
573 | } | |||
574 | ||||
575 | ||||
576 | // Support for intptr_t get_previous_sp() | |||
577 | // | |||
578 | // This routine is used to find the previous stack pointer for the | |||
579 | // caller. | |||
580 | address generate_get_previous_sp() { | |||
581 | StubCodeMark mark(this, "StubRoutines", "get_previous_sp"); | |||
582 | address start = __masm-> pc(); | |||
583 | ||||
584 | __masm-> movptr(rax, rsp); | |||
585 | __masm-> addptr(rax, 8); // return address is at the top of the stack. | |||
586 | __masm-> ret(0); | |||
587 | ||||
588 | return start; | |||
589 | } | |||
590 | ||||
591 | //---------------------------------------------------------------------------------------------------- | |||
592 | // Support for void verify_mxcsr() | |||
593 | // | |||
594 | // This routine is used with -Xcheck:jni to verify that native | |||
595 | // JNI code does not return to Java code without restoring the | |||
596 | // MXCSR register to our expected state. | |||
597 | ||||
598 | address generate_verify_mxcsr() { | |||
599 | StubCodeMark mark(this, "StubRoutines", "verify_mxcsr"); | |||
600 | address start = __masm-> pc(); | |||
601 | ||||
602 | const Address mxcsr_save(rsp, 0); | |||
603 | ||||
604 | if (CheckJNICalls) { | |||
605 | Label ok_ret; | |||
606 | ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std()); | |||
607 | __masm-> push(rax); | |||
608 | __masm-> subptr(rsp, wordSize); // allocate a temp location | |||
609 | __masm-> stmxcsr(mxcsr_save); | |||
610 | __masm-> movl(rax, mxcsr_save); | |||
611 | __masm-> andl(rax, MXCSR_MASK); // Only check control and mask bits | |||
612 | __masm-> cmp32(rax, mxcsr_std); | |||
613 | __masm-> jcc(Assembler::equal, ok_ret); | |||
614 | ||||
615 | __masm-> warn("MXCSR changed by native JNI code, use -XX:+RestoreMXCSROnJNICall"); | |||
616 | ||||
617 | __masm-> ldmxcsr(mxcsr_std); | |||
618 | ||||
619 | __masm-> bind(ok_ret); | |||
620 | __masm-> addptr(rsp, wordSize); | |||
621 | __masm-> pop(rax); | |||
622 | } | |||
623 | ||||
624 | __masm-> ret(0); | |||
625 | ||||
626 | return start; | |||
627 | } | |||
628 | ||||
629 | address generate_f2i_fixup() { | |||
630 | StubCodeMark mark(this, "StubRoutines", "f2i_fixup"); | |||
631 | Address inout(rsp, 5 * wordSize); // return address + 4 saves | |||
632 | ||||
633 | address start = __masm-> pc(); | |||
634 | ||||
635 | Label L; | |||
636 | ||||
637 | __masm-> push(rax); | |||
638 | __masm-> push(c_rarg3); | |||
639 | __masm-> push(c_rarg2); | |||
640 | __masm-> push(c_rarg1); | |||
641 | ||||
642 | __masm-> movl(rax, 0x7f800000); | |||
643 | __masm-> xorl(c_rarg3, c_rarg3); | |||
644 | __masm-> movl(c_rarg2, inout); | |||
645 | __masm-> movl(c_rarg1, c_rarg2); | |||
646 | __masm-> andl(c_rarg1, 0x7fffffff); | |||
647 | __masm-> cmpl(rax, c_rarg1); // NaN? -> 0 | |||
648 | __masm-> jcc(Assembler::negative, L); | |||
649 | __masm-> testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint | |||
650 | __masm-> movl(c_rarg3, 0x80000000); | |||
651 | __masm-> movl(rax, 0x7fffffff); | |||
652 | __masm-> cmovl(Assembler::positive, c_rarg3, rax); | |||
653 | ||||
654 | __masm-> bind(L); | |||
655 | __masm-> movptr(inout, c_rarg3); | |||
656 | ||||
657 | __masm-> pop(c_rarg1); | |||
658 | __masm-> pop(c_rarg2); | |||
659 | __masm-> pop(c_rarg3); | |||
660 | __masm-> pop(rax); | |||
661 | ||||
662 | __masm-> ret(0); | |||
663 | ||||
664 | return start; | |||
665 | } | |||
666 | ||||
667 | address generate_f2l_fixup() { | |||
668 | StubCodeMark mark(this, "StubRoutines", "f2l_fixup"); | |||
669 | Address inout(rsp, 5 * wordSize); // return address + 4 saves | |||
670 | address start = __masm-> pc(); | |||
671 | ||||
672 | Label L; | |||
673 | ||||
674 | __masm-> push(rax); | |||
675 | __masm-> push(c_rarg3); | |||
676 | __masm-> push(c_rarg2); | |||
677 | __masm-> push(c_rarg1); | |||
678 | ||||
679 | __masm-> movl(rax, 0x7f800000); | |||
680 | __masm-> xorl(c_rarg3, c_rarg3); | |||
681 | __masm-> movl(c_rarg2, inout); | |||
682 | __masm-> movl(c_rarg1, c_rarg2); | |||
683 | __masm-> andl(c_rarg1, 0x7fffffff); | |||
684 | __masm-> cmpl(rax, c_rarg1); // NaN? -> 0 | |||
685 | __masm-> jcc(Assembler::negative, L); | |||
686 | __masm-> testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong | |||
687 | __masm-> mov64(c_rarg3, 0x8000000000000000); | |||
688 | __masm-> mov64(rax, 0x7fffffffffffffff); | |||
689 | __masm-> cmov(Assembler::positive, c_rarg3, rax); | |||
690 | ||||
691 | __masm-> bind(L); | |||
692 | __masm-> movptr(inout, c_rarg3); | |||
693 | ||||
694 | __masm-> pop(c_rarg1); | |||
695 | __masm-> pop(c_rarg2); | |||
696 | __masm-> pop(c_rarg3); | |||
697 | __masm-> pop(rax); | |||
698 | ||||
699 | __masm-> ret(0); | |||
700 | ||||
701 | return start; | |||
702 | } | |||
703 | ||||
704 | address generate_d2i_fixup() { | |||
705 | StubCodeMark mark(this, "StubRoutines", "d2i_fixup"); | |||
706 | Address inout(rsp, 6 * wordSize); // return address + 5 saves | |||
707 | ||||
708 | address start = __masm-> pc(); | |||
709 | ||||
710 | Label L; | |||
711 | ||||
712 | __masm-> push(rax); | |||
713 | __masm-> push(c_rarg3); | |||
714 | __masm-> push(c_rarg2); | |||
715 | __masm-> push(c_rarg1); | |||
716 | __masm-> push(c_rarg0); | |||
717 | ||||
718 | __masm-> movl(rax, 0x7ff00000); | |||
719 | __masm-> movq(c_rarg2, inout); | |||
720 | __masm-> movl(c_rarg3, c_rarg2); | |||
721 | __masm-> mov(c_rarg1, c_rarg2); | |||
722 | __masm-> mov(c_rarg0, c_rarg2); | |||
723 | __masm-> negl(c_rarg3); | |||
724 | __masm-> shrptr(c_rarg1, 0x20); | |||
725 | __masm-> orl(c_rarg3, c_rarg2); | |||
726 | __masm-> andl(c_rarg1, 0x7fffffff); | |||
727 | __masm-> xorl(c_rarg2, c_rarg2); | |||
728 | __masm-> shrl(c_rarg3, 0x1f); | |||
729 | __masm-> orl(c_rarg1, c_rarg3); | |||
730 | __masm-> cmpl(rax, c_rarg1); | |||
731 | __masm-> jcc(Assembler::negative, L); // NaN -> 0 | |||
732 | __masm-> testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint | |||
733 | __masm-> movl(c_rarg2, 0x80000000); | |||
734 | __masm-> movl(rax, 0x7fffffff); | |||
735 | __masm-> cmov(Assembler::positive, c_rarg2, rax); | |||
736 | ||||
737 | __masm-> bind(L); | |||
738 | __masm-> movptr(inout, c_rarg2); | |||
739 | ||||
740 | __masm-> pop(c_rarg0); | |||
741 | __masm-> pop(c_rarg1); | |||
742 | __masm-> pop(c_rarg2); | |||
743 | __masm-> pop(c_rarg3); | |||
744 | __masm-> pop(rax); | |||
745 | ||||
746 | __masm-> ret(0); | |||
747 | ||||
748 | return start; | |||
749 | } | |||
750 | ||||
751 | address generate_d2l_fixup() { | |||
752 | StubCodeMark mark(this, "StubRoutines", "d2l_fixup"); | |||
753 | Address inout(rsp, 6 * wordSize); // return address + 5 saves | |||
754 | ||||
755 | address start = __masm-> pc(); | |||
756 | ||||
757 | Label L; | |||
758 | ||||
759 | __masm-> push(rax); | |||
760 | __masm-> push(c_rarg3); | |||
761 | __masm-> push(c_rarg2); | |||
762 | __masm-> push(c_rarg1); | |||
763 | __masm-> push(c_rarg0); | |||
764 | ||||
765 | __masm-> movl(rax, 0x7ff00000); | |||
766 | __masm-> movq(c_rarg2, inout); | |||
767 | __masm-> movl(c_rarg3, c_rarg2); | |||
768 | __masm-> mov(c_rarg1, c_rarg2); | |||
769 | __masm-> mov(c_rarg0, c_rarg2); | |||
770 | __masm-> negl(c_rarg3); | |||
771 | __masm-> shrptr(c_rarg1, 0x20); | |||
772 | __masm-> orl(c_rarg3, c_rarg2); | |||
773 | __masm-> andl(c_rarg1, 0x7fffffff); | |||
774 | __masm-> xorl(c_rarg2, c_rarg2); | |||
775 | __masm-> shrl(c_rarg3, 0x1f); | |||
776 | __masm-> orl(c_rarg1, c_rarg3); | |||
777 | __masm-> cmpl(rax, c_rarg1); | |||
778 | __masm-> jcc(Assembler::negative, L); // NaN -> 0 | |||
779 | __masm-> testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong | |||
780 | __masm-> mov64(c_rarg2, 0x8000000000000000); | |||
781 | __masm-> mov64(rax, 0x7fffffffffffffff); | |||
782 | __masm-> cmovq(Assembler::positive, c_rarg2, rax); | |||
783 | ||||
784 | __masm-> bind(L); | |||
785 | __masm-> movq(inout, c_rarg2); | |||
786 | ||||
787 | __masm-> pop(c_rarg0); | |||
788 | __masm-> pop(c_rarg1); | |||
789 | __masm-> pop(c_rarg2); | |||
790 | __masm-> pop(c_rarg3); | |||
791 | __masm-> pop(rax); | |||
792 | ||||
793 | __masm-> ret(0); | |||
794 | ||||
795 | return start; | |||
796 | } | |||
797 | ||||
798 | address generate_iota_indices(const char *stub_name) { | |||
799 | __masm-> align(CodeEntryAlignment); | |||
800 | StubCodeMark mark(this, "StubRoutines", stub_name); | |||
801 | address start = __masm-> pc(); | |||
802 | __masm-> emit_data64(0x0706050403020100, relocInfo::none); | |||
803 | __masm-> emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none); | |||
804 | __masm-> emit_data64(0x1716151413121110, relocInfo::none); | |||
805 | __masm-> emit_data64(0x1F1E1D1C1B1A1918, relocInfo::none); | |||
806 | __masm-> emit_data64(0x2726252423222120, relocInfo::none); | |||
807 | __masm-> emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none); | |||
808 | __masm-> emit_data64(0x3736353433323130, relocInfo::none); | |||
809 | __masm-> emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none); | |||
810 | return start; | |||
811 | } | |||
812 | ||||
813 | address generate_vector_byte_shuffle_mask(const char *stub_name) { | |||
814 | __masm-> align(CodeEntryAlignment); | |||
815 | StubCodeMark mark(this, "StubRoutines", stub_name); | |||
816 | address start = __masm-> pc(); | |||
817 | __masm-> emit_data64(0x7070707070707070, relocInfo::none); | |||
818 | __masm-> emit_data64(0x7070707070707070, relocInfo::none); | |||
819 | __masm-> emit_data64(0xF0F0F0F0F0F0F0F0, relocInfo::none); | |||
820 | __masm-> emit_data64(0xF0F0F0F0F0F0F0F0, relocInfo::none); | |||
821 | return start; | |||
822 | } | |||
823 | ||||
824 | address generate_fp_mask(const char *stub_name, int64_t mask) { | |||
825 | __masm-> align(CodeEntryAlignment); | |||
826 | StubCodeMark mark(this, "StubRoutines", stub_name); | |||
827 | address start = __masm-> pc(); | |||
828 | ||||
829 | __masm-> emit_data64( mask, relocInfo::none ); | |||
830 | __masm-> emit_data64( mask, relocInfo::none ); | |||
831 | ||||
832 | return start; | |||
833 | } | |||
834 | ||||
835 | address generate_vector_mask(const char *stub_name, int64_t mask) { | |||
836 | __masm-> align(CodeEntryAlignment); | |||
837 | StubCodeMark mark(this, "StubRoutines", stub_name); | |||
838 | address start = __masm-> pc(); | |||
839 | ||||
840 | __masm-> emit_data64(mask, relocInfo::none); | |||
841 | __masm-> emit_data64(mask, relocInfo::none); | |||
842 | __masm-> emit_data64(mask, relocInfo::none); | |||
843 | __masm-> emit_data64(mask, relocInfo::none); | |||
844 | __masm-> emit_data64(mask, relocInfo::none); | |||
845 | __masm-> emit_data64(mask, relocInfo::none); | |||
846 | __masm-> emit_data64(mask, relocInfo::none); | |||
847 | __masm-> emit_data64(mask, relocInfo::none); | |||
848 | ||||
849 | return start; | |||
850 | } | |||
851 | ||||
852 | address generate_vector_byte_perm_mask(const char *stub_name) { | |||
853 | __masm-> align(CodeEntryAlignment); | |||
854 | StubCodeMark mark(this, "StubRoutines", stub_name); | |||
855 | address start = __masm-> pc(); | |||
856 | ||||
857 | __masm-> emit_data64(0x0000000000000001, relocInfo::none); | |||
858 | __masm-> emit_data64(0x0000000000000003, relocInfo::none); | |||
859 | __masm-> emit_data64(0x0000000000000005, relocInfo::none); | |||
860 | __masm-> emit_data64(0x0000000000000007, relocInfo::none); | |||
861 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
862 | __masm-> emit_data64(0x0000000000000002, relocInfo::none); | |||
863 | __masm-> emit_data64(0x0000000000000004, relocInfo::none); | |||
864 | __masm-> emit_data64(0x0000000000000006, relocInfo::none); | |||
865 | ||||
866 | return start; | |||
867 | } | |||
868 | ||||
869 | address generate_vector_fp_mask(const char *stub_name, int64_t mask) { | |||
870 | __masm-> align(CodeEntryAlignment); | |||
871 | StubCodeMark mark(this, "StubRoutines", stub_name); | |||
872 | address start = __masm-> pc(); | |||
873 | ||||
874 | __masm-> emit_data64(mask, relocInfo::none); | |||
875 | __masm-> emit_data64(mask, relocInfo::none); | |||
876 | __masm-> emit_data64(mask, relocInfo::none); | |||
877 | __masm-> emit_data64(mask, relocInfo::none); | |||
878 | __masm-> emit_data64(mask, relocInfo::none); | |||
879 | __masm-> emit_data64(mask, relocInfo::none); | |||
880 | __masm-> emit_data64(mask, relocInfo::none); | |||
881 | __masm-> emit_data64(mask, relocInfo::none); | |||
882 | ||||
883 | return start; | |||
884 | } | |||
885 | ||||
886 | address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len, | |||
887 | int32_t val0, int32_t val1, int32_t val2, int32_t val3, | |||
888 | int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0, | |||
889 | int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0, | |||
890 | int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) { | |||
891 | __masm-> align(CodeEntryAlignment); | |||
892 | StubCodeMark mark(this, "StubRoutines", stub_name); | |||
893 | address start = __masm-> pc(); | |||
894 | ||||
895 | assert(len != Assembler::AVX_NoVec, "vector len must be specified")do { if (!(len != Assembler::AVX_NoVec)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 895, "assert(" "len != Assembler::AVX_NoVec" ") failed", "vector len must be specified" ); ::breakpoint(); } } while (0); | |||
896 | __masm-> emit_data(val0, relocInfo::none, 0); | |||
897 | __masm-> emit_data(val1, relocInfo::none, 0); | |||
898 | __masm-> emit_data(val2, relocInfo::none, 0); | |||
899 | __masm-> emit_data(val3, relocInfo::none, 0); | |||
900 | if (len >= Assembler::AVX_256bit) { | |||
901 | __masm-> emit_data(val4, relocInfo::none, 0); | |||
902 | __masm-> emit_data(val5, relocInfo::none, 0); | |||
903 | __masm-> emit_data(val6, relocInfo::none, 0); | |||
904 | __masm-> emit_data(val7, relocInfo::none, 0); | |||
905 | if (len >= Assembler::AVX_512bit) { | |||
906 | __masm-> emit_data(val8, relocInfo::none, 0); | |||
907 | __masm-> emit_data(val9, relocInfo::none, 0); | |||
908 | __masm-> emit_data(val10, relocInfo::none, 0); | |||
909 | __masm-> emit_data(val11, relocInfo::none, 0); | |||
910 | __masm-> emit_data(val12, relocInfo::none, 0); | |||
911 | __masm-> emit_data(val13, relocInfo::none, 0); | |||
912 | __masm-> emit_data(val14, relocInfo::none, 0); | |||
913 | __masm-> emit_data(val15, relocInfo::none, 0); | |||
914 | } | |||
915 | } | |||
916 | ||||
917 | return start; | |||
918 | } | |||
919 | ||||
920 | // Non-destructive plausibility checks for oops | |||
921 | // | |||
922 | // Arguments: | |||
923 | // all args on stack! | |||
924 | // | |||
925 | // Stack after saving c_rarg3: | |||
926 | // [tos + 0]: saved c_rarg3 | |||
927 | // [tos + 1]: saved c_rarg2 | |||
928 | // [tos + 2]: saved r12 (several TemplateTable methods use it) | |||
929 | // [tos + 3]: saved flags | |||
930 | // [tos + 4]: return address | |||
931 | // * [tos + 5]: error message (char*) | |||
932 | // * [tos + 6]: object to verify (oop) | |||
933 | // * [tos + 7]: saved rax - saved by caller and bashed | |||
934 | // * [tos + 8]: saved r10 (rscratch1) - saved by caller | |||
935 | // * = popped on exit | |||
936 | address generate_verify_oop() { | |||
937 | StubCodeMark mark(this, "StubRoutines", "verify_oop"); | |||
938 | address start = __masm-> pc(); | |||
939 | ||||
940 | Label exit, error; | |||
941 | ||||
942 | __masm-> pushf(); | |||
943 | __masm-> incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); | |||
944 | ||||
945 | __masm-> push(r12); | |||
946 | ||||
947 | // save c_rarg2 and c_rarg3 | |||
948 | __masm-> push(c_rarg2); | |||
949 | __masm-> push(c_rarg3); | |||
950 | ||||
951 | enum { | |||
952 | // After previous pushes. | |||
953 | oop_to_verify = 6 * wordSize, | |||
954 | saved_rax = 7 * wordSize, | |||
955 | saved_r10 = 8 * wordSize, | |||
956 | ||||
957 | // Before the call to MacroAssembler::debug(), see below. | |||
958 | return_addr = 16 * wordSize, | |||
959 | error_msg = 17 * wordSize | |||
960 | }; | |||
961 | ||||
962 | // get object | |||
963 | __masm-> movptr(rax, Address(rsp, oop_to_verify)); | |||
964 | ||||
965 | // make sure object is 'reasonable' | |||
966 | __masm-> testptr(rax, rax); | |||
967 | __masm-> jcc(Assembler::zero, exit); // if obj is NULL it is OK | |||
968 | ||||
969 | #if INCLUDE_ZGC1 | |||
970 | if (UseZGC) { | |||
971 | // Check if metadata bits indicate a bad oop | |||
972 | __masm-> testptr(rax, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); | |||
973 | __masm-> jcc(Assembler::notZero, error); | |||
974 | } | |||
975 | #endif | |||
976 | ||||
977 | // Check if the oop is in the right area of memory | |||
978 | __masm-> movptr(c_rarg2, rax); | |||
979 | __masm-> movptr(c_rarg3, (intptr_t) Universe::verify_oop_mask()); | |||
980 | __masm-> andptr(c_rarg2, c_rarg3); | |||
981 | __masm-> movptr(c_rarg3, (intptr_t) Universe::verify_oop_bits()); | |||
982 | __masm-> cmpptr(c_rarg2, c_rarg3); | |||
983 | __masm-> jcc(Assembler::notZero, error); | |||
984 | ||||
985 | // make sure klass is 'reasonable', which is not zero. | |||
986 | __masm-> load_klass(rax, rax, rscratch1); // get klass | |||
987 | __masm-> testptr(rax, rax); | |||
988 | __masm-> jcc(Assembler::zero, error); // if klass is NULL it is broken | |||
989 | ||||
990 | // return if everything seems ok | |||
991 | __masm-> bind(exit); | |||
992 | __masm-> movptr(rax, Address(rsp, saved_rax)); // get saved rax back | |||
993 | __masm-> movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back | |||
994 | __masm-> pop(c_rarg3); // restore c_rarg3 | |||
995 | __masm-> pop(c_rarg2); // restore c_rarg2 | |||
996 | __masm-> pop(r12); // restore r12 | |||
997 | __masm-> popf(); // restore flags | |||
998 | __masm-> ret(4 * wordSize); // pop caller saved stuff | |||
999 | ||||
1000 | // handle errors | |||
1001 | __masm-> bind(error); | |||
1002 | __masm-> movptr(rax, Address(rsp, saved_rax)); // get saved rax back | |||
1003 | __masm-> movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back | |||
1004 | __masm-> pop(c_rarg3); // get saved c_rarg3 back | |||
1005 | __masm-> pop(c_rarg2); // get saved c_rarg2 back | |||
1006 | __masm-> pop(r12); // get saved r12 back | |||
1007 | __masm-> popf(); // get saved flags off stack -- | |||
1008 | // will be ignored | |||
1009 | ||||
1010 | __masm-> pusha(); // push registers | |||
1011 | // (rip is already | |||
1012 | // already pushed) | |||
1013 | // debug(char* msg, int64_t pc, int64_t regs[]) | |||
1014 | // We've popped the registers we'd saved (c_rarg3, c_rarg2 and flags), and | |||
1015 | // pushed all the registers, so now the stack looks like: | |||
1016 | // [tos + 0] 16 saved registers | |||
1017 | // [tos + 16] return address | |||
1018 | // * [tos + 17] error message (char*) | |||
1019 | // * [tos + 18] object to verify (oop) | |||
1020 | // * [tos + 19] saved rax - saved by caller and bashed | |||
1021 | // * [tos + 20] saved r10 (rscratch1) - saved by caller | |||
1022 | // * = popped on exit | |||
1023 | ||||
1024 | __masm-> movptr(c_rarg0, Address(rsp, error_msg)); // pass address of error message | |||
1025 | __masm-> movptr(c_rarg1, Address(rsp, return_addr)); // pass return address | |||
1026 | __masm-> movq(c_rarg2, rsp); // pass address of regs on stack | |||
1027 | __masm-> mov(r12, rsp); // remember rsp | |||
1028 | __masm-> subptr(rsp, frame::arg_reg_save_area_bytes); // windows | |||
1029 | __masm-> andptr(rsp, -16); // align stack as required by ABI | |||
1030 | BLOCK_COMMENT("call MacroAssembler::debug")masm-> block_comment("call MacroAssembler::debug"); | |||
1031 | __masm-> call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)((address)((address_word)(MacroAssembler::debug64))))); | |||
1032 | __masm-> hlt(); | |||
1033 | return start; | |||
1034 | } | |||
1035 | ||||
1036 | // | |||
1037 | // Verify that a register contains clean 32-bits positive value | |||
1038 | // (high 32-bits are 0) so it could be used in 64-bits shifts. | |||
1039 | // | |||
1040 | // Input: | |||
1041 | // Rint - 32-bits value | |||
1042 | // Rtmp - scratch | |||
1043 | // | |||
1044 | void assert_clean_int(Register Rint, Register Rtmp) { | |||
1045 | #ifdef ASSERT1 | |||
1046 | Label L; | |||
1047 | assert_different_registers(Rtmp, Rint); | |||
1048 | __masm-> movslq(Rtmp, Rint); | |||
1049 | __masm-> cmpq(Rtmp, Rint); | |||
1050 | __masm-> jcc(Assembler::equal, L); | |||
1051 | __masm-> stop("high 32-bits of int value are not 0"); | |||
1052 | __masm-> bind(L); | |||
1053 | #endif | |||
1054 | } | |||
1055 | ||||
1056 | // Generate overlap test for array copy stubs | |||
1057 | // | |||
1058 | // Input: | |||
1059 | // c_rarg0 - from | |||
1060 | // c_rarg1 - to | |||
1061 | // c_rarg2 - element count | |||
1062 | // | |||
1063 | // Output: | |||
1064 | // rax - &from[element count - 1] | |||
1065 | // | |||
1066 | void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) { | |||
1067 | assert(no_overlap_target != NULL, "must be generated")do { if (!(no_overlap_target != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1067, "assert(" "no_overlap_target != __null" ") failed", "must be generated" ); ::breakpoint(); } } while (0); | |||
1068 | array_overlap_test(no_overlap_target, NULL__null, sf); | |||
1069 | } | |||
1070 | void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) { | |||
1071 | array_overlap_test(NULL__null, &L_no_overlap, sf); | |||
1072 | } | |||
1073 | void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) { | |||
1074 | const Register from = c_rarg0; | |||
1075 | const Register to = c_rarg1; | |||
1076 | const Register count = c_rarg2; | |||
1077 | const Register end_from = rax; | |||
1078 | ||||
1079 | __masm-> cmpptr(to, from); | |||
1080 | __masm-> lea(end_from, Address(from, count, sf, 0)); | |||
1081 | if (NOLp == NULL__null) { | |||
1082 | ExternalAddress no_overlap(no_overlap_target); | |||
1083 | __masm-> jump_cc(Assembler::belowEqual, no_overlap); | |||
1084 | __masm-> cmpptr(to, end_from); | |||
1085 | __masm-> jump_cc(Assembler::aboveEqual, no_overlap); | |||
1086 | } else { | |||
1087 | __masm-> jcc(Assembler::belowEqual, (*NOLp)); | |||
1088 | __masm-> cmpptr(to, end_from); | |||
1089 | __masm-> jcc(Assembler::aboveEqual, (*NOLp)); | |||
1090 | } | |||
1091 | } | |||
1092 | ||||
1093 | // Shuffle first three arg regs on Windows into Linux/Solaris locations. | |||
1094 | // | |||
1095 | // Outputs: | |||
1096 | // rdi - rcx | |||
1097 | // rsi - rdx | |||
1098 | // rdx - r8 | |||
1099 | // rcx - r9 | |||
1100 | // | |||
1101 | // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter | |||
1102 | // are non-volatile. r9 and r10 should not be used by the caller. | |||
1103 | // | |||
1104 | DEBUG_ONLY(bool regs_in_thread;)bool regs_in_thread; | |||
1105 | ||||
1106 | void setup_arg_regs(int nargs = 3) { | |||
1107 | const Register saved_rdi = r9; | |||
1108 | const Register saved_rsi = r10; | |||
1109 | assert(nargs == 3 || nargs == 4, "else fix")do { if (!(nargs == 3 || nargs == 4)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1109, "assert(" "nargs == 3 || nargs == 4" ") failed", "else fix" ); ::breakpoint(); } } while (0); | |||
1110 | #ifdef _WIN64 | |||
1111 | assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,do { if (!(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1112, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9" ") failed", "unexpected argument registers"); ::breakpoint() ; } } while (0) | |||
1112 | "unexpected argument registers")do { if (!(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1112, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9" ") failed", "unexpected argument registers"); ::breakpoint() ; } } while (0); | |||
1113 | if (nargs >= 4) | |||
1114 | __masm-> mov(rax, r9); // r9 is also saved_rdi | |||
1115 | __masm-> movptr(saved_rdi, rdi); | |||
1116 | __masm-> movptr(saved_rsi, rsi); | |||
1117 | __masm-> mov(rdi, rcx); // c_rarg0 | |||
1118 | __masm-> mov(rsi, rdx); // c_rarg1 | |||
1119 | __masm-> mov(rdx, r8); // c_rarg2 | |||
1120 | if (nargs >= 4) | |||
1121 | __masm-> mov(rcx, rax); // c_rarg3 (via rax) | |||
1122 | #else | |||
1123 | assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,do { if (!(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1124, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx" ") failed", "unexpected argument registers"); ::breakpoint() ; } } while (0) | |||
1124 | "unexpected argument registers")do { if (!(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1124, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx" ") failed", "unexpected argument registers"); ::breakpoint() ; } } while (0); | |||
1125 | #endif | |||
1126 | DEBUG_ONLY(regs_in_thread = false;)regs_in_thread = false; | |||
1127 | } | |||
1128 | ||||
1129 | void restore_arg_regs() { | |||
1130 | assert(!regs_in_thread, "wrong call to restore_arg_regs")do { if (!(!regs_in_thread)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1130, "assert(" "!regs_in_thread" ") failed", "wrong call to restore_arg_regs" ); ::breakpoint(); } } while (0); | |||
1131 | const Register saved_rdi = r9; | |||
1132 | const Register saved_rsi = r10; | |||
1133 | #ifdef _WIN64 | |||
1134 | __masm-> movptr(rdi, saved_rdi); | |||
1135 | __masm-> movptr(rsi, saved_rsi); | |||
1136 | #endif | |||
1137 | } | |||
1138 | ||||
1139 | // This is used in places where r10 is a scratch register, and can | |||
1140 | // be adapted if r9 is needed also. | |||
1141 | void setup_arg_regs_using_thread() { | |||
1142 | const Register saved_r15 = r9; | |||
1143 | #ifdef _WIN64 | |||
1144 | __masm-> mov(saved_r15, r15); // r15 is callee saved and needs to be restored | |||
1145 | __masm-> get_thread(r15_thread); | |||
1146 | assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,do { if (!(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1147, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9" ") failed", "unexpected argument registers"); ::breakpoint() ; } } while (0) | |||
1147 | "unexpected argument registers")do { if (!(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1147, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9" ") failed", "unexpected argument registers"); ::breakpoint() ; } } while (0); | |||
1148 | __masm-> movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())), rdi); | |||
1149 | __masm-> movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())), rsi); | |||
1150 | ||||
1151 | __masm-> mov(rdi, rcx); // c_rarg0 | |||
1152 | __masm-> mov(rsi, rdx); // c_rarg1 | |||
1153 | __masm-> mov(rdx, r8); // c_rarg2 | |||
1154 | #else | |||
1155 | assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,do { if (!(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1156, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx" ") failed", "unexpected argument registers"); ::breakpoint() ; } } while (0) | |||
1156 | "unexpected argument registers")do { if (!(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1156, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx" ") failed", "unexpected argument registers"); ::breakpoint() ; } } while (0); | |||
1157 | #endif | |||
1158 | DEBUG_ONLY(regs_in_thread = true;)regs_in_thread = true; | |||
1159 | } | |||
1160 | ||||
1161 | void restore_arg_regs_using_thread() { | |||
1162 | assert(regs_in_thread, "wrong call to restore_arg_regs")do { if (!(regs_in_thread)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1162, "assert(" "regs_in_thread" ") failed", "wrong call to restore_arg_regs" ); ::breakpoint(); } } while (0); | |||
1163 | const Register saved_r15 = r9; | |||
1164 | #ifdef _WIN64 | |||
1165 | __masm-> get_thread(r15_thread); | |||
1166 | __masm-> movptr(rsi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset()))); | |||
1167 | __masm-> movptr(rdi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset()))); | |||
1168 | __masm-> mov(r15, saved_r15); // r15 is callee saved and needs to be restored | |||
1169 | #endif | |||
1170 | } | |||
1171 | ||||
1172 | // Copy big chunks forward | |||
1173 | // | |||
1174 | // Inputs: | |||
1175 | // end_from - source arrays end address | |||
1176 | // end_to - destination array end address | |||
1177 | // qword_count - 64-bits element count, negative | |||
1178 | // to - scratch | |||
1179 | // L_copy_bytes - entry label | |||
1180 | // L_copy_8_bytes - exit label | |||
1181 | // | |||
1182 | void copy_bytes_forward(Register end_from, Register end_to, | |||
1183 | Register qword_count, Register to, | |||
1184 | Label& L_copy_bytes, Label& L_copy_8_bytes) { | |||
1185 | DEBUG_ONLY(__ stop("enter at entry label, not here"))masm-> stop("enter at entry label, not here"); | |||
1186 | Label L_loop; | |||
1187 | __masm-> align(OptoLoopAlignment); | |||
1188 | if (UseUnalignedLoadStores) { | |||
1189 | Label L_end; | |||
1190 | __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":"); | |||
1191 | if (UseAVX >= 2) { | |||
1192 | __masm-> vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); | |||
1193 | __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); | |||
1194 | __masm-> vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24)); | |||
1195 | __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); | |||
1196 | } else { | |||
1197 | __masm-> movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); | |||
1198 | __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); | |||
1199 | __masm-> movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); | |||
1200 | __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); | |||
1201 | __masm-> movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); | |||
1202 | __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); | |||
1203 | __masm-> movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); | |||
1204 | __masm-> movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); | |||
1205 | } | |||
1206 | ||||
1207 | __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":" ); | |||
1208 | __masm-> addptr(qword_count, 8); | |||
1209 | __masm-> jcc(Assembler::lessEqual, L_loop); | |||
1210 | __masm-> subptr(qword_count, 4); // sub(8) and add(4) | |||
1211 | __masm-> jccb(Assembler::greater, L_end)jccb_0(Assembler::greater, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1211); | |||
1212 | // Copy trailing 32 bytes | |||
1213 | if (UseAVX >= 2) { | |||
1214 | __masm-> vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); | |||
1215 | __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); | |||
1216 | } else { | |||
1217 | __masm-> movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); | |||
1218 | __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); | |||
1219 | __masm-> movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); | |||
1220 | __masm-> movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); | |||
1221 | } | |||
1222 | __masm-> addptr(qword_count, 4); | |||
1223 | __masm-> BIND(L_end)bind(L_end); masm-> block_comment("L_end" ":"); | |||
1224 | if (UseAVX >= 2) { | |||
1225 | // clean upper bits of YMM registers | |||
1226 | __masm-> vpxor(xmm0, xmm0); | |||
1227 | __masm-> vpxor(xmm1, xmm1); | |||
1228 | } | |||
1229 | } else { | |||
1230 | // Copy 32-bytes per iteration | |||
1231 | __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":"); | |||
1232 | __masm-> movq(to, Address(end_from, qword_count, Address::times_8, -24)); | |||
1233 | __masm-> movq(Address(end_to, qword_count, Address::times_8, -24), to); | |||
1234 | __masm-> movq(to, Address(end_from, qword_count, Address::times_8, -16)); | |||
1235 | __masm-> movq(Address(end_to, qword_count, Address::times_8, -16), to); | |||
1236 | __masm-> movq(to, Address(end_from, qword_count, Address::times_8, - 8)); | |||
1237 | __masm-> movq(Address(end_to, qword_count, Address::times_8, - 8), to); | |||
1238 | __masm-> movq(to, Address(end_from, qword_count, Address::times_8, - 0)); | |||
1239 | __masm-> movq(Address(end_to, qword_count, Address::times_8, - 0), to); | |||
1240 | ||||
1241 | __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":" ); | |||
1242 | __masm-> addptr(qword_count, 4); | |||
1243 | __masm-> jcc(Assembler::lessEqual, L_loop); | |||
1244 | } | |||
1245 | __masm-> subptr(qword_count, 4); | |||
1246 | __masm-> jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords | |||
1247 | } | |||
1248 | ||||
1249 | // Copy big chunks backward | |||
1250 | // | |||
1251 | // Inputs: | |||
1252 | // from - source arrays address | |||
1253 | // dest - destination array address | |||
1254 | // qword_count - 64-bits element count | |||
1255 | // to - scratch | |||
1256 | // L_copy_bytes - entry label | |||
1257 | // L_copy_8_bytes - exit label | |||
1258 | // | |||
1259 | void copy_bytes_backward(Register from, Register dest, | |||
1260 | Register qword_count, Register to, | |||
1261 | Label& L_copy_bytes, Label& L_copy_8_bytes) { | |||
1262 | DEBUG_ONLY(__ stop("enter at entry label, not here"))masm-> stop("enter at entry label, not here"); | |||
1263 | Label L_loop; | |||
1264 | __masm-> align(OptoLoopAlignment); | |||
1265 | if (UseUnalignedLoadStores) { | |||
1266 | Label L_end; | |||
1267 | __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":"); | |||
1268 | if (UseAVX >= 2) { | |||
1269 | __masm-> vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); | |||
1270 | __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); | |||
1271 | __masm-> vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); | |||
1272 | __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); | |||
1273 | } else { | |||
1274 | __masm-> movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); | |||
1275 | __masm-> movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); | |||
1276 | __masm-> movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); | |||
1277 | __masm-> movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); | |||
1278 | __masm-> movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); | |||
1279 | __masm-> movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); | |||
1280 | __masm-> movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); | |||
1281 | __masm-> movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); | |||
1282 | } | |||
1283 | ||||
1284 | __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":" ); | |||
1285 | __masm-> subptr(qword_count, 8); | |||
1286 | __masm-> jcc(Assembler::greaterEqual, L_loop); | |||
1287 | ||||
1288 | __masm-> addptr(qword_count, 4); // add(8) and sub(4) | |||
1289 | __masm-> jccb(Assembler::less, L_end)jccb_0(Assembler::less, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1289); | |||
1290 | // Copy trailing 32 bytes | |||
1291 | if (UseAVX >= 2) { | |||
1292 | __masm-> vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0)); | |||
1293 | __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0); | |||
1294 | } else { | |||
1295 | __masm-> movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); | |||
1296 | __masm-> movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); | |||
1297 | __masm-> movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); | |||
1298 | __masm-> movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); | |||
1299 | } | |||
1300 | __masm-> subptr(qword_count, 4); | |||
1301 | __masm-> BIND(L_end)bind(L_end); masm-> block_comment("L_end" ":"); | |||
1302 | if (UseAVX >= 2) { | |||
1303 | // clean upper bits of YMM registers | |||
1304 | __masm-> vpxor(xmm0, xmm0); | |||
1305 | __masm-> vpxor(xmm1, xmm1); | |||
1306 | } | |||
1307 | } else { | |||
1308 | // Copy 32-bytes per iteration | |||
1309 | __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":"); | |||
1310 | __masm-> movq(to, Address(from, qword_count, Address::times_8, 24)); | |||
1311 | __masm-> movq(Address(dest, qword_count, Address::times_8, 24), to); | |||
1312 | __masm-> movq(to, Address(from, qword_count, Address::times_8, 16)); | |||
1313 | __masm-> movq(Address(dest, qword_count, Address::times_8, 16), to); | |||
1314 | __masm-> movq(to, Address(from, qword_count, Address::times_8, 8)); | |||
1315 | __masm-> movq(Address(dest, qword_count, Address::times_8, 8), to); | |||
1316 | __masm-> movq(to, Address(from, qword_count, Address::times_8, 0)); | |||
1317 | __masm-> movq(Address(dest, qword_count, Address::times_8, 0), to); | |||
1318 | ||||
1319 | __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":" ); | |||
1320 | __masm-> subptr(qword_count, 4); | |||
1321 | __masm-> jcc(Assembler::greaterEqual, L_loop); | |||
1322 | } | |||
1323 | __masm-> addptr(qword_count, 4); | |||
1324 | __masm-> jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords | |||
1325 | } | |||
1326 | ||||
1327 | #ifndef PRODUCT | |||
1328 | int& get_profile_ctr(int shift) { | |||
1329 | if ( 0 == shift) | |||
1330 | return SharedRuntime::_jbyte_array_copy_ctr; | |||
1331 | else if(1 == shift) | |||
1332 | return SharedRuntime::_jshort_array_copy_ctr; | |||
1333 | else if(2 == shift) | |||
1334 | return SharedRuntime::_jint_array_copy_ctr; | |||
1335 | else | |||
1336 | return SharedRuntime::_jlong_array_copy_ctr; | |||
1337 | } | |||
1338 | #endif | |||
1339 | ||||
1340 | void setup_argument_regs(BasicType type) { | |||
1341 | if (type == T_BYTE || type == T_SHORT) { | |||
1342 | setup_arg_regs(); // from => rdi, to => rsi, count => rdx | |||
1343 | // r9 and r10 may be used to save non-volatile registers | |||
1344 | } else { | |||
1345 | setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx | |||
1346 | // r9 is used to save r15_thread | |||
1347 | } | |||
1348 | } | |||
1349 | ||||
1350 | void restore_argument_regs(BasicType type) { | |||
1351 | if (type == T_BYTE || type == T_SHORT) { | |||
1352 | restore_arg_regs(); | |||
1353 | } else { | |||
1354 | restore_arg_regs_using_thread(); | |||
1355 | } | |||
1356 | } | |||
1357 | ||||
1358 | #if COMPILER2_OR_JVMCI1 | |||
1359 | // Note: Following rules apply to AVX3 optimized arraycopy stubs:- | |||
1360 | // - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs) | |||
1361 | // for both special cases (various small block sizes) and aligned copy loop. This is the | |||
1362 | // default configuration. | |||
1363 | // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs) | |||
1364 | // for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it. | |||
1365 | // - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a | |||
1366 | // better performance for disjoint copies. For conjoint/backward copy vector based | |||
1367 | // copy performs better. | |||
1368 | // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over | |||
1369 | // 64 byte vector registers (ZMMs). | |||
1370 | ||||
1371 | // Inputs: | |||
1372 | // c_rarg0 - source array address | |||
1373 | // c_rarg1 - destination array address | |||
1374 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
1375 | // | |||
1376 | // | |||
1377 | // Side Effects: | |||
1378 | // disjoint_copy_avx3_masked is set to the no-overlap entry point | |||
1379 | // used by generate_conjoint_[byte/int/short/long]_copy(). | |||
1380 | // | |||
1381 | ||||
1382 | address generate_disjoint_copy_avx3_masked(address* entry, const char *name, int shift, | |||
1383 | bool aligned, bool is_oop, bool dest_uninitialized) { | |||
1384 | __masm-> align(CodeEntryAlignment); | |||
1385 | StubCodeMark mark(this, "StubRoutines", name); | |||
1386 | address start = __masm-> pc(); | |||
1387 | int avx3threshold = VM_Version::avx3_threshold(); | |||
1388 | bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0); | |||
1389 | Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry; | |||
1390 | Label L_repmovs, L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64; | |||
1391 | const Register from = rdi; // source array address | |||
1392 | const Register to = rsi; // destination array address | |||
1393 | const Register count = rdx; // elements count | |||
1394 | const Register temp1 = r8; | |||
1395 | const Register temp2 = r11; | |||
1396 | const Register temp3 = rax; | |||
1397 | const Register temp4 = rcx; | |||
1398 | // End pointers are inclusive, and if count is not zero they point | |||
1399 | // to the last unit copied: end_to[0] := end_from[0] | |||
1400 | ||||
1401 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
1402 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
1403 | ||||
1404 | if (entry != NULL__null) { | |||
1405 | *entry = __masm-> pc(); | |||
1406 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
1407 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
1408 | } | |||
1409 | ||||
1410 | BasicType type_vec[] = { T_BYTE, T_SHORT, T_INT, T_LONG}; | |||
1411 | BasicType type = is_oop ? T_OBJECT : type_vec[shift]; | |||
1412 | ||||
1413 | setup_argument_regs(type); | |||
1414 | ||||
1415 | DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; | |||
1416 | if (dest_uninitialized) { | |||
1417 | decorators |= IS_DEST_UNINITIALIZED; | |||
1418 | } | |||
1419 | if (aligned) { | |||
1420 | decorators |= ARRAYCOPY_ALIGNED; | |||
1421 | } | |||
1422 | BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
1423 | bs->arraycopy_prologue(_masm, decorators, type, from, to, count); | |||
1424 | ||||
1425 | { | |||
1426 | // Type(shift) byte(0), short(1), int(2), long(3) | |||
1427 | int loop_size[] = { 192, 96, 48, 24}; | |||
1428 | int threshold[] = { 4096, 2048, 1024, 512}; | |||
1429 | ||||
1430 | // UnsafeCopyMemory page error: continue after ucm | |||
1431 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
1432 | // 'from', 'to' and 'count' are now valid | |||
1433 | ||||
1434 | // temp1 holds remaining count and temp4 holds running count used to compute | |||
1435 | // next address offset for start of to/from addresses (temp4 * scale). | |||
1436 | __masm-> mov64(temp4, 0); | |||
1437 | __masm-> movq(temp1, count); | |||
1438 | ||||
1439 | // Zero length check. | |||
1440 | __masm-> BIND(L_tail)bind(L_tail); masm-> block_comment("L_tail" ":"); | |||
1441 | __masm-> cmpq(temp1, 0); | |||
1442 | __masm-> jcc(Assembler::lessEqual, L_exit); | |||
1443 | ||||
1444 | // Special cases using 32 byte [masked] vector copy operations. | |||
1445 | __masm-> arraycopy_avx3_special_cases(xmm1, k2, from, to, temp1, shift, | |||
1446 | temp4, temp3, use64byteVector, L_entry, L_exit); | |||
1447 | ||||
1448 | // PRE-MAIN-POST loop for aligned copy. | |||
1449 | __masm-> BIND(L_entry)bind(L_entry); masm-> block_comment("L_entry" ":"); | |||
1450 | ||||
1451 | if (avx3threshold != 0) { | |||
1452 | __masm-> cmpq(count, threshold[shift]); | |||
1453 | if (MaxVectorSize == 64) { | |||
1454 | // Copy using 64 byte vectors. | |||
1455 | __masm-> jcc(Assembler::greaterEqual, L_pre_main_post_64); | |||
1456 | } else { | |||
1457 | assert(MaxVectorSize < 64, "vector size should be < 64 bytes")do { if (!(MaxVectorSize < 64)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1457, "assert(" "MaxVectorSize < 64" ") failed", "vector size should be < 64 bytes" ); ::breakpoint(); } } while (0); | |||
1458 | // REP MOVS offer a faster copy path. | |||
1459 | __masm-> jcc(Assembler::greaterEqual, L_repmovs); | |||
1460 | } | |||
1461 | } | |||
1462 | ||||
1463 | if ((MaxVectorSize < 64) || (avx3threshold != 0)) { | |||
1464 | // Partial copy to make dst address 32 byte aligned. | |||
1465 | __masm-> movq(temp2, to); | |||
1466 | __masm-> andq(temp2, 31); | |||
1467 | __masm-> jcc(Assembler::equal, L_main_pre_loop); | |||
1468 | ||||
1469 | __masm-> negptr(temp2); | |||
1470 | __masm-> addq(temp2, 32); | |||
1471 | if (shift) { | |||
1472 | __masm-> shrq(temp2, shift); | |||
1473 | } | |||
1474 | __masm-> movq(temp3, temp2); | |||
1475 | __masm-> copy32_masked_avx(to, from, xmm1, k2, temp3, temp4, temp1, shift); | |||
1476 | __masm-> movq(temp4, temp2); | |||
1477 | __masm-> movq(temp1, count); | |||
1478 | __masm-> subq(temp1, temp2); | |||
1479 | ||||
1480 | __masm-> cmpq(temp1, loop_size[shift]); | |||
1481 | __masm-> jcc(Assembler::less, L_tail); | |||
1482 | ||||
1483 | __masm-> BIND(L_main_pre_loop)bind(L_main_pre_loop); masm-> block_comment("L_main_pre_loop" ":"); | |||
1484 | __masm-> subq(temp1, loop_size[shift]); | |||
1485 | ||||
1486 | // Main loop with aligned copy block size of 192 bytes at 32 byte granularity. | |||
1487 | __masm-> align32(); | |||
1488 | __masm-> BIND(L_main_loop)bind(L_main_loop); masm-> block_comment("L_main_loop" ":"); | |||
1489 | __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 0); | |||
1490 | __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 64); | |||
1491 | __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 128); | |||
1492 | __masm-> addptr(temp4, loop_size[shift]); | |||
1493 | __masm-> subq(temp1, loop_size[shift]); | |||
1494 | __masm-> jcc(Assembler::greater, L_main_loop); | |||
1495 | ||||
1496 | __masm-> addq(temp1, loop_size[shift]); | |||
1497 | ||||
1498 | // Tail loop. | |||
1499 | __masm-> jmp(L_tail); | |||
1500 | ||||
1501 | __masm-> BIND(L_repmovs)bind(L_repmovs); masm-> block_comment("L_repmovs" ":"); | |||
1502 | __masm-> movq(temp2, temp1); | |||
1503 | // Swap to(RSI) and from(RDI) addresses to comply with REP MOVs semantics. | |||
1504 | __masm-> movq(temp3, to); | |||
1505 | __masm-> movq(to, from); | |||
1506 | __masm-> movq(from, temp3); | |||
1507 | // Save to/from for restoration post rep_mov. | |||
1508 | __masm-> movq(temp1, to); | |||
1509 | __masm-> movq(temp3, from); | |||
1510 | if(shift < 3) { | |||
1511 | __masm-> shrq(temp2, 3-shift); // quad word count | |||
1512 | } | |||
1513 | __masm-> movq(temp4 , temp2); // move quad ward count into temp4(RCX). | |||
1514 | __masm-> rep_mov(); | |||
1515 | __masm-> shlq(temp2, 3); // convert quad words into byte count. | |||
1516 | if(shift) { | |||
1517 | __masm-> shrq(temp2, shift); // type specific count. | |||
1518 | } | |||
1519 | // Restore original addresses in to/from. | |||
1520 | __masm-> movq(to, temp3); | |||
1521 | __masm-> movq(from, temp1); | |||
1522 | __masm-> movq(temp4, temp2); | |||
1523 | __masm-> movq(temp1, count); | |||
1524 | __masm-> subq(temp1, temp2); // tailing part (less than a quad ward size). | |||
1525 | __masm-> jmp(L_tail); | |||
1526 | } | |||
1527 | ||||
1528 | if (MaxVectorSize > 32) { | |||
1529 | __masm-> BIND(L_pre_main_post_64)bind(L_pre_main_post_64); masm-> block_comment("L_pre_main_post_64" ":"); | |||
1530 | // Partial copy to make dst address 64 byte aligned. | |||
1531 | __masm-> movq(temp2, to); | |||
1532 | __masm-> andq(temp2, 63); | |||
1533 | __masm-> jcc(Assembler::equal, L_main_pre_loop_64bytes); | |||
1534 | ||||
1535 | __masm-> negptr(temp2); | |||
1536 | __masm-> addq(temp2, 64); | |||
1537 | if (shift) { | |||
1538 | __masm-> shrq(temp2, shift); | |||
1539 | } | |||
1540 | __masm-> movq(temp3, temp2); | |||
1541 | __masm-> copy64_masked_avx(to, from, xmm1, k2, temp3, temp4, temp1, shift, 0 , true); | |||
1542 | __masm-> movq(temp4, temp2); | |||
1543 | __masm-> movq(temp1, count); | |||
1544 | __masm-> subq(temp1, temp2); | |||
1545 | ||||
1546 | __masm-> cmpq(temp1, loop_size[shift]); | |||
1547 | __masm-> jcc(Assembler::less, L_tail64); | |||
1548 | ||||
1549 | __masm-> BIND(L_main_pre_loop_64bytes)bind(L_main_pre_loop_64bytes); masm-> block_comment("L_main_pre_loop_64bytes" ":"); | |||
1550 | __masm-> subq(temp1, loop_size[shift]); | |||
1551 | ||||
1552 | // Main loop with aligned copy block size of 192 bytes at | |||
1553 | // 64 byte copy granularity. | |||
1554 | __masm-> align32(); | |||
1555 | __masm-> BIND(L_main_loop_64bytes)bind(L_main_loop_64bytes); masm-> block_comment("L_main_loop_64bytes" ":"); | |||
1556 | __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 0 , true); | |||
1557 | __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 64, true); | |||
1558 | __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 128, true); | |||
1559 | __masm-> addptr(temp4, loop_size[shift]); | |||
1560 | __masm-> subq(temp1, loop_size[shift]); | |||
1561 | __masm-> jcc(Assembler::greater, L_main_loop_64bytes); | |||
1562 | ||||
1563 | __masm-> addq(temp1, loop_size[shift]); | |||
1564 | // Zero length check. | |||
1565 | __masm-> jcc(Assembler::lessEqual, L_exit); | |||
1566 | ||||
1567 | __masm-> BIND(L_tail64)bind(L_tail64); masm-> block_comment("L_tail64" ":"); | |||
1568 | ||||
1569 | // Tail handling using 64 byte [masked] vector copy operations. | |||
1570 | use64byteVector = true; | |||
1571 | __masm-> arraycopy_avx3_special_cases(xmm1, k2, from, to, temp1, shift, | |||
1572 | temp4, temp3, use64byteVector, L_entry, L_exit); | |||
1573 | } | |||
1574 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
1575 | } | |||
1576 | ||||
1577 | address ucme_exit_pc = __masm-> pc(); | |||
1578 | // When called from generic_arraycopy r11 contains specific values | |||
1579 | // used during arraycopy epilogue, re-initializing r11. | |||
1580 | if (is_oop) { | |||
1581 | __masm-> movq(r11, shift == 3 ? count : to); | |||
1582 | } | |||
1583 | bs->arraycopy_epilogue(_masm, decorators, type, from, to, count); | |||
1584 | restore_argument_regs(type); | |||
1585 | inc_counter_np(get_profile_ctr(shift))masm-> block_comment("inc_counter " "get_profile_ctr(shift)" ); inc_counter_np_(get_profile_ctr(shift));; // Update counter after rscratch1 is free | |||
1586 | __masm-> xorptr(rax, rax); // return 0 | |||
1587 | __masm-> vzeroupper(); | |||
1588 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
1589 | __masm-> ret(0); | |||
1590 | return start; | |||
1591 | } | |||
1592 | ||||
1593 | // Inputs: | |||
1594 | // c_rarg0 - source array address | |||
1595 | // c_rarg1 - destination array address | |||
1596 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
1597 | // | |||
1598 | // | |||
1599 | address generate_conjoint_copy_avx3_masked(address* entry, const char *name, int shift, | |||
1600 | address nooverlap_target, bool aligned, bool is_oop, | |||
1601 | bool dest_uninitialized) { | |||
1602 | __masm-> align(CodeEntryAlignment); | |||
1603 | StubCodeMark mark(this, "StubRoutines", name); | |||
1604 | address start = __masm-> pc(); | |||
1605 | ||||
1606 | int avx3threshold = VM_Version::avx3_threshold(); | |||
1607 | bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0); | |||
1608 | ||||
1609 | Label L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64; | |||
1610 | Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry; | |||
1611 | const Register from = rdi; // source array address | |||
1612 | const Register to = rsi; // destination array address | |||
1613 | const Register count = rdx; // elements count | |||
1614 | const Register temp1 = r8; | |||
1615 | const Register temp2 = rcx; | |||
1616 | const Register temp3 = r11; | |||
1617 | const Register temp4 = rax; | |||
1618 | // End pointers are inclusive, and if count is not zero they point | |||
1619 | // to the last unit copied: end_to[0] := end_from[0] | |||
1620 | ||||
1621 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
1622 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
1623 | ||||
1624 | if (entry != NULL__null) { | |||
1625 | *entry = __masm-> pc(); | |||
1626 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
1627 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
1628 | } | |||
1629 | ||||
1630 | array_overlap_test(nooverlap_target, (Address::ScaleFactor)(shift)); | |||
1631 | ||||
1632 | BasicType type_vec[] = { T_BYTE, T_SHORT, T_INT, T_LONG}; | |||
1633 | BasicType type = is_oop ? T_OBJECT : type_vec[shift]; | |||
1634 | ||||
1635 | setup_argument_regs(type); | |||
1636 | ||||
1637 | DecoratorSet decorators = IN_HEAP | IS_ARRAY; | |||
1638 | if (dest_uninitialized) { | |||
1639 | decorators |= IS_DEST_UNINITIALIZED; | |||
1640 | } | |||
1641 | if (aligned) { | |||
1642 | decorators |= ARRAYCOPY_ALIGNED; | |||
1643 | } | |||
1644 | BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
1645 | bs->arraycopy_prologue(_masm, decorators, type, from, to, count); | |||
1646 | { | |||
1647 | // Type(shift) byte(0), short(1), int(2), long(3) | |||
1648 | int loop_size[] = { 192, 96, 48, 24}; | |||
1649 | int threshold[] = { 4096, 2048, 1024, 512}; | |||
1650 | ||||
1651 | // UnsafeCopyMemory page error: continue after ucm | |||
1652 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
1653 | // 'from', 'to' and 'count' are now valid | |||
1654 | ||||
1655 | // temp1 holds remaining count. | |||
1656 | __masm-> movq(temp1, count); | |||
1657 | ||||
1658 | // Zero length check. | |||
1659 | __masm-> BIND(L_tail)bind(L_tail); masm-> block_comment("L_tail" ":"); | |||
1660 | __masm-> cmpq(temp1, 0); | |||
1661 | __masm-> jcc(Assembler::lessEqual, L_exit); | |||
1662 | ||||
1663 | __masm-> mov64(temp2, 0); | |||
1664 | __masm-> movq(temp3, temp1); | |||
1665 | // Special cases using 32 byte [masked] vector copy operations. | |||
1666 | __masm-> arraycopy_avx3_special_cases_conjoint(xmm1, k2, from, to, temp2, temp3, temp1, shift, | |||
1667 | temp4, use64byteVector, L_entry, L_exit); | |||
1668 | ||||
1669 | // PRE-MAIN-POST loop for aligned copy. | |||
1670 | __masm-> BIND(L_entry)bind(L_entry); masm-> block_comment("L_entry" ":"); | |||
1671 | ||||
1672 | if ((MaxVectorSize > 32) && (avx3threshold != 0)) { | |||
1673 | __masm-> cmpq(temp1, threshold[shift]); | |||
1674 | __masm-> jcc(Assembler::greaterEqual, L_pre_main_post_64); | |||
1675 | } | |||
1676 | ||||
1677 | if ((MaxVectorSize < 64) || (avx3threshold != 0)) { | |||
1678 | // Partial copy to make dst address 32 byte aligned. | |||
1679 | __masm-> leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0)); | |||
1680 | __masm-> andq(temp2, 31); | |||
1681 | __masm-> jcc(Assembler::equal, L_main_pre_loop); | |||
1682 | ||||
1683 | if (shift) { | |||
1684 | __masm-> shrq(temp2, shift); | |||
1685 | } | |||
1686 | __masm-> subq(temp1, temp2); | |||
1687 | __masm-> copy32_masked_avx(to, from, xmm1, k2, temp2, temp1, temp3, shift); | |||
1688 | ||||
1689 | __masm-> cmpq(temp1, loop_size[shift]); | |||
1690 | __masm-> jcc(Assembler::less, L_tail); | |||
1691 | ||||
1692 | __masm-> BIND(L_main_pre_loop)bind(L_main_pre_loop); masm-> block_comment("L_main_pre_loop" ":"); | |||
1693 | ||||
1694 | // Main loop with aligned copy block size of 192 bytes at 32 byte granularity. | |||
1695 | __masm-> align32(); | |||
1696 | __masm-> BIND(L_main_loop)bind(L_main_loop); masm-> block_comment("L_main_loop" ":"); | |||
1697 | __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -64); | |||
1698 | __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -128); | |||
1699 | __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -192); | |||
1700 | __masm-> subptr(temp1, loop_size[shift]); | |||
1701 | __masm-> cmpq(temp1, loop_size[shift]); | |||
1702 | __masm-> jcc(Assembler::greater, L_main_loop); | |||
1703 | ||||
1704 | // Tail loop. | |||
1705 | __masm-> jmp(L_tail); | |||
1706 | } | |||
1707 | ||||
1708 | if (MaxVectorSize > 32) { | |||
1709 | __masm-> BIND(L_pre_main_post_64)bind(L_pre_main_post_64); masm-> block_comment("L_pre_main_post_64" ":"); | |||
1710 | // Partial copy to make dst address 64 byte aligned. | |||
1711 | __masm-> leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0)); | |||
1712 | __masm-> andq(temp2, 63); | |||
1713 | __masm-> jcc(Assembler::equal, L_main_pre_loop_64bytes); | |||
1714 | ||||
1715 | if (shift) { | |||
1716 | __masm-> shrq(temp2, shift); | |||
1717 | } | |||
1718 | __masm-> subq(temp1, temp2); | |||
1719 | __masm-> copy64_masked_avx(to, from, xmm1, k2, temp2, temp1, temp3, shift, 0 , true); | |||
1720 | ||||
1721 | __masm-> cmpq(temp1, loop_size[shift]); | |||
1722 | __masm-> jcc(Assembler::less, L_tail64); | |||
1723 | ||||
1724 | __masm-> BIND(L_main_pre_loop_64bytes)bind(L_main_pre_loop_64bytes); masm-> block_comment("L_main_pre_loop_64bytes" ":"); | |||
1725 | ||||
1726 | // Main loop with aligned copy block size of 192 bytes at | |||
1727 | // 64 byte copy granularity. | |||
1728 | __masm-> align32(); | |||
1729 | __masm-> BIND(L_main_loop_64bytes)bind(L_main_loop_64bytes); masm-> block_comment("L_main_loop_64bytes" ":"); | |||
1730 | __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -64 , true); | |||
1731 | __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -128, true); | |||
1732 | __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -192, true); | |||
1733 | __masm-> subq(temp1, loop_size[shift]); | |||
1734 | __masm-> cmpq(temp1, loop_size[shift]); | |||
1735 | __masm-> jcc(Assembler::greater, L_main_loop_64bytes); | |||
1736 | ||||
1737 | // Zero length check. | |||
1738 | __masm-> cmpq(temp1, 0); | |||
1739 | __masm-> jcc(Assembler::lessEqual, L_exit); | |||
1740 | ||||
1741 | __masm-> BIND(L_tail64)bind(L_tail64); masm-> block_comment("L_tail64" ":"); | |||
1742 | ||||
1743 | // Tail handling using 64 byte [masked] vector copy operations. | |||
1744 | use64byteVector = true; | |||
1745 | __masm-> mov64(temp2, 0); | |||
1746 | __masm-> movq(temp3, temp1); | |||
1747 | __masm-> arraycopy_avx3_special_cases_conjoint(xmm1, k2, from, to, temp2, temp3, temp1, shift, | |||
1748 | temp4, use64byteVector, L_entry, L_exit); | |||
1749 | } | |||
1750 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
1751 | } | |||
1752 | address ucme_exit_pc = __masm-> pc(); | |||
1753 | // When called from generic_arraycopy r11 contains specific values | |||
1754 | // used during arraycopy epilogue, re-initializing r11. | |||
1755 | if(is_oop) { | |||
1756 | __masm-> movq(r11, count); | |||
1757 | } | |||
1758 | bs->arraycopy_epilogue(_masm, decorators, type, from, to, count); | |||
1759 | restore_argument_regs(type); | |||
1760 | inc_counter_np(get_profile_ctr(shift))masm-> block_comment("inc_counter " "get_profile_ctr(shift)" ); inc_counter_np_(get_profile_ctr(shift));; // Update counter after rscratch1 is free | |||
1761 | __masm-> xorptr(rax, rax); // return 0 | |||
1762 | __masm-> vzeroupper(); | |||
1763 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
1764 | __masm-> ret(0); | |||
1765 | return start; | |||
1766 | } | |||
1767 | #endif // COMPILER2_OR_JVMCI | |||
1768 | ||||
1769 | ||||
1770 | // Arguments: | |||
1771 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary | |||
1772 | // ignored | |||
1773 | // name - stub name string | |||
1774 | // | |||
1775 | // Inputs: | |||
1776 | // c_rarg0 - source array address | |||
1777 | // c_rarg1 - destination array address | |||
1778 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
1779 | // | |||
1780 | // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, | |||
1781 | // we let the hardware handle it. The one to eight bytes within words, | |||
1782 | // dwords or qwords that span cache line boundaries will still be loaded | |||
1783 | // and stored atomically. | |||
1784 | // | |||
1785 | // Side Effects: | |||
1786 | // disjoint_byte_copy_entry is set to the no-overlap entry point | |||
1787 | // used by generate_conjoint_byte_copy(). | |||
1788 | // | |||
1789 | address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { | |||
1790 | #if COMPILER2_OR_JVMCI1 | |||
1791 | if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) { | |||
1792 | return generate_disjoint_copy_avx3_masked(entry, "jbyte_disjoint_arraycopy_avx3", 0, | |||
1793 | aligned, false, false); | |||
1794 | } | |||
1795 | #endif | |||
1796 | __masm-> align(CodeEntryAlignment); | |||
1797 | StubCodeMark mark(this, "StubRoutines", name); | |||
1798 | address start = __masm-> pc(); | |||
1799 | ||||
1800 | Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; | |||
1801 | Label L_copy_byte, L_exit; | |||
1802 | const Register from = rdi; // source array address | |||
1803 | const Register to = rsi; // destination array address | |||
1804 | const Register count = rdx; // elements count | |||
1805 | const Register byte_count = rcx; | |||
1806 | const Register qword_count = count; | |||
1807 | const Register end_from = from; // source array end address | |||
1808 | const Register end_to = to; // destination array end address | |||
1809 | // End pointers are inclusive, and if count is not zero they point | |||
1810 | // to the last unit copied: end_to[0] := end_from[0] | |||
1811 | ||||
1812 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
1813 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
1814 | ||||
1815 | if (entry != NULL__null) { | |||
1816 | *entry = __masm-> pc(); | |||
1817 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
1818 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
1819 | } | |||
1820 | ||||
1821 | setup_arg_regs(); // from => rdi, to => rsi, count => rdx | |||
1822 | // r9 and r10 may be used to save non-volatile registers | |||
1823 | ||||
1824 | { | |||
1825 | // UnsafeCopyMemory page error: continue after ucm | |||
1826 | UnsafeCopyMemoryMark ucmm(this, !aligned, true); | |||
1827 | // 'from', 'to' and 'count' are now valid | |||
1828 | __masm-> movptr(byte_count, count); | |||
1829 | __masm-> shrptr(count, 3); // count => qword_count | |||
1830 | ||||
1831 | // Copy from low to high addresses. Use 'to' as scratch. | |||
1832 | __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8)); | |||
1833 | __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8)); | |||
1834 | __masm-> negptr(qword_count); // make the count negative | |||
1835 | __masm-> jmp(L_copy_bytes); | |||
1836 | ||||
1837 | // Copy trailing qwords | |||
1838 | __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes" ":"); | |||
1839 | __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8)); | |||
1840 | __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax); | |||
1841 | __masm-> increment(qword_count); | |||
1842 | __masm-> jcc(Assembler::notZero, L_copy_8_bytes); | |||
1843 | ||||
1844 | // Check for and copy trailing dword | |||
1845 | __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes" ":"); | |||
1846 | __masm-> testl(byte_count, 4); | |||
1847 | __masm-> jccb(Assembler::zero, L_copy_2_bytes)jccb_0(Assembler::zero, L_copy_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1847); | |||
1848 | __masm-> movl(rax, Address(end_from, 8)); | |||
1849 | __masm-> movl(Address(end_to, 8), rax); | |||
1850 | ||||
1851 | __masm-> addptr(end_from, 4); | |||
1852 | __masm-> addptr(end_to, 4); | |||
1853 | ||||
1854 | // Check for and copy trailing word | |||
1855 | __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes" ":"); | |||
1856 | __masm-> testl(byte_count, 2); | |||
1857 | __masm-> jccb(Assembler::zero, L_copy_byte)jccb_0(Assembler::zero, L_copy_byte, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1857); | |||
1858 | __masm-> movw(rax, Address(end_from, 8)); | |||
1859 | __masm-> movw(Address(end_to, 8), rax); | |||
1860 | ||||
1861 | __masm-> addptr(end_from, 2); | |||
1862 | __masm-> addptr(end_to, 2); | |||
1863 | ||||
1864 | // Check for and copy trailing byte | |||
1865 | __masm-> BIND(L_copy_byte)bind(L_copy_byte); masm-> block_comment("L_copy_byte" ":"); | |||
1866 | __masm-> testl(byte_count, 1); | |||
1867 | __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 1867); | |||
1868 | __masm-> movb(rax, Address(end_from, 8)); | |||
1869 | __masm-> movb(Address(end_to, 8), rax); | |||
1870 | } | |||
1871 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
1872 | address ucme_exit_pc = __masm-> pc(); | |||
1873 | restore_arg_regs(); | |||
1874 | inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);; // Update counter after rscratch1 is free | |||
1875 | __masm-> xorptr(rax, rax); // return 0 | |||
1876 | __masm-> vzeroupper(); | |||
1877 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
1878 | __masm-> ret(0); | |||
1879 | ||||
1880 | { | |||
1881 | UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc); | |||
1882 | // Copy in multi-bytes chunks | |||
1883 | copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); | |||
1884 | __masm-> jmp(L_copy_4_bytes); | |||
1885 | } | |||
1886 | return start; | |||
1887 | } | |||
1888 | ||||
1889 | // Arguments: | |||
1890 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary | |||
1891 | // ignored | |||
1892 | // name - stub name string | |||
1893 | // | |||
1894 | // Inputs: | |||
1895 | // c_rarg0 - source array address | |||
1896 | // c_rarg1 - destination array address | |||
1897 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
1898 | // | |||
1899 | // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, | |||
1900 | // we let the hardware handle it. The one to eight bytes within words, | |||
1901 | // dwords or qwords that span cache line boundaries will still be loaded | |||
1902 | // and stored atomically. | |||
1903 | // | |||
1904 | address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, | |||
1905 | address* entry, const char *name) { | |||
1906 | #if COMPILER2_OR_JVMCI1 | |||
1907 | if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) { | |||
1908 | return generate_conjoint_copy_avx3_masked(entry, "jbyte_conjoint_arraycopy_avx3", 0, | |||
1909 | nooverlap_target, aligned, false, false); | |||
1910 | } | |||
1911 | #endif | |||
1912 | __masm-> align(CodeEntryAlignment); | |||
1913 | StubCodeMark mark(this, "StubRoutines", name); | |||
1914 | address start = __masm-> pc(); | |||
1915 | ||||
1916 | Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; | |||
1917 | const Register from = rdi; // source array address | |||
1918 | const Register to = rsi; // destination array address | |||
1919 | const Register count = rdx; // elements count | |||
1920 | const Register byte_count = rcx; | |||
1921 | const Register qword_count = count; | |||
1922 | ||||
1923 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
1924 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
1925 | ||||
1926 | if (entry != NULL__null) { | |||
1927 | *entry = __masm-> pc(); | |||
1928 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
1929 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
1930 | } | |||
1931 | ||||
1932 | array_overlap_test(nooverlap_target, Address::times_1); | |||
1933 | setup_arg_regs(); // from => rdi, to => rsi, count => rdx | |||
1934 | // r9 and r10 may be used to save non-volatile registers | |||
1935 | ||||
1936 | { | |||
1937 | // UnsafeCopyMemory page error: continue after ucm | |||
1938 | UnsafeCopyMemoryMark ucmm(this, !aligned, true); | |||
1939 | // 'from', 'to' and 'count' are now valid | |||
1940 | __masm-> movptr(byte_count, count); | |||
1941 | __masm-> shrptr(count, 3); // count => qword_count | |||
1942 | ||||
1943 | // Copy from high to low addresses. | |||
1944 | ||||
1945 | // Check for and copy trailing byte | |||
1946 | __masm-> testl(byte_count, 1); | |||
1947 | __masm-> jcc(Assembler::zero, L_copy_2_bytes); | |||
1948 | __masm-> movb(rax, Address(from, byte_count, Address::times_1, -1)); | |||
1949 | __masm-> movb(Address(to, byte_count, Address::times_1, -1), rax); | |||
1950 | __masm-> decrement(byte_count); // Adjust for possible trailing word | |||
1951 | ||||
1952 | // Check for and copy trailing word | |||
1953 | __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes" ":"); | |||
1954 | __masm-> testl(byte_count, 2); | |||
1955 | __masm-> jcc(Assembler::zero, L_copy_4_bytes); | |||
1956 | __masm-> movw(rax, Address(from, byte_count, Address::times_1, -2)); | |||
1957 | __masm-> movw(Address(to, byte_count, Address::times_1, -2), rax); | |||
1958 | ||||
1959 | // Check for and copy trailing dword | |||
1960 | __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes" ":"); | |||
1961 | __masm-> testl(byte_count, 4); | |||
1962 | __masm-> jcc(Assembler::zero, L_copy_bytes); | |||
1963 | __masm-> movl(rax, Address(from, qword_count, Address::times_8)); | |||
1964 | __masm-> movl(Address(to, qword_count, Address::times_8), rax); | |||
1965 | __masm-> jmp(L_copy_bytes); | |||
1966 | ||||
1967 | // Copy trailing qwords | |||
1968 | __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes" ":"); | |||
1969 | __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8)); | |||
1970 | __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax); | |||
1971 | __masm-> decrement(qword_count); | |||
1972 | __masm-> jcc(Assembler::notZero, L_copy_8_bytes); | |||
1973 | } | |||
1974 | restore_arg_regs(); | |||
1975 | inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);; // Update counter after rscratch1 is free | |||
1976 | __masm-> xorptr(rax, rax); // return 0 | |||
1977 | __masm-> vzeroupper(); | |||
1978 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
1979 | __masm-> ret(0); | |||
1980 | ||||
1981 | { | |||
1982 | // UnsafeCopyMemory page error: continue after ucm | |||
1983 | UnsafeCopyMemoryMark ucmm(this, !aligned, true); | |||
1984 | // Copy in multi-bytes chunks | |||
1985 | copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); | |||
1986 | } | |||
1987 | restore_arg_regs(); | |||
1988 | inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);; // Update counter after rscratch1 is free | |||
1989 | __masm-> xorptr(rax, rax); // return 0 | |||
1990 | __masm-> vzeroupper(); | |||
1991 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
1992 | __masm-> ret(0); | |||
1993 | ||||
1994 | return start; | |||
1995 | } | |||
1996 | ||||
1997 | // Arguments: | |||
1998 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary | |||
1999 | // ignored | |||
2000 | // name - stub name string | |||
2001 | // | |||
2002 | // Inputs: | |||
2003 | // c_rarg0 - source array address | |||
2004 | // c_rarg1 - destination array address | |||
2005 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
2006 | // | |||
2007 | // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we | |||
2008 | // let the hardware handle it. The two or four words within dwords | |||
2009 | // or qwords that span cache line boundaries will still be loaded | |||
2010 | // and stored atomically. | |||
2011 | // | |||
2012 | // Side Effects: | |||
2013 | // disjoint_short_copy_entry is set to the no-overlap entry point | |||
2014 | // used by generate_conjoint_short_copy(). | |||
2015 | // | |||
2016 | address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) { | |||
2017 | #if COMPILER2_OR_JVMCI1 | |||
2018 | if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) { | |||
2019 | return generate_disjoint_copy_avx3_masked(entry, "jshort_disjoint_arraycopy_avx3", 1, | |||
2020 | aligned, false, false); | |||
2021 | } | |||
2022 | #endif | |||
2023 | ||||
2024 | __masm-> align(CodeEntryAlignment); | |||
2025 | StubCodeMark mark(this, "StubRoutines", name); | |||
2026 | address start = __masm-> pc(); | |||
2027 | ||||
2028 | Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; | |||
2029 | const Register from = rdi; // source array address | |||
2030 | const Register to = rsi; // destination array address | |||
2031 | const Register count = rdx; // elements count | |||
2032 | const Register word_count = rcx; | |||
2033 | const Register qword_count = count; | |||
2034 | const Register end_from = from; // source array end address | |||
2035 | const Register end_to = to; // destination array end address | |||
2036 | // End pointers are inclusive, and if count is not zero they point | |||
2037 | // to the last unit copied: end_to[0] := end_from[0] | |||
2038 | ||||
2039 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2040 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
2041 | ||||
2042 | if (entry != NULL__null) { | |||
2043 | *entry = __masm-> pc(); | |||
2044 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
2045 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
2046 | } | |||
2047 | ||||
2048 | setup_arg_regs(); // from => rdi, to => rsi, count => rdx | |||
2049 | // r9 and r10 may be used to save non-volatile registers | |||
2050 | ||||
2051 | { | |||
2052 | // UnsafeCopyMemory page error: continue after ucm | |||
2053 | UnsafeCopyMemoryMark ucmm(this, !aligned, true); | |||
2054 | // 'from', 'to' and 'count' are now valid | |||
2055 | __masm-> movptr(word_count, count); | |||
2056 | __masm-> shrptr(count, 2); // count => qword_count | |||
2057 | ||||
2058 | // Copy from low to high addresses. Use 'to' as scratch. | |||
2059 | __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8)); | |||
2060 | __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8)); | |||
2061 | __masm-> negptr(qword_count); | |||
2062 | __masm-> jmp(L_copy_bytes); | |||
2063 | ||||
2064 | // Copy trailing qwords | |||
2065 | __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes" ":"); | |||
2066 | __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8)); | |||
2067 | __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax); | |||
2068 | __masm-> increment(qword_count); | |||
2069 | __masm-> jcc(Assembler::notZero, L_copy_8_bytes); | |||
2070 | ||||
2071 | // Original 'dest' is trashed, so we can't use it as a | |||
2072 | // base register for a possible trailing word copy | |||
2073 | ||||
2074 | // Check for and copy trailing dword | |||
2075 | __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes" ":"); | |||
2076 | __masm-> testl(word_count, 2); | |||
2077 | __masm-> jccb(Assembler::zero, L_copy_2_bytes)jccb_0(Assembler::zero, L_copy_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 2077); | |||
2078 | __masm-> movl(rax, Address(end_from, 8)); | |||
2079 | __masm-> movl(Address(end_to, 8), rax); | |||
2080 | ||||
2081 | __masm-> addptr(end_from, 4); | |||
2082 | __masm-> addptr(end_to, 4); | |||
2083 | ||||
2084 | // Check for and copy trailing word | |||
2085 | __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes" ":"); | |||
2086 | __masm-> testl(word_count, 1); | |||
2087 | __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 2087); | |||
2088 | __masm-> movw(rax, Address(end_from, 8)); | |||
2089 | __masm-> movw(Address(end_to, 8), rax); | |||
2090 | } | |||
2091 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
2092 | address ucme_exit_pc = __masm-> pc(); | |||
2093 | restore_arg_regs(); | |||
2094 | inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2095 | __masm-> xorptr(rax, rax); // return 0 | |||
2096 | __masm-> vzeroupper(); | |||
2097 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2098 | __masm-> ret(0); | |||
2099 | ||||
2100 | { | |||
2101 | UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc); | |||
2102 | // Copy in multi-bytes chunks | |||
2103 | copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); | |||
2104 | __masm-> jmp(L_copy_4_bytes); | |||
2105 | } | |||
2106 | ||||
2107 | return start; | |||
2108 | } | |||
2109 | ||||
2110 | address generate_fill(BasicType t, bool aligned, const char *name) { | |||
2111 | __masm-> align(CodeEntryAlignment); | |||
2112 | StubCodeMark mark(this, "StubRoutines", name); | |||
2113 | address start = __masm-> pc(); | |||
2114 | ||||
2115 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
2116 | ||||
2117 | const Register to = c_rarg0; // destination array address | |||
2118 | const Register value = c_rarg1; // value | |||
2119 | const Register count = c_rarg2; // elements count | |||
2120 | __masm-> mov(r11, count); | |||
2121 | ||||
2122 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2123 | ||||
2124 | __masm-> generate_fill(t, aligned, to, value, r11, rax, xmm0); | |||
2125 | ||||
2126 | __masm-> vzeroupper(); | |||
2127 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2128 | __masm-> ret(0); | |||
2129 | return start; | |||
2130 | } | |||
2131 | ||||
2132 | // Arguments: | |||
2133 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary | |||
2134 | // ignored | |||
2135 | // name - stub name string | |||
2136 | // | |||
2137 | // Inputs: | |||
2138 | // c_rarg0 - source array address | |||
2139 | // c_rarg1 - destination array address | |||
2140 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
2141 | // | |||
2142 | // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we | |||
2143 | // let the hardware handle it. The two or four words within dwords | |||
2144 | // or qwords that span cache line boundaries will still be loaded | |||
2145 | // and stored atomically. | |||
2146 | // | |||
2147 | address generate_conjoint_short_copy(bool aligned, address nooverlap_target, | |||
2148 | address *entry, const char *name) { | |||
2149 | #if COMPILER2_OR_JVMCI1 | |||
2150 | if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) { | |||
2151 | return generate_conjoint_copy_avx3_masked(entry, "jshort_conjoint_arraycopy_avx3", 1, | |||
2152 | nooverlap_target, aligned, false, false); | |||
2153 | } | |||
2154 | #endif | |||
2155 | __masm-> align(CodeEntryAlignment); | |||
2156 | StubCodeMark mark(this, "StubRoutines", name); | |||
2157 | address start = __masm-> pc(); | |||
2158 | ||||
2159 | Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes; | |||
2160 | const Register from = rdi; // source array address | |||
2161 | const Register to = rsi; // destination array address | |||
2162 | const Register count = rdx; // elements count | |||
2163 | const Register word_count = rcx; | |||
2164 | const Register qword_count = count; | |||
2165 | ||||
2166 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2167 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
2168 | ||||
2169 | if (entry != NULL__null) { | |||
2170 | *entry = __masm-> pc(); | |||
2171 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
2172 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
2173 | } | |||
2174 | ||||
2175 | array_overlap_test(nooverlap_target, Address::times_2); | |||
2176 | setup_arg_regs(); // from => rdi, to => rsi, count => rdx | |||
2177 | // r9 and r10 may be used to save non-volatile registers | |||
2178 | ||||
2179 | { | |||
2180 | // UnsafeCopyMemory page error: continue after ucm | |||
2181 | UnsafeCopyMemoryMark ucmm(this, !aligned, true); | |||
2182 | // 'from', 'to' and 'count' are now valid | |||
2183 | __masm-> movptr(word_count, count); | |||
2184 | __masm-> shrptr(count, 2); // count => qword_count | |||
2185 | ||||
2186 | // Copy from high to low addresses. Use 'to' as scratch. | |||
2187 | ||||
2188 | // Check for and copy trailing word | |||
2189 | __masm-> testl(word_count, 1); | |||
2190 | __masm-> jccb(Assembler::zero, L_copy_4_bytes)jccb_0(Assembler::zero, L_copy_4_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 2190); | |||
2191 | __masm-> movw(rax, Address(from, word_count, Address::times_2, -2)); | |||
2192 | __masm-> movw(Address(to, word_count, Address::times_2, -2), rax); | |||
2193 | ||||
2194 | // Check for and copy trailing dword | |||
2195 | __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes" ":"); | |||
2196 | __masm-> testl(word_count, 2); | |||
2197 | __masm-> jcc(Assembler::zero, L_copy_bytes); | |||
2198 | __masm-> movl(rax, Address(from, qword_count, Address::times_8)); | |||
2199 | __masm-> movl(Address(to, qword_count, Address::times_8), rax); | |||
2200 | __masm-> jmp(L_copy_bytes); | |||
2201 | ||||
2202 | // Copy trailing qwords | |||
2203 | __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes" ":"); | |||
2204 | __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8)); | |||
2205 | __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax); | |||
2206 | __masm-> decrement(qword_count); | |||
2207 | __masm-> jcc(Assembler::notZero, L_copy_8_bytes); | |||
2208 | } | |||
2209 | restore_arg_regs(); | |||
2210 | inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2211 | __masm-> xorptr(rax, rax); // return 0 | |||
2212 | __masm-> vzeroupper(); | |||
2213 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2214 | __masm-> ret(0); | |||
2215 | ||||
2216 | { | |||
2217 | // UnsafeCopyMemory page error: continue after ucm | |||
2218 | UnsafeCopyMemoryMark ucmm(this, !aligned, true); | |||
2219 | // Copy in multi-bytes chunks | |||
2220 | copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); | |||
2221 | } | |||
2222 | restore_arg_regs(); | |||
2223 | inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2224 | __masm-> xorptr(rax, rax); // return 0 | |||
2225 | __masm-> vzeroupper(); | |||
2226 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2227 | __masm-> ret(0); | |||
2228 | ||||
2229 | return start; | |||
2230 | } | |||
2231 | ||||
2232 | // Arguments: | |||
2233 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary | |||
2234 | // ignored | |||
2235 | // is_oop - true => oop array, so generate store check code | |||
2236 | // name - stub name string | |||
2237 | // | |||
2238 | // Inputs: | |||
2239 | // c_rarg0 - source array address | |||
2240 | // c_rarg1 - destination array address | |||
2241 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
2242 | // | |||
2243 | // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let | |||
2244 | // the hardware handle it. The two dwords within qwords that span | |||
2245 | // cache line boundaries will still be loaded and stored atomicly. | |||
2246 | // | |||
2247 | // Side Effects: | |||
2248 | // disjoint_int_copy_entry is set to the no-overlap entry point | |||
2249 | // used by generate_conjoint_int_oop_copy(). | |||
2250 | // | |||
2251 | address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, | |||
2252 | const char *name, bool dest_uninitialized = false) { | |||
2253 | #if COMPILER2_OR_JVMCI1 | |||
2254 | if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) { | |||
2255 | return generate_disjoint_copy_avx3_masked(entry, "jint_disjoint_arraycopy_avx3", 2, | |||
2256 | aligned, is_oop, dest_uninitialized); | |||
2257 | } | |||
2258 | #endif | |||
2259 | ||||
2260 | __masm-> align(CodeEntryAlignment); | |||
2261 | StubCodeMark mark(this, "StubRoutines", name); | |||
2262 | address start = __masm-> pc(); | |||
2263 | ||||
2264 | Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; | |||
2265 | const Register from = rdi; // source array address | |||
2266 | const Register to = rsi; // destination array address | |||
2267 | const Register count = rdx; // elements count | |||
2268 | const Register dword_count = rcx; | |||
2269 | const Register qword_count = count; | |||
2270 | const Register end_from = from; // source array end address | |||
2271 | const Register end_to = to; // destination array end address | |||
2272 | // End pointers are inclusive, and if count is not zero they point | |||
2273 | // to the last unit copied: end_to[0] := end_from[0] | |||
2274 | ||||
2275 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2276 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
2277 | ||||
2278 | if (entry != NULL__null) { | |||
2279 | *entry = __masm-> pc(); | |||
2280 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
2281 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
2282 | } | |||
2283 | ||||
2284 | setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx | |||
2285 | // r9 is used to save r15_thread | |||
2286 | ||||
2287 | DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; | |||
2288 | if (dest_uninitialized) { | |||
2289 | decorators |= IS_DEST_UNINITIALIZED; | |||
2290 | } | |||
2291 | if (aligned) { | |||
2292 | decorators |= ARRAYCOPY_ALIGNED; | |||
2293 | } | |||
2294 | ||||
2295 | BasicType type = is_oop ? T_OBJECT : T_INT; | |||
2296 | BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
2297 | bs->arraycopy_prologue(_masm, decorators, type, from, to, count); | |||
2298 | ||||
2299 | { | |||
2300 | // UnsafeCopyMemory page error: continue after ucm | |||
2301 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
2302 | // 'from', 'to' and 'count' are now valid | |||
2303 | __masm-> movptr(dword_count, count); | |||
2304 | __masm-> shrptr(count, 1); // count => qword_count | |||
2305 | ||||
2306 | // Copy from low to high addresses. Use 'to' as scratch. | |||
2307 | __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8)); | |||
2308 | __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8)); | |||
2309 | __masm-> negptr(qword_count); | |||
2310 | __masm-> jmp(L_copy_bytes); | |||
2311 | ||||
2312 | // Copy trailing qwords | |||
2313 | __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes" ":"); | |||
2314 | __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8)); | |||
2315 | __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax); | |||
2316 | __masm-> increment(qword_count); | |||
2317 | __masm-> jcc(Assembler::notZero, L_copy_8_bytes); | |||
2318 | ||||
2319 | // Check for and copy trailing dword | |||
2320 | __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes" ":"); | |||
2321 | __masm-> testl(dword_count, 1); // Only byte test since the value is 0 or 1 | |||
2322 | __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 2322); | |||
2323 | __masm-> movl(rax, Address(end_from, 8)); | |||
2324 | __masm-> movl(Address(end_to, 8), rax); | |||
2325 | } | |||
2326 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
2327 | address ucme_exit_pc = __masm-> pc(); | |||
2328 | bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count); | |||
2329 | restore_arg_regs_using_thread(); | |||
2330 | inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2331 | __masm-> vzeroupper(); | |||
2332 | __masm-> xorptr(rax, rax); // return 0 | |||
2333 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2334 | __masm-> ret(0); | |||
2335 | ||||
2336 | { | |||
2337 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc); | |||
2338 | // Copy in multi-bytes chunks | |||
2339 | copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); | |||
2340 | __masm-> jmp(L_copy_4_bytes); | |||
2341 | } | |||
2342 | ||||
2343 | return start; | |||
2344 | } | |||
2345 | ||||
2346 | // Arguments: | |||
2347 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary | |||
2348 | // ignored | |||
2349 | // is_oop - true => oop array, so generate store check code | |||
2350 | // name - stub name string | |||
2351 | // | |||
2352 | // Inputs: | |||
2353 | // c_rarg0 - source array address | |||
2354 | // c_rarg1 - destination array address | |||
2355 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
2356 | // | |||
2357 | // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let | |||
2358 | // the hardware handle it. The two dwords within qwords that span | |||
2359 | // cache line boundaries will still be loaded and stored atomicly. | |||
2360 | // | |||
2361 | address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target, | |||
2362 | address *entry, const char *name, | |||
2363 | bool dest_uninitialized = false) { | |||
2364 | #if COMPILER2_OR_JVMCI1 | |||
2365 | if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) { | |||
2366 | return generate_conjoint_copy_avx3_masked(entry, "jint_conjoint_arraycopy_avx3", 2, | |||
2367 | nooverlap_target, aligned, is_oop, dest_uninitialized); | |||
2368 | } | |||
2369 | #endif | |||
2370 | __masm-> align(CodeEntryAlignment); | |||
2371 | StubCodeMark mark(this, "StubRoutines", name); | |||
2372 | address start = __masm-> pc(); | |||
2373 | ||||
2374 | Label L_copy_bytes, L_copy_8_bytes, L_exit; | |||
2375 | const Register from = rdi; // source array address | |||
2376 | const Register to = rsi; // destination array address | |||
2377 | const Register count = rdx; // elements count | |||
2378 | const Register dword_count = rcx; | |||
2379 | const Register qword_count = count; | |||
2380 | ||||
2381 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2382 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
2383 | ||||
2384 | if (entry != NULL__null) { | |||
2385 | *entry = __masm-> pc(); | |||
2386 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
2387 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
2388 | } | |||
2389 | ||||
2390 | array_overlap_test(nooverlap_target, Address::times_4); | |||
2391 | setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx | |||
2392 | // r9 is used to save r15_thread | |||
2393 | ||||
2394 | DecoratorSet decorators = IN_HEAP | IS_ARRAY; | |||
2395 | if (dest_uninitialized) { | |||
2396 | decorators |= IS_DEST_UNINITIALIZED; | |||
2397 | } | |||
2398 | if (aligned) { | |||
2399 | decorators |= ARRAYCOPY_ALIGNED; | |||
2400 | } | |||
2401 | ||||
2402 | BasicType type = is_oop ? T_OBJECT : T_INT; | |||
2403 | BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
2404 | // no registers are destroyed by this call | |||
2405 | bs->arraycopy_prologue(_masm, decorators, type, from, to, count); | |||
2406 | ||||
2407 | assert_clean_int(count, rax); // Make sure 'count' is clean int. | |||
2408 | { | |||
2409 | // UnsafeCopyMemory page error: continue after ucm | |||
2410 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
2411 | // 'from', 'to' and 'count' are now valid | |||
2412 | __masm-> movptr(dword_count, count); | |||
2413 | __masm-> shrptr(count, 1); // count => qword_count | |||
2414 | ||||
2415 | // Copy from high to low addresses. Use 'to' as scratch. | |||
2416 | ||||
2417 | // Check for and copy trailing dword | |||
2418 | __masm-> testl(dword_count, 1); | |||
2419 | __masm-> jcc(Assembler::zero, L_copy_bytes); | |||
2420 | __masm-> movl(rax, Address(from, dword_count, Address::times_4, -4)); | |||
2421 | __masm-> movl(Address(to, dword_count, Address::times_4, -4), rax); | |||
2422 | __masm-> jmp(L_copy_bytes); | |||
2423 | ||||
2424 | // Copy trailing qwords | |||
2425 | __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes" ":"); | |||
2426 | __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8)); | |||
2427 | __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax); | |||
2428 | __masm-> decrement(qword_count); | |||
2429 | __masm-> jcc(Assembler::notZero, L_copy_8_bytes); | |||
2430 | } | |||
2431 | if (is_oop) { | |||
2432 | __masm-> jmp(L_exit); | |||
2433 | } | |||
2434 | restore_arg_regs_using_thread(); | |||
2435 | inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2436 | __masm-> xorptr(rax, rax); // return 0 | |||
2437 | __masm-> vzeroupper(); | |||
2438 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2439 | __masm-> ret(0); | |||
2440 | ||||
2441 | { | |||
2442 | // UnsafeCopyMemory page error: continue after ucm | |||
2443 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
2444 | // Copy in multi-bytes chunks | |||
2445 | copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); | |||
2446 | } | |||
2447 | ||||
2448 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
2449 | bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count); | |||
2450 | restore_arg_regs_using_thread(); | |||
2451 | inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2452 | __masm-> xorptr(rax, rax); // return 0 | |||
2453 | __masm-> vzeroupper(); | |||
2454 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2455 | __masm-> ret(0); | |||
2456 | ||||
2457 | return start; | |||
2458 | } | |||
2459 | ||||
2460 | // Arguments: | |||
2461 | // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes | |||
2462 | // ignored | |||
2463 | // is_oop - true => oop array, so generate store check code | |||
2464 | // name - stub name string | |||
2465 | // | |||
2466 | // Inputs: | |||
2467 | // c_rarg0 - source array address | |||
2468 | // c_rarg1 - destination array address | |||
2469 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
2470 | // | |||
2471 | // Side Effects: | |||
2472 | // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the | |||
2473 | // no-overlap entry point used by generate_conjoint_long_oop_copy(). | |||
2474 | // | |||
2475 | address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, | |||
2476 | const char *name, bool dest_uninitialized = false) { | |||
2477 | #if COMPILER2_OR_JVMCI1 | |||
2478 | if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) { | |||
2479 | return generate_disjoint_copy_avx3_masked(entry, "jlong_disjoint_arraycopy_avx3", 3, | |||
2480 | aligned, is_oop, dest_uninitialized); | |||
2481 | } | |||
2482 | #endif | |||
2483 | __masm-> align(CodeEntryAlignment); | |||
2484 | StubCodeMark mark(this, "StubRoutines", name); | |||
2485 | address start = __masm-> pc(); | |||
2486 | ||||
2487 | Label L_copy_bytes, L_copy_8_bytes, L_exit; | |||
2488 | const Register from = rdi; // source array address | |||
2489 | const Register to = rsi; // destination array address | |||
2490 | const Register qword_count = rdx; // elements count | |||
2491 | const Register end_from = from; // source array end address | |||
2492 | const Register end_to = rcx; // destination array end address | |||
2493 | const Register saved_count = r11; | |||
2494 | // End pointers are inclusive, and if count is not zero they point | |||
2495 | // to the last unit copied: end_to[0] := end_from[0] | |||
2496 | ||||
2497 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2498 | // Save no-overlap entry point for generate_conjoint_long_oop_copy() | |||
2499 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
2500 | ||||
2501 | if (entry != NULL__null) { | |||
2502 | *entry = __masm-> pc(); | |||
2503 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
2504 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
2505 | } | |||
2506 | ||||
2507 | setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx | |||
2508 | // r9 is used to save r15_thread | |||
2509 | // 'from', 'to' and 'qword_count' are now valid | |||
2510 | ||||
2511 | DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; | |||
2512 | if (dest_uninitialized) { | |||
2513 | decorators |= IS_DEST_UNINITIALIZED; | |||
2514 | } | |||
2515 | if (aligned) { | |||
2516 | decorators |= ARRAYCOPY_ALIGNED; | |||
2517 | } | |||
2518 | ||||
2519 | BasicType type = is_oop ? T_OBJECT : T_LONG; | |||
2520 | BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
2521 | bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count); | |||
2522 | { | |||
2523 | // UnsafeCopyMemory page error: continue after ucm | |||
2524 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
2525 | ||||
2526 | // Copy from low to high addresses. Use 'to' as scratch. | |||
2527 | __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8)); | |||
2528 | __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8)); | |||
2529 | __masm-> negptr(qword_count); | |||
2530 | __masm-> jmp(L_copy_bytes); | |||
2531 | ||||
2532 | // Copy trailing qwords | |||
2533 | __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes" ":"); | |||
2534 | __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8)); | |||
2535 | __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax); | |||
2536 | __masm-> increment(qword_count); | |||
2537 | __masm-> jcc(Assembler::notZero, L_copy_8_bytes); | |||
2538 | } | |||
2539 | if (is_oop) { | |||
2540 | __masm-> jmp(L_exit); | |||
2541 | } else { | |||
2542 | restore_arg_regs_using_thread(); | |||
2543 | inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2544 | __masm-> xorptr(rax, rax); // return 0 | |||
2545 | __masm-> vzeroupper(); | |||
2546 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2547 | __masm-> ret(0); | |||
2548 | } | |||
2549 | ||||
2550 | { | |||
2551 | // UnsafeCopyMemory page error: continue after ucm | |||
2552 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
2553 | // Copy in multi-bytes chunks | |||
2554 | copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); | |||
2555 | } | |||
2556 | ||||
2557 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
2558 | bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count); | |||
2559 | restore_arg_regs_using_thread(); | |||
2560 | if (is_oop) { | |||
2561 | inc_counter_np(SharedRuntime::_oop_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_oop_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_oop_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2562 | } else { | |||
2563 | inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2564 | } | |||
2565 | __masm-> vzeroupper(); | |||
2566 | __masm-> xorptr(rax, rax); // return 0 | |||
2567 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2568 | __masm-> ret(0); | |||
2569 | ||||
2570 | return start; | |||
2571 | } | |||
2572 | ||||
2573 | // Arguments: | |||
2574 | // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes | |||
2575 | // ignored | |||
2576 | // is_oop - true => oop array, so generate store check code | |||
2577 | // name - stub name string | |||
2578 | // | |||
2579 | // Inputs: | |||
2580 | // c_rarg0 - source array address | |||
2581 | // c_rarg1 - destination array address | |||
2582 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
2583 | // | |||
2584 | address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, | |||
2585 | address nooverlap_target, address *entry, | |||
2586 | const char *name, bool dest_uninitialized = false) { | |||
2587 | #if COMPILER2_OR_JVMCI1 | |||
2588 | if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) { | |||
2589 | return generate_conjoint_copy_avx3_masked(entry, "jlong_conjoint_arraycopy_avx3", 3, | |||
2590 | nooverlap_target, aligned, is_oop, dest_uninitialized); | |||
2591 | } | |||
2592 | #endif | |||
2593 | __masm-> align(CodeEntryAlignment); | |||
2594 | StubCodeMark mark(this, "StubRoutines", name); | |||
2595 | address start = __masm-> pc(); | |||
2596 | ||||
2597 | Label L_copy_bytes, L_copy_8_bytes, L_exit; | |||
2598 | const Register from = rdi; // source array address | |||
2599 | const Register to = rsi; // destination array address | |||
2600 | const Register qword_count = rdx; // elements count | |||
2601 | const Register saved_count = rcx; | |||
2602 | ||||
2603 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2604 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. | |||
2605 | ||||
2606 | if (entry != NULL__null) { | |||
2607 | *entry = __masm-> pc(); | |||
2608 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |||
2609 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
2610 | } | |||
2611 | ||||
2612 | array_overlap_test(nooverlap_target, Address::times_8); | |||
2613 | setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx | |||
2614 | // r9 is used to save r15_thread | |||
2615 | // 'from', 'to' and 'qword_count' are now valid | |||
2616 | ||||
2617 | DecoratorSet decorators = IN_HEAP | IS_ARRAY; | |||
2618 | if (dest_uninitialized) { | |||
2619 | decorators |= IS_DEST_UNINITIALIZED; | |||
2620 | } | |||
2621 | if (aligned) { | |||
2622 | decorators |= ARRAYCOPY_ALIGNED; | |||
2623 | } | |||
2624 | ||||
2625 | BasicType type = is_oop ? T_OBJECT : T_LONG; | |||
2626 | BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
2627 | bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count); | |||
2628 | { | |||
2629 | // UnsafeCopyMemory page error: continue after ucm | |||
2630 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
2631 | ||||
2632 | __masm-> jmp(L_copy_bytes); | |||
2633 | ||||
2634 | // Copy trailing qwords | |||
2635 | __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes" ":"); | |||
2636 | __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8)); | |||
2637 | __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax); | |||
2638 | __masm-> decrement(qword_count); | |||
2639 | __masm-> jcc(Assembler::notZero, L_copy_8_bytes); | |||
2640 | } | |||
2641 | if (is_oop) { | |||
2642 | __masm-> jmp(L_exit); | |||
2643 | } else { | |||
2644 | restore_arg_regs_using_thread(); | |||
2645 | inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2646 | __masm-> xorptr(rax, rax); // return 0 | |||
2647 | __masm-> vzeroupper(); | |||
2648 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2649 | __masm-> ret(0); | |||
2650 | } | |||
2651 | { | |||
2652 | // UnsafeCopyMemory page error: continue after ucm | |||
2653 | UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); | |||
2654 | ||||
2655 | // Copy in multi-bytes chunks | |||
2656 | copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); | |||
2657 | } | |||
2658 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
2659 | bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count); | |||
2660 | restore_arg_regs_using_thread(); | |||
2661 | if (is_oop) { | |||
2662 | inc_counter_np(SharedRuntime::_oop_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_oop_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_oop_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2663 | } else { | |||
2664 | inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2665 | } | |||
2666 | __masm-> vzeroupper(); | |||
2667 | __masm-> xorptr(rax, rax); // return 0 | |||
2668 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2669 | __masm-> ret(0); | |||
2670 | ||||
2671 | return start; | |||
2672 | } | |||
2673 | ||||
2674 | ||||
2675 | // Helper for generating a dynamic type check. | |||
2676 | // Smashes no registers. | |||
2677 | void generate_type_check(Register sub_klass, | |||
2678 | Register super_check_offset, | |||
2679 | Register super_klass, | |||
2680 | Label& L_success) { | |||
2681 | assert_different_registers(sub_klass, super_check_offset, super_klass); | |||
2682 | ||||
2683 | BLOCK_COMMENT("type_check:")masm-> block_comment("type_check:"); | |||
2684 | ||||
2685 | Label L_miss; | |||
2686 | ||||
2687 | __masm-> check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL__null, | |||
2688 | super_check_offset); | |||
2689 | __masm-> check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL__null); | |||
2690 | ||||
2691 | // Fall through on failure! | |||
2692 | __masm-> BIND(L_miss)bind(L_miss); masm-> block_comment("L_miss" ":"); | |||
2693 | } | |||
2694 | ||||
2695 | // | |||
2696 | // Generate checkcasting array copy stub | |||
2697 | // | |||
2698 | // Input: | |||
2699 | // c_rarg0 - source array address | |||
2700 | // c_rarg1 - destination array address | |||
2701 | // c_rarg2 - element count, treated as ssize_t, can be zero | |||
2702 | // c_rarg3 - size_t ckoff (super_check_offset) | |||
2703 | // not Win64 | |||
2704 | // c_rarg4 - oop ckval (super_klass) | |||
2705 | // Win64 | |||
2706 | // rsp+40 - oop ckval (super_klass) | |||
2707 | // | |||
2708 | // Output: | |||
2709 | // rax == 0 - success | |||
2710 | // rax == -1^K - failure, where K is partial transfer count | |||
2711 | // | |||
2712 | address generate_checkcast_copy(const char *name, address *entry, | |||
2713 | bool dest_uninitialized = false) { | |||
2714 | ||||
2715 | Label L_load_element, L_store_element, L_do_card_marks, L_done; | |||
2716 | ||||
2717 | // Input registers (after setup_arg_regs) | |||
2718 | const Register from = rdi; // source array address | |||
2719 | const Register to = rsi; // destination array address | |||
2720 | const Register length = rdx; // elements count | |||
2721 | const Register ckoff = rcx; // super_check_offset | |||
2722 | const Register ckval = r8; // super_klass | |||
2723 | ||||
2724 | // Registers used as temps (r13, r14 are save-on-entry) | |||
2725 | const Register end_from = from; // source array end address | |||
2726 | const Register end_to = r13; // destination array end address | |||
2727 | const Register count = rdx; // -(count_remaining) | |||
2728 | const Register r14_length = r14; // saved copy of length | |||
2729 | // End pointers are inclusive, and if length is not zero they point | |||
2730 | // to the last unit copied: end_to[0] := end_from[0] | |||
2731 | ||||
2732 | const Register rax_oop = rax; // actual oop copied | |||
2733 | const Register r11_klass = r11; // oop._klass | |||
2734 | ||||
2735 | //--------------------------------------------------------------- | |||
2736 | // Assembler stub will be used for this call to arraycopy | |||
2737 | // if the two arrays are subtypes of Object[] but the | |||
2738 | // destination array type is not equal to or a supertype | |||
2739 | // of the source type. Each element must be separately | |||
2740 | // checked. | |||
2741 | ||||
2742 | __masm-> align(CodeEntryAlignment); | |||
2743 | StubCodeMark mark(this, "StubRoutines", name); | |||
2744 | address start = __masm-> pc(); | |||
2745 | ||||
2746 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2747 | ||||
2748 | #ifdef ASSERT1 | |||
2749 | // caller guarantees that the arrays really are different | |||
2750 | // otherwise, we would have to make conjoint checks | |||
2751 | { Label L; | |||
2752 | array_overlap_test(L, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8)); | |||
2753 | __masm-> stop("checkcast_copy within a single array"); | |||
2754 | __masm-> bind(L); | |||
2755 | } | |||
2756 | #endif //ASSERT | |||
2757 | ||||
2758 | setup_arg_regs(4); // from => rdi, to => rsi, length => rdx | |||
2759 | // ckoff => rcx, ckval => r8 | |||
2760 | // r9 and r10 may be used to save non-volatile registers | |||
2761 | #ifdef _WIN64 | |||
2762 | // last argument (#4) is on stack on Win64 | |||
2763 | __masm-> movptr(ckval, Address(rsp, 6 * wordSize)); | |||
2764 | #endif | |||
2765 | ||||
2766 | // Caller of this entry point must set up the argument registers. | |||
2767 | if (entry != NULL__null) { | |||
2768 | *entry = __masm-> pc(); | |||
2769 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
2770 | } | |||
2771 | ||||
2772 | // allocate spill slots for r13, r14 | |||
2773 | enum { | |||
2774 | saved_r13_offset, | |||
2775 | saved_r14_offset, | |||
2776 | saved_r10_offset, | |||
2777 | saved_rbp_offset | |||
2778 | }; | |||
2779 | __masm-> subptr(rsp, saved_rbp_offset * wordSize); | |||
2780 | __masm-> movptr(Address(rsp, saved_r13_offset * wordSize), r13); | |||
2781 | __masm-> movptr(Address(rsp, saved_r14_offset * wordSize), r14); | |||
2782 | __masm-> movptr(Address(rsp, saved_r10_offset * wordSize), r10); | |||
2783 | ||||
2784 | #ifdef ASSERT1 | |||
2785 | Label L2; | |||
2786 | __masm-> get_thread(r14); | |||
2787 | __masm-> cmpptr(r15_thread, r14); | |||
2788 | __masm-> jcc(Assembler::equal, L2); | |||
2789 | __masm-> stop("StubRoutines::call_stub: r15_thread is modified by call"); | |||
2790 | __masm-> bind(L2); | |||
2791 | #endif // ASSERT | |||
2792 | ||||
2793 | // check that int operands are properly extended to size_t | |||
2794 | assert_clean_int(length, rax); | |||
2795 | assert_clean_int(ckoff, rax); | |||
2796 | ||||
2797 | #ifdef ASSERT1 | |||
2798 | BLOCK_COMMENT("assert consistent ckoff/ckval")masm-> block_comment("assert consistent ckoff/ckval"); | |||
2799 | // The ckoff and ckval must be mutually consistent, | |||
2800 | // even though caller generates both. | |||
2801 | { Label L; | |||
2802 | int sco_offset = in_bytes(Klass::super_check_offset_offset()); | |||
2803 | __masm-> cmpl(ckoff, Address(ckval, sco_offset)); | |||
2804 | __masm-> jcc(Assembler::equal, L); | |||
2805 | __masm-> stop("super_check_offset inconsistent"); | |||
2806 | __masm-> bind(L); | |||
2807 | } | |||
2808 | #endif //ASSERT | |||
2809 | ||||
2810 | // Loop-invariant addresses. They are exclusive end pointers. | |||
2811 | Address end_from_addr(from, length, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0); | |||
2812 | Address end_to_addr(to, length, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0); | |||
2813 | // Loop-variant addresses. They assume post-incremented count < 0. | |||
2814 | Address from_element_addr(end_from, count, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0); | |||
2815 | Address to_element_addr(end_to, count, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0); | |||
2816 | ||||
2817 | DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; | |||
2818 | if (dest_uninitialized) { | |||
2819 | decorators |= IS_DEST_UNINITIALIZED; | |||
2820 | } | |||
2821 | ||||
2822 | BasicType type = T_OBJECT; | |||
2823 | BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
2824 | bs->arraycopy_prologue(_masm, decorators, type, from, to, count); | |||
2825 | ||||
2826 | // Copy from low to high addresses, indexed from the end of each array. | |||
2827 | __masm-> lea(end_from, end_from_addr); | |||
2828 | __masm-> lea(end_to, end_to_addr); | |||
2829 | __masm-> movptr(r14_length, length); // save a copy of the length | |||
2830 | assert(length == count, "")do { if (!(length == count)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 2830, "assert(" "length == count" ") failed", ""); ::breakpoint (); } } while (0); // else fix next line: | |||
2831 | __masm-> negptr(count); // negate and test the length | |||
2832 | __masm-> jcc(Assembler::notZero, L_load_element); | |||
2833 | ||||
2834 | // Empty array: Nothing to do. | |||
2835 | __masm-> xorptr(rax, rax); // return 0 on (trivial) success | |||
2836 | __masm-> jmp(L_done); | |||
2837 | ||||
2838 | // ======== begin loop ======== | |||
2839 | // (Loop is rotated; its entry is L_load_element.) | |||
2840 | // Loop control: | |||
2841 | // for (count = -count; count != 0; count++) | |||
2842 | // Base pointers src, dst are biased by 8*(count-1),to last element. | |||
2843 | __masm-> align(OptoLoopAlignment); | |||
2844 | ||||
2845 | __masm-> BIND(L_store_element)bind(L_store_element); masm-> block_comment("L_store_element" ":"); | |||
2846 | __masm-> store_heap_oop(to_element_addr, rax_oop, noreg, noreg, AS_RAW); // store the oop | |||
2847 | __masm-> increment(count); // increment the count toward zero | |||
2848 | __masm-> jcc(Assembler::zero, L_do_card_marks); | |||
2849 | ||||
2850 | // ======== loop entry is here ======== | |||
2851 | __masm-> BIND(L_load_element)bind(L_load_element); masm-> block_comment("L_load_element" ":"); | |||
2852 | __masm-> load_heap_oop(rax_oop, from_element_addr, noreg, noreg, AS_RAW); // load the oop | |||
2853 | __masm-> testptr(rax_oop, rax_oop); | |||
2854 | __masm-> jcc(Assembler::zero, L_store_element); | |||
2855 | ||||
2856 | __masm-> load_klass(r11_klass, rax_oop, rscratch1);// query the object klass | |||
2857 | generate_type_check(r11_klass, ckoff, ckval, L_store_element); | |||
2858 | // ======== end loop ======== | |||
2859 | ||||
2860 | // It was a real error; we must depend on the caller to finish the job. | |||
2861 | // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. | |||
2862 | // Emit GC store barriers for the oops we have copied (r14 + rdx), | |||
2863 | // and report their number to the caller. | |||
2864 | assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1); | |||
2865 | Label L_post_barrier; | |||
2866 | __masm-> addptr(r14_length, count); // K = (original - remaining) oops | |||
2867 | __masm-> movptr(rax, r14_length); // save the value | |||
2868 | __masm-> notptr(rax); // report (-1^K) to caller (does not affect flags) | |||
2869 | __masm-> jccb(Assembler::notZero, L_post_barrier)jccb_0(Assembler::notZero, L_post_barrier, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 2869); | |||
2870 | __masm-> jmp(L_done); // K == 0, nothing was copied, skip post barrier | |||
2871 | ||||
2872 | // Come here on success only. | |||
2873 | __masm-> BIND(L_do_card_marks)bind(L_do_card_marks); masm-> block_comment("L_do_card_marks" ":"); | |||
2874 | __masm-> xorptr(rax, rax); // return 0 on success | |||
2875 | ||||
2876 | __masm-> BIND(L_post_barrier)bind(L_post_barrier); masm-> block_comment("L_post_barrier" ":"); | |||
2877 | bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length); | |||
2878 | ||||
2879 | // Common exit point (success or failure). | |||
2880 | __masm-> BIND(L_done)bind(L_done); masm-> block_comment("L_done" ":"); | |||
2881 | __masm-> movptr(r13, Address(rsp, saved_r13_offset * wordSize)); | |||
2882 | __masm-> movptr(r14, Address(rsp, saved_r14_offset * wordSize)); | |||
2883 | __masm-> movptr(r10, Address(rsp, saved_r10_offset * wordSize)); | |||
2884 | restore_arg_regs(); | |||
2885 | inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_checkcast_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_checkcast_array_copy_ctr);; // Update counter after rscratch1 is free | |||
2886 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
2887 | __masm-> ret(0); | |||
2888 | ||||
2889 | return start; | |||
2890 | } | |||
2891 | ||||
2892 | // | |||
2893 | // Generate 'unsafe' array copy stub | |||
2894 | // Though just as safe as the other stubs, it takes an unscaled | |||
2895 | // size_t argument instead of an element count. | |||
2896 | // | |||
2897 | // Input: | |||
2898 | // c_rarg0 - source array address | |||
2899 | // c_rarg1 - destination array address | |||
2900 | // c_rarg2 - byte count, treated as ssize_t, can be zero | |||
2901 | // | |||
2902 | // Examines the alignment of the operands and dispatches | |||
2903 | // to a long, int, short, or byte copy loop. | |||
2904 | // | |||
2905 | address generate_unsafe_copy(const char *name, | |||
2906 | address byte_copy_entry, address short_copy_entry, | |||
2907 | address int_copy_entry, address long_copy_entry) { | |||
2908 | ||||
2909 | Label L_long_aligned, L_int_aligned, L_short_aligned; | |||
2910 | ||||
2911 | // Input registers (before setup_arg_regs) | |||
2912 | const Register from = c_rarg0; // source array address | |||
2913 | const Register to = c_rarg1; // destination array address | |||
2914 | const Register size = c_rarg2; // byte count (size_t) | |||
2915 | ||||
2916 | // Register used as a temp | |||
2917 | const Register bits = rax; // test copy of low bits | |||
2918 | ||||
2919 | __masm-> align(CodeEntryAlignment); | |||
2920 | StubCodeMark mark(this, "StubRoutines", name); | |||
2921 | address start = __masm-> pc(); | |||
2922 | ||||
2923 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
2924 | ||||
2925 | // bump this on entry, not on exit: | |||
2926 | inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_unsafe_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_unsafe_array_copy_ctr);; | |||
2927 | ||||
2928 | __masm-> mov(bits, from); | |||
2929 | __masm-> orptr(bits, to); | |||
2930 | __masm-> orptr(bits, size); | |||
2931 | ||||
2932 | __masm-> testb(bits, BytesPerLong-1); | |||
2933 | __masm-> jccb(Assembler::zero, L_long_aligned)jccb_0(Assembler::zero, L_long_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 2933); | |||
2934 | ||||
2935 | __masm-> testb(bits, BytesPerInt-1); | |||
2936 | __masm-> jccb(Assembler::zero, L_int_aligned)jccb_0(Assembler::zero, L_int_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 2936); | |||
2937 | ||||
2938 | __masm-> testb(bits, BytesPerShort-1); | |||
2939 | __masm-> jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); | |||
2940 | ||||
2941 | __masm-> BIND(L_short_aligned)bind(L_short_aligned); masm-> block_comment("L_short_aligned" ":"); | |||
2942 | __masm-> shrptr(size, LogBytesPerShort); // size => short_count | |||
2943 | __masm-> jump(RuntimeAddress(short_copy_entry)); | |||
2944 | ||||
2945 | __masm-> BIND(L_int_aligned)bind(L_int_aligned); masm-> block_comment("L_int_aligned" ":" ); | |||
2946 | __masm-> shrptr(size, LogBytesPerInt); // size => int_count | |||
2947 | __masm-> jump(RuntimeAddress(int_copy_entry)); | |||
2948 | ||||
2949 | __masm-> BIND(L_long_aligned)bind(L_long_aligned); masm-> block_comment("L_long_aligned" ":"); | |||
2950 | __masm-> shrptr(size, LogBytesPerLong); // size => qword_count | |||
2951 | __masm-> jump(RuntimeAddress(long_copy_entry)); | |||
2952 | ||||
2953 | return start; | |||
2954 | } | |||
2955 | ||||
2956 | // Perform range checks on the proposed arraycopy. | |||
2957 | // Kills temp, but nothing else. | |||
2958 | // Also, clean the sign bits of src_pos and dst_pos. | |||
2959 | void arraycopy_range_checks(Register src, // source array oop (c_rarg0) | |||
2960 | Register src_pos, // source position (c_rarg1) | |||
2961 | Register dst, // destination array oo (c_rarg2) | |||
2962 | Register dst_pos, // destination position (c_rarg3) | |||
2963 | Register length, | |||
2964 | Register temp, | |||
2965 | Label& L_failed) { | |||
2966 | BLOCK_COMMENT("arraycopy_range_checks:")masm-> block_comment("arraycopy_range_checks:"); | |||
2967 | ||||
2968 | // if (src_pos + length > arrayOop(src)->length()) FAIL; | |||
2969 | __masm-> movl(temp, length); | |||
2970 | __masm-> addl(temp, src_pos); // src_pos + length | |||
2971 | __masm-> cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes())); | |||
2972 | __masm-> jcc(Assembler::above, L_failed); | |||
2973 | ||||
2974 | // if (dst_pos + length > arrayOop(dst)->length()) FAIL; | |||
2975 | __masm-> movl(temp, length); | |||
2976 | __masm-> addl(temp, dst_pos); // dst_pos + length | |||
2977 | __masm-> cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes())); | |||
2978 | __masm-> jcc(Assembler::above, L_failed); | |||
2979 | ||||
2980 | // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. | |||
2981 | // Move with sign extension can be used since they are positive. | |||
2982 | __masm-> movslq(src_pos, src_pos); | |||
2983 | __masm-> movslq(dst_pos, dst_pos); | |||
2984 | ||||
2985 | BLOCK_COMMENT("arraycopy_range_checks done")masm-> block_comment("arraycopy_range_checks done"); | |||
2986 | } | |||
2987 | ||||
2988 | // | |||
2989 | // Generate generic array copy stubs | |||
2990 | // | |||
2991 | // Input: | |||
2992 | // c_rarg0 - src oop | |||
2993 | // c_rarg1 - src_pos (32-bits) | |||
2994 | // c_rarg2 - dst oop | |||
2995 | // c_rarg3 - dst_pos (32-bits) | |||
2996 | // not Win64 | |||
2997 | // c_rarg4 - element count (32-bits) | |||
2998 | // Win64 | |||
2999 | // rsp+40 - element count (32-bits) | |||
3000 | // | |||
3001 | // Output: | |||
3002 | // rax == 0 - success | |||
3003 | // rax == -1^K - failure, where K is partial transfer count | |||
3004 | // | |||
3005 | address generate_generic_copy(const char *name, | |||
3006 | address byte_copy_entry, address short_copy_entry, | |||
3007 | address int_copy_entry, address oop_copy_entry, | |||
3008 | address long_copy_entry, address checkcast_copy_entry) { | |||
3009 | ||||
3010 | Label L_failed, L_failed_0, L_objArray; | |||
3011 | Label L_copy_shorts, L_copy_ints, L_copy_longs; | |||
3012 | ||||
3013 | // Input registers | |||
3014 | const Register src = c_rarg0; // source array oop | |||
3015 | const Register src_pos = c_rarg1; // source position | |||
3016 | const Register dst = c_rarg2; // destination array oop | |||
3017 | const Register dst_pos = c_rarg3; // destination position | |||
3018 | #ifndef _WIN64 | |||
3019 | const Register length = c_rarg4; | |||
3020 | const Register rklass_tmp = r9; // load_klass | |||
3021 | #else | |||
3022 | const Address length(rsp, 7 * wordSize); // elements count is on stack on Win64 | |||
3023 | const Register rklass_tmp = rdi; // load_klass | |||
3024 | #endif | |||
3025 | ||||
3026 | { int modulus = CodeEntryAlignment; | |||
3027 | int target = modulus - 5; // 5 = sizeof jmp(L_failed) | |||
3028 | int advance = target - (__masm-> offset() % modulus); | |||
3029 | if (advance < 0) advance += modulus; | |||
3030 | if (advance > 0) __masm-> nop(advance); | |||
3031 | } | |||
3032 | StubCodeMark mark(this, "StubRoutines", name); | |||
3033 | ||||
3034 | // Short-hop target to L_failed. Makes for denser prologue code. | |||
3035 | __masm-> BIND(L_failed_0)bind(L_failed_0); masm-> block_comment("L_failed_0" ":"); | |||
3036 | __masm-> jmp(L_failed); | |||
3037 | assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed")do { if (!(masm-> offset() % CodeEntryAlignment == 0)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3037, "assert(" "masm-> offset() % CodeEntryAlignment == 0" ") failed", "no further alignment needed"); ::breakpoint(); } } while (0); | |||
3038 | ||||
3039 | __masm-> align(CodeEntryAlignment); | |||
3040 | address start = __masm-> pc(); | |||
3041 | ||||
3042 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
3043 | ||||
3044 | #ifdef _WIN64 | |||
3045 | __masm-> push(rklass_tmp); // rdi is callee-save on Windows | |||
3046 | #endif | |||
3047 | ||||
3048 | // bump this on entry, not on exit: | |||
3049 | inc_counter_np(SharedRuntime::_generic_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_generic_array_copy_ctr" ); inc_counter_np_(SharedRuntime::_generic_array_copy_ctr);; | |||
3050 | ||||
3051 | //----------------------------------------------------------------------- | |||
3052 | // Assembler stub will be used for this call to arraycopy | |||
3053 | // if the following conditions are met: | |||
3054 | // | |||
3055 | // (1) src and dst must not be null. | |||
3056 | // (2) src_pos must not be negative. | |||
3057 | // (3) dst_pos must not be negative. | |||
3058 | // (4) length must not be negative. | |||
3059 | // (5) src klass and dst klass should be the same and not NULL. | |||
3060 | // (6) src and dst should be arrays. | |||
3061 | // (7) src_pos + length must not exceed length of src. | |||
3062 | // (8) dst_pos + length must not exceed length of dst. | |||
3063 | // | |||
3064 | ||||
3065 | // if (src == NULL) return -1; | |||
3066 | __masm-> testptr(src, src); // src oop | |||
3067 | size_t j1off = __masm-> offset(); | |||
3068 | __masm-> jccb(Assembler::zero, L_failed_0)jccb_0(Assembler::zero, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3068); | |||
3069 | ||||
3070 | // if (src_pos < 0) return -1; | |||
3071 | __masm-> testl(src_pos, src_pos); // src_pos (32-bits) | |||
3072 | __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3072); | |||
3073 | ||||
3074 | // if (dst == NULL) return -1; | |||
3075 | __masm-> testptr(dst, dst); // dst oop | |||
3076 | __masm-> jccb(Assembler::zero, L_failed_0)jccb_0(Assembler::zero, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3076); | |||
3077 | ||||
3078 | // if (dst_pos < 0) return -1; | |||
3079 | __masm-> testl(dst_pos, dst_pos); // dst_pos (32-bits) | |||
3080 | size_t j4off = __masm-> offset(); | |||
3081 | __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3081); | |||
3082 | ||||
3083 | // The first four tests are very dense code, | |||
3084 | // but not quite dense enough to put four | |||
3085 | // jumps in a 16-byte instruction fetch buffer. | |||
3086 | // That's good, because some branch predicters | |||
3087 | // do not like jumps so close together. | |||
3088 | // Make sure of this. | |||
3089 | guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps")do { if (!(((j1off ^ j4off) & ~15) != 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3089, "guarantee(" "((j1off ^ j4off) & ~15) != 0" ") failed" , "I$ line of 1st & 4th jumps"); ::breakpoint(); } } while (0); | |||
3090 | ||||
3091 | // registers used as temp | |||
3092 | const Register r11_length = r11; // elements count to copy | |||
3093 | const Register r10_src_klass = r10; // array klass | |||
3094 | ||||
3095 | // if (length < 0) return -1; | |||
3096 | __masm-> movl(r11_length, length); // length (elements count, 32-bits value) | |||
3097 | __masm-> testl(r11_length, r11_length); | |||
3098 | __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3098); | |||
3099 | ||||
3100 | __masm-> load_klass(r10_src_klass, src, rklass_tmp); | |||
3101 | #ifdef ASSERT1 | |||
3102 | // assert(src->klass() != NULL); | |||
3103 | { | |||
3104 | BLOCK_COMMENT("assert klasses not null {")masm-> block_comment("assert klasses not null {"); | |||
3105 | Label L1, L2; | |||
3106 | __masm-> testptr(r10_src_klass, r10_src_klass); | |||
3107 | __masm-> jcc(Assembler::notZero, L2); // it is broken if klass is NULL | |||
3108 | __masm-> bind(L1); | |||
3109 | __masm-> stop("broken null klass"); | |||
3110 | __masm-> bind(L2); | |||
3111 | __masm-> load_klass(rax, dst, rklass_tmp); | |||
3112 | __masm-> cmpq(rax, 0); | |||
3113 | __masm-> jcc(Assembler::equal, L1); // this would be broken also | |||
3114 | BLOCK_COMMENT("} assert klasses not null done")masm-> block_comment("} assert klasses not null done"); | |||
3115 | } | |||
3116 | #endif | |||
3117 | ||||
3118 | // Load layout helper (32-bits) | |||
3119 | // | |||
3120 | // |array_tag| | header_size | element_type | |log2_element_size| | |||
3121 | // 32 30 24 16 8 2 0 | |||
3122 | // | |||
3123 | // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 | |||
3124 | // | |||
3125 | ||||
3126 | const int lh_offset = in_bytes(Klass::layout_helper_offset()); | |||
3127 | ||||
3128 | // Handle objArrays completely differently... | |||
3129 | const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); | |||
3130 | __masm-> cmpl(Address(r10_src_klass, lh_offset), objArray_lh); | |||
3131 | __masm-> jcc(Assembler::equal, L_objArray); | |||
3132 | ||||
3133 | // if (src->klass() != dst->klass()) return -1; | |||
3134 | __masm-> load_klass(rax, dst, rklass_tmp); | |||
3135 | __masm-> cmpq(r10_src_klass, rax); | |||
3136 | __masm-> jcc(Assembler::notEqual, L_failed); | |||
3137 | ||||
3138 | const Register rax_lh = rax; // layout helper | |||
3139 | __masm-> movl(rax_lh, Address(r10_src_klass, lh_offset)); | |||
3140 | ||||
3141 | // if (!src->is_Array()) return -1; | |||
3142 | __masm-> cmpl(rax_lh, Klass::_lh_neutral_value); | |||
3143 | __masm-> jcc(Assembler::greaterEqual, L_failed); | |||
3144 | ||||
3145 | // At this point, it is known to be a typeArray (array_tag 0x3). | |||
3146 | #ifdef ASSERT1 | |||
3147 | { | |||
3148 | BLOCK_COMMENT("assert primitive array {")masm-> block_comment("assert primitive array {"); | |||
3149 | Label L; | |||
3150 | __masm-> cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); | |||
3151 | __masm-> jcc(Assembler::greaterEqual, L); | |||
3152 | __masm-> stop("must be a primitive array"); | |||
3153 | __masm-> bind(L); | |||
3154 | BLOCK_COMMENT("} assert primitive array done")masm-> block_comment("} assert primitive array done"); | |||
3155 | } | |||
3156 | #endif | |||
3157 | ||||
3158 | arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, | |||
3159 | r10, L_failed); | |||
3160 | ||||
3161 | // TypeArrayKlass | |||
3162 | // | |||
3163 | // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); | |||
3164 | // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); | |||
3165 | // | |||
3166 | ||||
3167 | const Register r10_offset = r10; // array offset | |||
3168 | const Register rax_elsize = rax_lh; // element size | |||
3169 | ||||
3170 | __masm-> movl(r10_offset, rax_lh); | |||
3171 | __masm-> shrl(r10_offset, Klass::_lh_header_size_shift); | |||
3172 | __masm-> andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset | |||
3173 | __masm-> addptr(src, r10_offset); // src array offset | |||
3174 | __masm-> addptr(dst, r10_offset); // dst array offset | |||
3175 | BLOCK_COMMENT("choose copy loop based on element size")masm-> block_comment("choose copy loop based on element size" ); | |||
3176 | __masm-> andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize | |||
3177 | ||||
3178 | #ifdef _WIN64 | |||
3179 | __masm-> pop(rklass_tmp); // Restore callee-save rdi | |||
3180 | #endif | |||
3181 | ||||
3182 | // next registers should be set before the jump to corresponding stub | |||
3183 | const Register from = c_rarg0; // source array address | |||
3184 | const Register to = c_rarg1; // destination array address | |||
3185 | const Register count = c_rarg2; // elements count | |||
3186 | ||||
3187 | // 'from', 'to', 'count' registers should be set in such order | |||
3188 | // since they are the same as 'src', 'src_pos', 'dst'. | |||
3189 | ||||
3190 | __masm-> cmpl(rax_elsize, 0); | |||
3191 | __masm-> jccb(Assembler::notEqual, L_copy_shorts)jccb_0(Assembler::notEqual, L_copy_shorts, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3191); | |||
3192 | __masm-> lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr | |||
3193 | __masm-> lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr | |||
3194 | __masm-> movl2ptr(count, r11_length); // length | |||
3195 | __masm-> jump(RuntimeAddress(byte_copy_entry)); | |||
3196 | ||||
3197 | __masm-> BIND(L_copy_shorts)bind(L_copy_shorts); masm-> block_comment("L_copy_shorts" ":" ); | |||
3198 | __masm-> cmpl(rax_elsize, LogBytesPerShort); | |||
3199 | __masm-> jccb(Assembler::notEqual, L_copy_ints)jccb_0(Assembler::notEqual, L_copy_ints, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3199); | |||
3200 | __masm-> lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr | |||
3201 | __masm-> lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr | |||
3202 | __masm-> movl2ptr(count, r11_length); // length | |||
3203 | __masm-> jump(RuntimeAddress(short_copy_entry)); | |||
3204 | ||||
3205 | __masm-> BIND(L_copy_ints)bind(L_copy_ints); masm-> block_comment("L_copy_ints" ":"); | |||
3206 | __masm-> cmpl(rax_elsize, LogBytesPerInt); | |||
3207 | __masm-> jccb(Assembler::notEqual, L_copy_longs)jccb_0(Assembler::notEqual, L_copy_longs, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3207); | |||
3208 | __masm-> lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr | |||
3209 | __masm-> lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr | |||
3210 | __masm-> movl2ptr(count, r11_length); // length | |||
3211 | __masm-> jump(RuntimeAddress(int_copy_entry)); | |||
3212 | ||||
3213 | __masm-> BIND(L_copy_longs)bind(L_copy_longs); masm-> block_comment("L_copy_longs" ":" ); | |||
3214 | #ifdef ASSERT1 | |||
3215 | { | |||
3216 | BLOCK_COMMENT("assert long copy {")masm-> block_comment("assert long copy {"); | |||
3217 | Label L; | |||
3218 | __masm-> cmpl(rax_elsize, LogBytesPerLong); | |||
3219 | __masm-> jcc(Assembler::equal, L); | |||
3220 | __masm-> stop("must be long copy, but elsize is wrong"); | |||
3221 | __masm-> bind(L); | |||
3222 | BLOCK_COMMENT("} assert long copy done")masm-> block_comment("} assert long copy done"); | |||
3223 | } | |||
3224 | #endif | |||
3225 | __masm-> lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr | |||
3226 | __masm-> lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr | |||
3227 | __masm-> movl2ptr(count, r11_length); // length | |||
3228 | __masm-> jump(RuntimeAddress(long_copy_entry)); | |||
3229 | ||||
3230 | // ObjArrayKlass | |||
3231 | __masm-> BIND(L_objArray)bind(L_objArray); masm-> block_comment("L_objArray" ":"); | |||
3232 | // live at this point: r10_src_klass, r11_length, src[_pos], dst[_pos] | |||
3233 | ||||
3234 | Label L_plain_copy, L_checkcast_copy; | |||
3235 | // test array classes for subtyping | |||
3236 | __masm-> load_klass(rax, dst, rklass_tmp); | |||
3237 | __masm-> cmpq(r10_src_klass, rax); // usual case is exact equality | |||
3238 | __masm-> jcc(Assembler::notEqual, L_checkcast_copy); | |||
3239 | ||||
3240 | // Identically typed arrays can be copied without element-wise checks. | |||
3241 | arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, | |||
3242 | r10, L_failed); | |||
3243 | ||||
3244 | __masm-> lea(from, Address(src, src_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), | |||
3245 | arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr | |||
3246 | __masm-> lea(to, Address(dst, dst_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), | |||
3247 | arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr | |||
3248 | __masm-> movl2ptr(count, r11_length); // length | |||
3249 | __masm-> BIND(L_plain_copy)bind(L_plain_copy); masm-> block_comment("L_plain_copy" ":" ); | |||
3250 | #ifdef _WIN64 | |||
3251 | __masm-> pop(rklass_tmp); // Restore callee-save rdi | |||
3252 | #endif | |||
3253 | __masm-> jump(RuntimeAddress(oop_copy_entry)); | |||
3254 | ||||
3255 | __masm-> BIND(L_checkcast_copy)bind(L_checkcast_copy); masm-> block_comment("L_checkcast_copy" ":"); | |||
3256 | // live at this point: r10_src_klass, r11_length, rax (dst_klass) | |||
3257 | { | |||
3258 | // Before looking at dst.length, make sure dst is also an objArray. | |||
3259 | __masm-> cmpl(Address(rax, lh_offset), objArray_lh); | |||
3260 | __masm-> jcc(Assembler::notEqual, L_failed); | |||
3261 | ||||
3262 | // It is safe to examine both src.length and dst.length. | |||
3263 | arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, | |||
3264 | rax, L_failed); | |||
3265 | ||||
3266 | const Register r11_dst_klass = r11; | |||
3267 | __masm-> load_klass(r11_dst_klass, dst, rklass_tmp); // reload | |||
3268 | ||||
3269 | // Marshal the base address arguments now, freeing registers. | |||
3270 | __masm-> lea(from, Address(src, src_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), | |||
3271 | arrayOopDesc::base_offset_in_bytes(T_OBJECT))); | |||
3272 | __masm-> lea(to, Address(dst, dst_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), | |||
3273 | arrayOopDesc::base_offset_in_bytes(T_OBJECT))); | |||
3274 | __masm-> movl(count, length); // length (reloaded) | |||
3275 | Register sco_temp = c_rarg3; // this register is free now | |||
3276 | assert_different_registers(from, to, count, sco_temp, | |||
3277 | r11_dst_klass, r10_src_klass); | |||
3278 | assert_clean_int(count, sco_temp); | |||
3279 | ||||
3280 | // Generate the type check. | |||
3281 | const int sco_offset = in_bytes(Klass::super_check_offset_offset()); | |||
3282 | __masm-> movl(sco_temp, Address(r11_dst_klass, sco_offset)); | |||
3283 | assert_clean_int(sco_temp, rax); | |||
3284 | generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); | |||
3285 | ||||
3286 | // Fetch destination element klass from the ObjArrayKlass header. | |||
3287 | int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); | |||
3288 | __masm-> movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); | |||
3289 | __masm-> movl( sco_temp, Address(r11_dst_klass, sco_offset)); | |||
3290 | assert_clean_int(sco_temp, rax); | |||
3291 | ||||
3292 | #ifdef _WIN64 | |||
3293 | __masm-> pop(rklass_tmp); // Restore callee-save rdi | |||
3294 | #endif | |||
3295 | ||||
3296 | // the checkcast_copy loop needs two extra arguments: | |||
3297 | assert(c_rarg3 == sco_temp, "#3 already in place")do { if (!(c_rarg3 == sco_temp)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3297, "assert(" "c_rarg3 == sco_temp" ") failed", "#3 already in place" ); ::breakpoint(); } } while (0); | |||
3298 | // Set up arguments for checkcast_copy_entry. | |||
3299 | setup_arg_regs(4); | |||
3300 | __masm-> movptr(r8, r11_dst_klass); // dst.klass.element_klass, r8 is c_rarg4 on Linux/Solaris | |||
3301 | __masm-> jump(RuntimeAddress(checkcast_copy_entry)); | |||
3302 | } | |||
3303 | ||||
3304 | __masm-> BIND(L_failed)bind(L_failed); masm-> block_comment("L_failed" ":"); | |||
3305 | #ifdef _WIN64 | |||
3306 | __masm-> pop(rklass_tmp); // Restore callee-save rdi | |||
3307 | #endif | |||
3308 | __masm-> xorptr(rax, rax); | |||
3309 | __masm-> notptr(rax); // return -1 | |||
3310 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
3311 | __masm-> ret(0); | |||
3312 | ||||
3313 | return start; | |||
3314 | } | |||
3315 | ||||
3316 | address generate_data_cache_writeback() { | |||
3317 | const Register src = c_rarg0; // source address | |||
3318 | ||||
3319 | __masm-> align(CodeEntryAlignment); | |||
3320 | ||||
3321 | StubCodeMark mark(this, "StubRoutines", "_data_cache_writeback"); | |||
3322 | ||||
3323 | address start = __masm-> pc(); | |||
3324 | __masm-> enter(); | |||
3325 | __masm-> cache_wb(Address(src, 0)); | |||
3326 | __masm-> leave(); | |||
3327 | __masm-> ret(0); | |||
3328 | ||||
3329 | return start; | |||
3330 | } | |||
3331 | ||||
3332 | address generate_data_cache_writeback_sync() { | |||
3333 | const Register is_pre = c_rarg0; // pre or post sync | |||
3334 | ||||
3335 | __masm-> align(CodeEntryAlignment); | |||
3336 | ||||
3337 | StubCodeMark mark(this, "StubRoutines", "_data_cache_writeback_sync"); | |||
3338 | ||||
3339 | // pre wbsync is a no-op | |||
3340 | // post wbsync translates to an sfence | |||
3341 | ||||
3342 | Label skip; | |||
3343 | address start = __masm-> pc(); | |||
3344 | __masm-> enter(); | |||
3345 | __masm-> cmpl(is_pre, 0); | |||
3346 | __masm-> jcc(Assembler::notEqual, skip); | |||
3347 | __masm-> cache_wbsync(false); | |||
3348 | __masm-> bind(skip); | |||
3349 | __masm-> leave(); | |||
3350 | __masm-> ret(0); | |||
3351 | ||||
3352 | return start; | |||
3353 | } | |||
3354 | ||||
3355 | void generate_arraycopy_stubs() { | |||
3356 | address entry; | |||
3357 | address entry_jbyte_arraycopy; | |||
3358 | address entry_jshort_arraycopy; | |||
3359 | address entry_jint_arraycopy; | |||
3360 | address entry_oop_arraycopy; | |||
3361 | address entry_jlong_arraycopy; | |||
3362 | address entry_checkcast_arraycopy; | |||
3363 | ||||
3364 | StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, | |||
3365 | "jbyte_disjoint_arraycopy"); | |||
3366 | StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy, | |||
3367 | "jbyte_arraycopy"); | |||
3368 | ||||
3369 | StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, | |||
3370 | "jshort_disjoint_arraycopy"); | |||
3371 | StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy, | |||
3372 | "jshort_arraycopy"); | |||
3373 | ||||
3374 | StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, &entry, | |||
3375 | "jint_disjoint_arraycopy"); | |||
3376 | StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, entry, | |||
3377 | &entry_jint_arraycopy, "jint_arraycopy"); | |||
3378 | ||||
3379 | StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, &entry, | |||
3380 | "jlong_disjoint_arraycopy"); | |||
3381 | StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, entry, | |||
3382 | &entry_jlong_arraycopy, "jlong_arraycopy"); | |||
3383 | ||||
3384 | ||||
3385 | if (UseCompressedOops) { | |||
3386 | StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry, | |||
3387 | "oop_disjoint_arraycopy"); | |||
3388 | StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry, | |||
3389 | &entry_oop_arraycopy, "oop_arraycopy"); | |||
3390 | StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, &entry, | |||
3391 | "oop_disjoint_arraycopy_uninit", | |||
3392 | /*dest_uninitialized*/true); | |||
3393 | StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, entry, | |||
3394 | NULL__null, "oop_arraycopy_uninit", | |||
3395 | /*dest_uninitialized*/true); | |||
3396 | } else { | |||
3397 | StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry, | |||
3398 | "oop_disjoint_arraycopy"); | |||
3399 | StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry, | |||
3400 | &entry_oop_arraycopy, "oop_arraycopy"); | |||
3401 | StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, &entry, | |||
3402 | "oop_disjoint_arraycopy_uninit", | |||
3403 | /*dest_uninitialized*/true); | |||
3404 | StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, entry, | |||
3405 | NULL__null, "oop_arraycopy_uninit", | |||
3406 | /*dest_uninitialized*/true); | |||
3407 | } | |||
3408 | ||||
3409 | StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); | |||
3410 | StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL__null, | |||
3411 | /*dest_uninitialized*/true); | |||
3412 | ||||
3413 | StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", | |||
3414 | entry_jbyte_arraycopy, | |||
3415 | entry_jshort_arraycopy, | |||
3416 | entry_jint_arraycopy, | |||
3417 | entry_jlong_arraycopy); | |||
3418 | StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", | |||
3419 | entry_jbyte_arraycopy, | |||
3420 | entry_jshort_arraycopy, | |||
3421 | entry_jint_arraycopy, | |||
3422 | entry_oop_arraycopy, | |||
3423 | entry_jlong_arraycopy, | |||
3424 | entry_checkcast_arraycopy); | |||
3425 | ||||
3426 | StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); | |||
3427 | StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); | |||
3428 | StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); | |||
3429 | StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); | |||
3430 | StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); | |||
3431 | StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); | |||
3432 | ||||
3433 | // We don't generate specialized code for HeapWord-aligned source | |||
3434 | // arrays, so just use the code we've already generated | |||
3435 | StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; | |||
3436 | StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; | |||
3437 | ||||
3438 | StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; | |||
3439 | StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; | |||
3440 | ||||
3441 | StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; | |||
3442 | StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; | |||
3443 | ||||
3444 | StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; | |||
3445 | StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; | |||
3446 | ||||
3447 | StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; | |||
3448 | StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; | |||
3449 | ||||
3450 | StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; | |||
3451 | StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; | |||
3452 | } | |||
3453 | ||||
3454 | // AES intrinsic stubs | |||
3455 | enum {AESBlockSize = 16}; | |||
3456 | ||||
3457 | address generate_key_shuffle_mask() { | |||
3458 | __masm-> align(16); | |||
3459 | StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); | |||
3460 | address start = __masm-> pc(); | |||
3461 | __masm-> emit_data64( 0x0405060700010203, relocInfo::none ); | |||
3462 | __masm-> emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none ); | |||
3463 | return start; | |||
3464 | } | |||
3465 | ||||
3466 | address generate_counter_shuffle_mask() { | |||
3467 | __masm-> align(16); | |||
3468 | StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask"); | |||
3469 | address start = __masm-> pc(); | |||
3470 | __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); | |||
3471 | __masm-> emit_data64(0x0001020304050607, relocInfo::none); | |||
3472 | return start; | |||
3473 | } | |||
3474 | ||||
3475 | // Utility routine for loading a 128-bit key word in little endian format | |||
3476 | // can optionally specify that the shuffle mask is already in an xmmregister | |||
3477 | void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL__null) { | |||
3478 | __masm-> movdqu(xmmdst, Address(key, offset)); | |||
3479 | if (xmm_shuf_mask != NULL__null) { | |||
3480 | __masm-> pshufb(xmmdst, xmm_shuf_mask); | |||
3481 | } else { | |||
3482 | __masm-> pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |||
3483 | } | |||
3484 | } | |||
3485 | ||||
3486 | // Utility routine for increase 128bit counter (iv in CTR mode) | |||
3487 | void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) { | |||
3488 | __masm-> pextrq(reg, xmmdst, 0x0); | |||
3489 | __masm-> addq(reg, inc_delta); | |||
3490 | __masm-> pinsrq(xmmdst, reg, 0x0); | |||
3491 | __masm-> jcc(Assembler::carryClear, next_block); // jump if no carry | |||
3492 | __masm-> pextrq(reg, xmmdst, 0x01); // Carry | |||
3493 | __masm-> addq(reg, 0x01); | |||
3494 | __masm-> pinsrq(xmmdst, reg, 0x01); //Carry end | |||
3495 | __masm-> BIND(next_block)bind(next_block); masm-> block_comment("next_block" ":"); // next instruction | |||
3496 | } | |||
3497 | ||||
3498 | // Arguments: | |||
3499 | // | |||
3500 | // Inputs: | |||
3501 | // c_rarg0 - source byte array address | |||
3502 | // c_rarg1 - destination byte array address | |||
3503 | // c_rarg2 - K (key) in little endian int array | |||
3504 | // | |||
3505 | address generate_aescrypt_encryptBlock() { | |||
3506 | assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3506, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support" ); ::breakpoint(); } } while (0); | |||
3507 | __masm-> align(CodeEntryAlignment); | |||
3508 | StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); | |||
3509 | Label L_doLast; | |||
3510 | address start = __masm-> pc(); | |||
3511 | ||||
3512 | const Register from = c_rarg0; // source array address | |||
3513 | const Register to = c_rarg1; // destination array address | |||
3514 | const Register key = c_rarg2; // key array address | |||
3515 | const Register keylen = rax; | |||
3516 | ||||
3517 | const XMMRegister xmm_result = xmm0; | |||
3518 | const XMMRegister xmm_key_shuf_mask = xmm1; | |||
3519 | // On win64 xmm6-xmm15 must be preserved so don't use them. | |||
3520 | const XMMRegister xmm_temp1 = xmm2; | |||
3521 | const XMMRegister xmm_temp2 = xmm3; | |||
3522 | const XMMRegister xmm_temp3 = xmm4; | |||
3523 | const XMMRegister xmm_temp4 = xmm5; | |||
3524 | ||||
3525 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
3526 | ||||
3527 | // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} | |||
3528 | __masm-> movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |||
3529 | ||||
3530 | __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |||
3531 | __masm-> movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input | |||
3532 | ||||
3533 | // For encryption, the java expanded key ordering is just what we need | |||
3534 | // we don't know if the key is aligned, hence not using load-execute form | |||
3535 | ||||
3536 | load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); | |||
3537 | __masm-> pxor(xmm_result, xmm_temp1); | |||
3538 | ||||
3539 | load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); | |||
3540 | load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); | |||
3541 | load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); | |||
3542 | load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); | |||
3543 | ||||
3544 | __masm-> aesenc(xmm_result, xmm_temp1); | |||
3545 | __masm-> aesenc(xmm_result, xmm_temp2); | |||
3546 | __masm-> aesenc(xmm_result, xmm_temp3); | |||
3547 | __masm-> aesenc(xmm_result, xmm_temp4); | |||
3548 | ||||
3549 | load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); | |||
3550 | load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); | |||
3551 | load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); | |||
3552 | load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); | |||
3553 | ||||
3554 | __masm-> aesenc(xmm_result, xmm_temp1); | |||
3555 | __masm-> aesenc(xmm_result, xmm_temp2); | |||
3556 | __masm-> aesenc(xmm_result, xmm_temp3); | |||
3557 | __masm-> aesenc(xmm_result, xmm_temp4); | |||
3558 | ||||
3559 | load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); | |||
3560 | load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); | |||
3561 | ||||
3562 | __masm-> cmpl(keylen, 44); | |||
3563 | __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3563); | |||
3564 | ||||
3565 | __masm-> aesenc(xmm_result, xmm_temp1); | |||
3566 | __masm-> aesenc(xmm_result, xmm_temp2); | |||
3567 | ||||
3568 | load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); | |||
3569 | load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); | |||
3570 | ||||
3571 | __masm-> cmpl(keylen, 52); | |||
3572 | __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3572); | |||
3573 | ||||
3574 | __masm-> aesenc(xmm_result, xmm_temp1); | |||
3575 | __masm-> aesenc(xmm_result, xmm_temp2); | |||
3576 | ||||
3577 | load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); | |||
3578 | load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); | |||
3579 | ||||
3580 | __masm-> BIND(L_doLast)bind(L_doLast); masm-> block_comment("L_doLast" ":"); | |||
3581 | __masm-> aesenc(xmm_result, xmm_temp1); | |||
3582 | __masm-> aesenclast(xmm_result, xmm_temp2); | |||
3583 | __masm-> movdqu(Address(to, 0), xmm_result); // store the result | |||
3584 | __masm-> xorptr(rax, rax); // return 0 | |||
3585 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
3586 | __masm-> ret(0); | |||
3587 | ||||
3588 | return start; | |||
3589 | } | |||
3590 | ||||
3591 | ||||
3592 | // Arguments: | |||
3593 | // | |||
3594 | // Inputs: | |||
3595 | // c_rarg0 - source byte array address | |||
3596 | // c_rarg1 - destination byte array address | |||
3597 | // c_rarg2 - K (key) in little endian int array | |||
3598 | // | |||
3599 | address generate_aescrypt_decryptBlock() { | |||
3600 | assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3600, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support" ); ::breakpoint(); } } while (0); | |||
3601 | __masm-> align(CodeEntryAlignment); | |||
3602 | StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); | |||
3603 | Label L_doLast; | |||
3604 | address start = __masm-> pc(); | |||
3605 | ||||
3606 | const Register from = c_rarg0; // source array address | |||
3607 | const Register to = c_rarg1; // destination array address | |||
3608 | const Register key = c_rarg2; // key array address | |||
3609 | const Register keylen = rax; | |||
3610 | ||||
3611 | const XMMRegister xmm_result = xmm0; | |||
3612 | const XMMRegister xmm_key_shuf_mask = xmm1; | |||
3613 | // On win64 xmm6-xmm15 must be preserved so don't use them. | |||
3614 | const XMMRegister xmm_temp1 = xmm2; | |||
3615 | const XMMRegister xmm_temp2 = xmm3; | |||
3616 | const XMMRegister xmm_temp3 = xmm4; | |||
3617 | const XMMRegister xmm_temp4 = xmm5; | |||
3618 | ||||
3619 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
3620 | ||||
3621 | // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} | |||
3622 | __masm-> movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |||
3623 | ||||
3624 | __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |||
3625 | __masm-> movdqu(xmm_result, Address(from, 0)); | |||
3626 | ||||
3627 | // for decryption java expanded key ordering is rotated one position from what we want | |||
3628 | // so we start from 0x10 here and hit 0x00 last | |||
3629 | // we don't know if the key is aligned, hence not using load-execute form | |||
3630 | load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); | |||
3631 | load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); | |||
3632 | load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); | |||
3633 | load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); | |||
3634 | ||||
3635 | __masm-> pxor (xmm_result, xmm_temp1); | |||
3636 | __masm-> aesdec(xmm_result, xmm_temp2); | |||
3637 | __masm-> aesdec(xmm_result, xmm_temp3); | |||
3638 | __masm-> aesdec(xmm_result, xmm_temp4); | |||
3639 | ||||
3640 | load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); | |||
3641 | load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); | |||
3642 | load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); | |||
3643 | load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); | |||
3644 | ||||
3645 | __masm-> aesdec(xmm_result, xmm_temp1); | |||
3646 | __masm-> aesdec(xmm_result, xmm_temp2); | |||
3647 | __masm-> aesdec(xmm_result, xmm_temp3); | |||
3648 | __masm-> aesdec(xmm_result, xmm_temp4); | |||
3649 | ||||
3650 | load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); | |||
3651 | load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); | |||
3652 | load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); | |||
3653 | ||||
3654 | __masm-> cmpl(keylen, 44); | |||
3655 | __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3655); | |||
3656 | ||||
3657 | __masm-> aesdec(xmm_result, xmm_temp1); | |||
3658 | __masm-> aesdec(xmm_result, xmm_temp2); | |||
3659 | ||||
3660 | load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); | |||
3661 | load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); | |||
3662 | ||||
3663 | __masm-> cmpl(keylen, 52); | |||
3664 | __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3664); | |||
3665 | ||||
3666 | __masm-> aesdec(xmm_result, xmm_temp1); | |||
3667 | __masm-> aesdec(xmm_result, xmm_temp2); | |||
3668 | ||||
3669 | load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); | |||
3670 | load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); | |||
3671 | ||||
3672 | __masm-> BIND(L_doLast)bind(L_doLast); masm-> block_comment("L_doLast" ":"); | |||
3673 | __masm-> aesdec(xmm_result, xmm_temp1); | |||
3674 | __masm-> aesdec(xmm_result, xmm_temp2); | |||
3675 | ||||
3676 | // for decryption the aesdeclast operation is always on key+0x00 | |||
3677 | __masm-> aesdeclast(xmm_result, xmm_temp3); | |||
3678 | __masm-> movdqu(Address(to, 0), xmm_result); // store the result | |||
3679 | __masm-> xorptr(rax, rax); // return 0 | |||
3680 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
3681 | __masm-> ret(0); | |||
3682 | ||||
3683 | return start; | |||
3684 | } | |||
3685 | ||||
3686 | ||||
3687 | // Arguments: | |||
3688 | // | |||
3689 | // Inputs: | |||
3690 | // c_rarg0 - source byte array address | |||
3691 | // c_rarg1 - destination byte array address | |||
3692 | // c_rarg2 - K (key) in little endian int array | |||
3693 | // c_rarg3 - r vector byte array address | |||
3694 | // c_rarg4 - input length | |||
3695 | // | |||
3696 | // Output: | |||
3697 | // rax - input length | |||
3698 | // | |||
3699 | address generate_cipherBlockChaining_encryptAESCrypt() { | |||
3700 | assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3700, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support" ); ::breakpoint(); } } while (0); | |||
3701 | __masm-> align(CodeEntryAlignment); | |||
3702 | StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); | |||
3703 | address start = __masm-> pc(); | |||
3704 | ||||
3705 | Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; | |||
3706 | const Register from = c_rarg0; // source array address | |||
3707 | const Register to = c_rarg1; // destination array address | |||
3708 | const Register key = c_rarg2; // key array address | |||
3709 | const Register rvec = c_rarg3; // r byte array initialized from initvector array address | |||
3710 | // and left with the results of the last encryption block | |||
3711 | #ifndef _WIN64 | |||
3712 | const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) | |||
3713 | #else | |||
3714 | const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 | |||
3715 | const Register len_reg = r11; // pick the volatile windows register | |||
3716 | #endif | |||
3717 | const Register pos = rax; | |||
3718 | ||||
3719 | // xmm register assignments for the loops below | |||
3720 | const XMMRegister xmm_result = xmm0; | |||
3721 | const XMMRegister xmm_temp = xmm1; | |||
3722 | // keys 0-10 preloaded into xmm2-xmm12 | |||
3723 | const int XMM_REG_NUM_KEY_FIRST = 2; | |||
3724 | const int XMM_REG_NUM_KEY_LAST = 15; | |||
3725 | const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); | |||
3726 | const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10); | |||
3727 | const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11); | |||
3728 | const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12); | |||
3729 | const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13); | |||
3730 | ||||
3731 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
3732 | ||||
3733 | #ifdef _WIN64 | |||
3734 | // on win64, fill len_reg from stack position | |||
3735 | __masm-> movl(len_reg, len_mem); | |||
3736 | #else | |||
3737 | __masm-> push(len_reg); // Save | |||
3738 | #endif | |||
3739 | ||||
3740 | const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front | |||
3741 | __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |||
3742 | // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0 | |||
3743 | for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) { | |||
3744 | load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); | |||
3745 | offset += 0x10; | |||
3746 | } | |||
3747 | __masm-> movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec | |||
3748 | ||||
3749 | // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) | |||
3750 | __masm-> movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |||
3751 | __masm-> cmpl(rax, 44); | |||
3752 | __masm-> jcc(Assembler::notEqual, L_key_192_256); | |||
3753 | ||||
3754 | // 128 bit code follows here | |||
3755 | __masm-> movptr(pos, 0); | |||
3756 | __masm-> align(OptoLoopAlignment); | |||
3757 | ||||
3758 | __masm-> BIND(L_loopTop_128)bind(L_loopTop_128); masm-> block_comment("L_loopTop_128" ":" ); | |||
3759 | __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | |||
3760 | __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector | |||
3761 | __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds | |||
3762 | for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) { | |||
3763 | __masm-> aesenc(xmm_result, as_XMMRegister(rnum)); | |||
3764 | } | |||
3765 | __masm-> aesenclast(xmm_result, xmm_key10); | |||
3766 | __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |||
3767 | // no need to store r to memory until we exit | |||
3768 | __masm-> addptr(pos, AESBlockSize); | |||
3769 | __masm-> subptr(len_reg, AESBlockSize); | |||
3770 | __masm-> jcc(Assembler::notEqual, L_loopTop_128); | |||
3771 | ||||
3772 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
3773 | __masm-> movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object | |||
3774 | ||||
3775 | #ifdef _WIN64 | |||
3776 | __masm-> movl(rax, len_mem); | |||
3777 | #else | |||
3778 | __masm-> pop(rax); // return length | |||
3779 | #endif | |||
3780 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
3781 | __masm-> ret(0); | |||
3782 | ||||
3783 | __masm-> BIND(L_key_192_256)bind(L_key_192_256); masm-> block_comment("L_key_192_256" ":" ); | |||
3784 | // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | |||
3785 | load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask); | |||
3786 | load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask); | |||
3787 | __masm-> cmpl(rax, 52); | |||
3788 | __masm-> jcc(Assembler::notEqual, L_key_256); | |||
3789 | ||||
3790 | // 192-bit code follows here (could be changed to use more xmm registers) | |||
3791 | __masm-> movptr(pos, 0); | |||
3792 | __masm-> align(OptoLoopAlignment); | |||
3793 | ||||
3794 | __masm-> BIND(L_loopTop_192)bind(L_loopTop_192); masm-> block_comment("L_loopTop_192" ":" ); | |||
3795 | __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | |||
3796 | __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector | |||
3797 | __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds | |||
3798 | for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) { | |||
3799 | __masm-> aesenc(xmm_result, as_XMMRegister(rnum)); | |||
3800 | } | |||
3801 | __masm-> aesenclast(xmm_result, xmm_key12); | |||
3802 | __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |||
3803 | // no need to store r to memory until we exit | |||
3804 | __masm-> addptr(pos, AESBlockSize); | |||
3805 | __masm-> subptr(len_reg, AESBlockSize); | |||
3806 | __masm-> jcc(Assembler::notEqual, L_loopTop_192); | |||
3807 | __masm-> jmp(L_exit); | |||
3808 | ||||
3809 | __masm-> BIND(L_key_256)bind(L_key_256); masm-> block_comment("L_key_256" ":"); | |||
3810 | // 256-bit code follows here (could be changed to use more xmm registers) | |||
3811 | load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask); | |||
3812 | __masm-> movptr(pos, 0); | |||
3813 | __masm-> align(OptoLoopAlignment); | |||
3814 | ||||
3815 | __masm-> BIND(L_loopTop_256)bind(L_loopTop_256); masm-> block_comment("L_loopTop_256" ":" ); | |||
3816 | __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | |||
3817 | __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector | |||
3818 | __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds | |||
3819 | for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) { | |||
3820 | __masm-> aesenc(xmm_result, as_XMMRegister(rnum)); | |||
3821 | } | |||
3822 | load_key(xmm_temp, key, 0xe0); | |||
3823 | __masm-> aesenclast(xmm_result, xmm_temp); | |||
3824 | __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |||
3825 | // no need to store r to memory until we exit | |||
3826 | __masm-> addptr(pos, AESBlockSize); | |||
3827 | __masm-> subptr(len_reg, AESBlockSize); | |||
3828 | __masm-> jcc(Assembler::notEqual, L_loopTop_256); | |||
3829 | __masm-> jmp(L_exit); | |||
3830 | ||||
3831 | return start; | |||
3832 | } | |||
3833 | ||||
3834 | // Safefetch stubs. | |||
3835 | void generate_safefetch(const char* name, int size, address* entry, | |||
3836 | address* fault_pc, address* continuation_pc) { | |||
3837 | // safefetch signatures: | |||
3838 | // int SafeFetch32(int* adr, int errValue); | |||
3839 | // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); | |||
3840 | // | |||
3841 | // arguments: | |||
3842 | // c_rarg0 = adr | |||
3843 | // c_rarg1 = errValue | |||
3844 | // | |||
3845 | // result: | |||
3846 | // PPC_RET = *adr or errValue | |||
3847 | ||||
3848 | StubCodeMark mark(this, "StubRoutines", name); | |||
3849 | ||||
3850 | // Entry point, pc or function descriptor. | |||
3851 | *entry = __masm-> pc(); | |||
3852 | ||||
3853 | // Load *adr into c_rarg1, may fault. | |||
3854 | *fault_pc = __masm-> pc(); | |||
3855 | switch (size) { | |||
3856 | case 4: | |||
3857 | // int32_t | |||
3858 | __masm-> movl(c_rarg1, Address(c_rarg0, 0)); | |||
3859 | break; | |||
3860 | case 8: | |||
3861 | // int64_t | |||
3862 | __masm-> movq(c_rarg1, Address(c_rarg0, 0)); | |||
3863 | break; | |||
3864 | default: | |||
3865 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3865); ::breakpoint(); } while (0); | |||
3866 | } | |||
3867 | ||||
3868 | // return errValue or *adr | |||
3869 | *continuation_pc = __masm-> pc(); | |||
3870 | __masm-> movq(rax, c_rarg1); | |||
3871 | __masm-> ret(0); | |||
3872 | } | |||
3873 | ||||
3874 | // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time | |||
3875 | // to hide instruction latency | |||
3876 | // | |||
3877 | // Arguments: | |||
3878 | // | |||
3879 | // Inputs: | |||
3880 | // c_rarg0 - source byte array address | |||
3881 | // c_rarg1 - destination byte array address | |||
3882 | // c_rarg2 - K (key) in little endian int array | |||
3883 | // c_rarg3 - r vector byte array address | |||
3884 | // c_rarg4 - input length | |||
3885 | // | |||
3886 | // Output: | |||
3887 | // rax - input length | |||
3888 | // | |||
3889 | address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { | |||
3890 | assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 3890, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support" ); ::breakpoint(); } } while (0); | |||
3891 | __masm-> align(CodeEntryAlignment); | |||
3892 | StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); | |||
3893 | address start = __masm-> pc(); | |||
3894 | ||||
3895 | const Register from = c_rarg0; // source array address | |||
3896 | const Register to = c_rarg1; // destination array address | |||
3897 | const Register key = c_rarg2; // key array address | |||
3898 | const Register rvec = c_rarg3; // r byte array initialized from initvector array address | |||
3899 | // and left with the results of the last encryption block | |||
3900 | #ifndef _WIN64 | |||
3901 | const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) | |||
3902 | #else | |||
3903 | const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 | |||
3904 | const Register len_reg = r11; // pick the volatile windows register | |||
3905 | #endif | |||
3906 | const Register pos = rax; | |||
3907 | ||||
3908 | const int PARALLEL_FACTOR = 4; | |||
3909 | const int ROUNDS[3] = { 10, 12, 14 }; // aes rounds for key128, key192, key256 | |||
3910 | ||||
3911 | Label L_exit; | |||
3912 | Label L_singleBlock_loopTopHead[3]; // 128, 192, 256 | |||
3913 | Label L_singleBlock_loopTopHead2[3]; // 128, 192, 256 | |||
3914 | Label L_singleBlock_loopTop[3]; // 128, 192, 256 | |||
3915 | Label L_multiBlock_loopTopHead[3]; // 128, 192, 256 | |||
3916 | Label L_multiBlock_loopTop[3]; // 128, 192, 256 | |||
3917 | ||||
3918 | // keys 0-10 preloaded into xmm5-xmm15 | |||
3919 | const int XMM_REG_NUM_KEY_FIRST = 5; | |||
3920 | const int XMM_REG_NUM_KEY_LAST = 15; | |||
3921 | const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); | |||
3922 | const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST); | |||
3923 | ||||
3924 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
3925 | ||||
3926 | #ifdef _WIN64 | |||
3927 | // on win64, fill len_reg from stack position | |||
3928 | __masm-> movl(len_reg, len_mem); | |||
3929 | #else | |||
3930 | __masm-> push(len_reg); // Save | |||
3931 | #endif | |||
3932 | __masm-> push(rbx); | |||
3933 | // the java expanded key ordering is rotated one position from what we want | |||
3934 | // so we start from 0x10 here and hit 0x00 last | |||
3935 | const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front | |||
3936 | __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |||
3937 | // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00 | |||
3938 | for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) { | |||
3939 | load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); | |||
3940 | offset += 0x10; | |||
3941 | } | |||
3942 | load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask); | |||
3943 | ||||
3944 | const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block | |||
3945 | ||||
3946 | // registers holding the four results in the parallelized loop | |||
3947 | const XMMRegister xmm_result0 = xmm0; | |||
3948 | const XMMRegister xmm_result1 = xmm2; | |||
3949 | const XMMRegister xmm_result2 = xmm3; | |||
3950 | const XMMRegister xmm_result3 = xmm4; | |||
3951 | ||||
3952 | __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec | |||
3953 | ||||
3954 | __masm-> xorptr(pos, pos); | |||
3955 | ||||
3956 | // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) | |||
3957 | __masm-> movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |||
3958 | __masm-> cmpl(rbx, 52); | |||
3959 | __masm-> jcc(Assembler::equal, L_multiBlock_loopTopHead[1]); | |||
3960 | __masm-> cmpl(rbx, 60); | |||
3961 | __masm-> jcc(Assembler::equal, L_multiBlock_loopTopHead[2]); | |||
3962 | ||||
3963 | #define DoFour(opc, src_reg)masm-> opc(xmm_result0, src_reg); masm-> opc(xmm_result1 , src_reg); masm-> opc(xmm_result2, src_reg); masm-> opc (xmm_result3, src_reg); \ | |||
3964 | __masm-> opc(xmm_result0, src_reg); \ | |||
3965 | __masm-> opc(xmm_result1, src_reg); \ | |||
3966 | __masm-> opc(xmm_result2, src_reg); \ | |||
3967 | __masm-> opc(xmm_result3, src_reg); \ | |||
3968 | ||||
3969 | for (int k = 0; k < 3; ++k) { | |||
3970 | __masm-> BIND(L_multiBlock_loopTopHead[k])bind(L_multiBlock_loopTopHead[k]); masm-> block_comment("L_multiBlock_loopTopHead[k]" ":"); | |||
3971 | if (k != 0) { | |||
3972 | __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left | |||
3973 | __masm-> jcc(Assembler::less, L_singleBlock_loopTopHead2[k]); | |||
3974 | } | |||
3975 | if (k == 1) { | |||
3976 | __masm-> subptr(rsp, 6 * wordSize); | |||
3977 | __masm-> movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15 | |||
3978 | load_key(xmm15, key, 0xb0); // 0xb0; 192-bit key goes up to 0xc0 | |||
3979 | __masm-> movdqu(Address(rsp, 2 * wordSize), xmm15); | |||
3980 | load_key(xmm1, key, 0xc0); // 0xc0; | |||
3981 | __masm-> movdqu(Address(rsp, 4 * wordSize), xmm1); | |||
3982 | } else if (k == 2) { | |||
3983 | __masm-> subptr(rsp, 10 * wordSize); | |||
3984 | __masm-> movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15 | |||
3985 | load_key(xmm15, key, 0xd0); // 0xd0; 256-bit key goes upto 0xe0 | |||
3986 | __masm-> movdqu(Address(rsp, 6 * wordSize), xmm15); | |||
3987 | load_key(xmm1, key, 0xe0); // 0xe0; | |||
3988 | __masm-> movdqu(Address(rsp, 8 * wordSize), xmm1); | |||
3989 | load_key(xmm15, key, 0xb0); // 0xb0; | |||
3990 | __masm-> movdqu(Address(rsp, 2 * wordSize), xmm15); | |||
3991 | load_key(xmm1, key, 0xc0); // 0xc0; | |||
3992 | __masm-> movdqu(Address(rsp, 4 * wordSize), xmm1); | |||
3993 | } | |||
3994 | __masm-> align(OptoLoopAlignment); | |||
3995 | __masm-> BIND(L_multiBlock_loopTop[k])bind(L_multiBlock_loopTop[k]); masm-> block_comment("L_multiBlock_loopTop[k]" ":"); | |||
3996 | __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left | |||
3997 | __masm-> jcc(Assembler::less, L_singleBlock_loopTopHead[k]); | |||
3998 | ||||
3999 | if (k != 0) { | |||
4000 | __masm-> movdqu(xmm15, Address(rsp, 2 * wordSize)); | |||
4001 | __masm-> movdqu(xmm1, Address(rsp, 4 * wordSize)); | |||
4002 | } | |||
4003 | ||||
4004 | __masm-> movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers | |||
4005 | __masm-> movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); | |||
4006 | __masm-> movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); | |||
4007 | __masm-> movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); | |||
4008 | ||||
4009 | DoFour(pxor, xmm_key_first)masm-> pxor(xmm_result0, xmm_key_first); masm-> pxor(xmm_result1 , xmm_key_first); masm-> pxor(xmm_result2, xmm_key_first); masm-> pxor(xmm_result3, xmm_key_first);; | |||
4010 | if (k == 0) { | |||
4011 | for (int rnum = 1; rnum < ROUNDS[k]; rnum++) { | |||
4012 | DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST ));; | |||
4013 | } | |||
4014 | DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast (xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2 , xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last );; | |||
4015 | } else if (k == 1) { | |||
4016 | for (int rnum = 1; rnum <= ROUNDS[k]-2; rnum++) { | |||
4017 | DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST ));; | |||
4018 | } | |||
4019 | __masm-> movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again. | |||
4020 | DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1 , xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec (xmm_result3, xmm1);; // key : 0xc0 | |||
4021 | __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // xmm1 needs to be loaded again | |||
4022 | DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast (xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2 , xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last );; | |||
4023 | } else if (k == 2) { | |||
4024 | for (int rnum = 1; rnum <= ROUNDS[k] - 4; rnum++) { | |||
4025 | DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST )); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST ));; | |||
4026 | } | |||
4027 | DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1 , xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec (xmm_result3, xmm1);; // key : 0xc0 | |||
4028 | __masm-> movdqu(xmm15, Address(rsp, 6 * wordSize)); | |||
4029 | __masm-> movdqu(xmm1, Address(rsp, 8 * wordSize)); | |||
4030 | DoFour(aesdec, xmm15)masm-> aesdec(xmm_result0, xmm15); masm-> aesdec(xmm_result1 , xmm15); masm-> aesdec(xmm_result2, xmm15); masm-> aesdec (xmm_result3, xmm15);; // key : 0xd0 | |||
4031 | __masm-> movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again. | |||
4032 | DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1 , xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec (xmm_result3, xmm1);; // key : 0xe0 | |||
4033 | __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // xmm1 needs to be loaded again | |||
4034 | DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast (xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2 , xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last );; | |||
4035 | } | |||
4036 | ||||
4037 | // for each result, xor with the r vector of previous cipher block | |||
4038 | __masm-> pxor(xmm_result0, xmm_prev_block_cipher); | |||
4039 | __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize)); | |||
4040 | __masm-> pxor(xmm_result1, xmm_prev_block_cipher); | |||
4041 | __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize)); | |||
4042 | __masm-> pxor(xmm_result2, xmm_prev_block_cipher); | |||
4043 | __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize)); | |||
4044 | __masm-> pxor(xmm_result3, xmm_prev_block_cipher); | |||
4045 | __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize)); // this will carry over to next set of blocks | |||
4046 | if (k != 0) { | |||
4047 | __masm-> movdqu(Address(rvec, 0x00), xmm_prev_block_cipher); | |||
4048 | } | |||
4049 | ||||
4050 | __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output | |||
4051 | __masm-> movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); | |||
4052 | __masm-> movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); | |||
4053 | __masm-> movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); | |||
4054 | ||||
4055 | __masm-> addptr(pos, PARALLEL_FACTOR * AESBlockSize); | |||
4056 | __masm-> subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); | |||
4057 | __masm-> jmp(L_multiBlock_loopTop[k]); | |||
4058 | ||||
4059 | // registers used in the non-parallelized loops | |||
4060 | // xmm register assignments for the loops below | |||
4061 | const XMMRegister xmm_result = xmm0; | |||
4062 | const XMMRegister xmm_prev_block_cipher_save = xmm2; | |||
4063 | const XMMRegister xmm_key11 = xmm3; | |||
4064 | const XMMRegister xmm_key12 = xmm4; | |||
4065 | const XMMRegister key_tmp = xmm4; | |||
4066 | ||||
4067 | __masm-> BIND(L_singleBlock_loopTopHead[k])bind(L_singleBlock_loopTopHead[k]); masm-> block_comment("L_singleBlock_loopTopHead[k]" ":"); | |||
4068 | if (k == 1) { | |||
4069 | __masm-> addptr(rsp, 6 * wordSize); | |||
4070 | } else if (k == 2) { | |||
4071 | __masm-> addptr(rsp, 10 * wordSize); | |||
4072 | } | |||
4073 | __masm-> cmpptr(len_reg, 0); // any blocks left?? | |||
4074 | __masm-> jcc(Assembler::equal, L_exit); | |||
4075 | __masm-> BIND(L_singleBlock_loopTopHead2[k])bind(L_singleBlock_loopTopHead2[k]); masm-> block_comment( "L_singleBlock_loopTopHead2[k]" ":"); | |||
4076 | if (k == 1) { | |||
4077 | load_key(xmm_key11, key, 0xb0); // 0xb0; 192-bit key goes upto 0xc0 | |||
4078 | load_key(xmm_key12, key, 0xc0); // 0xc0; 192-bit key goes upto 0xc0 | |||
4079 | } | |||
4080 | if (k == 2) { | |||
4081 | load_key(xmm_key11, key, 0xb0); // 0xb0; 256-bit key goes upto 0xe0 | |||
4082 | } | |||
4083 | __masm-> align(OptoLoopAlignment); | |||
4084 | __masm-> BIND(L_singleBlock_loopTop[k])bind(L_singleBlock_loopTop[k]); masm-> block_comment("L_singleBlock_loopTop[k]" ":"); | |||
4085 | __masm-> movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | |||
4086 | __masm-> movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector | |||
4087 | __masm-> pxor(xmm_result, xmm_key_first); // do the aes dec rounds | |||
4088 | for (int rnum = 1; rnum <= 9 ; rnum++) { | |||
4089 | __masm-> aesdec(xmm_result, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST)); | |||
4090 | } | |||
4091 | if (k == 1) { | |||
4092 | __masm-> aesdec(xmm_result, xmm_key11); | |||
4093 | __masm-> aesdec(xmm_result, xmm_key12); | |||
4094 | } | |||
4095 | if (k == 2) { | |||
4096 | __masm-> aesdec(xmm_result, xmm_key11); | |||
4097 | load_key(key_tmp, key, 0xc0); | |||
4098 | __masm-> aesdec(xmm_result, key_tmp); | |||
4099 | load_key(key_tmp, key, 0xd0); | |||
4100 | __masm-> aesdec(xmm_result, key_tmp); | |||
4101 | load_key(key_tmp, key, 0xe0); | |||
4102 | __masm-> aesdec(xmm_result, key_tmp); | |||
4103 | } | |||
4104 | ||||
4105 | __masm-> aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0 | |||
4106 | __masm-> pxor(xmm_result, xmm_prev_block_cipher); // xor with the current r vector | |||
4107 | __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |||
4108 | // no need to store r to memory until we exit | |||
4109 | __masm-> movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block | |||
4110 | __masm-> addptr(pos, AESBlockSize); | |||
4111 | __masm-> subptr(len_reg, AESBlockSize); | |||
4112 | __masm-> jcc(Assembler::notEqual, L_singleBlock_loopTop[k]); | |||
4113 | if (k != 2) { | |||
4114 | __masm-> jmp(L_exit); | |||
4115 | } | |||
4116 | } //for 128/192/256 | |||
4117 | ||||
4118 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
4119 | __masm-> movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object | |||
4120 | __masm-> pop(rbx); | |||
4121 | #ifdef _WIN64 | |||
4122 | __masm-> movl(rax, len_mem); | |||
4123 | #else | |||
4124 | __masm-> pop(rax); // return length | |||
4125 | #endif | |||
4126 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
4127 | __masm-> ret(0); | |||
4128 | return start; | |||
4129 | } | |||
4130 | ||||
4131 | address generate_electronicCodeBook_encryptAESCrypt() { | |||
4132 | __masm-> align(CodeEntryAlignment); | |||
4133 | StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_encryptAESCrypt"); | |||
4134 | address start = __masm-> pc(); | |||
4135 | const Register from = c_rarg0; // source array address | |||
4136 | const Register to = c_rarg1; // destination array address | |||
4137 | const Register key = c_rarg2; // key array address | |||
4138 | const Register len = c_rarg3; // src len (must be multiple of blocksize 16) | |||
4139 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
4140 | __masm-> aesecb_encrypt(from, to, key, len); | |||
4141 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
4142 | __masm-> ret(0); | |||
4143 | return start; | |||
4144 | } | |||
4145 | ||||
4146 | address generate_electronicCodeBook_decryptAESCrypt() { | |||
4147 | __masm-> align(CodeEntryAlignment); | |||
4148 | StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_decryptAESCrypt"); | |||
4149 | address start = __masm-> pc(); | |||
4150 | const Register from = c_rarg0; // source array address | |||
4151 | const Register to = c_rarg1; // destination array address | |||
4152 | const Register key = c_rarg2; // key array address | |||
4153 | const Register len = c_rarg3; // src len (must be multiple of blocksize 16) | |||
4154 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
4155 | __masm-> aesecb_decrypt(from, to, key, len); | |||
4156 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
4157 | __masm-> ret(0); | |||
4158 | return start; | |||
4159 | } | |||
4160 | ||||
4161 | // ofs and limit are use for multi-block byte array. | |||
4162 | // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) | |||
4163 | address generate_md5_implCompress(bool multi_block, const char *name) { | |||
4164 | __masm-> align(CodeEntryAlignment); | |||
4165 | StubCodeMark mark(this, "StubRoutines", name); | |||
4166 | address start = __masm-> pc(); | |||
4167 | ||||
4168 | const Register buf_param = r15; | |||
4169 | const Address state_param(rsp, 0 * wordSize); | |||
4170 | const Address ofs_param (rsp, 1 * wordSize ); | |||
4171 | const Address limit_param(rsp, 1 * wordSize + 4); | |||
4172 | ||||
4173 | __masm-> enter(); | |||
4174 | __masm-> push(rbx); | |||
4175 | __masm-> push(rdi); | |||
4176 | __masm-> push(rsi); | |||
4177 | __masm-> push(r15); | |||
4178 | __masm-> subptr(rsp, 2 * wordSize); | |||
4179 | ||||
4180 | __masm-> movptr(buf_param, c_rarg0); | |||
4181 | __masm-> movptr(state_param, c_rarg1); | |||
4182 | if (multi_block) { | |||
4183 | __masm-> movl(ofs_param, c_rarg2); | |||
4184 | __masm-> movl(limit_param, c_rarg3); | |||
4185 | } | |||
4186 | __masm-> fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block); | |||
4187 | ||||
4188 | __masm-> addptr(rsp, 2 * wordSize); | |||
4189 | __masm-> pop(r15); | |||
4190 | __masm-> pop(rsi); | |||
4191 | __masm-> pop(rdi); | |||
4192 | __masm-> pop(rbx); | |||
4193 | __masm-> leave(); | |||
4194 | __masm-> ret(0); | |||
4195 | return start; | |||
4196 | } | |||
4197 | ||||
4198 | address generate_upper_word_mask() { | |||
4199 | __masm-> align64(); | |||
4200 | StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); | |||
4201 | address start = __masm-> pc(); | |||
4202 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4203 | __masm-> emit_data64(0xFFFFFFFF00000000, relocInfo::none); | |||
4204 | return start; | |||
4205 | } | |||
4206 | ||||
4207 | address generate_shuffle_byte_flip_mask() { | |||
4208 | __masm-> align64(); | |||
4209 | StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); | |||
4210 | address start = __masm-> pc(); | |||
4211 | __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); | |||
4212 | __masm-> emit_data64(0x0001020304050607, relocInfo::none); | |||
4213 | return start; | |||
4214 | } | |||
4215 | ||||
4216 | // ofs and limit are use for multi-block byte array. | |||
4217 | // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) | |||
4218 | address generate_sha1_implCompress(bool multi_block, const char *name) { | |||
4219 | __masm-> align(CodeEntryAlignment); | |||
4220 | StubCodeMark mark(this, "StubRoutines", name); | |||
4221 | address start = __masm-> pc(); | |||
4222 | ||||
4223 | Register buf = c_rarg0; | |||
4224 | Register state = c_rarg1; | |||
4225 | Register ofs = c_rarg2; | |||
4226 | Register limit = c_rarg3; | |||
4227 | ||||
4228 | const XMMRegister abcd = xmm0; | |||
4229 | const XMMRegister e0 = xmm1; | |||
4230 | const XMMRegister e1 = xmm2; | |||
4231 | const XMMRegister msg0 = xmm3; | |||
4232 | ||||
4233 | const XMMRegister msg1 = xmm4; | |||
4234 | const XMMRegister msg2 = xmm5; | |||
4235 | const XMMRegister msg3 = xmm6; | |||
4236 | const XMMRegister shuf_mask = xmm7; | |||
4237 | ||||
4238 | __masm-> enter(); | |||
4239 | ||||
4240 | __masm-> subptr(rsp, 4 * wordSize); | |||
4241 | ||||
4242 | __masm-> fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask, | |||
4243 | buf, state, ofs, limit, rsp, multi_block); | |||
4244 | ||||
4245 | __masm-> addptr(rsp, 4 * wordSize); | |||
4246 | ||||
4247 | __masm-> leave(); | |||
4248 | __masm-> ret(0); | |||
4249 | return start; | |||
4250 | } | |||
4251 | ||||
4252 | address generate_pshuffle_byte_flip_mask() { | |||
4253 | __masm-> align64(); | |||
4254 | StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask"); | |||
4255 | address start = __masm-> pc(); | |||
4256 | __masm-> emit_data64(0x0405060700010203, relocInfo::none); | |||
4257 | __masm-> emit_data64(0x0c0d0e0f08090a0b, relocInfo::none); | |||
4258 | ||||
4259 | if (VM_Version::supports_avx2()) { | |||
4260 | __masm-> emit_data64(0x0405060700010203, relocInfo::none); // second copy | |||
4261 | __masm-> emit_data64(0x0c0d0e0f08090a0b, relocInfo::none); | |||
4262 | // _SHUF_00BA | |||
4263 | __masm-> emit_data64(0x0b0a090803020100, relocInfo::none); | |||
4264 | __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); | |||
4265 | __masm-> emit_data64(0x0b0a090803020100, relocInfo::none); | |||
4266 | __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); | |||
4267 | // _SHUF_DC00 | |||
4268 | __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); | |||
4269 | __masm-> emit_data64(0x0b0a090803020100, relocInfo::none); | |||
4270 | __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); | |||
4271 | __masm-> emit_data64(0x0b0a090803020100, relocInfo::none); | |||
4272 | } | |||
4273 | ||||
4274 | return start; | |||
4275 | } | |||
4276 | ||||
4277 | //Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. | |||
4278 | address generate_pshuffle_byte_flip_mask_sha512() { | |||
4279 | __masm-> align32(); | |||
4280 | StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask_sha512"); | |||
4281 | address start = __masm-> pc(); | |||
4282 | if (VM_Version::supports_avx2()) { | |||
4283 | __masm-> emit_data64(0x0001020304050607, relocInfo::none); // PSHUFFLE_BYTE_FLIP_MASK | |||
4284 | __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); | |||
4285 | __masm-> emit_data64(0x1011121314151617, relocInfo::none); | |||
4286 | __masm-> emit_data64(0x18191a1b1c1d1e1f, relocInfo::none); | |||
4287 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); //MASK_YMM_LO | |||
4288 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4289 | __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); | |||
4290 | __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); | |||
4291 | } | |||
4292 | ||||
4293 | return start; | |||
4294 | } | |||
4295 | ||||
4296 | // ofs and limit are use for multi-block byte array. | |||
4297 | // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) | |||
4298 | address generate_sha256_implCompress(bool multi_block, const char *name) { | |||
4299 | assert(VM_Version::supports_sha() || VM_Version::supports_avx2(), "")do { if (!(VM_Version::supports_sha() || VM_Version::supports_avx2 ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 4299, "assert(" "VM_Version::supports_sha() || VM_Version::supports_avx2()" ") failed", ""); ::breakpoint(); } } while (0); | |||
4300 | __masm-> align(CodeEntryAlignment); | |||
4301 | StubCodeMark mark(this, "StubRoutines", name); | |||
4302 | address start = __masm-> pc(); | |||
4303 | ||||
4304 | Register buf = c_rarg0; | |||
4305 | Register state = c_rarg1; | |||
4306 | Register ofs = c_rarg2; | |||
4307 | Register limit = c_rarg3; | |||
4308 | ||||
4309 | const XMMRegister msg = xmm0; | |||
4310 | const XMMRegister state0 = xmm1; | |||
4311 | const XMMRegister state1 = xmm2; | |||
4312 | const XMMRegister msgtmp0 = xmm3; | |||
4313 | ||||
4314 | const XMMRegister msgtmp1 = xmm4; | |||
4315 | const XMMRegister msgtmp2 = xmm5; | |||
4316 | const XMMRegister msgtmp3 = xmm6; | |||
4317 | const XMMRegister msgtmp4 = xmm7; | |||
4318 | ||||
4319 | const XMMRegister shuf_mask = xmm8; | |||
4320 | ||||
4321 | __masm-> enter(); | |||
4322 | ||||
4323 | __masm-> subptr(rsp, 4 * wordSize); | |||
4324 | ||||
4325 | if (VM_Version::supports_sha()) { | |||
4326 | __masm-> fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, | |||
4327 | buf, state, ofs, limit, rsp, multi_block, shuf_mask); | |||
4328 | } else if (VM_Version::supports_avx2()) { | |||
4329 | __masm-> sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, | |||
4330 | buf, state, ofs, limit, rsp, multi_block, shuf_mask); | |||
4331 | } | |||
4332 | __masm-> addptr(rsp, 4 * wordSize); | |||
4333 | __masm-> vzeroupper(); | |||
4334 | __masm-> leave(); | |||
4335 | __masm-> ret(0); | |||
4336 | return start; | |||
4337 | } | |||
4338 | ||||
4339 | address generate_sha512_implCompress(bool multi_block, const char *name) { | |||
4340 | assert(VM_Version::supports_avx2(), "")do { if (!(VM_Version::supports_avx2())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 4340, "assert(" "VM_Version::supports_avx2()" ") failed", "" ); ::breakpoint(); } } while (0); | |||
4341 | assert(VM_Version::supports_bmi2(), "")do { if (!(VM_Version::supports_bmi2())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 4341, "assert(" "VM_Version::supports_bmi2()" ") failed", "" ); ::breakpoint(); } } while (0); | |||
4342 | __masm-> align(CodeEntryAlignment); | |||
4343 | StubCodeMark mark(this, "StubRoutines", name); | |||
4344 | address start = __masm-> pc(); | |||
4345 | ||||
4346 | Register buf = c_rarg0; | |||
4347 | Register state = c_rarg1; | |||
4348 | Register ofs = c_rarg2; | |||
4349 | Register limit = c_rarg3; | |||
4350 | ||||
4351 | const XMMRegister msg = xmm0; | |||
4352 | const XMMRegister state0 = xmm1; | |||
4353 | const XMMRegister state1 = xmm2; | |||
4354 | const XMMRegister msgtmp0 = xmm3; | |||
4355 | const XMMRegister msgtmp1 = xmm4; | |||
4356 | const XMMRegister msgtmp2 = xmm5; | |||
4357 | const XMMRegister msgtmp3 = xmm6; | |||
4358 | const XMMRegister msgtmp4 = xmm7; | |||
4359 | ||||
4360 | const XMMRegister shuf_mask = xmm8; | |||
4361 | ||||
4362 | __masm-> enter(); | |||
4363 | ||||
4364 | __masm-> sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, | |||
4365 | buf, state, ofs, limit, rsp, multi_block, shuf_mask); | |||
4366 | ||||
4367 | __masm-> vzeroupper(); | |||
4368 | __masm-> leave(); | |||
4369 | __masm-> ret(0); | |||
4370 | return start; | |||
4371 | } | |||
4372 | ||||
4373 | address ghash_polynomial512_addr() { | |||
4374 | __masm-> align(CodeEntryAlignment); | |||
4375 | StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr"); | |||
4376 | address start = __masm-> pc(); | |||
4377 | __masm-> emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction | |||
4378 | __masm-> emit_data64(0xC200000000000000, relocInfo::none); | |||
4379 | __masm-> emit_data64(0x00000001C2000000, relocInfo::none); | |||
4380 | __masm-> emit_data64(0xC200000000000000, relocInfo::none); | |||
4381 | __masm-> emit_data64(0x00000001C2000000, relocInfo::none); | |||
4382 | __masm-> emit_data64(0xC200000000000000, relocInfo::none); | |||
4383 | __masm-> emit_data64(0x00000001C2000000, relocInfo::none); | |||
4384 | __masm-> emit_data64(0xC200000000000000, relocInfo::none); | |||
4385 | __masm-> emit_data64(0x0000000000000001, relocInfo::none); // POLY | |||
4386 | __masm-> emit_data64(0xC200000000000000, relocInfo::none); | |||
4387 | __masm-> emit_data64(0x0000000000000001, relocInfo::none); // TWOONE | |||
4388 | __masm-> emit_data64(0x0000000100000000, relocInfo::none); | |||
4389 | return start; | |||
4390 | } | |||
4391 | ||||
4392 | // Vector AES Galois Counter Mode implementation. Parameters: | |||
4393 | // Windows regs | Linux regs | |||
4394 | // in = c_rarg0 (rcx) | c_rarg0 (rsi) | |||
4395 | // len = c_rarg1 (rdx) | c_rarg1 (rdi) | |||
4396 | // ct = c_rarg2 (r8) | c_rarg2 (rdx) | |||
4397 | // out = c_rarg3 (r9) | c_rarg3 (rcx) | |||
4398 | // key = r10 | c_rarg4 (r8) | |||
4399 | // state = r13 | c_rarg5 (r9) | |||
4400 | // subkeyHtbl = r14 | r11 | |||
4401 | // counter = rsi | r12 | |||
4402 | // return - number of processed bytes | |||
4403 | address generate_galoisCounterMode_AESCrypt() { | |||
4404 | __masm-> align(CodeEntryAlignment); | |||
4405 | StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt"); | |||
4406 | address start = __masm-> pc(); | |||
4407 | const Register in = c_rarg0; | |||
4408 | const Register len = c_rarg1; | |||
4409 | const Register ct = c_rarg2; | |||
4410 | const Register out = c_rarg3; | |||
4411 | // and updated with the incremented counter in the end | |||
4412 | #ifndef _WIN64 | |||
4413 | const Register key = c_rarg4; | |||
4414 | const Register state = c_rarg5; | |||
4415 | const Address subkeyH_mem(rbp, 2 * wordSize); | |||
4416 | const Register subkeyHtbl = r11; | |||
4417 | const Address avx512_subkeyH_mem(rbp, 3 * wordSize); | |||
4418 | const Register avx512_subkeyHtbl = r13; | |||
4419 | const Address counter_mem(rbp, 4 * wordSize); | |||
4420 | const Register counter = r12; | |||
4421 | #else | |||
4422 | const Address key_mem(rbp, 6 * wordSize); | |||
4423 | const Register key = r10; | |||
4424 | const Address state_mem(rbp, 7 * wordSize); | |||
4425 | const Register state = r13; | |||
4426 | const Address subkeyH_mem(rbp, 8 * wordSize); | |||
4427 | const Register subkeyHtbl = r14; | |||
4428 | const Address avx512_subkeyH_mem(rbp, 9 * wordSize); | |||
4429 | const Register avx512_subkeyHtbl = r12; | |||
4430 | const Address counter_mem(rbp, 10 * wordSize); | |||
4431 | const Register counter = rsi; | |||
4432 | #endif | |||
4433 | __masm-> enter(); | |||
4434 | // Save state before entering routine | |||
4435 | __masm-> push(r12); | |||
4436 | __masm-> push(r13); | |||
4437 | __masm-> push(r14); | |||
4438 | __masm-> push(r15); | |||
4439 | __masm-> push(rbx); | |||
4440 | #ifdef _WIN64 | |||
4441 | // on win64, fill len_reg from stack position | |||
4442 | __masm-> push(rsi); | |||
4443 | __masm-> movptr(key, key_mem); | |||
4444 | __masm-> movptr(state, state_mem); | |||
4445 | #endif | |||
4446 | __masm-> movptr(subkeyHtbl, subkeyH_mem); | |||
4447 | __masm-> movptr(avx512_subkeyHtbl, avx512_subkeyH_mem); | |||
4448 | __masm-> movptr(counter, counter_mem); | |||
4449 | ||||
4450 | __masm-> aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter); | |||
4451 | ||||
4452 | // Restore state before leaving routine | |||
4453 | #ifdef _WIN64 | |||
4454 | __masm-> pop(rsi); | |||
4455 | #endif | |||
4456 | __masm-> pop(rbx); | |||
4457 | __masm-> pop(r15); | |||
4458 | __masm-> pop(r14); | |||
4459 | __masm-> pop(r13); | |||
4460 | __masm-> pop(r12); | |||
4461 | ||||
4462 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
4463 | __masm-> ret(0); | |||
4464 | return start; | |||
4465 | } | |||
4466 | ||||
4467 | // This mask is used for incrementing counter value(linc0, linc4, etc.) | |||
4468 | address counter_mask_addr() { | |||
4469 | __masm-> align64(); | |||
4470 | StubCodeMark mark(this, "StubRoutines", "counter_mask_addr"); | |||
4471 | address start = __masm-> pc(); | |||
4472 | __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);//lbswapmask | |||
4473 | __masm-> emit_data64(0x0001020304050607, relocInfo::none); | |||
4474 | __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); | |||
4475 | __masm-> emit_data64(0x0001020304050607, relocInfo::none); | |||
4476 | __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); | |||
4477 | __masm-> emit_data64(0x0001020304050607, relocInfo::none); | |||
4478 | __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); | |||
4479 | __masm-> emit_data64(0x0001020304050607, relocInfo::none); | |||
4480 | __masm-> emit_data64(0x0000000000000000, relocInfo::none);//linc0 = counter_mask_addr+64 | |||
4481 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4482 | __masm-> emit_data64(0x0000000000000001, relocInfo::none);//counter_mask_addr() + 80 | |||
4483 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4484 | __masm-> emit_data64(0x0000000000000002, relocInfo::none); | |||
4485 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4486 | __masm-> emit_data64(0x0000000000000003, relocInfo::none); | |||
4487 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4488 | __masm-> emit_data64(0x0000000000000004, relocInfo::none);//linc4 = counter_mask_addr() + 128 | |||
4489 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4490 | __masm-> emit_data64(0x0000000000000004, relocInfo::none); | |||
4491 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4492 | __masm-> emit_data64(0x0000000000000004, relocInfo::none); | |||
4493 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4494 | __masm-> emit_data64(0x0000000000000004, relocInfo::none); | |||
4495 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4496 | __masm-> emit_data64(0x0000000000000008, relocInfo::none);//linc8 = counter_mask_addr() + 192 | |||
4497 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4498 | __masm-> emit_data64(0x0000000000000008, relocInfo::none); | |||
4499 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4500 | __masm-> emit_data64(0x0000000000000008, relocInfo::none); | |||
4501 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4502 | __masm-> emit_data64(0x0000000000000008, relocInfo::none); | |||
4503 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4504 | __masm-> emit_data64(0x0000000000000020, relocInfo::none);//linc32 = counter_mask_addr() + 256 | |||
4505 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4506 | __masm-> emit_data64(0x0000000000000020, relocInfo::none); | |||
4507 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4508 | __masm-> emit_data64(0x0000000000000020, relocInfo::none); | |||
4509 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4510 | __masm-> emit_data64(0x0000000000000020, relocInfo::none); | |||
4511 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4512 | __masm-> emit_data64(0x0000000000000010, relocInfo::none);//linc16 = counter_mask_addr() + 320 | |||
4513 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4514 | __masm-> emit_data64(0x0000000000000010, relocInfo::none); | |||
4515 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4516 | __masm-> emit_data64(0x0000000000000010, relocInfo::none); | |||
4517 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4518 | __masm-> emit_data64(0x0000000000000010, relocInfo::none); | |||
4519 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
4520 | return start; | |||
4521 | } | |||
4522 | ||||
4523 | // Vector AES Counter implementation | |||
4524 | address generate_counterMode_VectorAESCrypt() { | |||
4525 | __masm-> align(CodeEntryAlignment); | |||
4526 | StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt"); | |||
4527 | address start = __masm-> pc(); | |||
4528 | const Register from = c_rarg0; // source array address | |||
4529 | const Register to = c_rarg1; // destination array address | |||
4530 | const Register key = c_rarg2; // key array address r8 | |||
4531 | const Register counter = c_rarg3; // counter byte array initialized from counter array address | |||
4532 | // and updated with the incremented counter in the end | |||
4533 | #ifndef _WIN64 | |||
4534 | const Register len_reg = c_rarg4; | |||
4535 | const Register saved_encCounter_start = c_rarg5; | |||
4536 | const Register used_addr = r10; | |||
4537 | const Address used_mem(rbp, 2 * wordSize); | |||
4538 | const Register used = r11; | |||
4539 | #else | |||
4540 | const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 | |||
4541 | const Address saved_encCounter_mem(rbp, 7 * wordSize); // saved encrypted counter is on stack on Win64 | |||
4542 | const Address used_mem(rbp, 8 * wordSize); // used length is on stack on Win64 | |||
4543 | const Register len_reg = r10; // pick the first volatile windows register | |||
4544 | const Register saved_encCounter_start = r11; | |||
4545 | const Register used_addr = r13; | |||
4546 | const Register used = r14; | |||
4547 | #endif | |||
4548 | __masm-> enter(); | |||
4549 | // Save state before entering routine | |||
4550 | __masm-> push(r12); | |||
4551 | __masm-> push(r13); | |||
4552 | __masm-> push(r14); | |||
4553 | __masm-> push(r15); | |||
4554 | #ifdef _WIN64 | |||
4555 | // on win64, fill len_reg from stack position | |||
4556 | __masm-> movl(len_reg, len_mem); | |||
4557 | __masm-> movptr(saved_encCounter_start, saved_encCounter_mem); | |||
4558 | __masm-> movptr(used_addr, used_mem); | |||
4559 | __masm-> movl(used, Address(used_addr, 0)); | |||
4560 | #else | |||
4561 | __masm-> push(len_reg); // Save | |||
4562 | __masm-> movptr(used_addr, used_mem); | |||
4563 | __masm-> movl(used, Address(used_addr, 0)); | |||
4564 | #endif | |||
4565 | __masm-> push(rbx); | |||
4566 | __masm-> aesctr_encrypt(from, to, key, counter, len_reg, used, used_addr, saved_encCounter_start); | |||
4567 | // Restore state before leaving routine | |||
4568 | __masm-> pop(rbx); | |||
4569 | #ifdef _WIN64 | |||
4570 | __masm-> movl(rax, len_mem); // return length | |||
4571 | #else | |||
4572 | __masm-> pop(rax); // return length | |||
4573 | #endif | |||
4574 | __masm-> pop(r15); | |||
4575 | __masm-> pop(r14); | |||
4576 | __masm-> pop(r13); | |||
4577 | __masm-> pop(r12); | |||
4578 | ||||
4579 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
4580 | __masm-> ret(0); | |||
4581 | return start; | |||
4582 | } | |||
4583 | ||||
4584 | // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time | |||
4585 | // to hide instruction latency | |||
4586 | // | |||
4587 | // Arguments: | |||
4588 | // | |||
4589 | // Inputs: | |||
4590 | // c_rarg0 - source byte array address | |||
4591 | // c_rarg1 - destination byte array address | |||
4592 | // c_rarg2 - K (key) in little endian int array | |||
4593 | // c_rarg3 - counter vector byte array address | |||
4594 | // Linux | |||
4595 | // c_rarg4 - input length | |||
4596 | // c_rarg5 - saved encryptedCounter start | |||
4597 | // rbp + 6 * wordSize - saved used length | |||
4598 | // Windows | |||
4599 | // rbp + 6 * wordSize - input length | |||
4600 | // rbp + 7 * wordSize - saved encryptedCounter start | |||
4601 | // rbp + 8 * wordSize - saved used length | |||
4602 | // | |||
4603 | // Output: | |||
4604 | // rax - input length | |||
4605 | // | |||
4606 | address generate_counterMode_AESCrypt_Parallel() { | |||
4607 | assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 4607, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support" ); ::breakpoint(); } } while (0); | |||
4608 | __masm-> align(CodeEntryAlignment); | |||
4609 | StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt"); | |||
4610 | address start = __masm-> pc(); | |||
4611 | const Register from = c_rarg0; // source array address | |||
4612 | const Register to = c_rarg1; // destination array address | |||
4613 | const Register key = c_rarg2; // key array address | |||
4614 | const Register counter = c_rarg3; // counter byte array initialized from counter array address | |||
4615 | // and updated with the incremented counter in the end | |||
4616 | #ifndef _WIN64 | |||
4617 | const Register len_reg = c_rarg4; | |||
4618 | const Register saved_encCounter_start = c_rarg5; | |||
4619 | const Register used_addr = r10; | |||
4620 | const Address used_mem(rbp, 2 * wordSize); | |||
4621 | const Register used = r11; | |||
4622 | #else | |||
4623 | const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 | |||
4624 | const Address saved_encCounter_mem(rbp, 7 * wordSize); // length is on stack on Win64 | |||
4625 | const Address used_mem(rbp, 8 * wordSize); // length is on stack on Win64 | |||
4626 | const Register len_reg = r10; // pick the first volatile windows register | |||
4627 | const Register saved_encCounter_start = r11; | |||
4628 | const Register used_addr = r13; | |||
4629 | const Register used = r14; | |||
4630 | #endif | |||
4631 | const Register pos = rax; | |||
4632 | ||||
4633 | const int PARALLEL_FACTOR = 6; | |||
4634 | const XMMRegister xmm_counter_shuf_mask = xmm0; | |||
4635 | const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front | |||
4636 | const XMMRegister xmm_curr_counter = xmm2; | |||
4637 | ||||
4638 | const XMMRegister xmm_key_tmp0 = xmm3; | |||
4639 | const XMMRegister xmm_key_tmp1 = xmm4; | |||
4640 | ||||
4641 | // registers holding the four results in the parallelized loop | |||
4642 | const XMMRegister xmm_result0 = xmm5; | |||
4643 | const XMMRegister xmm_result1 = xmm6; | |||
4644 | const XMMRegister xmm_result2 = xmm7; | |||
4645 | const XMMRegister xmm_result3 = xmm8; | |||
4646 | const XMMRegister xmm_result4 = xmm9; | |||
4647 | const XMMRegister xmm_result5 = xmm10; | |||
4648 | ||||
4649 | const XMMRegister xmm_from0 = xmm11; | |||
4650 | const XMMRegister xmm_from1 = xmm12; | |||
4651 | const XMMRegister xmm_from2 = xmm13; | |||
4652 | const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64. | |||
4653 | const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text | |||
4654 | const XMMRegister xmm_from5 = xmm4; | |||
4655 | ||||
4656 | //for key_128, key_192, key_256 | |||
4657 | const int rounds[3] = {10, 12, 14}; | |||
4658 | Label L_exit_preLoop, L_preLoop_start; | |||
4659 | Label L_multiBlock_loopTop[3]; | |||
4660 | Label L_singleBlockLoopTop[3]; | |||
4661 | Label L__incCounter[3][6]; //for 6 blocks | |||
4662 | Label L__incCounter_single[3]; //for single block, key128, key192, key256 | |||
4663 | Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3]; | |||
4664 | Label L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3]; | |||
4665 | ||||
4666 | Label L_exit; | |||
4667 | ||||
4668 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
4669 | ||||
4670 | #ifdef _WIN64 | |||
4671 | // allocate spill slots for r13, r14 | |||
4672 | enum { | |||
4673 | saved_r13_offset, | |||
4674 | saved_r14_offset | |||
4675 | }; | |||
4676 | __masm-> subptr(rsp, 2 * wordSize); | |||
4677 | __masm-> movptr(Address(rsp, saved_r13_offset * wordSize), r13); | |||
4678 | __masm-> movptr(Address(rsp, saved_r14_offset * wordSize), r14); | |||
4679 | ||||
4680 | // on win64, fill len_reg from stack position | |||
4681 | __masm-> movl(len_reg, len_mem); | |||
4682 | __masm-> movptr(saved_encCounter_start, saved_encCounter_mem); | |||
4683 | __masm-> movptr(used_addr, used_mem); | |||
4684 | __masm-> movl(used, Address(used_addr, 0)); | |||
4685 | #else | |||
4686 | __masm-> push(len_reg); // Save | |||
4687 | __masm-> movptr(used_addr, used_mem); | |||
4688 | __masm-> movl(used, Address(used_addr, 0)); | |||
4689 | #endif | |||
4690 | ||||
4691 | __masm-> push(rbx); // Save RBX | |||
4692 | __masm-> movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter | |||
4693 | __masm-> movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch | |||
4694 | __masm-> pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled | |||
4695 | __masm-> movptr(pos, 0); | |||
4696 | ||||
4697 | // Use the partially used encrpyted counter from last invocation | |||
4698 | __masm-> BIND(L_preLoop_start)bind(L_preLoop_start); masm-> block_comment("L_preLoop_start" ":"); | |||
4699 | __masm-> cmpptr(used, 16); | |||
4700 | __masm-> jcc(Assembler::aboveEqual, L_exit_preLoop); | |||
4701 | __masm-> cmpptr(len_reg, 0); | |||
4702 | __masm-> jcc(Assembler::lessEqual, L_exit_preLoop); | |||
4703 | __masm-> movb(rbx, Address(saved_encCounter_start, used)); | |||
4704 | __masm-> xorb(rbx, Address(from, pos)); | |||
4705 | __masm-> movb(Address(to, pos), rbx); | |||
4706 | __masm-> addptr(pos, 1); | |||
4707 | __masm-> addptr(used, 1); | |||
4708 | __masm-> subptr(len_reg, 1); | |||
4709 | ||||
4710 | __masm-> jmp(L_preLoop_start); | |||
4711 | ||||
4712 | __masm-> BIND(L_exit_preLoop)bind(L_exit_preLoop); masm-> block_comment("L_exit_preLoop" ":"); | |||
4713 | __masm-> movl(Address(used_addr, 0), used); | |||
4714 | ||||
4715 | // key length could be only {11, 13, 15} * 4 = {44, 52, 60} | |||
4716 | __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch | |||
4717 | __masm-> movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |||
4718 | __masm-> cmpl(rbx, 52); | |||
4719 | __masm-> jcc(Assembler::equal, L_multiBlock_loopTop[1]); | |||
4720 | __masm-> cmpl(rbx, 60); | |||
4721 | __masm-> jcc(Assembler::equal, L_multiBlock_loopTop[2]); | |||
4722 | ||||
4723 | #define CTR_DoSix(opc, src_reg)masm-> opc(xmm_result0, src_reg); masm-> opc(xmm_result1 , src_reg); masm-> opc(xmm_result2, src_reg); masm-> opc (xmm_result3, src_reg); masm-> opc(xmm_result4, src_reg); masm -> opc(xmm_result5, src_reg); \ | |||
4724 | __masm-> opc(xmm_result0, src_reg); \ | |||
4725 | __masm-> opc(xmm_result1, src_reg); \ | |||
4726 | __masm-> opc(xmm_result2, src_reg); \ | |||
4727 | __masm-> opc(xmm_result3, src_reg); \ | |||
4728 | __masm-> opc(xmm_result4, src_reg); \ | |||
4729 | __masm-> opc(xmm_result5, src_reg); | |||
4730 | ||||
4731 | // k == 0 : generate code for key_128 | |||
4732 | // k == 1 : generate code for key_192 | |||
4733 | // k == 2 : generate code for key_256 | |||
4734 | for (int k = 0; k < 3; ++k) { | |||
4735 | //multi blocks starts here | |||
4736 | __masm-> align(OptoLoopAlignment); | |||
4737 | __masm-> BIND(L_multiBlock_loopTop[k])bind(L_multiBlock_loopTop[k]); masm-> block_comment("L_multiBlock_loopTop[k]" ":"); | |||
4738 | __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left | |||
4739 | __masm-> jcc(Assembler::less, L_singleBlockLoopTop[k]); | |||
4740 | load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask); | |||
4741 | ||||
4742 | //load, then increase counters | |||
4743 | CTR_DoSix(movdqa, xmm_curr_counter)masm-> movdqa(xmm_result0, xmm_curr_counter); masm-> movdqa (xmm_result1, xmm_curr_counter); masm-> movdqa(xmm_result2 , xmm_curr_counter); masm-> movdqa(xmm_result3, xmm_curr_counter ); masm-> movdqa(xmm_result4, xmm_curr_counter); masm-> movdqa(xmm_result5, xmm_curr_counter);; | |||
4744 | inc_counter(rbx, xmm_result1, 0x01, L__incCounter[k][0]); | |||
4745 | inc_counter(rbx, xmm_result2, 0x02, L__incCounter[k][1]); | |||
4746 | inc_counter(rbx, xmm_result3, 0x03, L__incCounter[k][2]); | |||
4747 | inc_counter(rbx, xmm_result4, 0x04, L__incCounter[k][3]); | |||
4748 | inc_counter(rbx, xmm_result5, 0x05, L__incCounter[k][4]); | |||
4749 | inc_counter(rbx, xmm_curr_counter, 0x06, L__incCounter[k][5]); | |||
4750 | CTR_DoSix(pshufb, xmm_counter_shuf_mask)masm-> pshufb(xmm_result0, xmm_counter_shuf_mask); masm-> pshufb(xmm_result1, xmm_counter_shuf_mask); masm-> pshufb (xmm_result2, xmm_counter_shuf_mask); masm-> pshufb(xmm_result3 , xmm_counter_shuf_mask); masm-> pshufb(xmm_result4, xmm_counter_shuf_mask ); masm-> pshufb(xmm_result5, xmm_counter_shuf_mask);; // after increased, shuffled counters back for PXOR | |||
4751 | CTR_DoSix(pxor, xmm_key_tmp0)masm-> pxor(xmm_result0, xmm_key_tmp0); masm-> pxor(xmm_result1 , xmm_key_tmp0); masm-> pxor(xmm_result2, xmm_key_tmp0); masm -> pxor(xmm_result3, xmm_key_tmp0); masm-> pxor(xmm_result4 , xmm_key_tmp0); masm-> pxor(xmm_result5, xmm_key_tmp0);; //PXOR with Round 0 key | |||
4752 | ||||
4753 | //load two ROUND_KEYs at a time | |||
4754 | for (int i = 1; i < rounds[k]; ) { | |||
4755 | load_key(xmm_key_tmp1, key, (0x10 * i), xmm_key_shuf_mask); | |||
4756 | load_key(xmm_key_tmp0, key, (0x10 * (i+1)), xmm_key_shuf_mask); | |||
4757 | CTR_DoSix(aesenc, xmm_key_tmp1)masm-> aesenc(xmm_result0, xmm_key_tmp1); masm-> aesenc (xmm_result1, xmm_key_tmp1); masm-> aesenc(xmm_result2, xmm_key_tmp1 ); masm-> aesenc(xmm_result3, xmm_key_tmp1); masm-> aesenc (xmm_result4, xmm_key_tmp1); masm-> aesenc(xmm_result5, xmm_key_tmp1 );; | |||
4758 | i++; | |||
4759 | if (i != rounds[k]) { | |||
4760 | CTR_DoSix(aesenc, xmm_key_tmp0)masm-> aesenc(xmm_result0, xmm_key_tmp0); masm-> aesenc (xmm_result1, xmm_key_tmp0); masm-> aesenc(xmm_result2, xmm_key_tmp0 ); masm-> aesenc(xmm_result3, xmm_key_tmp0); masm-> aesenc (xmm_result4, xmm_key_tmp0); masm-> aesenc(xmm_result5, xmm_key_tmp0 );; | |||
4761 | } else { | |||
4762 | CTR_DoSix(aesenclast, xmm_key_tmp0)masm-> aesenclast(xmm_result0, xmm_key_tmp0); masm-> aesenclast (xmm_result1, xmm_key_tmp0); masm-> aesenclast(xmm_result2 , xmm_key_tmp0); masm-> aesenclast(xmm_result3, xmm_key_tmp0 ); masm-> aesenclast(xmm_result4, xmm_key_tmp0); masm-> aesenclast(xmm_result5, xmm_key_tmp0);; | |||
4763 | } | |||
4764 | i++; | |||
4765 | } | |||
4766 | ||||
4767 | // get next PARALLEL_FACTOR blocks into xmm_result registers | |||
4768 | __masm-> movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); | |||
4769 | __masm-> movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); | |||
4770 | __masm-> movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); | |||
4771 | __masm-> movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); | |||
4772 | __masm-> movdqu(xmm_from4, Address(from, pos, Address::times_1, 4 * AESBlockSize)); | |||
4773 | __masm-> movdqu(xmm_from5, Address(from, pos, Address::times_1, 5 * AESBlockSize)); | |||
4774 | ||||
4775 | __masm-> pxor(xmm_result0, xmm_from0); | |||
4776 | __masm-> pxor(xmm_result1, xmm_from1); | |||
4777 | __masm-> pxor(xmm_result2, xmm_from2); | |||
4778 | __masm-> pxor(xmm_result3, xmm_from3); | |||
4779 | __masm-> pxor(xmm_result4, xmm_from4); | |||
4780 | __masm-> pxor(xmm_result5, xmm_from5); | |||
4781 | ||||
4782 | // store 6 results into the next 64 bytes of output | |||
4783 | __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); | |||
4784 | __masm-> movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); | |||
4785 | __masm-> movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); | |||
4786 | __masm-> movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); | |||
4787 | __masm-> movdqu(Address(to, pos, Address::times_1, 4 * AESBlockSize), xmm_result4); | |||
4788 | __masm-> movdqu(Address(to, pos, Address::times_1, 5 * AESBlockSize), xmm_result5); | |||
4789 | ||||
4790 | __masm-> addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text | |||
4791 | __masm-> subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length | |||
4792 | __masm-> jmp(L_multiBlock_loopTop[k]); | |||
4793 | ||||
4794 | // singleBlock starts here | |||
4795 | __masm-> align(OptoLoopAlignment); | |||
4796 | __masm-> BIND(L_singleBlockLoopTop[k])bind(L_singleBlockLoopTop[k]); masm-> block_comment("L_singleBlockLoopTop[k]" ":"); | |||
4797 | __masm-> cmpptr(len_reg, 0); | |||
4798 | __masm-> jcc(Assembler::lessEqual, L_exit); | |||
4799 | load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask); | |||
4800 | __masm-> movdqa(xmm_result0, xmm_curr_counter); | |||
4801 | inc_counter(rbx, xmm_curr_counter, 0x01, L__incCounter_single[k]); | |||
4802 | __masm-> pshufb(xmm_result0, xmm_counter_shuf_mask); | |||
4803 | __masm-> pxor(xmm_result0, xmm_key_tmp0); | |||
4804 | for (int i = 1; i < rounds[k]; i++) { | |||
4805 | load_key(xmm_key_tmp0, key, (0x10 * i), xmm_key_shuf_mask); | |||
4806 | __masm-> aesenc(xmm_result0, xmm_key_tmp0); | |||
4807 | } | |||
4808 | load_key(xmm_key_tmp0, key, (rounds[k] * 0x10), xmm_key_shuf_mask); | |||
4809 | __masm-> aesenclast(xmm_result0, xmm_key_tmp0); | |||
4810 | __masm-> cmpptr(len_reg, AESBlockSize); | |||
4811 | __masm-> jcc(Assembler::less, L_processTail_insr[k]); | |||
4812 | __masm-> movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); | |||
4813 | __masm-> pxor(xmm_result0, xmm_from0); | |||
4814 | __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); | |||
4815 | __masm-> addptr(pos, AESBlockSize); | |||
4816 | __masm-> subptr(len_reg, AESBlockSize); | |||
4817 | __masm-> jmp(L_singleBlockLoopTop[k]); | |||
4818 | __masm-> BIND(L_processTail_insr[k])bind(L_processTail_insr[k]); masm-> block_comment("L_processTail_insr[k]" ":"); // Process the tail part of the input array | |||
4819 | __masm-> addptr(pos, len_reg); // 1. Insert bytes from src array into xmm_from0 register | |||
4820 | __masm-> testptr(len_reg, 8); | |||
4821 | __masm-> jcc(Assembler::zero, L_processTail_4_insr[k]); | |||
4822 | __masm-> subptr(pos,8); | |||
4823 | __masm-> pinsrq(xmm_from0, Address(from, pos), 0); | |||
4824 | __masm-> BIND(L_processTail_4_insr[k])bind(L_processTail_4_insr[k]); masm-> block_comment("L_processTail_4_insr[k]" ":"); | |||
4825 | __masm-> testptr(len_reg, 4); | |||
4826 | __masm-> jcc(Assembler::zero, L_processTail_2_insr[k]); | |||
4827 | __masm-> subptr(pos,4); | |||
4828 | __masm-> pslldq(xmm_from0, 4); | |||
4829 | __masm-> pinsrd(xmm_from0, Address(from, pos), 0); | |||
4830 | __masm-> BIND(L_processTail_2_insr[k])bind(L_processTail_2_insr[k]); masm-> block_comment("L_processTail_2_insr[k]" ":"); | |||
4831 | __masm-> testptr(len_reg, 2); | |||
4832 | __masm-> jcc(Assembler::zero, L_processTail_1_insr[k]); | |||
4833 | __masm-> subptr(pos, 2); | |||
4834 | __masm-> pslldq(xmm_from0, 2); | |||
4835 | __masm-> pinsrw(xmm_from0, Address(from, pos), 0); | |||
4836 | __masm-> BIND(L_processTail_1_insr[k])bind(L_processTail_1_insr[k]); masm-> block_comment("L_processTail_1_insr[k]" ":"); | |||
4837 | __masm-> testptr(len_reg, 1); | |||
4838 | __masm-> jcc(Assembler::zero, L_processTail_exit_insr[k]); | |||
4839 | __masm-> subptr(pos, 1); | |||
4840 | __masm-> pslldq(xmm_from0, 1); | |||
4841 | __masm-> pinsrb(xmm_from0, Address(from, pos), 0); | |||
4842 | __masm-> BIND(L_processTail_exit_insr[k])bind(L_processTail_exit_insr[k]); masm-> block_comment("L_processTail_exit_insr[k]" ":"); | |||
4843 | ||||
4844 | __masm-> movdqu(Address(saved_encCounter_start, 0), xmm_result0); // 2. Perform pxor of the encrypted counter and plaintext Bytes. | |||
4845 | __masm-> pxor(xmm_result0, xmm_from0); // Also the encrypted counter is saved for next invocation. | |||
4846 | ||||
4847 | __masm-> testptr(len_reg, 8); | |||
4848 | __masm-> jcc(Assembler::zero, L_processTail_4_extr[k]); // 3. Extract bytes from xmm_result0 into the dest. array | |||
4849 | __masm-> pextrq(Address(to, pos), xmm_result0, 0); | |||
4850 | __masm-> psrldq(xmm_result0, 8); | |||
4851 | __masm-> addptr(pos, 8); | |||
4852 | __masm-> BIND(L_processTail_4_extr[k])bind(L_processTail_4_extr[k]); masm-> block_comment("L_processTail_4_extr[k]" ":"); | |||
4853 | __masm-> testptr(len_reg, 4); | |||
4854 | __masm-> jcc(Assembler::zero, L_processTail_2_extr[k]); | |||
4855 | __masm-> pextrd(Address(to, pos), xmm_result0, 0); | |||
4856 | __masm-> psrldq(xmm_result0, 4); | |||
4857 | __masm-> addptr(pos, 4); | |||
4858 | __masm-> BIND(L_processTail_2_extr[k])bind(L_processTail_2_extr[k]); masm-> block_comment("L_processTail_2_extr[k]" ":"); | |||
4859 | __masm-> testptr(len_reg, 2); | |||
4860 | __masm-> jcc(Assembler::zero, L_processTail_1_extr[k]); | |||
4861 | __masm-> pextrw(Address(to, pos), xmm_result0, 0); | |||
4862 | __masm-> psrldq(xmm_result0, 2); | |||
4863 | __masm-> addptr(pos, 2); | |||
4864 | __masm-> BIND(L_processTail_1_extr[k])bind(L_processTail_1_extr[k]); masm-> block_comment("L_processTail_1_extr[k]" ":"); | |||
4865 | __masm-> testptr(len_reg, 1); | |||
4866 | __masm-> jcc(Assembler::zero, L_processTail_exit_extr[k]); | |||
4867 | __masm-> pextrb(Address(to, pos), xmm_result0, 0); | |||
4868 | ||||
4869 | __masm-> BIND(L_processTail_exit_extr[k])bind(L_processTail_exit_extr[k]); masm-> block_comment("L_processTail_exit_extr[k]" ":"); | |||
4870 | __masm-> movl(Address(used_addr, 0), len_reg); | |||
4871 | __masm-> jmp(L_exit); | |||
4872 | ||||
4873 | } | |||
4874 | ||||
4875 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
4876 | __masm-> pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back. | |||
4877 | __masm-> movdqu(Address(counter, 0), xmm_curr_counter); //save counter back | |||
4878 | __masm-> pop(rbx); // pop the saved RBX. | |||
4879 | #ifdef _WIN64 | |||
4880 | __masm-> movl(rax, len_mem); | |||
4881 | __masm-> movptr(r13, Address(rsp, saved_r13_offset * wordSize)); | |||
4882 | __masm-> movptr(r14, Address(rsp, saved_r14_offset * wordSize)); | |||
4883 | __masm-> addptr(rsp, 2 * wordSize); | |||
4884 | #else | |||
4885 | __masm-> pop(rax); // return 'len' | |||
4886 | #endif | |||
4887 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
4888 | __masm-> ret(0); | |||
4889 | return start; | |||
4890 | } | |||
4891 | ||||
4892 | void roundDec(XMMRegister xmm_reg) { | |||
4893 | __masm-> vaesdec(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit); | |||
4894 | __masm-> vaesdec(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit); | |||
4895 | __masm-> vaesdec(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit); | |||
4896 | __masm-> vaesdec(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit); | |||
4897 | __masm-> vaesdec(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit); | |||
4898 | __masm-> vaesdec(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit); | |||
4899 | __masm-> vaesdec(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit); | |||
4900 | __masm-> vaesdec(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit); | |||
4901 | } | |||
4902 | ||||
4903 | void roundDeclast(XMMRegister xmm_reg) { | |||
4904 | __masm-> vaesdeclast(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit); | |||
4905 | __masm-> vaesdeclast(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit); | |||
4906 | __masm-> vaesdeclast(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit); | |||
4907 | __masm-> vaesdeclast(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit); | |||
4908 | __masm-> vaesdeclast(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit); | |||
4909 | __masm-> vaesdeclast(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit); | |||
4910 | __masm-> vaesdeclast(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit); | |||
4911 | __masm-> vaesdeclast(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit); | |||
4912 | } | |||
4913 | ||||
4914 | void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = NULL__null) { | |||
4915 | __masm-> movdqu(xmmdst, Address(key, offset)); | |||
4916 | if (xmm_shuf_mask != NULL__null) { | |||
4917 | __masm-> pshufb(xmmdst, xmm_shuf_mask); | |||
4918 | } else { | |||
4919 | __masm-> pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |||
4920 | } | |||
4921 | __masm-> evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit); | |||
4922 | ||||
4923 | } | |||
4924 | ||||
4925 | address generate_cipherBlockChaining_decryptVectorAESCrypt() { | |||
4926 | assert(VM_Version::supports_avx512_vaes(), "need AES instructions and misaligned SSE support")do { if (!(VM_Version::supports_avx512_vaes())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 4926, "assert(" "VM_Version::supports_avx512_vaes()" ") failed" , "need AES instructions and misaligned SSE support"); ::breakpoint (); } } while (0); | |||
4927 | __masm-> align(CodeEntryAlignment); | |||
4928 | StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); | |||
4929 | address start = __masm-> pc(); | |||
4930 | ||||
4931 | const Register from = c_rarg0; // source array address | |||
4932 | const Register to = c_rarg1; // destination array address | |||
4933 | const Register key = c_rarg2; // key array address | |||
4934 | const Register rvec = c_rarg3; // r byte array initialized from initvector array address | |||
4935 | // and left with the results of the last encryption block | |||
4936 | #ifndef _WIN64 | |||
4937 | const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) | |||
4938 | #else | |||
4939 | const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 | |||
4940 | const Register len_reg = r11; // pick the volatile windows register | |||
4941 | #endif | |||
4942 | ||||
4943 | Label Loop, Loop1, L_128, L_256, L_192, KEY_192, KEY_256, Loop2, Lcbc_dec_rem_loop, | |||
4944 | Lcbc_dec_rem_last, Lcbc_dec_ret, Lcbc_dec_rem, Lcbc_exit; | |||
4945 | ||||
4946 | __masm-> enter(); | |||
4947 | ||||
4948 | #ifdef _WIN64 | |||
4949 | // on win64, fill len_reg from stack position | |||
4950 | __masm-> movl(len_reg, len_mem); | |||
4951 | #else | |||
4952 | __masm-> push(len_reg); // Save | |||
4953 | #endif | |||
4954 | __masm-> push(rbx); | |||
4955 | __masm-> vzeroupper(); | |||
4956 | ||||
4957 | // Temporary variable declaration for swapping key bytes | |||
4958 | const XMMRegister xmm_key_shuf_mask = xmm1; | |||
4959 | __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |||
4960 | ||||
4961 | // Calculate number of rounds from key size: 44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds | |||
4962 | const Register rounds = rbx; | |||
4963 | __masm-> movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |||
4964 | ||||
4965 | const XMMRegister IV = xmm0; | |||
4966 | // Load IV and broadcast value to 512-bits | |||
4967 | __masm-> evbroadcasti64x2(IV, Address(rvec, 0), Assembler::AVX_512bit); | |||
4968 | ||||
4969 | // Temporary variables for storing round keys | |||
4970 | const XMMRegister RK0 = xmm30; | |||
4971 | const XMMRegister RK1 = xmm9; | |||
4972 | const XMMRegister RK2 = xmm18; | |||
4973 | const XMMRegister RK3 = xmm19; | |||
4974 | const XMMRegister RK4 = xmm20; | |||
4975 | const XMMRegister RK5 = xmm21; | |||
4976 | const XMMRegister RK6 = xmm22; | |||
4977 | const XMMRegister RK7 = xmm23; | |||
4978 | const XMMRegister RK8 = xmm24; | |||
4979 | const XMMRegister RK9 = xmm25; | |||
4980 | const XMMRegister RK10 = xmm26; | |||
4981 | ||||
4982 | // Load and shuffle key | |||
4983 | // the java expanded key ordering is rotated one position from what we want | |||
4984 | // so we start from 1*16 here and hit 0*16 last | |||
4985 | ev_load_key(RK1, key, 1 * 16, xmm_key_shuf_mask); | |||
4986 | ev_load_key(RK2, key, 2 * 16, xmm_key_shuf_mask); | |||
4987 | ev_load_key(RK3, key, 3 * 16, xmm_key_shuf_mask); | |||
4988 | ev_load_key(RK4, key, 4 * 16, xmm_key_shuf_mask); | |||
4989 | ev_load_key(RK5, key, 5 * 16, xmm_key_shuf_mask); | |||
4990 | ev_load_key(RK6, key, 6 * 16, xmm_key_shuf_mask); | |||
4991 | ev_load_key(RK7, key, 7 * 16, xmm_key_shuf_mask); | |||
4992 | ev_load_key(RK8, key, 8 * 16, xmm_key_shuf_mask); | |||
4993 | ev_load_key(RK9, key, 9 * 16, xmm_key_shuf_mask); | |||
4994 | ev_load_key(RK10, key, 10 * 16, xmm_key_shuf_mask); | |||
4995 | ev_load_key(RK0, key, 0*16, xmm_key_shuf_mask); | |||
4996 | ||||
4997 | // Variables for storing source cipher text | |||
4998 | const XMMRegister S0 = xmm10; | |||
4999 | const XMMRegister S1 = xmm11; | |||
5000 | const XMMRegister S2 = xmm12; | |||
5001 | const XMMRegister S3 = xmm13; | |||
5002 | const XMMRegister S4 = xmm14; | |||
5003 | const XMMRegister S5 = xmm15; | |||
5004 | const XMMRegister S6 = xmm16; | |||
5005 | const XMMRegister S7 = xmm17; | |||
5006 | ||||
5007 | // Variables for storing decrypted text | |||
5008 | const XMMRegister B0 = xmm1; | |||
5009 | const XMMRegister B1 = xmm2; | |||
5010 | const XMMRegister B2 = xmm3; | |||
5011 | const XMMRegister B3 = xmm4; | |||
5012 | const XMMRegister B4 = xmm5; | |||
5013 | const XMMRegister B5 = xmm6; | |||
5014 | const XMMRegister B6 = xmm7; | |||
5015 | const XMMRegister B7 = xmm8; | |||
5016 | ||||
5017 | __masm-> cmpl(rounds, 44); | |||
5018 | __masm-> jcc(Assembler::greater, KEY_192); | |||
5019 | __masm-> jmp(Loop); | |||
5020 | ||||
5021 | __masm-> BIND(KEY_192)bind(KEY_192); masm-> block_comment("KEY_192" ":"); | |||
5022 | const XMMRegister RK11 = xmm27; | |||
5023 | const XMMRegister RK12 = xmm28; | |||
5024 | ev_load_key(RK11, key, 11*16, xmm_key_shuf_mask); | |||
5025 | ev_load_key(RK12, key, 12*16, xmm_key_shuf_mask); | |||
5026 | ||||
5027 | __masm-> cmpl(rounds, 52); | |||
5028 | __masm-> jcc(Assembler::greater, KEY_256); | |||
5029 | __masm-> jmp(Loop); | |||
5030 | ||||
5031 | __masm-> BIND(KEY_256)bind(KEY_256); masm-> block_comment("KEY_256" ":"); | |||
5032 | const XMMRegister RK13 = xmm29; | |||
5033 | const XMMRegister RK14 = xmm31; | |||
5034 | ev_load_key(RK13, key, 13*16, xmm_key_shuf_mask); | |||
5035 | ev_load_key(RK14, key, 14*16, xmm_key_shuf_mask); | |||
5036 | ||||
5037 | __masm-> BIND(Loop)bind(Loop); masm-> block_comment("Loop" ":"); | |||
5038 | __masm-> cmpl(len_reg, 512); | |||
5039 | __masm-> jcc(Assembler::below, Lcbc_dec_rem); | |||
5040 | __masm-> BIND(Loop1)bind(Loop1); masm-> block_comment("Loop1" ":"); | |||
5041 | __masm-> subl(len_reg, 512); | |||
5042 | __masm-> evmovdquq(S0, Address(from, 0 * 64), Assembler::AVX_512bit); | |||
5043 | __masm-> evmovdquq(S1, Address(from, 1 * 64), Assembler::AVX_512bit); | |||
5044 | __masm-> evmovdquq(S2, Address(from, 2 * 64), Assembler::AVX_512bit); | |||
5045 | __masm-> evmovdquq(S3, Address(from, 3 * 64), Assembler::AVX_512bit); | |||
5046 | __masm-> evmovdquq(S4, Address(from, 4 * 64), Assembler::AVX_512bit); | |||
5047 | __masm-> evmovdquq(S5, Address(from, 5 * 64), Assembler::AVX_512bit); | |||
5048 | __masm-> evmovdquq(S6, Address(from, 6 * 64), Assembler::AVX_512bit); | |||
5049 | __masm-> evmovdquq(S7, Address(from, 7 * 64), Assembler::AVX_512bit); | |||
5050 | __masm-> leaq(from, Address(from, 8 * 64)); | |||
5051 | ||||
5052 | __masm-> evpxorq(B0, S0, RK1, Assembler::AVX_512bit); | |||
5053 | __masm-> evpxorq(B1, S1, RK1, Assembler::AVX_512bit); | |||
5054 | __masm-> evpxorq(B2, S2, RK1, Assembler::AVX_512bit); | |||
5055 | __masm-> evpxorq(B3, S3, RK1, Assembler::AVX_512bit); | |||
5056 | __masm-> evpxorq(B4, S4, RK1, Assembler::AVX_512bit); | |||
5057 | __masm-> evpxorq(B5, S5, RK1, Assembler::AVX_512bit); | |||
5058 | __masm-> evpxorq(B6, S6, RK1, Assembler::AVX_512bit); | |||
5059 | __masm-> evpxorq(B7, S7, RK1, Assembler::AVX_512bit); | |||
5060 | ||||
5061 | __masm-> evalignq(IV, S0, IV, 0x06); | |||
5062 | __masm-> evalignq(S0, S1, S0, 0x06); | |||
5063 | __masm-> evalignq(S1, S2, S1, 0x06); | |||
5064 | __masm-> evalignq(S2, S3, S2, 0x06); | |||
5065 | __masm-> evalignq(S3, S4, S3, 0x06); | |||
5066 | __masm-> evalignq(S4, S5, S4, 0x06); | |||
5067 | __masm-> evalignq(S5, S6, S5, 0x06); | |||
5068 | __masm-> evalignq(S6, S7, S6, 0x06); | |||
5069 | ||||
5070 | roundDec(RK2); | |||
5071 | roundDec(RK3); | |||
5072 | roundDec(RK4); | |||
5073 | roundDec(RK5); | |||
5074 | roundDec(RK6); | |||
5075 | roundDec(RK7); | |||
5076 | roundDec(RK8); | |||
5077 | roundDec(RK9); | |||
5078 | roundDec(RK10); | |||
5079 | ||||
5080 | __masm-> cmpl(rounds, 44); | |||
5081 | __masm-> jcc(Assembler::belowEqual, L_128); | |||
5082 | roundDec(RK11); | |||
5083 | roundDec(RK12); | |||
5084 | ||||
5085 | __masm-> cmpl(rounds, 52); | |||
5086 | __masm-> jcc(Assembler::belowEqual, L_192); | |||
5087 | roundDec(RK13); | |||
5088 | roundDec(RK14); | |||
5089 | ||||
5090 | __masm-> BIND(L_256)bind(L_256); masm-> block_comment("L_256" ":"); | |||
5091 | roundDeclast(RK0); | |||
5092 | __masm-> jmp(Loop2); | |||
5093 | ||||
5094 | __masm-> BIND(L_128)bind(L_128); masm-> block_comment("L_128" ":"); | |||
5095 | roundDeclast(RK0); | |||
5096 | __masm-> jmp(Loop2); | |||
5097 | ||||
5098 | __masm-> BIND(L_192)bind(L_192); masm-> block_comment("L_192" ":"); | |||
5099 | roundDeclast(RK0); | |||
5100 | ||||
5101 | __masm-> BIND(Loop2)bind(Loop2); masm-> block_comment("Loop2" ":"); | |||
5102 | __masm-> evpxorq(B0, B0, IV, Assembler::AVX_512bit); | |||
5103 | __masm-> evpxorq(B1, B1, S0, Assembler::AVX_512bit); | |||
5104 | __masm-> evpxorq(B2, B2, S1, Assembler::AVX_512bit); | |||
5105 | __masm-> evpxorq(B3, B3, S2, Assembler::AVX_512bit); | |||
5106 | __masm-> evpxorq(B4, B4, S3, Assembler::AVX_512bit); | |||
5107 | __masm-> evpxorq(B5, B5, S4, Assembler::AVX_512bit); | |||
5108 | __masm-> evpxorq(B6, B6, S5, Assembler::AVX_512bit); | |||
5109 | __masm-> evpxorq(B7, B7, S6, Assembler::AVX_512bit); | |||
5110 | __masm-> evmovdquq(IV, S7, Assembler::AVX_512bit); | |||
5111 | ||||
5112 | __masm-> evmovdquq(Address(to, 0 * 64), B0, Assembler::AVX_512bit); | |||
5113 | __masm-> evmovdquq(Address(to, 1 * 64), B1, Assembler::AVX_512bit); | |||
5114 | __masm-> evmovdquq(Address(to, 2 * 64), B2, Assembler::AVX_512bit); | |||
5115 | __masm-> evmovdquq(Address(to, 3 * 64), B3, Assembler::AVX_512bit); | |||
5116 | __masm-> evmovdquq(Address(to, 4 * 64), B4, Assembler::AVX_512bit); | |||
5117 | __masm-> evmovdquq(Address(to, 5 * 64), B5, Assembler::AVX_512bit); | |||
5118 | __masm-> evmovdquq(Address(to, 6 * 64), B6, Assembler::AVX_512bit); | |||
5119 | __masm-> evmovdquq(Address(to, 7 * 64), B7, Assembler::AVX_512bit); | |||
5120 | __masm-> leaq(to, Address(to, 8 * 64)); | |||
5121 | __masm-> jmp(Loop); | |||
5122 | ||||
5123 | __masm-> BIND(Lcbc_dec_rem)bind(Lcbc_dec_rem); masm-> block_comment("Lcbc_dec_rem" ":" ); | |||
5124 | __masm-> evshufi64x2(IV, IV, IV, 0x03, Assembler::AVX_512bit); | |||
5125 | ||||
5126 | __masm-> BIND(Lcbc_dec_rem_loop)bind(Lcbc_dec_rem_loop); masm-> block_comment("Lcbc_dec_rem_loop" ":"); | |||
5127 | __masm-> subl(len_reg, 16); | |||
5128 | __masm-> jcc(Assembler::carrySet, Lcbc_dec_ret); | |||
5129 | ||||
5130 | __masm-> movdqu(S0, Address(from, 0)); | |||
5131 | __masm-> evpxorq(B0, S0, RK1, Assembler::AVX_512bit); | |||
5132 | __masm-> vaesdec(B0, B0, RK2, Assembler::AVX_512bit); | |||
5133 | __masm-> vaesdec(B0, B0, RK3, Assembler::AVX_512bit); | |||
5134 | __masm-> vaesdec(B0, B0, RK4, Assembler::AVX_512bit); | |||
5135 | __masm-> vaesdec(B0, B0, RK5, Assembler::AVX_512bit); | |||
5136 | __masm-> vaesdec(B0, B0, RK6, Assembler::AVX_512bit); | |||
5137 | __masm-> vaesdec(B0, B0, RK7, Assembler::AVX_512bit); | |||
5138 | __masm-> vaesdec(B0, B0, RK8, Assembler::AVX_512bit); | |||
5139 | __masm-> vaesdec(B0, B0, RK9, Assembler::AVX_512bit); | |||
5140 | __masm-> vaesdec(B0, B0, RK10, Assembler::AVX_512bit); | |||
5141 | __masm-> cmpl(rounds, 44); | |||
5142 | __masm-> jcc(Assembler::belowEqual, Lcbc_dec_rem_last); | |||
5143 | ||||
5144 | __masm-> vaesdec(B0, B0, RK11, Assembler::AVX_512bit); | |||
5145 | __masm-> vaesdec(B0, B0, RK12, Assembler::AVX_512bit); | |||
5146 | __masm-> cmpl(rounds, 52); | |||
5147 | __masm-> jcc(Assembler::belowEqual, Lcbc_dec_rem_last); | |||
5148 | ||||
5149 | __masm-> vaesdec(B0, B0, RK13, Assembler::AVX_512bit); | |||
5150 | __masm-> vaesdec(B0, B0, RK14, Assembler::AVX_512bit); | |||
5151 | ||||
5152 | __masm-> BIND(Lcbc_dec_rem_last)bind(Lcbc_dec_rem_last); masm-> block_comment("Lcbc_dec_rem_last" ":"); | |||
5153 | __masm-> vaesdeclast(B0, B0, RK0, Assembler::AVX_512bit); | |||
5154 | ||||
5155 | __masm-> evpxorq(B0, B0, IV, Assembler::AVX_512bit); | |||
5156 | __masm-> evmovdquq(IV, S0, Assembler::AVX_512bit); | |||
5157 | __masm-> movdqu(Address(to, 0), B0); | |||
5158 | __masm-> leaq(from, Address(from, 16)); | |||
5159 | __masm-> leaq(to, Address(to, 16)); | |||
5160 | __masm-> jmp(Lcbc_dec_rem_loop); | |||
5161 | ||||
5162 | __masm-> BIND(Lcbc_dec_ret)bind(Lcbc_dec_ret); masm-> block_comment("Lcbc_dec_ret" ":" ); | |||
5163 | __masm-> movdqu(Address(rvec, 0), IV); | |||
5164 | ||||
5165 | // Zero out the round keys | |||
5166 | __masm-> evpxorq(RK0, RK0, RK0, Assembler::AVX_512bit); | |||
5167 | __masm-> evpxorq(RK1, RK1, RK1, Assembler::AVX_512bit); | |||
5168 | __masm-> evpxorq(RK2, RK2, RK2, Assembler::AVX_512bit); | |||
5169 | __masm-> evpxorq(RK3, RK3, RK3, Assembler::AVX_512bit); | |||
5170 | __masm-> evpxorq(RK4, RK4, RK4, Assembler::AVX_512bit); | |||
5171 | __masm-> evpxorq(RK5, RK5, RK5, Assembler::AVX_512bit); | |||
5172 | __masm-> evpxorq(RK6, RK6, RK6, Assembler::AVX_512bit); | |||
5173 | __masm-> evpxorq(RK7, RK7, RK7, Assembler::AVX_512bit); | |||
5174 | __masm-> evpxorq(RK8, RK8, RK8, Assembler::AVX_512bit); | |||
5175 | __masm-> evpxorq(RK9, RK9, RK9, Assembler::AVX_512bit); | |||
5176 | __masm-> evpxorq(RK10, RK10, RK10, Assembler::AVX_512bit); | |||
5177 | __masm-> cmpl(rounds, 44); | |||
5178 | __masm-> jcc(Assembler::belowEqual, Lcbc_exit); | |||
5179 | __masm-> evpxorq(RK11, RK11, RK11, Assembler::AVX_512bit); | |||
5180 | __masm-> evpxorq(RK12, RK12, RK12, Assembler::AVX_512bit); | |||
5181 | __masm-> cmpl(rounds, 52); | |||
5182 | __masm-> jcc(Assembler::belowEqual, Lcbc_exit); | |||
5183 | __masm-> evpxorq(RK13, RK13, RK13, Assembler::AVX_512bit); | |||
5184 | __masm-> evpxorq(RK14, RK14, RK14, Assembler::AVX_512bit); | |||
5185 | ||||
5186 | __masm-> BIND(Lcbc_exit)bind(Lcbc_exit); masm-> block_comment("Lcbc_exit" ":"); | |||
5187 | __masm-> pop(rbx); | |||
5188 | #ifdef _WIN64 | |||
5189 | __masm-> movl(rax, len_mem); | |||
5190 | #else | |||
5191 | __masm-> pop(rax); // return length | |||
5192 | #endif | |||
5193 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
5194 | __masm-> ret(0); | |||
5195 | return start; | |||
5196 | } | |||
5197 | ||||
5198 | // Polynomial x^128+x^127+x^126+x^121+1 | |||
5199 | address ghash_polynomial_addr() { | |||
5200 | __masm-> align(CodeEntryAlignment); | |||
5201 | StubCodeMark mark(this, "StubRoutines", "_ghash_poly_addr"); | |||
5202 | address start = __masm-> pc(); | |||
5203 | __masm-> emit_data64(0x0000000000000001, relocInfo::none); | |||
5204 | __masm-> emit_data64(0xc200000000000000, relocInfo::none); | |||
5205 | return start; | |||
5206 | } | |||
5207 | ||||
5208 | address ghash_shufflemask_addr() { | |||
5209 | __masm-> align(CodeEntryAlignment); | |||
5210 | StubCodeMark mark(this, "StubRoutines", "_ghash_shuffmask_addr"); | |||
5211 | address start = __masm-> pc(); | |||
5212 | __masm-> emit_data64(0x0f0f0f0f0f0f0f0f, relocInfo::none); | |||
5213 | __masm-> emit_data64(0x0f0f0f0f0f0f0f0f, relocInfo::none); | |||
5214 | return start; | |||
5215 | } | |||
5216 | ||||
5217 | // Ghash single and multi block operations using AVX instructions | |||
5218 | address generate_avx_ghash_processBlocks() { | |||
5219 | __masm-> align(CodeEntryAlignment); | |||
5220 | ||||
5221 | StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); | |||
5222 | address start = __masm-> pc(); | |||
5223 | ||||
5224 | // arguments | |||
5225 | const Register state = c_rarg0; | |||
5226 | const Register htbl = c_rarg1; | |||
5227 | const Register data = c_rarg2; | |||
5228 | const Register blocks = c_rarg3; | |||
5229 | __masm-> enter(); | |||
5230 | // Save state before entering routine | |||
5231 | __masm-> avx_ghash(state, htbl, data, blocks); | |||
5232 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
5233 | __masm-> ret(0); | |||
5234 | return start; | |||
5235 | } | |||
5236 | ||||
5237 | // byte swap x86 long | |||
5238 | address generate_ghash_long_swap_mask() { | |||
5239 | __masm-> align(CodeEntryAlignment); | |||
5240 | StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); | |||
5241 | address start = __masm-> pc(); | |||
5242 | __masm-> emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none ); | |||
5243 | __masm-> emit_data64(0x0706050403020100, relocInfo::none ); | |||
5244 | return start; | |||
5245 | } | |||
5246 | ||||
5247 | // byte swap x86 byte array | |||
5248 | address generate_ghash_byte_swap_mask() { | |||
5249 | __masm-> align(CodeEntryAlignment); | |||
5250 | StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); | |||
5251 | address start = __masm-> pc(); | |||
5252 | __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none ); | |||
5253 | __masm-> emit_data64(0x0001020304050607, relocInfo::none ); | |||
5254 | return start; | |||
5255 | } | |||
5256 | ||||
5257 | /* Single and multi-block ghash operations */ | |||
5258 | address generate_ghash_processBlocks() { | |||
5259 | __masm-> align(CodeEntryAlignment); | |||
5260 | Label L_ghash_loop, L_exit; | |||
5261 | StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); | |||
5262 | address start = __masm-> pc(); | |||
5263 | ||||
5264 | const Register state = c_rarg0; | |||
5265 | const Register subkeyH = c_rarg1; | |||
5266 | const Register data = c_rarg2; | |||
5267 | const Register blocks = c_rarg3; | |||
5268 | ||||
5269 | const XMMRegister xmm_temp0 = xmm0; | |||
5270 | const XMMRegister xmm_temp1 = xmm1; | |||
5271 | const XMMRegister xmm_temp2 = xmm2; | |||
5272 | const XMMRegister xmm_temp3 = xmm3; | |||
5273 | const XMMRegister xmm_temp4 = xmm4; | |||
5274 | const XMMRegister xmm_temp5 = xmm5; | |||
5275 | const XMMRegister xmm_temp6 = xmm6; | |||
5276 | const XMMRegister xmm_temp7 = xmm7; | |||
5277 | const XMMRegister xmm_temp8 = xmm8; | |||
5278 | const XMMRegister xmm_temp9 = xmm9; | |||
5279 | const XMMRegister xmm_temp10 = xmm10; | |||
5280 | ||||
5281 | __masm-> enter(); | |||
5282 | ||||
5283 | __masm-> movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); | |||
5284 | ||||
5285 | __masm-> movdqu(xmm_temp0, Address(state, 0)); | |||
5286 | __masm-> pshufb(xmm_temp0, xmm_temp10); | |||
5287 | ||||
5288 | ||||
5289 | __masm-> BIND(L_ghash_loop)bind(L_ghash_loop); masm-> block_comment("L_ghash_loop" ":" ); | |||
5290 | __masm-> movdqu(xmm_temp2, Address(data, 0)); | |||
5291 | __masm-> pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); | |||
5292 | ||||
5293 | __masm-> movdqu(xmm_temp1, Address(subkeyH, 0)); | |||
5294 | __masm-> pshufb(xmm_temp1, xmm_temp10); | |||
5295 | ||||
5296 | __masm-> pxor(xmm_temp0, xmm_temp2); | |||
5297 | ||||
5298 | // | |||
5299 | // Multiply with the hash key | |||
5300 | // | |||
5301 | __masm-> movdqu(xmm_temp3, xmm_temp0); | |||
5302 | __masm-> pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 | |||
5303 | __masm-> movdqu(xmm_temp4, xmm_temp0); | |||
5304 | __masm-> pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 | |||
5305 | ||||
5306 | __masm-> movdqu(xmm_temp5, xmm_temp0); | |||
5307 | __masm-> pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 | |||
5308 | __masm-> movdqu(xmm_temp6, xmm_temp0); | |||
5309 | __masm-> pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 | |||
5310 | ||||
5311 | __masm-> pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 | |||
5312 | ||||
5313 | __masm-> movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 | |||
5314 | __masm-> psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right | |||
5315 | __masm-> pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left | |||
5316 | __masm-> pxor(xmm_temp3, xmm_temp5); | |||
5317 | __masm-> pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result | |||
5318 | // of the carry-less multiplication of | |||
5319 | // xmm0 by xmm1. | |||
5320 | ||||
5321 | // We shift the result of the multiplication by one bit position | |||
5322 | // to the left to cope for the fact that the bits are reversed. | |||
5323 | __masm-> movdqu(xmm_temp7, xmm_temp3); | |||
5324 | __masm-> movdqu(xmm_temp8, xmm_temp6); | |||
5325 | __masm-> pslld(xmm_temp3, 1); | |||
5326 | __masm-> pslld(xmm_temp6, 1); | |||
5327 | __masm-> psrld(xmm_temp7, 31); | |||
5328 | __masm-> psrld(xmm_temp8, 31); | |||
5329 | __masm-> movdqu(xmm_temp9, xmm_temp7); | |||
5330 | __masm-> pslldq(xmm_temp8, 4); | |||
5331 | __masm-> pslldq(xmm_temp7, 4); | |||
5332 | __masm-> psrldq(xmm_temp9, 12); | |||
5333 | __masm-> por(xmm_temp3, xmm_temp7); | |||
5334 | __masm-> por(xmm_temp6, xmm_temp8); | |||
5335 | __masm-> por(xmm_temp6, xmm_temp9); | |||
5336 | ||||
5337 | // | |||
5338 | // First phase of the reduction | |||
5339 | // | |||
5340 | // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts | |||
5341 | // independently. | |||
5342 | __masm-> movdqu(xmm_temp7, xmm_temp3); | |||
5343 | __masm-> movdqu(xmm_temp8, xmm_temp3); | |||
5344 | __masm-> movdqu(xmm_temp9, xmm_temp3); | |||
5345 | __masm-> pslld(xmm_temp7, 31); // packed right shift shifting << 31 | |||
5346 | __masm-> pslld(xmm_temp8, 30); // packed right shift shifting << 30 | |||
5347 | __masm-> pslld(xmm_temp9, 25); // packed right shift shifting << 25 | |||
5348 | __masm-> pxor(xmm_temp7, xmm_temp8); // xor the shifted versions | |||
5349 | __masm-> pxor(xmm_temp7, xmm_temp9); | |||
5350 | __masm-> movdqu(xmm_temp8, xmm_temp7); | |||
5351 | __masm-> pslldq(xmm_temp7, 12); | |||
5352 | __masm-> psrldq(xmm_temp8, 4); | |||
5353 | __masm-> pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete | |||
5354 | ||||
5355 | // | |||
5356 | // Second phase of the reduction | |||
5357 | // | |||
5358 | // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these | |||
5359 | // shift operations. | |||
5360 | __masm-> movdqu(xmm_temp2, xmm_temp3); | |||
5361 | __masm-> movdqu(xmm_temp4, xmm_temp3); | |||
5362 | __masm-> movdqu(xmm_temp5, xmm_temp3); | |||
5363 | __masm-> psrld(xmm_temp2, 1); // packed left shifting >> 1 | |||
5364 | __masm-> psrld(xmm_temp4, 2); // packed left shifting >> 2 | |||
5365 | __masm-> psrld(xmm_temp5, 7); // packed left shifting >> 7 | |||
5366 | __masm-> pxor(xmm_temp2, xmm_temp4); // xor the shifted versions | |||
5367 | __masm-> pxor(xmm_temp2, xmm_temp5); | |||
5368 | __masm-> pxor(xmm_temp2, xmm_temp8); | |||
5369 | __masm-> pxor(xmm_temp3, xmm_temp2); | |||
5370 | __masm-> pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 | |||
5371 | ||||
5372 | __masm-> decrement(blocks); | |||
5373 | __masm-> jcc(Assembler::zero, L_exit); | |||
5374 | __masm-> movdqu(xmm_temp0, xmm_temp6); | |||
5375 | __masm-> addptr(data, 16); | |||
5376 | __masm-> jmp(L_ghash_loop); | |||
5377 | ||||
5378 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
5379 | __masm-> pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result | |||
5380 | __masm-> movdqu(Address(state, 0), xmm_temp6); // store the result | |||
5381 | __masm-> leave(); | |||
5382 | __masm-> ret(0); | |||
5383 | return start; | |||
5384 | } | |||
5385 | ||||
5386 | address base64_shuffle_addr() | |||
5387 | { | |||
5388 | __masm-> align64(); | |||
5389 | StubCodeMark mark(this, "StubRoutines", "shuffle_base64"); | |||
5390 | address start = __masm-> pc(); | |||
5391 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5392, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5392 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5392, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5393 | __masm-> emit_data64(0x0405030401020001, relocInfo::none); | |||
5394 | __masm-> emit_data64(0x0a0b090a07080607, relocInfo::none); | |||
5395 | __masm-> emit_data64(0x10110f100d0e0c0d, relocInfo::none); | |||
5396 | __masm-> emit_data64(0x1617151613141213, relocInfo::none); | |||
5397 | __masm-> emit_data64(0x1c1d1b1c191a1819, relocInfo::none); | |||
5398 | __masm-> emit_data64(0x222321221f201e1f, relocInfo::none); | |||
5399 | __masm-> emit_data64(0x2829272825262425, relocInfo::none); | |||
5400 | __masm-> emit_data64(0x2e2f2d2e2b2c2a2b, relocInfo::none); | |||
5401 | return start; | |||
5402 | } | |||
5403 | ||||
5404 | address base64_avx2_shuffle_addr() | |||
5405 | { | |||
5406 | __masm-> align32(); | |||
5407 | StubCodeMark mark(this, "StubRoutines", "avx2_shuffle_base64"); | |||
5408 | address start = __masm-> pc(); | |||
5409 | __masm-> emit_data64(0x0809070805060405, relocInfo::none); | |||
5410 | __masm-> emit_data64(0x0e0f0d0e0b0c0a0b, relocInfo::none); | |||
5411 | __masm-> emit_data64(0x0405030401020001, relocInfo::none); | |||
5412 | __masm-> emit_data64(0x0a0b090a07080607, relocInfo::none); | |||
5413 | return start; | |||
5414 | } | |||
5415 | ||||
5416 | address base64_avx2_input_mask_addr() | |||
5417 | { | |||
5418 | __masm-> align32(); | |||
5419 | StubCodeMark mark(this, "StubRoutines", "avx2_input_mask_base64"); | |||
5420 | address start = __masm-> pc(); | |||
5421 | __masm-> emit_data64(0x8000000000000000, relocInfo::none); | |||
5422 | __masm-> emit_data64(0x8000000080000000, relocInfo::none); | |||
5423 | __masm-> emit_data64(0x8000000080000000, relocInfo::none); | |||
5424 | __masm-> emit_data64(0x8000000080000000, relocInfo::none); | |||
5425 | return start; | |||
5426 | } | |||
5427 | ||||
5428 | address base64_avx2_lut_addr() | |||
5429 | { | |||
5430 | __masm-> align32(); | |||
5431 | StubCodeMark mark(this, "StubRoutines", "avx2_lut_base64"); | |||
5432 | address start = __masm-> pc(); | |||
5433 | __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none); | |||
5434 | __masm-> emit_data64(0x0000f0edfcfcfcfc, relocInfo::none); | |||
5435 | __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none); | |||
5436 | __masm-> emit_data64(0x0000f0edfcfcfcfc, relocInfo::none); | |||
5437 | ||||
5438 | // URL LUT | |||
5439 | __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none); | |||
5440 | __masm-> emit_data64(0x000020effcfcfcfc, relocInfo::none); | |||
5441 | __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none); | |||
5442 | __masm-> emit_data64(0x000020effcfcfcfc, relocInfo::none); | |||
5443 | return start; | |||
5444 | } | |||
5445 | ||||
5446 | address base64_encoding_table_addr() | |||
5447 | { | |||
5448 | __masm-> align64(); | |||
5449 | StubCodeMark mark(this, "StubRoutines", "encoding_table_base64"); | |||
5450 | address start = __masm-> pc(); | |||
5451 | assert(((unsigned long long)start & 0x3f) == 0, "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5451, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5452 | __masm-> emit_data64(0x4847464544434241, relocInfo::none); | |||
5453 | __masm-> emit_data64(0x504f4e4d4c4b4a49, relocInfo::none); | |||
5454 | __masm-> emit_data64(0x5857565554535251, relocInfo::none); | |||
5455 | __masm-> emit_data64(0x6665646362615a59, relocInfo::none); | |||
5456 | __masm-> emit_data64(0x6e6d6c6b6a696867, relocInfo::none); | |||
5457 | __masm-> emit_data64(0x767574737271706f, relocInfo::none); | |||
5458 | __masm-> emit_data64(0x333231307a797877, relocInfo::none); | |||
5459 | __masm-> emit_data64(0x2f2b393837363534, relocInfo::none); | |||
5460 | ||||
5461 | // URL table | |||
5462 | __masm-> emit_data64(0x4847464544434241, relocInfo::none); | |||
5463 | __masm-> emit_data64(0x504f4e4d4c4b4a49, relocInfo::none); | |||
5464 | __masm-> emit_data64(0x5857565554535251, relocInfo::none); | |||
5465 | __masm-> emit_data64(0x6665646362615a59, relocInfo::none); | |||
5466 | __masm-> emit_data64(0x6e6d6c6b6a696867, relocInfo::none); | |||
5467 | __masm-> emit_data64(0x767574737271706f, relocInfo::none); | |||
5468 | __masm-> emit_data64(0x333231307a797877, relocInfo::none); | |||
5469 | __masm-> emit_data64(0x5f2d393837363534, relocInfo::none); | |||
5470 | return start; | |||
5471 | } | |||
5472 | ||||
5473 | // Code for generating Base64 encoding. | |||
5474 | // Intrinsic function prototype in Base64.java: | |||
5475 | // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, | |||
5476 | // boolean isURL) { | |||
5477 | address generate_base64_encodeBlock() | |||
5478 | { | |||
5479 | __masm-> align(CodeEntryAlignment); | |||
5480 | StubCodeMark mark(this, "StubRoutines", "implEncode"); | |||
5481 | address start = __masm-> pc(); | |||
5482 | __masm-> enter(); | |||
5483 | ||||
5484 | // Save callee-saved registers before using them | |||
5485 | __masm-> push(r12); | |||
5486 | __masm-> push(r13); | |||
5487 | __masm-> push(r14); | |||
5488 | __masm-> push(r15); | |||
5489 | ||||
5490 | // arguments | |||
5491 | const Register source = c_rarg0; // Source Array | |||
5492 | const Register start_offset = c_rarg1; // start offset | |||
5493 | const Register end_offset = c_rarg2; // end offset | |||
5494 | const Register dest = c_rarg3; // destination array | |||
5495 | ||||
5496 | #ifndef _WIN64 | |||
5497 | const Register dp = c_rarg4; // Position for writing to dest array | |||
5498 | const Register isURL = c_rarg5; // Base64 or URL character set | |||
5499 | #else | |||
5500 | const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64 | |||
5501 | const Address isURL_mem(rbp, 7 * wordSize); | |||
5502 | const Register isURL = r10; // pick the volatile windows register | |||
5503 | const Register dp = r12; | |||
5504 | __masm-> movl(dp, dp_mem); | |||
5505 | __masm-> movl(isURL, isURL_mem); | |||
5506 | #endif | |||
5507 | ||||
5508 | const Register length = r14; | |||
5509 | const Register encode_table = r13; | |||
5510 | Label L_process3, L_exit, L_processdata, L_vbmiLoop, L_not512, L_32byteLoop; | |||
5511 | ||||
5512 | // calculate length from offsets | |||
5513 | __masm-> movl(length, end_offset); | |||
5514 | __masm-> subl(length, start_offset); | |||
5515 | __masm-> cmpl(length, 0); | |||
5516 | __masm-> jcc(Assembler::lessEqual, L_exit); | |||
5517 | ||||
5518 | // Code for 512-bit VBMI encoding. Encodes 48 input bytes into 64 | |||
5519 | // output bytes. We read 64 input bytes and ignore the last 16, so be | |||
5520 | // sure not to read past the end of the input buffer. | |||
5521 | if (VM_Version::supports_avx512_vbmi()) { | |||
5522 | __masm-> cmpl(length, 64); // Do not overrun input buffer. | |||
5523 | __masm-> jcc(Assembler::below, L_not512); | |||
5524 | ||||
5525 | __masm-> shll(isURL, 6); // index into decode table based on isURL | |||
5526 | __masm-> lea(encode_table, ExternalAddress(StubRoutines::x86::base64_encoding_table_addr())); | |||
5527 | __masm-> addptr(encode_table, isURL); | |||
5528 | __masm-> shrl(isURL, 6); // restore isURL | |||
5529 | ||||
5530 | __masm-> mov64(rax, 0x3036242a1016040aull); // Shifts | |||
5531 | __masm-> evmovdquq(xmm3, ExternalAddress(StubRoutines::x86::base64_shuffle_addr()), Assembler::AVX_512bit, r15); | |||
5532 | __masm-> evmovdquq(xmm2, Address(encode_table, 0), Assembler::AVX_512bit); | |||
5533 | __masm-> evpbroadcastq(xmm1, rax, Assembler::AVX_512bit); | |||
5534 | ||||
5535 | __masm-> align32(); | |||
5536 | __masm-> BIND(L_vbmiLoop)bind(L_vbmiLoop); masm-> block_comment("L_vbmiLoop" ":"); | |||
5537 | ||||
5538 | __masm-> vpermb(xmm0, xmm3, Address(source, start_offset), Assembler::AVX_512bit); | |||
5539 | __masm-> subl(length, 48); | |||
5540 | ||||
5541 | // Put the input bytes into the proper lanes for writing, then | |||
5542 | // encode them. | |||
5543 | __masm-> evpmultishiftqb(xmm0, xmm1, xmm0, Assembler::AVX_512bit); | |||
5544 | __masm-> vpermb(xmm0, xmm0, xmm2, Assembler::AVX_512bit); | |||
5545 | ||||
5546 | // Write to destination | |||
5547 | __masm-> evmovdquq(Address(dest, dp), xmm0, Assembler::AVX_512bit); | |||
5548 | ||||
5549 | __masm-> addptr(dest, 64); | |||
5550 | __masm-> addptr(source, 48); | |||
5551 | __masm-> cmpl(length, 64); | |||
5552 | __masm-> jcc(Assembler::aboveEqual, L_vbmiLoop); | |||
5553 | ||||
5554 | __masm-> vzeroupper(); | |||
5555 | } | |||
5556 | ||||
5557 | __masm-> BIND(L_not512)bind(L_not512); masm-> block_comment("L_not512" ":"); | |||
5558 | if (VM_Version::supports_avx2() | |||
5559 | && VM_Version::supports_avx512vlbw()) { | |||
5560 | /* | |||
5561 | ** This AVX2 encoder is based off the paper at: | |||
5562 | ** https://dl.acm.org/doi/10.1145/3132709 | |||
5563 | ** | |||
5564 | ** We use AVX2 SIMD instructions to encode 24 bytes into 32 | |||
5565 | ** output bytes. | |||
5566 | ** | |||
5567 | */ | |||
5568 | // Lengths under 32 bytes are done with scalar routine | |||
5569 | __masm-> cmpl(length, 31); | |||
5570 | __masm-> jcc(Assembler::belowEqual, L_process3); | |||
5571 | ||||
5572 | // Set up supporting constant table data | |||
5573 | __masm-> vmovdqu(xmm9, ExternalAddress(StubRoutines::x86::base64_avx2_shuffle_addr()), rax); | |||
5574 | // 6-bit mask for 2nd and 4th (and multiples) 6-bit values | |||
5575 | __masm-> movl(rax, 0x0fc0fc00); | |||
5576 | __masm-> vmovdqu(xmm1, ExternalAddress(StubRoutines::x86::base64_avx2_input_mask_addr()), rax); | |||
5577 | __masm-> evpbroadcastd(xmm8, rax, Assembler::AVX_256bit); | |||
5578 | ||||
5579 | // Multiplication constant for "shifting" right by 6 and 10 | |||
5580 | // bits | |||
5581 | __masm-> movl(rax, 0x04000040); | |||
5582 | ||||
5583 | __masm-> subl(length, 24); | |||
5584 | __masm-> evpbroadcastd(xmm7, rax, Assembler::AVX_256bit); | |||
5585 | ||||
5586 | // For the first load, we mask off reading of the first 4 | |||
5587 | // bytes into the register. This is so we can get 4 3-byte | |||
5588 | // chunks into each lane of the register, avoiding having to | |||
5589 | // handle end conditions. We then shuffle these bytes into a | |||
5590 | // specific order so that manipulation is easier. | |||
5591 | // | |||
5592 | // The initial read loads the XMM register like this: | |||
5593 | // | |||
5594 | // Lower 128-bit lane: | |||
5595 | // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ | |||
5596 | // | XX | XX | XX | XX | A0 | A1 | A2 | B0 | B1 | B2 | C0 | C1 | |||
5597 | // | C2 | D0 | D1 | D2 | | |||
5598 | // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ | |||
5599 | // | |||
5600 | // Upper 128-bit lane: | |||
5601 | // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ | |||
5602 | // | E0 | E1 | E2 | F0 | F1 | F2 | G0 | G1 | G2 | H0 | H1 | H2 | |||
5603 | // | XX | XX | XX | XX | | |||
5604 | // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ | |||
5605 | // | |||
5606 | // Where A0 is the first input byte, B0 is the fourth, etc. | |||
5607 | // The alphabetical significance denotes the 3 bytes to be | |||
5608 | // consumed and encoded into 4 bytes. | |||
5609 | // | |||
5610 | // We then shuffle the register so each 32-bit word contains | |||
5611 | // the sequence: | |||
5612 | // A1 A0 A2 A1, B1, B0, B2, B1, etc. | |||
5613 | // Each of these byte sequences are then manipulated into 4 | |||
5614 | // 6-bit values ready for encoding. | |||
5615 | // | |||
5616 | // If we focus on one set of 3-byte chunks, changing the | |||
5617 | // nomenclature such that A0 => a, A1 => b, and A2 => c, we | |||
5618 | // shuffle such that each 24-bit chunk contains: | |||
5619 | // | |||
5620 | // b7 b6 b5 b4 b3 b2 b1 b0 | a7 a6 a5 a4 a3 a2 a1 a0 | c7 c6 | |||
5621 | // c5 c4 c3 c2 c1 c0 | b7 b6 b5 b4 b3 b2 b1 b0 | |||
5622 | // Explain this step. | |||
5623 | // b3 b2 b1 b0 c5 c4 c3 c2 | c1 c0 d5 d4 d3 d2 d1 d0 | a5 a4 | |||
5624 | // a3 a2 a1 a0 b5 b4 | b3 b2 b1 b0 c5 c4 c3 c2 | |||
5625 | // | |||
5626 | // W first and off all but bits 4-9 and 16-21 (c5..c0 and | |||
5627 | // a5..a0) and shift them using a vector multiplication | |||
5628 | // operation (vpmulhuw) which effectively shifts c right by 6 | |||
5629 | // bits and a right by 10 bits. We similarly mask bits 10-15 | |||
5630 | // (d5..d0) and 22-27 (b5..b0) and shift them left by 8 and 4 | |||
5631 | // bits respecively. This is done using vpmullw. We end up | |||
5632 | // with 4 6-bit values, thus splitting the 3 input bytes, | |||
5633 | // ready for encoding: | |||
5634 | // 0 0 d5..d0 0 0 c5..c0 0 0 b5..b0 0 0 a5..a0 | |||
5635 | // | |||
5636 | // For translation, we recognize that there are 5 distinct | |||
5637 | // ranges of legal Base64 characters as below: | |||
5638 | // | |||
5639 | // +-------------+-------------+------------+ | |||
5640 | // | 6-bit value | ASCII range | offset | | |||
5641 | // +-------------+-------------+------------+ | |||
5642 | // | 0..25 | A..Z | 65 | | |||
5643 | // | 26..51 | a..z | 71 | | |||
5644 | // | 52..61 | 0..9 | -4 | | |||
5645 | // | 62 | + or - | -19 or -17 | | |||
5646 | // | 63 | / or _ | -16 or 32 | | |||
5647 | // +-------------+-------------+------------+ | |||
5648 | // | |||
5649 | // We note that vpshufb does a parallel lookup in a | |||
5650 | // destination register using the lower 4 bits of bytes from a | |||
5651 | // source register. If we use a saturated subtraction and | |||
5652 | // subtract 51 from each 6-bit value, bytes from [0,51] | |||
5653 | // saturate to 0, and [52,63] map to a range of [1,12]. We | |||
5654 | // distinguish the [0,25] and [26,51] ranges by assigning a | |||
5655 | // value of 13 for all 6-bit values less than 26. We end up | |||
5656 | // with: | |||
5657 | // | |||
5658 | // +-------------+-------------+------------+ | |||
5659 | // | 6-bit value | Reduced | offset | | |||
5660 | // +-------------+-------------+------------+ | |||
5661 | // | 0..25 | 13 | 65 | | |||
5662 | // | 26..51 | 0 | 71 | | |||
5663 | // | 52..61 | 0..9 | -4 | | |||
5664 | // | 62 | 11 | -19 or -17 | | |||
5665 | // | 63 | 12 | -16 or 32 | | |||
5666 | // +-------------+-------------+------------+ | |||
5667 | // | |||
5668 | // We then use a final vpshufb to add the appropriate offset, | |||
5669 | // translating the bytes. | |||
5670 | // | |||
5671 | // Load input bytes - only 28 bytes. Mask the first load to | |||
5672 | // not load into the full register. | |||
5673 | __masm-> vpmaskmovd(xmm1, xmm1, Address(source, start_offset, Address::times_1, -4), Assembler::AVX_256bit); | |||
5674 | ||||
5675 | // Move 3-byte chunks of input (12 bytes) into 16 bytes, | |||
5676 | // ordering by: | |||
5677 | // 1, 0, 2, 1; 4, 3, 5, 4; etc. This groups 6-bit chunks | |||
5678 | // for easy masking | |||
5679 | __masm-> vpshufb(xmm1, xmm1, xmm9, Assembler::AVX_256bit); | |||
5680 | ||||
5681 | __masm-> addl(start_offset, 24); | |||
5682 | ||||
5683 | // Load masking register for first and third (and multiples) | |||
5684 | // 6-bit values. | |||
5685 | __masm-> movl(rax, 0x003f03f0); | |||
5686 | __masm-> evpbroadcastd(xmm6, rax, Assembler::AVX_256bit); | |||
5687 | // Multiplication constant for "shifting" left by 4 and 8 bits | |||
5688 | __masm-> movl(rax, 0x01000010); | |||
5689 | __masm-> evpbroadcastd(xmm5, rax, Assembler::AVX_256bit); | |||
5690 | ||||
5691 | // Isolate 6-bit chunks of interest | |||
5692 | __masm-> vpand(xmm0, xmm8, xmm1, Assembler::AVX_256bit); | |||
5693 | ||||
5694 | // Load constants for encoding | |||
5695 | __masm-> movl(rax, 0x19191919); | |||
5696 | __masm-> evpbroadcastd(xmm3, rax, Assembler::AVX_256bit); | |||
5697 | __masm-> movl(rax, 0x33333333); | |||
5698 | __masm-> evpbroadcastd(xmm4, rax, Assembler::AVX_256bit); | |||
5699 | ||||
5700 | // Shift output bytes 0 and 2 into proper lanes | |||
5701 | __masm-> vpmulhuw(xmm2, xmm0, xmm7, Assembler::AVX_256bit); | |||
5702 | ||||
5703 | // Mask and shift output bytes 1 and 3 into proper lanes and | |||
5704 | // combine | |||
5705 | __masm-> vpand(xmm0, xmm6, xmm1, Assembler::AVX_256bit); | |||
5706 | __masm-> vpmullw(xmm0, xmm5, xmm0, Assembler::AVX_256bit); | |||
5707 | __masm-> vpor(xmm0, xmm0, xmm2, Assembler::AVX_256bit); | |||
5708 | ||||
5709 | // Find out which are 0..25. This indicates which input | |||
5710 | // values fall in the range of 'A'-'Z', which require an | |||
5711 | // additional offset (see comments above) | |||
5712 | __masm-> vpcmpgtb(xmm2, xmm0, xmm3, Assembler::AVX_256bit); | |||
5713 | __masm-> vpsubusb(xmm1, xmm0, xmm4, Assembler::AVX_256bit); | |||
5714 | __masm-> vpsubb(xmm1, xmm1, xmm2, Assembler::AVX_256bit); | |||
5715 | ||||
5716 | // Load the proper lookup table | |||
5717 | __masm-> lea(r11, ExternalAddress(StubRoutines::x86::base64_avx2_lut_addr())); | |||
5718 | __masm-> movl(r15, isURL); | |||
5719 | __masm-> shll(r15, 5); | |||
5720 | __masm-> vmovdqu(xmm2, Address(r11, r15)); | |||
5721 | ||||
5722 | // Shuffle the offsets based on the range calculation done | |||
5723 | // above. This allows us to add the correct offset to the | |||
5724 | // 6-bit value corresponding to the range documented above. | |||
5725 | __masm-> vpshufb(xmm1, xmm2, xmm1, Assembler::AVX_256bit); | |||
5726 | __masm-> vpaddb(xmm0, xmm1, xmm0, Assembler::AVX_256bit); | |||
5727 | ||||
5728 | // Store the encoded bytes | |||
5729 | __masm-> vmovdqu(Address(dest, dp), xmm0); | |||
5730 | __masm-> addl(dp, 32); | |||
5731 | ||||
5732 | __masm-> cmpl(length, 31); | |||
5733 | __masm-> jcc(Assembler::belowEqual, L_process3); | |||
5734 | ||||
5735 | __masm-> align32(); | |||
5736 | __masm-> BIND(L_32byteLoop)bind(L_32byteLoop); masm-> block_comment("L_32byteLoop" ":" ); | |||
5737 | ||||
5738 | // Get next 32 bytes | |||
5739 | __masm-> vmovdqu(xmm1, Address(source, start_offset, Address::times_1, -4)); | |||
5740 | ||||
5741 | __masm-> subl(length, 24); | |||
5742 | __masm-> addl(start_offset, 24); | |||
5743 | ||||
5744 | // This logic is identical to the above, with only constant | |||
5745 | // register loads removed. Shuffle the input, mask off 6-bit | |||
5746 | // chunks, shift them into place, then add the offset to | |||
5747 | // encode. | |||
5748 | __masm-> vpshufb(xmm1, xmm1, xmm9, Assembler::AVX_256bit); | |||
5749 | ||||
5750 | __masm-> vpand(xmm0, xmm8, xmm1, Assembler::AVX_256bit); | |||
5751 | __masm-> vpmulhuw(xmm10, xmm0, xmm7, Assembler::AVX_256bit); | |||
5752 | __masm-> vpand(xmm0, xmm6, xmm1, Assembler::AVX_256bit); | |||
5753 | __masm-> vpmullw(xmm0, xmm5, xmm0, Assembler::AVX_256bit); | |||
5754 | __masm-> vpor(xmm0, xmm0, xmm10, Assembler::AVX_256bit); | |||
5755 | __masm-> vpcmpgtb(xmm10, xmm0, xmm3, Assembler::AVX_256bit); | |||
5756 | __masm-> vpsubusb(xmm1, xmm0, xmm4, Assembler::AVX_256bit); | |||
5757 | __masm-> vpsubb(xmm1, xmm1, xmm10, Assembler::AVX_256bit); | |||
5758 | __masm-> vpshufb(xmm1, xmm2, xmm1, Assembler::AVX_256bit); | |||
5759 | __masm-> vpaddb(xmm0, xmm1, xmm0, Assembler::AVX_256bit); | |||
5760 | ||||
5761 | // Store the encoded bytes | |||
5762 | __masm-> vmovdqu(Address(dest, dp), xmm0); | |||
5763 | __masm-> addl(dp, 32); | |||
5764 | ||||
5765 | __masm-> cmpl(length, 31); | |||
5766 | __masm-> jcc(Assembler::above, L_32byteLoop); | |||
5767 | ||||
5768 | __masm-> BIND(L_process3)bind(L_process3); masm-> block_comment("L_process3" ":"); | |||
5769 | __masm-> vzeroupper(); | |||
5770 | } else { | |||
5771 | __masm-> BIND(L_process3)bind(L_process3); masm-> block_comment("L_process3" ":"); | |||
5772 | } | |||
5773 | ||||
5774 | __masm-> cmpl(length, 3); | |||
5775 | __masm-> jcc(Assembler::below, L_exit); | |||
5776 | ||||
5777 | // Load the encoding table based on isURL | |||
5778 | __masm-> lea(r11, ExternalAddress(StubRoutines::x86::base64_encoding_table_addr())); | |||
5779 | __masm-> movl(r15, isURL); | |||
5780 | __masm-> shll(r15, 6); | |||
5781 | __masm-> addptr(r11, r15); | |||
5782 | ||||
5783 | __masm-> BIND(L_processdata)bind(L_processdata); masm-> block_comment("L_processdata" ":" ); | |||
5784 | ||||
5785 | // Load 3 bytes | |||
5786 | __masm-> load_unsigned_byte(r15, Address(source, start_offset)); | |||
5787 | __masm-> load_unsigned_byte(r10, Address(source, start_offset, Address::times_1, 1)); | |||
5788 | __masm-> load_unsigned_byte(r13, Address(source, start_offset, Address::times_1, 2)); | |||
5789 | ||||
5790 | // Build a 32-bit word with bytes 1, 2, 0, 1 | |||
5791 | __masm-> movl(rax, r10); | |||
5792 | __masm-> shll(r10, 24); | |||
5793 | __masm-> orl(rax, r10); | |||
5794 | ||||
5795 | __masm-> subl(length, 3); | |||
5796 | ||||
5797 | __masm-> shll(r15, 8); | |||
5798 | __masm-> shll(r13, 16); | |||
5799 | __masm-> orl(rax, r15); | |||
5800 | ||||
5801 | __masm-> addl(start_offset, 3); | |||
5802 | ||||
5803 | __masm-> orl(rax, r13); | |||
5804 | // At this point, rax contains | byte1 | byte2 | byte0 | byte1 | |||
5805 | // r13 has byte2 << 16 - need low-order 6 bits to translate. | |||
5806 | // This translated byte is the fourth output byte. | |||
5807 | __masm-> shrl(r13, 16); | |||
5808 | __masm-> andl(r13, 0x3f); | |||
5809 | ||||
5810 | // The high-order 6 bits of r15 (byte0) is translated. | |||
5811 | // The translated byte is the first output byte. | |||
5812 | __masm-> shrl(r15, 10); | |||
5813 | ||||
5814 | __masm-> load_unsigned_byte(r13, Address(r11, r13)); | |||
5815 | __masm-> load_unsigned_byte(r15, Address(r11, r15)); | |||
5816 | ||||
5817 | __masm-> movb(Address(dest, dp, Address::times_1, 3), r13); | |||
5818 | ||||
5819 | // Extract high-order 4 bits of byte1 and low-order 2 bits of byte0. | |||
5820 | // This translated byte is the second output byte. | |||
5821 | __masm-> shrl(rax, 4); | |||
5822 | __masm-> movl(r10, rax); | |||
5823 | __masm-> andl(rax, 0x3f); | |||
5824 | ||||
5825 | __masm-> movb(Address(dest, dp, Address::times_1, 0), r15); | |||
5826 | ||||
5827 | __masm-> load_unsigned_byte(rax, Address(r11, rax)); | |||
5828 | ||||
5829 | // Extract low-order 2 bits of byte1 and high-order 4 bits of byte2. | |||
5830 | // This translated byte is the third output byte. | |||
5831 | __masm-> shrl(r10, 18); | |||
5832 | __masm-> andl(r10, 0x3f); | |||
5833 | ||||
5834 | __masm-> load_unsigned_byte(r10, Address(r11, r10)); | |||
5835 | ||||
5836 | __masm-> movb(Address(dest, dp, Address::times_1, 1), rax); | |||
5837 | __masm-> movb(Address(dest, dp, Address::times_1, 2), r10); | |||
5838 | ||||
5839 | __masm-> addl(dp, 4); | |||
5840 | __masm-> cmpl(length, 3); | |||
5841 | __masm-> jcc(Assembler::aboveEqual, L_processdata); | |||
5842 | ||||
5843 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
5844 | __masm-> pop(r15); | |||
5845 | __masm-> pop(r14); | |||
5846 | __masm-> pop(r13); | |||
5847 | __masm-> pop(r12); | |||
5848 | __masm-> leave(); | |||
5849 | __masm-> ret(0); | |||
5850 | return start; | |||
5851 | } | |||
5852 | ||||
5853 | // base64 AVX512vbmi tables | |||
5854 | address base64_vbmi_lookup_lo_addr() { | |||
5855 | __masm-> align64(); | |||
5856 | StubCodeMark mark(this, "StubRoutines", "lookup_lo_base64"); | |||
5857 | address start = __masm-> pc(); | |||
5858 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5859, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5859 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5859, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5860 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5861 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5862 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5863 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5864 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5865 | __masm-> emit_data64(0x3f8080803e808080, relocInfo::none); | |||
5866 | __masm-> emit_data64(0x3b3a393837363534, relocInfo::none); | |||
5867 | __masm-> emit_data64(0x8080808080803d3c, relocInfo::none); | |||
5868 | return start; | |||
5869 | } | |||
5870 | ||||
5871 | address base64_vbmi_lookup_hi_addr() { | |||
5872 | __masm-> align64(); | |||
5873 | StubCodeMark mark(this, "StubRoutines", "lookup_hi_base64"); | |||
5874 | address start = __masm-> pc(); | |||
5875 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5876, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5876 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5876, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5877 | __masm-> emit_data64(0x0605040302010080, relocInfo::none); | |||
5878 | __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none); | |||
5879 | __masm-> emit_data64(0x161514131211100f, relocInfo::none); | |||
5880 | __masm-> emit_data64(0x8080808080191817, relocInfo::none); | |||
5881 | __masm-> emit_data64(0x201f1e1d1c1b1a80, relocInfo::none); | |||
5882 | __masm-> emit_data64(0x2827262524232221, relocInfo::none); | |||
5883 | __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none); | |||
5884 | __masm-> emit_data64(0x8080808080333231, relocInfo::none); | |||
5885 | return start; | |||
5886 | } | |||
5887 | address base64_vbmi_lookup_lo_url_addr() { | |||
5888 | __masm-> align64(); | |||
5889 | StubCodeMark mark(this, "StubRoutines", "lookup_lo_base64url"); | |||
5890 | address start = __masm-> pc(); | |||
5891 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5892, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5892 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5892, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5893 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5894 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5895 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5896 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5897 | __masm-> emit_data64(0x8080808080808080, relocInfo::none); | |||
5898 | __masm-> emit_data64(0x80803e8080808080, relocInfo::none); | |||
5899 | __masm-> emit_data64(0x3b3a393837363534, relocInfo::none); | |||
5900 | __masm-> emit_data64(0x8080808080803d3c, relocInfo::none); | |||
5901 | return start; | |||
5902 | } | |||
5903 | ||||
5904 | address base64_vbmi_lookup_hi_url_addr() { | |||
5905 | __masm-> align64(); | |||
5906 | StubCodeMark mark(this, "StubRoutines", "lookup_hi_base64url"); | |||
5907 | address start = __masm-> pc(); | |||
5908 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5909, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5909 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5909, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5910 | __masm-> emit_data64(0x0605040302010080, relocInfo::none); | |||
5911 | __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none); | |||
5912 | __masm-> emit_data64(0x161514131211100f, relocInfo::none); | |||
5913 | __masm-> emit_data64(0x3f80808080191817, relocInfo::none); | |||
5914 | __masm-> emit_data64(0x201f1e1d1c1b1a80, relocInfo::none); | |||
5915 | __masm-> emit_data64(0x2827262524232221, relocInfo::none); | |||
5916 | __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none); | |||
5917 | __masm-> emit_data64(0x8080808080333231, relocInfo::none); | |||
5918 | return start; | |||
5919 | } | |||
5920 | ||||
5921 | address base64_vbmi_pack_vec_addr() { | |||
5922 | __masm-> align64(); | |||
5923 | StubCodeMark mark(this, "StubRoutines", "pack_vec_base64"); | |||
5924 | address start = __masm-> pc(); | |||
5925 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5926, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5926 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5926, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5927 | __masm-> emit_data64(0x090a040506000102, relocInfo::none); | |||
5928 | __masm-> emit_data64(0x161011120c0d0e08, relocInfo::none); | |||
5929 | __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none); | |||
5930 | __masm-> emit_data64(0x292a242526202122, relocInfo::none); | |||
5931 | __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none); | |||
5932 | __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none); | |||
5933 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
5934 | __masm-> emit_data64(0x0000000000000000, relocInfo::none); | |||
5935 | return start; | |||
5936 | } | |||
5937 | ||||
5938 | address base64_vbmi_join_0_1_addr() { | |||
5939 | __masm-> align64(); | |||
5940 | StubCodeMark mark(this, "StubRoutines", "join_0_1_base64"); | |||
5941 | address start = __masm-> pc(); | |||
5942 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5943, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5943 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5943, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5944 | __masm-> emit_data64(0x090a040506000102, relocInfo::none); | |||
5945 | __masm-> emit_data64(0x161011120c0d0e08, relocInfo::none); | |||
5946 | __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none); | |||
5947 | __masm-> emit_data64(0x292a242526202122, relocInfo::none); | |||
5948 | __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none); | |||
5949 | __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none); | |||
5950 | __masm-> emit_data64(0x494a444546404142, relocInfo::none); | |||
5951 | __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none); | |||
5952 | return start; | |||
5953 | } | |||
5954 | ||||
5955 | address base64_vbmi_join_1_2_addr() { | |||
5956 | __masm-> align64(); | |||
5957 | StubCodeMark mark(this, "StubRoutines", "join_1_2_base64"); | |||
5958 | address start = __masm-> pc(); | |||
5959 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5960, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5960 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5960, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5961 | __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none); | |||
5962 | __masm-> emit_data64(0x292a242526202122, relocInfo::none); | |||
5963 | __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none); | |||
5964 | __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none); | |||
5965 | __masm-> emit_data64(0x494a444546404142, relocInfo::none); | |||
5966 | __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none); | |||
5967 | __masm-> emit_data64(0x5c5d5e58595a5455, relocInfo::none); | |||
5968 | __masm-> emit_data64(0x696a646566606162, relocInfo::none); | |||
5969 | return start; | |||
5970 | } | |||
5971 | ||||
5972 | address base64_vbmi_join_2_3_addr() { | |||
5973 | __masm-> align64(); | |||
5974 | StubCodeMark mark(this, "StubRoutines", "join_2_3_base64"); | |||
5975 | address start = __masm-> pc(); | |||
5976 | assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5977, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0) | |||
5977 | "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 5977, "assert(" "((unsigned long long)start & 0x3f) == 0" ") failed", "Alignment problem (0x%08llx)", (unsigned long long )start); ::breakpoint(); } } while (0); | |||
5978 | __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none); | |||
5979 | __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none); | |||
5980 | __masm-> emit_data64(0x494a444546404142, relocInfo::none); | |||
5981 | __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none); | |||
5982 | __masm-> emit_data64(0x5c5d5e58595a5455, relocInfo::none); | |||
5983 | __masm-> emit_data64(0x696a646566606162, relocInfo::none); | |||
5984 | __masm-> emit_data64(0x767071726c6d6e68, relocInfo::none); | |||
5985 | __masm-> emit_data64(0x7c7d7e78797a7475, relocInfo::none); | |||
5986 | return start; | |||
5987 | } | |||
5988 | ||||
5989 | address base64_decoding_table_addr() { | |||
5990 | StubCodeMark mark(this, "StubRoutines", "decoding_table_base64"); | |||
5991 | address start = __masm-> pc(); | |||
5992 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
5993 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
5994 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
5995 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
5996 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
5997 | __masm-> emit_data64(0x3fffffff3effffff, relocInfo::none); | |||
5998 | __masm-> emit_data64(0x3b3a393837363534, relocInfo::none); | |||
5999 | __masm-> emit_data64(0xffffffffffff3d3c, relocInfo::none); | |||
6000 | __masm-> emit_data64(0x06050403020100ff, relocInfo::none); | |||
6001 | __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none); | |||
6002 | __masm-> emit_data64(0x161514131211100f, relocInfo::none); | |||
6003 | __masm-> emit_data64(0xffffffffff191817, relocInfo::none); | |||
6004 | __masm-> emit_data64(0x201f1e1d1c1b1aff, relocInfo::none); | |||
6005 | __masm-> emit_data64(0x2827262524232221, relocInfo::none); | |||
6006 | __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none); | |||
6007 | __masm-> emit_data64(0xffffffffff333231, relocInfo::none); | |||
6008 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6009 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6010 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6011 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6012 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6013 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6014 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6015 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6016 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6017 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6018 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6019 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6020 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6021 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6022 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6023 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6024 | ||||
6025 | // URL table | |||
6026 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6027 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6028 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6029 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6030 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6031 | __masm-> emit_data64(0xffff3effffffffff, relocInfo::none); | |||
6032 | __masm-> emit_data64(0x3b3a393837363534, relocInfo::none); | |||
6033 | __masm-> emit_data64(0xffffffffffff3d3c, relocInfo::none); | |||
6034 | __masm-> emit_data64(0x06050403020100ff, relocInfo::none); | |||
6035 | __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none); | |||
6036 | __masm-> emit_data64(0x161514131211100f, relocInfo::none); | |||
6037 | __masm-> emit_data64(0x3fffffffff191817, relocInfo::none); | |||
6038 | __masm-> emit_data64(0x201f1e1d1c1b1aff, relocInfo::none); | |||
6039 | __masm-> emit_data64(0x2827262524232221, relocInfo::none); | |||
6040 | __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none); | |||
6041 | __masm-> emit_data64(0xffffffffff333231, relocInfo::none); | |||
6042 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6043 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6044 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6045 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6046 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6047 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6048 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6049 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6050 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6051 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6052 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6053 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6054 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6055 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6056 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6057 | __masm-> emit_data64(0xffffffffffffffff, relocInfo::none); | |||
6058 | return start; | |||
6059 | } | |||
6060 | ||||
6061 | ||||
6062 | // Code for generating Base64 decoding. | |||
6063 | // | |||
6064 | // Based on the article (and associated code) from https://arxiv.org/abs/1910.05109. | |||
6065 | // | |||
6066 | // Intrinsic function prototype in Base64.java: | |||
6067 | // private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME) { | |||
6068 | address generate_base64_decodeBlock() { | |||
6069 | __masm-> align(CodeEntryAlignment); | |||
6070 | StubCodeMark mark(this, "StubRoutines", "implDecode"); | |||
6071 | address start = __masm-> pc(); | |||
6072 | __masm-> enter(); | |||
6073 | ||||
6074 | // Save callee-saved registers before using them | |||
6075 | __masm-> push(r12); | |||
6076 | __masm-> push(r13); | |||
6077 | __masm-> push(r14); | |||
6078 | __masm-> push(r15); | |||
6079 | __masm-> push(rbx); | |||
6080 | ||||
6081 | // arguments | |||
6082 | const Register source = c_rarg0; // Source Array | |||
6083 | const Register start_offset = c_rarg1; // start offset | |||
6084 | const Register end_offset = c_rarg2; // end offset | |||
6085 | const Register dest = c_rarg3; // destination array | |||
6086 | const Register isMIME = rbx; | |||
6087 | ||||
6088 | #ifndef _WIN64 | |||
6089 | const Register dp = c_rarg4; // Position for writing to dest array | |||
6090 | const Register isURL = c_rarg5;// Base64 or URL character set | |||
6091 | __masm-> movl(isMIME, Address(rbp, 2 * wordSize)); | |||
6092 | #else | |||
6093 | const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64 | |||
6094 | const Address isURL_mem(rbp, 7 * wordSize); | |||
6095 | const Register isURL = r10; // pick the volatile windows register | |||
6096 | const Register dp = r12; | |||
6097 | __masm-> movl(dp, dp_mem); | |||
6098 | __masm-> movl(isURL, isURL_mem); | |||
6099 | __masm-> movl(isMIME, Address(rbp, 8 * wordSize)); | |||
6100 | #endif | |||
6101 | ||||
6102 | const XMMRegister lookup_lo = xmm5; | |||
6103 | const XMMRegister lookup_hi = xmm6; | |||
6104 | const XMMRegister errorvec = xmm7; | |||
6105 | const XMMRegister pack16_op = xmm9; | |||
6106 | const XMMRegister pack32_op = xmm8; | |||
6107 | const XMMRegister input0 = xmm3; | |||
6108 | const XMMRegister input1 = xmm20; | |||
6109 | const XMMRegister input2 = xmm21; | |||
6110 | const XMMRegister input3 = xmm19; | |||
6111 | const XMMRegister join01 = xmm12; | |||
6112 | const XMMRegister join12 = xmm11; | |||
6113 | const XMMRegister join23 = xmm10; | |||
6114 | const XMMRegister translated0 = xmm2; | |||
6115 | const XMMRegister translated1 = xmm1; | |||
6116 | const XMMRegister translated2 = xmm0; | |||
| ||||
6117 | const XMMRegister translated3 = xmm4; | |||
6118 | ||||
6119 | const XMMRegister merged0 = xmm2; | |||
6120 | const XMMRegister merged1 = xmm1; | |||
6121 | const XMMRegister merged2 = xmm0; | |||
6122 | const XMMRegister merged3 = xmm4; | |||
6123 | const XMMRegister merge_ab_bc0 = xmm2; | |||
6124 | const XMMRegister merge_ab_bc1 = xmm1; | |||
6125 | const XMMRegister merge_ab_bc2 = xmm0; | |||
6126 | const XMMRegister merge_ab_bc3 = xmm4; | |||
6127 | ||||
6128 | const XMMRegister pack24bits = xmm4; | |||
6129 | ||||
6130 | const Register length = r14; | |||
6131 | const Register output_size = r13; | |||
6132 | const Register output_mask = r15; | |||
6133 | const KRegister input_mask = k1; | |||
6134 | ||||
6135 | const XMMRegister input_initial_valid_b64 = xmm0; | |||
6136 | const XMMRegister tmp = xmm10; | |||
6137 | const XMMRegister mask = xmm0; | |||
6138 | const XMMRegister invalid_b64 = xmm1; | |||
6139 | ||||
6140 | Label L_process256, L_process64, L_process64Loop, L_exit, L_processdata, L_loadURL; | |||
6141 | Label L_continue, L_finalBit, L_padding, L_donePadding, L_bruteForce; | |||
6142 | Label L_forceLoop, L_bottomLoop, L_checkMIME, L_exit_no_vzero; | |||
6143 | ||||
6144 | // calculate length from offsets | |||
6145 | __masm-> movl(length, end_offset); | |||
6146 | __masm-> subl(length, start_offset); | |||
6147 | __masm-> push(dest); // Save for return value calc | |||
6148 | ||||
6149 | // If AVX512 VBMI not supported, just compile non-AVX code | |||
6150 | if(VM_Version::supports_avx512_vbmi() && | |||
6151 | VM_Version::supports_avx512bw()) { | |||
6152 | __masm-> cmpl(length, 128); // 128-bytes is break-even for AVX-512 | |||
6153 | __masm-> jcc(Assembler::lessEqual, L_bruteForce); | |||
6154 | ||||
6155 | __masm-> cmpl(isMIME, 0); | |||
6156 | __masm-> jcc(Assembler::notEqual, L_bruteForce); | |||
6157 | ||||
6158 | // Load lookup tables based on isURL | |||
6159 | __masm-> cmpl(isURL, 0); | |||
6160 | __masm-> jcc(Assembler::notZero, L_loadURL); | |||
6161 | ||||
6162 | __masm-> evmovdquq(lookup_lo, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_lo_addr()), Assembler::AVX_512bit, r13); | |||
6163 | __masm-> evmovdquq(lookup_hi, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_hi_addr()), Assembler::AVX_512bit, r13); | |||
6164 | ||||
6165 | __masm-> BIND(L_continue)bind(L_continue); masm-> block_comment("L_continue" ":"); | |||
6166 | ||||
6167 | __masm-> movl(r15, 0x01400140); | |||
6168 | __masm-> evpbroadcastd(pack16_op, r15, Assembler::AVX_512bit); | |||
6169 | ||||
6170 | __masm-> movl(r15, 0x00011000); | |||
6171 | __masm-> evpbroadcastd(pack32_op, r15, Assembler::AVX_512bit); | |||
6172 | ||||
6173 | __masm-> cmpl(length, 0xff); | |||
6174 | __masm-> jcc(Assembler::lessEqual, L_process64); | |||
6175 | ||||
6176 | // load masks required for decoding data | |||
6177 | __masm-> BIND(L_processdata)bind(L_processdata); masm-> block_comment("L_processdata" ":" ); | |||
6178 | __masm-> evmovdquq(join01, ExternalAddress(StubRoutines::x86::base64_vbmi_join_0_1_addr()), Assembler::AVX_512bit,r13); | |||
6179 | __masm-> evmovdquq(join12, ExternalAddress(StubRoutines::x86::base64_vbmi_join_1_2_addr()), Assembler::AVX_512bit, r13); | |||
6180 | __masm-> evmovdquq(join23, ExternalAddress(StubRoutines::x86::base64_vbmi_join_2_3_addr()), Assembler::AVX_512bit, r13); | |||
6181 | ||||
6182 | __masm-> align32(); | |||
6183 | __masm-> BIND(L_process256)bind(L_process256); masm-> block_comment("L_process256" ":" ); | |||
6184 | // Grab input data | |||
6185 | __masm-> evmovdquq(input0, Address(source, start_offset, Address::times_1, 0x00), Assembler::AVX_512bit); | |||
6186 | __masm-> evmovdquq(input1, Address(source, start_offset, Address::times_1, 0x40), Assembler::AVX_512bit); | |||
6187 | __masm-> evmovdquq(input2, Address(source, start_offset, Address::times_1, 0x80), Assembler::AVX_512bit); | |||
6188 | __masm-> evmovdquq(input3, Address(source, start_offset, Address::times_1, 0xc0), Assembler::AVX_512bit); | |||
6189 | ||||
6190 | // Copy the low part of the lookup table into the destination of the permutation | |||
6191 | __masm-> evmovdquq(translated0, lookup_lo, Assembler::AVX_512bit); | |||
6192 | __masm-> evmovdquq(translated1, lookup_lo, Assembler::AVX_512bit); | |||
6193 | __masm-> evmovdquq(translated2, lookup_lo, Assembler::AVX_512bit); | |||
6194 | __masm-> evmovdquq(translated3, lookup_lo, Assembler::AVX_512bit); | |||
6195 | ||||
6196 | // Translate the base64 input into "decoded" bytes | |||
6197 | __masm-> evpermt2b(translated0, input0, lookup_hi, Assembler::AVX_512bit); | |||
6198 | __masm-> evpermt2b(translated1, input1, lookup_hi, Assembler::AVX_512bit); | |||
6199 | __masm-> evpermt2b(translated2, input2, lookup_hi, Assembler::AVX_512bit); | |||
6200 | __masm-> evpermt2b(translated3, input3, lookup_hi, Assembler::AVX_512bit); | |||
6201 | ||||
6202 | // OR all of the translations together to check for errors (high-order bit of byte set) | |||
6203 | __masm-> vpternlogd(input0, 0xfe, input1, input2, Assembler::AVX_512bit); | |||
6204 | ||||
6205 | __masm-> vpternlogd(input3, 0xfe, translated0, translated1, Assembler::AVX_512bit); | |||
6206 | __masm-> vpternlogd(input0, 0xfe, translated2, translated3, Assembler::AVX_512bit); | |||
6207 | __masm-> vpor(errorvec, input3, input0, Assembler::AVX_512bit); | |||
6208 | ||||
6209 | // Check if there was an error - if so, try 64-byte chunks | |||
6210 | __masm-> evpmovb2m(k3, errorvec, Assembler::AVX_512bit); | |||
6211 | __masm-> kortestql(k3, k3); | |||
6212 | __masm-> jcc(Assembler::notZero, L_process64); | |||
6213 | ||||
6214 | // The merging and shuffling happens here | |||
6215 | // We multiply each byte pair [00dddddd | 00cccccc | 00bbbbbb | 00aaaaaa] | |||
6216 | // Multiply [00cccccc] by 2^6 added to [00dddddd] to get [0000cccc | ccdddddd] | |||
6217 | // The pack16_op is a vector of 0x01400140, so multiply D by 1 and C by 0x40 | |||
6218 | __masm-> vpmaddubsw(merge_ab_bc0, translated0, pack16_op, Assembler::AVX_512bit); | |||
6219 | __masm-> vpmaddubsw(merge_ab_bc1, translated1, pack16_op, Assembler::AVX_512bit); | |||
6220 | __masm-> vpmaddubsw(merge_ab_bc2, translated2, pack16_op, Assembler::AVX_512bit); | |||
6221 | __masm-> vpmaddubsw(merge_ab_bc3, translated3, pack16_op, Assembler::AVX_512bit); | |||
6222 | ||||
6223 | // Now do the same with packed 16-bit values. | |||
6224 | // We start with [0000cccc | ccdddddd | 0000aaaa | aabbbbbb] | |||
6225 | // pack32_op is 0x00011000 (2^12, 1), so this multiplies [0000aaaa | aabbbbbb] by 2^12 | |||
6226 | // and adds [0000cccc | ccdddddd] to yield [00000000 | aaaaaabb | bbbbcccc | ccdddddd] | |||
6227 | __masm-> vpmaddwd(merged0, merge_ab_bc0, pack32_op, Assembler::AVX_512bit); | |||
6228 | __masm-> vpmaddwd(merged1, merge_ab_bc1, pack32_op, Assembler::AVX_512bit); | |||
6229 | __masm-> vpmaddwd(merged2, merge_ab_bc2, pack32_op, Assembler::AVX_512bit); | |||
6230 | __masm-> vpmaddwd(merged3, merge_ab_bc3, pack32_op, Assembler::AVX_512bit); | |||
6231 | ||||
6232 | // The join vectors specify which byte from which vector goes into the outputs | |||
6233 | // One of every 4 bytes in the extended vector is zero, so we pack them into their | |||
6234 | // final positions in the register for storing (256 bytes in, 192 bytes out) | |||
6235 | __masm-> evpermt2b(merged0, join01, merged1, Assembler::AVX_512bit); | |||
6236 | __masm-> evpermt2b(merged1, join12, merged2, Assembler::AVX_512bit); | |||
6237 | __masm-> evpermt2b(merged2, join23, merged3, Assembler::AVX_512bit); | |||
6238 | ||||
6239 | // Store result | |||
6240 | __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x00), merged0, Assembler::AVX_512bit); | |||
6241 | __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x40), merged1, Assembler::AVX_512bit); | |||
6242 | __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x80), merged2, Assembler::AVX_512bit); | |||
6243 | ||||
6244 | __masm-> addptr(source, 0x100); | |||
6245 | __masm-> addptr(dest, 0xc0); | |||
6246 | __masm-> subl(length, 0x100); | |||
6247 | __masm-> cmpl(length, 64 * 4); | |||
6248 | __masm-> jcc(Assembler::greaterEqual, L_process256); | |||
6249 | ||||
6250 | // At this point, we've decoded 64 * 4 * n bytes. | |||
6251 | // The remaining length will be <= 64 * 4 - 1. | |||
6252 | // UNLESS there was an error decoding the first 256-byte chunk. In this | |||
6253 | // case, the length will be arbitrarily long. | |||
6254 | // | |||
6255 | // Note that this will be the path for MIME-encoded strings. | |||
6256 | ||||
6257 | __masm-> BIND(L_process64)bind(L_process64); masm-> block_comment("L_process64" ":"); | |||
6258 | ||||
6259 | __masm-> evmovdquq(pack24bits, ExternalAddress(StubRoutines::x86::base64_vbmi_pack_vec_addr()), Assembler::AVX_512bit, r13); | |||
6260 | ||||
6261 | __masm-> cmpl(length, 63); | |||
6262 | __masm-> jcc(Assembler::lessEqual, L_finalBit); | |||
6263 | ||||
6264 | __masm-> mov64(rax, 0x0000ffffffffffff); | |||
6265 | __masm-> kmovql(k2, rax); | |||
6266 | ||||
6267 | __masm-> align32(); | |||
6268 | __masm-> BIND(L_process64Loop)bind(L_process64Loop); masm-> block_comment("L_process64Loop" ":"); | |||
6269 | ||||
6270 | // Handle first 64-byte block | |||
6271 | ||||
6272 | __masm-> evmovdquq(input0, Address(source, start_offset), Assembler::AVX_512bit); | |||
6273 | __masm-> evmovdquq(translated0, lookup_lo, Assembler::AVX_512bit); | |||
6274 | __masm-> evpermt2b(translated0, input0, lookup_hi, Assembler::AVX_512bit); | |||
6275 | ||||
6276 | __masm-> vpor(errorvec, translated0, input0, Assembler::AVX_512bit); | |||
6277 | ||||
6278 | // Check for error and bomb out before updating dest | |||
6279 | __masm-> evpmovb2m(k3, errorvec, Assembler::AVX_512bit); | |||
6280 | __masm-> kortestql(k3, k3); | |||
6281 | __masm-> jcc(Assembler::notZero, L_exit); | |||
6282 | ||||
6283 | // Pack output register, selecting correct byte ordering | |||
6284 | __masm-> vpmaddubsw(merge_ab_bc0, translated0, pack16_op, Assembler::AVX_512bit); | |||
6285 | __masm-> vpmaddwd(merged0, merge_ab_bc0, pack32_op, Assembler::AVX_512bit); | |||
6286 | __masm-> vpermb(merged0, pack24bits, merged0, Assembler::AVX_512bit); | |||
6287 | ||||
6288 | __masm-> evmovdqub(Address(dest, dp), k2, merged0, true, Assembler::AVX_512bit); | |||
6289 | ||||
6290 | __masm-> subl(length, 64); | |||
6291 | __masm-> addptr(source, 64); | |||
6292 | __masm-> addptr(dest, 48); | |||
6293 | ||||
6294 | __masm-> cmpl(length, 64); | |||
6295 | __masm-> jcc(Assembler::greaterEqual, L_process64Loop); | |||
6296 | ||||
6297 | __masm-> cmpl(length, 0); | |||
6298 | __masm-> jcc(Assembler::lessEqual, L_exit); | |||
6299 | ||||
6300 | __masm-> BIND(L_finalBit)bind(L_finalBit); masm-> block_comment("L_finalBit" ":"); | |||
6301 | // Now have 1 to 63 bytes left to decode | |||
6302 | ||||
6303 | // I was going to let Java take care of the final fragment | |||
6304 | // however it will repeatedly call this routine for every 4 bytes | |||
6305 | // of input data, so handle the rest here. | |||
6306 | __masm-> movq(rax, -1); | |||
6307 | __masm-> bzhiq(rax, rax, length); // Input mask in rax | |||
6308 | ||||
6309 | __masm-> movl(output_size, length); | |||
6310 | __masm-> shrl(output_size, 2); // Find (len / 4) * 3 (output length) | |||
6311 | __masm-> lea(output_size, Address(output_size, output_size, Address::times_2, 0)); | |||
6312 | // output_size in r13 | |||
6313 | ||||
6314 | // Strip pad characters, if any, and adjust length and mask | |||
6315 | __masm-> cmpb(Address(source, length, Address::times_1, -1), '='); | |||
6316 | __masm-> jcc(Assembler::equal, L_padding); | |||
6317 | ||||
6318 | __masm-> BIND(L_donePadding)bind(L_donePadding); masm-> block_comment("L_donePadding" ":" ); | |||
6319 | ||||
6320 | // Output size is (64 - output_size), output mask is (all 1s >> output_size). | |||
6321 | __masm-> kmovql(input_mask, rax); | |||
6322 | __masm-> movq(output_mask, -1); | |||
6323 | __masm-> bzhiq(output_mask, output_mask, output_size); | |||
6324 | ||||
6325 | // Load initial input with all valid base64 characters. Will be used | |||
6326 | // in merging source bytes to avoid masking when determining if an error occurred. | |||
6327 | __masm-> movl(rax, 0x61616161); | |||
6328 | __masm-> evpbroadcastd(input_initial_valid_b64, rax, Assembler::AVX_512bit); | |||
6329 | ||||
6330 | // A register containing all invalid base64 decoded values | |||
6331 | __masm-> movl(rax, 0x80808080); | |||
6332 | __masm-> evpbroadcastd(invalid_b64, rax, Assembler::AVX_512bit); | |||
6333 | ||||
6334 | // input_mask is in k1 | |||
6335 | // output_size is in r13 | |||
6336 | // output_mask is in r15 | |||
6337 | // zmm0 - free | |||
6338 | // zmm1 - 0x00011000 | |||
6339 | // zmm2 - 0x01400140 | |||
6340 | // zmm3 - errorvec | |||
6341 | // zmm4 - pack vector | |||
6342 | // zmm5 - lookup_lo | |||
6343 | // zmm6 - lookup_hi | |||
6344 | // zmm7 - errorvec | |||
6345 | // zmm8 - 0x61616161 | |||
6346 | // zmm9 - 0x80808080 | |||
6347 | ||||
6348 | // Load only the bytes from source, merging into our "fully-valid" register | |||
6349 | __masm-> evmovdqub(input_initial_valid_b64, input_mask, Address(source, start_offset, Address::times_1, 0x0), true, Assembler::AVX_512bit); | |||
6350 | ||||
6351 | // Decode all bytes within our merged input | |||
6352 | __masm-> evmovdquq(tmp, lookup_lo, Assembler::AVX_512bit); | |||
6353 | __masm-> evpermt2b(tmp, input_initial_valid_b64, lookup_hi, Assembler::AVX_512bit); | |||
6354 | __masm-> vporq(mask, tmp, input_initial_valid_b64, Assembler::AVX_512bit); | |||
6355 | ||||
6356 | // Check for error. Compare (decoded | initial) to all invalid. | |||
6357 | // If any bytes have their high-order bit set, then we have an error. | |||
6358 | __masm-> evptestmb(k2, mask, invalid_b64, Assembler::AVX_512bit); | |||
6359 | __masm-> kortestql(k2, k2); | |||
6360 | ||||
6361 | // If we have an error, use the brute force loop to decode what we can (4-byte chunks). | |||
6362 | __masm-> jcc(Assembler::notZero, L_bruteForce); | |||
6363 | ||||
6364 | // Shuffle output bytes | |||
6365 | __masm-> vpmaddubsw(tmp, tmp, pack16_op, Assembler::AVX_512bit); | |||
6366 | __masm-> vpmaddwd(tmp, tmp, pack32_op, Assembler::AVX_512bit); | |||
6367 | ||||
6368 | __masm-> vpermb(tmp, pack24bits, tmp, Assembler::AVX_512bit); | |||
6369 | __masm-> kmovql(k1, output_mask); | |||
6370 | __masm-> evmovdqub(Address(dest, dp), k1, tmp, true, Assembler::AVX_512bit); | |||
6371 | ||||
6372 | __masm-> addptr(dest, output_size); | |||
6373 | ||||
6374 | __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":"); | |||
6375 | __masm-> vzeroupper(); | |||
6376 | __masm-> pop(rax); // Get original dest value | |||
6377 | __masm-> subptr(dest, rax); // Number of bytes converted | |||
6378 | __masm-> movptr(rax, dest); | |||
6379 | __masm-> pop(rbx); | |||
6380 | __masm-> pop(r15); | |||
6381 | __masm-> pop(r14); | |||
6382 | __masm-> pop(r13); | |||
6383 | __masm-> pop(r12); | |||
6384 | __masm-> leave(); | |||
6385 | __masm-> ret(0); | |||
6386 | ||||
6387 | __masm-> BIND(L_loadURL)bind(L_loadURL); masm-> block_comment("L_loadURL" ":"); | |||
6388 | __masm-> evmovdquq(lookup_lo, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_lo_url_addr()), Assembler::AVX_512bit, r13); | |||
6389 | __masm-> evmovdquq(lookup_hi, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_hi_url_addr()), Assembler::AVX_512bit, r13); | |||
6390 | __masm-> jmp(L_continue); | |||
6391 | ||||
6392 | __masm-> BIND(L_padding)bind(L_padding); masm-> block_comment("L_padding" ":"); | |||
6393 | __masm-> decrementq(output_size, 1); | |||
6394 | __masm-> shrq(rax, 1); | |||
6395 | ||||
6396 | __masm-> cmpb(Address(source, length, Address::times_1, -2), '='); | |||
6397 | __masm-> jcc(Assembler::notEqual, L_donePadding); | |||
6398 | ||||
6399 | __masm-> decrementq(output_size, 1); | |||
6400 | __masm-> shrq(rax, 1); | |||
6401 | __masm-> jmp(L_donePadding); | |||
6402 | ||||
6403 | __masm-> align32(); | |||
6404 | __masm-> BIND(L_bruteForce)bind(L_bruteForce); masm-> block_comment("L_bruteForce" ":" ); | |||
6405 | } // End of if(avx512_vbmi) | |||
6406 | ||||
6407 | // Use non-AVX code to decode 4-byte chunks into 3 bytes of output | |||
6408 | ||||
6409 | // Register state (Linux): | |||
6410 | // r12-15 - saved on stack | |||
6411 | // rdi - src | |||
6412 | // rsi - sp | |||
6413 | // rdx - sl | |||
6414 | // rcx - dst | |||
6415 | // r8 - dp | |||
6416 | // r9 - isURL | |||
6417 | ||||
6418 | // Register state (Windows): | |||
6419 | // r12-15 - saved on stack | |||
6420 | // rcx - src | |||
6421 | // rdx - sp | |||
6422 | // r8 - sl | |||
6423 | // r9 - dst | |||
6424 | // r12 - dp | |||
6425 | // r10 - isURL | |||
6426 | ||||
6427 | // Registers (common): | |||
6428 | // length (r14) - bytes in src | |||
6429 | ||||
6430 | const Register decode_table = r11; | |||
6431 | const Register out_byte_count = rbx; | |||
6432 | const Register byte1 = r13; | |||
6433 | const Register byte2 = r15; | |||
6434 | const Register byte3 = WINDOWS_ONLY(r8) NOT_WINDOWS(rdx)rdx; | |||
6435 | const Register byte4 = WINDOWS_ONLY(r10) NOT_WINDOWS(r9)r9; | |||
6436 | ||||
6437 | __masm-> shrl(length, 2); // Multiple of 4 bytes only - length is # 4-byte chunks | |||
6438 | __masm-> cmpl(length, 0); | |||
6439 | __masm-> jcc(Assembler::lessEqual, L_exit_no_vzero); | |||
6440 | ||||
6441 | __masm-> shll(isURL, 8); // index into decode table based on isURL | |||
6442 | __masm-> lea(decode_table, ExternalAddress(StubRoutines::x86::base64_decoding_table_addr())); | |||
6443 | __masm-> addptr(decode_table, isURL); | |||
6444 | ||||
6445 | __masm-> jmp(L_bottomLoop); | |||
6446 | ||||
6447 | __masm-> align32(); | |||
6448 | __masm-> BIND(L_forceLoop)bind(L_forceLoop); masm-> block_comment("L_forceLoop" ":"); | |||
6449 | __masm-> shll(byte1, 18); | |||
6450 | __masm-> shll(byte2, 12); | |||
6451 | __masm-> shll(byte3, 6); | |||
6452 | __masm-> orl(byte1, byte2); | |||
6453 | __masm-> orl(byte1, byte3); | |||
6454 | __masm-> orl(byte1, byte4); | |||
6455 | ||||
6456 | __masm-> addptr(source, 4); | |||
6457 | ||||
6458 | __masm-> movb(Address(dest, dp, Address::times_1, 2), byte1); | |||
6459 | __masm-> shrl(byte1, 8); | |||
6460 | __masm-> movb(Address(dest, dp, Address::times_1, 1), byte1); | |||
6461 | __masm-> shrl(byte1, 8); | |||
6462 | __masm-> movb(Address(dest, dp, Address::times_1, 0), byte1); | |||
6463 | ||||
6464 | __masm-> addptr(dest, 3); | |||
6465 | __masm-> decrementl(length, 1); | |||
6466 | __masm-> jcc(Assembler::zero, L_exit_no_vzero); | |||
6467 | ||||
6468 | __masm-> BIND(L_bottomLoop)bind(L_bottomLoop); masm-> block_comment("L_bottomLoop" ":" ); | |||
6469 | __masm-> load_unsigned_byte(byte1, Address(source, start_offset, Address::times_1, 0x00)); | |||
6470 | __masm-> load_unsigned_byte(byte2, Address(source, start_offset, Address::times_1, 0x01)); | |||
6471 | __masm-> load_signed_byte(byte1, Address(decode_table, byte1)); | |||
6472 | __masm-> load_signed_byte(byte2, Address(decode_table, byte2)); | |||
6473 | __masm-> load_unsigned_byte(byte3, Address(source, start_offset, Address::times_1, 0x02)); | |||
6474 | __masm-> load_unsigned_byte(byte4, Address(source, start_offset, Address::times_1, 0x03)); | |||
6475 | __masm-> load_signed_byte(byte3, Address(decode_table, byte3)); | |||
6476 | __masm-> load_signed_byte(byte4, Address(decode_table, byte4)); | |||
6477 | ||||
6478 | __masm-> mov(rax, byte1); | |||
6479 | __masm-> orl(rax, byte2); | |||
6480 | __masm-> orl(rax, byte3); | |||
6481 | __masm-> orl(rax, byte4); | |||
6482 | __masm-> jcc(Assembler::positive, L_forceLoop); | |||
6483 | ||||
6484 | __masm-> BIND(L_exit_no_vzero)bind(L_exit_no_vzero); masm-> block_comment("L_exit_no_vzero" ":"); | |||
6485 | __masm-> pop(rax); // Get original dest value | |||
6486 | __masm-> subptr(dest, rax); // Number of bytes converted | |||
6487 | __masm-> movptr(rax, dest); | |||
6488 | __masm-> pop(rbx); | |||
6489 | __masm-> pop(r15); | |||
6490 | __masm-> pop(r14); | |||
6491 | __masm-> pop(r13); | |||
6492 | __masm-> pop(r12); | |||
6493 | __masm-> leave(); | |||
6494 | __masm-> ret(0); | |||
6495 | ||||
6496 | return start; | |||
6497 | } | |||
6498 | ||||
6499 | ||||
6500 | /** | |||
6501 | * Arguments: | |||
6502 | * | |||
6503 | * Inputs: | |||
6504 | * c_rarg0 - int crc | |||
6505 | * c_rarg1 - byte* buf | |||
6506 | * c_rarg2 - int length | |||
6507 | * | |||
6508 | * Ouput: | |||
6509 | * rax - int crc result | |||
6510 | */ | |||
6511 | address generate_updateBytesCRC32() { | |||
6512 | assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions")do { if (!(UseCRC32Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 6512, "assert(" "UseCRC32Intrinsics" ") failed", "need AVX and CLMUL instructions" ); ::breakpoint(); } } while (0); | |||
6513 | ||||
6514 | __masm-> align(CodeEntryAlignment); | |||
6515 | StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); | |||
6516 | ||||
6517 | address start = __masm-> pc(); | |||
6518 | // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6519 | // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6520 | // rscratch1: r10 | |||
6521 | const Register crc = c_rarg0; // crc | |||
6522 | const Register buf = c_rarg1; // source java byte array address | |||
6523 | const Register len = c_rarg2; // length | |||
6524 | const Register table = c_rarg3; // crc_table address (reuse register) | |||
6525 | const Register tmp1 = r11; | |||
6526 | const Register tmp2 = r10; | |||
6527 | assert_different_registers(crc, buf, len, table, tmp1, tmp2, rax); | |||
6528 | ||||
6529 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6530 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
6531 | ||||
6532 | if (VM_Version::supports_sse4_1() && VM_Version::supports_avx512_vpclmulqdq() && | |||
6533 | VM_Version::supports_avx512bw() && | |||
6534 | VM_Version::supports_avx512vl()) { | |||
6535 | // The constants used in the CRC32 algorithm requires the 1's compliment of the initial crc value. | |||
6536 | // However, the constant table for CRC32-C assumes the original crc value. Account for this | |||
6537 | // difference before calling and after returning. | |||
6538 | __masm-> lea(table, ExternalAddress(StubRoutines::x86::crc_table_avx512_addr())); | |||
6539 | __masm-> notl(crc); | |||
6540 | __masm-> kernel_crc32_avx512(crc, buf, len, table, tmp1, tmp2); | |||
6541 | __masm-> notl(crc); | |||
6542 | } else { | |||
6543 | __masm-> kernel_crc32(crc, buf, len, table, tmp1); | |||
6544 | } | |||
6545 | ||||
6546 | __masm-> movl(rax, crc); | |||
6547 | __masm-> vzeroupper(); | |||
6548 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
6549 | __masm-> ret(0); | |||
6550 | ||||
6551 | return start; | |||
6552 | } | |||
6553 | ||||
6554 | /** | |||
6555 | * Arguments: | |||
6556 | * | |||
6557 | * Inputs: | |||
6558 | * c_rarg0 - int crc | |||
6559 | * c_rarg1 - byte* buf | |||
6560 | * c_rarg2 - long length | |||
6561 | * c_rarg3 - table_start - optional (present only when doing a library_call, | |||
6562 | * not used by x86 algorithm) | |||
6563 | * | |||
6564 | * Ouput: | |||
6565 | * rax - int crc result | |||
6566 | */ | |||
6567 | address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) { | |||
6568 | assert(UseCRC32CIntrinsics, "need SSE4_2")do { if (!(UseCRC32CIntrinsics)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 6568, "assert(" "UseCRC32CIntrinsics" ") failed", "need SSE4_2" ); ::breakpoint(); } } while (0); | |||
6569 | __masm-> align(CodeEntryAlignment); | |||
6570 | StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); | |||
6571 | address start = __masm-> pc(); | |||
6572 | //reg.arg int#0 int#1 int#2 int#3 int#4 int#5 float regs | |||
6573 | //Windows RCX RDX R8 R9 none none XMM0..XMM3 | |||
6574 | //Lin / Sol RDI RSI RDX RCX R8 R9 XMM0..XMM7 | |||
6575 | const Register crc = c_rarg0; // crc | |||
6576 | const Register buf = c_rarg1; // source java byte array address | |||
6577 | const Register len = c_rarg2; // length | |||
6578 | const Register a = rax; | |||
6579 | const Register j = r9; | |||
6580 | const Register k = r10; | |||
6581 | const Register l = r11; | |||
6582 | #ifdef _WIN64 | |||
6583 | const Register y = rdi; | |||
6584 | const Register z = rsi; | |||
6585 | #else | |||
6586 | const Register y = rcx; | |||
6587 | const Register z = r8; | |||
6588 | #endif | |||
6589 | assert_different_registers(crc, buf, len, a, j, k, l, y, z); | |||
6590 | ||||
6591 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6592 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
6593 | if (VM_Version::supports_sse4_1() && VM_Version::supports_avx512_vpclmulqdq() && | |||
6594 | VM_Version::supports_avx512bw() && | |||
6595 | VM_Version::supports_avx512vl()) { | |||
6596 | __masm-> lea(j, ExternalAddress(StubRoutines::x86::crc32c_table_avx512_addr())); | |||
6597 | __masm-> kernel_crc32_avx512(crc, buf, len, j, l, k); | |||
6598 | } else { | |||
6599 | #ifdef _WIN64 | |||
6600 | __masm-> push(y); | |||
6601 | __masm-> push(z); | |||
6602 | #endif | |||
6603 | __masm-> crc32c_ipl_alg2_alt2(crc, buf, len, | |||
6604 | a, j, k, | |||
6605 | l, y, z, | |||
6606 | c_farg0, c_farg1, c_farg2, | |||
6607 | is_pclmulqdq_supported); | |||
6608 | #ifdef _WIN64 | |||
6609 | __masm-> pop(z); | |||
6610 | __masm-> pop(y); | |||
6611 | #endif | |||
6612 | } | |||
6613 | __masm-> movl(rax, crc); | |||
6614 | __masm-> vzeroupper(); | |||
6615 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
6616 | __masm-> ret(0); | |||
6617 | ||||
6618 | return start; | |||
6619 | } | |||
6620 | ||||
6621 | ||||
6622 | /*** | |||
6623 | * Arguments: | |||
6624 | * | |||
6625 | * Inputs: | |||
6626 | * c_rarg0 - int adler | |||
6627 | * c_rarg1 - byte* buff | |||
6628 | * c_rarg2 - int len | |||
6629 | * | |||
6630 | * Output: | |||
6631 | * rax - int adler result | |||
6632 | */ | |||
6633 | ||||
6634 | address generate_updateBytesAdler32() { | |||
6635 | assert(UseAdler32Intrinsics, "need AVX2")do { if (!(UseAdler32Intrinsics)) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 6635, "assert(" "UseAdler32Intrinsics" ") failed", "need AVX2" ); ::breakpoint(); } } while (0); | |||
6636 | ||||
6637 | __masm-> align(CodeEntryAlignment); | |||
6638 | StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32"); | |||
6639 | ||||
6640 | address start = __masm-> pc(); | |||
6641 | ||||
6642 | const Register data = r9; | |||
6643 | const Register size = r10; | |||
6644 | ||||
6645 | const XMMRegister yshuf0 = xmm6; | |||
6646 | const XMMRegister yshuf1 = xmm7; | |||
6647 | assert_different_registers(c_rarg0, c_rarg1, c_rarg2, data, size); | |||
6648 | ||||
6649 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6650 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
6651 | ||||
6652 | __masm-> vmovdqu(yshuf0, ExternalAddress((address) StubRoutines::x86::_adler32_shuf0_table), r9); | |||
6653 | __masm-> vmovdqu(yshuf1, ExternalAddress((address) StubRoutines::x86::_adler32_shuf1_table), r9); | |||
6654 | __masm-> movptr(data, c_rarg1); //data | |||
6655 | __masm-> movl(size, c_rarg2); //length | |||
6656 | __masm-> updateBytesAdler32(c_rarg0, data, size, yshuf0, yshuf1, ExternalAddress((address) StubRoutines::x86::_adler32_ascale_table)); | |||
6657 | __masm-> leave(); | |||
6658 | __masm-> ret(0); | |||
6659 | return start; | |||
6660 | } | |||
6661 | ||||
6662 | /** | |||
6663 | * Arguments: | |||
6664 | * | |||
6665 | * Input: | |||
6666 | * c_rarg0 - x address | |||
6667 | * c_rarg1 - x length | |||
6668 | * c_rarg2 - y address | |||
6669 | * c_rarg3 - y length | |||
6670 | * not Win64 | |||
6671 | * c_rarg4 - z address | |||
6672 | * c_rarg5 - z length | |||
6673 | * Win64 | |||
6674 | * rsp+40 - z address | |||
6675 | * rsp+48 - z length | |||
6676 | */ | |||
6677 | address generate_multiplyToLen() { | |||
6678 | __masm-> align(CodeEntryAlignment); | |||
6679 | StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); | |||
6680 | ||||
6681 | address start = __masm-> pc(); | |||
6682 | // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6683 | // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6684 | const Register x = rdi; | |||
6685 | const Register xlen = rax; | |||
6686 | const Register y = rsi; | |||
6687 | const Register ylen = rcx; | |||
6688 | const Register z = r8; | |||
6689 | const Register zlen = r11; | |||
6690 | ||||
6691 | // Next registers will be saved on stack in multiply_to_len(). | |||
6692 | const Register tmp1 = r12; | |||
6693 | const Register tmp2 = r13; | |||
6694 | const Register tmp3 = r14; | |||
6695 | const Register tmp4 = r15; | |||
6696 | const Register tmp5 = rbx; | |||
6697 | ||||
6698 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6699 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
6700 | ||||
6701 | #ifndef _WIN64 | |||
6702 | __masm-> movptr(zlen, r9); // Save r9 in r11 - zlen | |||
6703 | #endif | |||
6704 | setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx | |||
6705 | // ylen => rcx, z => r8, zlen => r11 | |||
6706 | // r9 and r10 may be used to save non-volatile registers | |||
6707 | #ifdef _WIN64 | |||
6708 | // last 2 arguments (#4, #5) are on stack on Win64 | |||
6709 | __masm-> movptr(z, Address(rsp, 6 * wordSize)); | |||
6710 | __masm-> movptr(zlen, Address(rsp, 7 * wordSize)); | |||
6711 | #endif | |||
6712 | ||||
6713 | __masm-> movptr(xlen, rsi); | |||
6714 | __masm-> movptr(y, rdx); | |||
6715 | __masm-> multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5); | |||
6716 | ||||
6717 | restore_arg_regs(); | |||
6718 | ||||
6719 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
6720 | __masm-> ret(0); | |||
6721 | ||||
6722 | return start; | |||
6723 | } | |||
6724 | ||||
6725 | /** | |||
6726 | * Arguments: | |||
6727 | * | |||
6728 | * Input: | |||
6729 | * c_rarg0 - obja address | |||
6730 | * c_rarg1 - objb address | |||
6731 | * c_rarg3 - length length | |||
6732 | * c_rarg4 - scale log2_array_indxscale | |||
6733 | * | |||
6734 | * Output: | |||
6735 | * rax - int >= mismatched index, < 0 bitwise complement of tail | |||
6736 | */ | |||
6737 | address generate_vectorizedMismatch() { | |||
6738 | __masm-> align(CodeEntryAlignment); | |||
6739 | StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch"); | |||
6740 | address start = __masm-> pc(); | |||
6741 | ||||
6742 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6743 | __masm-> enter(); | |||
6744 | ||||
6745 | #ifdef _WIN64 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6746 | const Register scale = c_rarg0; //rcx, will exchange with r9 | |||
6747 | const Register objb = c_rarg1; //rdx | |||
6748 | const Register length = c_rarg2; //r8 | |||
6749 | const Register obja = c_rarg3; //r9 | |||
6750 | __masm-> xchgq(obja, scale); //now obja and scale contains the correct contents | |||
6751 | ||||
6752 | const Register tmp1 = r10; | |||
6753 | const Register tmp2 = r11; | |||
6754 | #endif | |||
6755 | #ifndef _WIN64 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6756 | const Register obja = c_rarg0; //U:rdi | |||
6757 | const Register objb = c_rarg1; //U:rsi | |||
6758 | const Register length = c_rarg2; //U:rdx | |||
6759 | const Register scale = c_rarg3; //U:rcx | |||
6760 | const Register tmp1 = r8; | |||
6761 | const Register tmp2 = r9; | |||
6762 | #endif | |||
6763 | const Register result = rax; //return value | |||
6764 | const XMMRegister vec0 = xmm0; | |||
6765 | const XMMRegister vec1 = xmm1; | |||
6766 | const XMMRegister vec2 = xmm2; | |||
6767 | ||||
6768 | __masm-> vectorized_mismatch(obja, objb, length, scale, result, tmp1, tmp2, vec0, vec1, vec2); | |||
6769 | ||||
6770 | __masm-> vzeroupper(); | |||
6771 | __masm-> leave(); | |||
6772 | __masm-> ret(0); | |||
6773 | ||||
6774 | return start; | |||
6775 | } | |||
6776 | ||||
6777 | /** | |||
6778 | * Arguments: | |||
6779 | * | |||
6780 | // Input: | |||
6781 | // c_rarg0 - x address | |||
6782 | // c_rarg1 - x length | |||
6783 | // c_rarg2 - z address | |||
6784 | // c_rarg3 - z lenth | |||
6785 | * | |||
6786 | */ | |||
6787 | address generate_squareToLen() { | |||
6788 | ||||
6789 | __masm-> align(CodeEntryAlignment); | |||
6790 | StubCodeMark mark(this, "StubRoutines", "squareToLen"); | |||
6791 | ||||
6792 | address start = __masm-> pc(); | |||
6793 | // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6794 | // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...) | |||
6795 | const Register x = rdi; | |||
6796 | const Register len = rsi; | |||
6797 | const Register z = r8; | |||
6798 | const Register zlen = rcx; | |||
6799 | ||||
6800 | const Register tmp1 = r12; | |||
6801 | const Register tmp2 = r13; | |||
6802 | const Register tmp3 = r14; | |||
6803 | const Register tmp4 = r15; | |||
6804 | const Register tmp5 = rbx; | |||
6805 | ||||
6806 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6807 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
6808 | ||||
6809 | setup_arg_regs(4); // x => rdi, len => rsi, z => rdx | |||
6810 | // zlen => rcx | |||
6811 | // r9 and r10 may be used to save non-volatile registers | |||
6812 | __masm-> movptr(r8, rdx); | |||
6813 | __masm-> square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); | |||
6814 | ||||
6815 | restore_arg_regs(); | |||
6816 | ||||
6817 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
6818 | __masm-> ret(0); | |||
6819 | ||||
6820 | return start; | |||
6821 | } | |||
6822 | ||||
6823 | address generate_method_entry_barrier() { | |||
6824 | __masm-> align(CodeEntryAlignment); | |||
6825 | StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); | |||
6826 | ||||
6827 | Label deoptimize_label; | |||
6828 | ||||
6829 | address start = __masm-> pc(); | |||
6830 | ||||
6831 | __masm-> push(-1); // cookie, this is used for writing the new rsp when deoptimizing | |||
6832 | ||||
6833 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6834 | __masm-> enter(); // save rbp | |||
6835 | ||||
6836 | // save c_rarg0, because we want to use that value. | |||
6837 | // We could do without it but then we depend on the number of slots used by pusha | |||
6838 | __masm-> push(c_rarg0); | |||
6839 | ||||
6840 | __masm-> lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address | |||
6841 | ||||
6842 | __masm-> pusha(); | |||
6843 | ||||
6844 | // The method may have floats as arguments, and we must spill them before calling | |||
6845 | // the VM runtime. | |||
6846 | assert(Argument::n_float_register_parameters_j == 8, "Assumption")do { if (!(Argument::n_float_register_parameters_j == 8)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 6846, "assert(" "Argument::n_float_register_parameters_j == 8" ") failed", "Assumption"); ::breakpoint(); } } while (0); | |||
6847 | const int xmm_size = wordSize * 2; | |||
6848 | const int xmm_spill_size = xmm_size * Argument::n_float_register_parameters_j; | |||
6849 | __masm-> subptr(rsp, xmm_spill_size); | |||
6850 | __masm-> movdqu(Address(rsp, xmm_size * 7), xmm7); | |||
6851 | __masm-> movdqu(Address(rsp, xmm_size * 6), xmm6); | |||
6852 | __masm-> movdqu(Address(rsp, xmm_size * 5), xmm5); | |||
6853 | __masm-> movdqu(Address(rsp, xmm_size * 4), xmm4); | |||
6854 | __masm-> movdqu(Address(rsp, xmm_size * 3), xmm3); | |||
6855 | __masm-> movdqu(Address(rsp, xmm_size * 2), xmm2); | |||
6856 | __masm-> movdqu(Address(rsp, xmm_size * 1), xmm1); | |||
6857 | __masm-> movdqu(Address(rsp, xmm_size * 0), xmm0); | |||
6858 | ||||
6859 | __masm-> call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(address*)>(BarrierSetNMethod::nmethod_stub_entry_barrier))((address)((address_word)(static_cast<int (*)(address*)> (BarrierSetNMethod::nmethod_stub_entry_barrier)))), 1); | |||
6860 | ||||
6861 | __masm-> movdqu(xmm0, Address(rsp, xmm_size * 0)); | |||
6862 | __masm-> movdqu(xmm1, Address(rsp, xmm_size * 1)); | |||
6863 | __masm-> movdqu(xmm2, Address(rsp, xmm_size * 2)); | |||
6864 | __masm-> movdqu(xmm3, Address(rsp, xmm_size * 3)); | |||
6865 | __masm-> movdqu(xmm4, Address(rsp, xmm_size * 4)); | |||
6866 | __masm-> movdqu(xmm5, Address(rsp, xmm_size * 5)); | |||
6867 | __masm-> movdqu(xmm6, Address(rsp, xmm_size * 6)); | |||
6868 | __masm-> movdqu(xmm7, Address(rsp, xmm_size * 7)); | |||
6869 | __masm-> addptr(rsp, xmm_spill_size); | |||
6870 | ||||
6871 | __masm-> cmpl(rax, 1); // 1 means deoptimize | |||
6872 | __masm-> jcc(Assembler::equal, deoptimize_label); | |||
6873 | ||||
6874 | __masm-> popa(); | |||
6875 | __masm-> pop(c_rarg0); | |||
6876 | ||||
6877 | __masm-> leave(); | |||
6878 | ||||
6879 | __masm-> addptr(rsp, 1 * wordSize); // cookie | |||
6880 | __masm-> ret(0); | |||
6881 | ||||
6882 | ||||
6883 | __masm-> BIND(deoptimize_label)bind(deoptimize_label); masm-> block_comment("deoptimize_label" ":"); | |||
6884 | ||||
6885 | __masm-> popa(); | |||
6886 | __masm-> pop(c_rarg0); | |||
6887 | ||||
6888 | __masm-> leave(); | |||
6889 | ||||
6890 | // this can be taken out, but is good for verification purposes. getting a SIGSEGV | |||
6891 | // here while still having a correct stack is valuable | |||
6892 | __masm-> testptr(rsp, Address(rsp, 0)); | |||
6893 | ||||
6894 | __masm-> movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier | |||
6895 | __masm-> jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point | |||
6896 | ||||
6897 | return start; | |||
6898 | } | |||
6899 | ||||
6900 | /** | |||
6901 | * Arguments: | |||
6902 | * | |||
6903 | * Input: | |||
6904 | * c_rarg0 - out address | |||
6905 | * c_rarg1 - in address | |||
6906 | * c_rarg2 - offset | |||
6907 | * c_rarg3 - len | |||
6908 | * not Win64 | |||
6909 | * c_rarg4 - k | |||
6910 | * Win64 | |||
6911 | * rsp+40 - k | |||
6912 | */ | |||
6913 | address generate_mulAdd() { | |||
6914 | __masm-> align(CodeEntryAlignment); | |||
6915 | StubCodeMark mark(this, "StubRoutines", "mulAdd"); | |||
6916 | ||||
6917 | address start = __masm-> pc(); | |||
6918 | // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6919 | // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) | |||
6920 | const Register out = rdi; | |||
6921 | const Register in = rsi; | |||
6922 | const Register offset = r11; | |||
6923 | const Register len = rcx; | |||
6924 | const Register k = r8; | |||
6925 | ||||
6926 | // Next registers will be saved on stack in mul_add(). | |||
6927 | const Register tmp1 = r12; | |||
6928 | const Register tmp2 = r13; | |||
6929 | const Register tmp3 = r14; | |||
6930 | const Register tmp4 = r15; | |||
6931 | const Register tmp5 = rbx; | |||
6932 | ||||
6933 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6934 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
6935 | ||||
6936 | setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx | |||
6937 | // len => rcx, k => r8 | |||
6938 | // r9 and r10 may be used to save non-volatile registers | |||
6939 | #ifdef _WIN64 | |||
6940 | // last argument is on stack on Win64 | |||
6941 | __masm-> movl(k, Address(rsp, 6 * wordSize)); | |||
6942 | #endif | |||
6943 | __masm-> movptr(r11, rdx); // move offset in rdx to offset(r11) | |||
6944 | __masm-> mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); | |||
6945 | ||||
6946 | restore_arg_regs(); | |||
6947 | ||||
6948 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
6949 | __masm-> ret(0); | |||
6950 | ||||
6951 | return start; | |||
6952 | } | |||
6953 | ||||
6954 | address generate_bigIntegerRightShift() { | |||
6955 | __masm-> align(CodeEntryAlignment); | |||
6956 | StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); | |||
6957 | ||||
6958 | address start = __masm-> pc(); | |||
6959 | Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit; | |||
6960 | // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8. | |||
6961 | const Register newArr = rdi; | |||
6962 | const Register oldArr = rsi; | |||
6963 | const Register newIdx = rdx; | |||
6964 | const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift. | |||
6965 | const Register totalNumIter = r8; | |||
6966 | ||||
6967 | // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps. | |||
6968 | // For everything else, we prefer using r9 and r10 since we do not have to save them before use. | |||
6969 | const Register tmp1 = r11; // Caller save. | |||
6970 | const Register tmp2 = rax; // Caller save. | |||
6971 | const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9)r9; // Windows: Callee save. Linux: Caller save. | |||
6972 | const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10)r10; // Windows: Callee save. Linux: Caller save. | |||
6973 | const Register tmp5 = r14; // Callee save. | |||
6974 | const Register tmp6 = r15; | |||
6975 | ||||
6976 | const XMMRegister x0 = xmm0; | |||
6977 | const XMMRegister x1 = xmm1; | |||
6978 | const XMMRegister x2 = xmm2; | |||
6979 | ||||
6980 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
6981 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
6982 | ||||
6983 | #ifdef _WINDOWS | |||
6984 | setup_arg_regs(4); | |||
6985 | // For windows, since last argument is on stack, we need to move it to the appropriate register. | |||
6986 | __masm-> movl(totalNumIter, Address(rsp, 6 * wordSize)); | |||
6987 | // Save callee save registers. | |||
6988 | __masm-> push(tmp3); | |||
6989 | __masm-> push(tmp4); | |||
6990 | #endif | |||
6991 | __masm-> push(tmp5); | |||
6992 | ||||
6993 | // Rename temps used throughout the code. | |||
6994 | const Register idx = tmp1; | |||
6995 | const Register nIdx = tmp2; | |||
6996 | ||||
6997 | __masm-> xorl(idx, idx); | |||
6998 | ||||
6999 | // Start right shift from end of the array. | |||
7000 | // For example, if #iteration = 4 and newIdx = 1 | |||
7001 | // then dest[4] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32) | |||
7002 | // if #iteration = 4 and newIdx = 0 | |||
7003 | // then dest[3] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32) | |||
7004 | __masm-> movl(idx, totalNumIter); | |||
7005 | __masm-> movl(nIdx, idx); | |||
7006 | __masm-> addl(nIdx, newIdx); | |||
7007 | ||||
7008 | // If vectorization is enabled, check if the number of iterations is at least 64 | |||
7009 | // If not, then go to ShifTwo processing 2 iterations | |||
7010 | if (VM_Version::supports_avx512_vbmi2()) { | |||
7011 | __masm-> cmpptr(totalNumIter, (AVX3Threshold/64)); | |||
7012 | __masm-> jcc(Assembler::less, ShiftTwo); | |||
7013 | ||||
7014 | if (AVX3Threshold < 16 * 64) { | |||
7015 | __masm-> cmpl(totalNumIter, 16); | |||
7016 | __masm-> jcc(Assembler::less, ShiftTwo); | |||
7017 | } | |||
7018 | __masm-> evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit); | |||
7019 | __masm-> subl(idx, 16); | |||
7020 | __masm-> subl(nIdx, 16); | |||
7021 | __masm-> BIND(Shift512Loop)bind(Shift512Loop); masm-> block_comment("Shift512Loop" ":" ); | |||
7022 | __masm-> evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit); | |||
7023 | __masm-> evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit); | |||
7024 | __masm-> vpshrdvd(x2, x1, x0, Assembler::AVX_512bit); | |||
7025 | __masm-> evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit); | |||
7026 | __masm-> subl(nIdx, 16); | |||
7027 | __masm-> subl(idx, 16); | |||
7028 | __masm-> jcc(Assembler::greaterEqual, Shift512Loop); | |||
7029 | __masm-> addl(idx, 16); | |||
7030 | __masm-> addl(nIdx, 16); | |||
7031 | } | |||
7032 | __masm-> BIND(ShiftTwo)bind(ShiftTwo); masm-> block_comment("ShiftTwo" ":"); | |||
7033 | __masm-> cmpl(idx, 2); | |||
7034 | __masm-> jcc(Assembler::less, ShiftOne); | |||
7035 | __masm-> subl(idx, 2); | |||
7036 | __masm-> subl(nIdx, 2); | |||
7037 | __masm-> BIND(ShiftTwoLoop)bind(ShiftTwoLoop); masm-> block_comment("ShiftTwoLoop" ":" ); | |||
7038 | __masm-> movl(tmp5, Address(oldArr, idx, Address::times_4, 8)); | |||
7039 | __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 4)); | |||
7040 | __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4)); | |||
7041 | __masm-> shrdl(tmp5, tmp4); | |||
7042 | __masm-> shrdl(tmp4, tmp3); | |||
7043 | __masm-> movl(Address(newArr, nIdx, Address::times_4, 4), tmp5); | |||
7044 | __masm-> movl(Address(newArr, nIdx, Address::times_4), tmp4); | |||
7045 | __masm-> subl(nIdx, 2); | |||
7046 | __masm-> subl(idx, 2); | |||
7047 | __masm-> jcc(Assembler::greaterEqual, ShiftTwoLoop); | |||
7048 | __masm-> addl(idx, 2); | |||
7049 | __masm-> addl(nIdx, 2); | |||
7050 | ||||
7051 | // Do the last iteration | |||
7052 | __masm-> BIND(ShiftOne)bind(ShiftOne); masm-> block_comment("ShiftOne" ":"); | |||
7053 | __masm-> cmpl(idx, 1); | |||
7054 | __masm-> jcc(Assembler::less, Exit); | |||
7055 | __masm-> subl(idx, 1); | |||
7056 | __masm-> subl(nIdx, 1); | |||
7057 | __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 4)); | |||
7058 | __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4)); | |||
7059 | __masm-> shrdl(tmp4, tmp3); | |||
7060 | __masm-> movl(Address(newArr, nIdx, Address::times_4), tmp4); | |||
7061 | __masm-> BIND(Exit)bind(Exit); masm-> block_comment("Exit" ":"); | |||
7062 | // Restore callee save registers. | |||
7063 | __masm-> pop(tmp5); | |||
7064 | #ifdef _WINDOWS | |||
7065 | __masm-> pop(tmp4); | |||
7066 | __masm-> pop(tmp3); | |||
7067 | restore_arg_regs(); | |||
7068 | #endif | |||
7069 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7070 | __masm-> ret(0); | |||
7071 | return start; | |||
7072 | } | |||
7073 | ||||
7074 | /** | |||
7075 | * Arguments: | |||
7076 | * | |||
7077 | * Input: | |||
7078 | * c_rarg0 - newArr address | |||
7079 | * c_rarg1 - oldArr address | |||
7080 | * c_rarg2 - newIdx | |||
7081 | * c_rarg3 - shiftCount | |||
7082 | * not Win64 | |||
7083 | * c_rarg4 - numIter | |||
7084 | * Win64 | |||
7085 | * rsp40 - numIter | |||
7086 | */ | |||
7087 | address generate_bigIntegerLeftShift() { | |||
7088 | __masm-> align(CodeEntryAlignment); | |||
7089 | StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); | |||
7090 | address start = __masm-> pc(); | |||
7091 | Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit; | |||
7092 | // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8. | |||
7093 | const Register newArr = rdi; | |||
7094 | const Register oldArr = rsi; | |||
7095 | const Register newIdx = rdx; | |||
7096 | const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift. | |||
7097 | const Register totalNumIter = r8; | |||
7098 | // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps. | |||
7099 | // For everything else, we prefer using r9 and r10 since we do not have to save them before use. | |||
7100 | const Register tmp1 = r11; // Caller save. | |||
7101 | const Register tmp2 = rax; // Caller save. | |||
7102 | const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9)r9; // Windows: Callee save. Linux: Caller save. | |||
7103 | const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10)r10; // Windows: Callee save. Linux: Caller save. | |||
7104 | const Register tmp5 = r14; // Callee save. | |||
7105 | ||||
7106 | const XMMRegister x0 = xmm0; | |||
7107 | const XMMRegister x1 = xmm1; | |||
7108 | const XMMRegister x2 = xmm2; | |||
7109 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
7110 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7111 | ||||
7112 | #ifdef _WINDOWS | |||
7113 | setup_arg_regs(4); | |||
7114 | // For windows, since last argument is on stack, we need to move it to the appropriate register. | |||
7115 | __masm-> movl(totalNumIter, Address(rsp, 6 * wordSize)); | |||
7116 | // Save callee save registers. | |||
7117 | __masm-> push(tmp3); | |||
7118 | __masm-> push(tmp4); | |||
7119 | #endif | |||
7120 | __masm-> push(tmp5); | |||
7121 | ||||
7122 | // Rename temps used throughout the code | |||
7123 | const Register idx = tmp1; | |||
7124 | const Register numIterTmp = tmp2; | |||
7125 | ||||
7126 | // Start idx from zero. | |||
7127 | __masm-> xorl(idx, idx); | |||
7128 | // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays. | |||
7129 | __masm-> lea(newArr, Address(newArr, newIdx, Address::times_4)); | |||
7130 | __masm-> movl(numIterTmp, totalNumIter); | |||
7131 | ||||
7132 | // If vectorization is enabled, check if the number of iterations is at least 64 | |||
7133 | // If not, then go to ShiftTwo shifting two numbers at a time | |||
7134 | if (VM_Version::supports_avx512_vbmi2()) { | |||
7135 | __masm-> cmpl(totalNumIter, (AVX3Threshold/64)); | |||
7136 | __masm-> jcc(Assembler::less, ShiftTwo); | |||
7137 | ||||
7138 | if (AVX3Threshold < 16 * 64) { | |||
7139 | __masm-> cmpl(totalNumIter, 16); | |||
7140 | __masm-> jcc(Assembler::less, ShiftTwo); | |||
7141 | } | |||
7142 | __masm-> evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit); | |||
7143 | __masm-> subl(numIterTmp, 16); | |||
7144 | __masm-> BIND(Shift512Loop)bind(Shift512Loop); masm-> block_comment("Shift512Loop" ":" ); | |||
7145 | __masm-> evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit); | |||
7146 | __masm-> evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit); | |||
7147 | __masm-> vpshldvd(x1, x2, x0, Assembler::AVX_512bit); | |||
7148 | __masm-> evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit); | |||
7149 | __masm-> addl(idx, 16); | |||
7150 | __masm-> subl(numIterTmp, 16); | |||
7151 | __masm-> jcc(Assembler::greaterEqual, Shift512Loop); | |||
7152 | __masm-> addl(numIterTmp, 16); | |||
7153 | } | |||
7154 | __masm-> BIND(ShiftTwo)bind(ShiftTwo); masm-> block_comment("ShiftTwo" ":"); | |||
7155 | __masm-> cmpl(totalNumIter, 1); | |||
7156 | __masm-> jcc(Assembler::less, Exit); | |||
7157 | __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4)); | |||
7158 | __masm-> subl(numIterTmp, 2); | |||
7159 | __masm-> jcc(Assembler::less, ShiftOne); | |||
7160 | ||||
7161 | __masm-> BIND(ShiftTwoLoop)bind(ShiftTwoLoop); masm-> block_comment("ShiftTwoLoop" ":" ); | |||
7162 | __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4)); | |||
7163 | __masm-> movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8)); | |||
7164 | __masm-> shldl(tmp3, tmp4); | |||
7165 | __masm-> shldl(tmp4, tmp5); | |||
7166 | __masm-> movl(Address(newArr, idx, Address::times_4), tmp3); | |||
7167 | __masm-> movl(Address(newArr, idx, Address::times_4, 0x4), tmp4); | |||
7168 | __masm-> movl(tmp3, tmp5); | |||
7169 | __masm-> addl(idx, 2); | |||
7170 | __masm-> subl(numIterTmp, 2); | |||
7171 | __masm-> jcc(Assembler::greaterEqual, ShiftTwoLoop); | |||
7172 | ||||
7173 | // Do the last iteration | |||
7174 | __masm-> BIND(ShiftOne)bind(ShiftOne); masm-> block_comment("ShiftOne" ":"); | |||
7175 | __masm-> addl(numIterTmp, 2); | |||
7176 | __masm-> cmpl(numIterTmp, 1); | |||
7177 | __masm-> jcc(Assembler::less, Exit); | |||
7178 | __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4)); | |||
7179 | __masm-> shldl(tmp3, tmp4); | |||
7180 | __masm-> movl(Address(newArr, idx, Address::times_4), tmp3); | |||
7181 | ||||
7182 | __masm-> BIND(Exit)bind(Exit); masm-> block_comment("Exit" ":"); | |||
7183 | // Restore callee save registers. | |||
7184 | __masm-> pop(tmp5); | |||
7185 | #ifdef _WINDOWS | |||
7186 | __masm-> pop(tmp4); | |||
7187 | __masm-> pop(tmp3); | |||
7188 | restore_arg_regs(); | |||
7189 | #endif | |||
7190 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7191 | __masm-> ret(0); | |||
7192 | return start; | |||
7193 | } | |||
7194 | ||||
7195 | address generate_libmExp() { | |||
7196 | StubCodeMark mark(this, "StubRoutines", "libmExp"); | |||
7197 | ||||
7198 | address start = __masm-> pc(); | |||
7199 | ||||
7200 | const XMMRegister x0 = xmm0; | |||
7201 | const XMMRegister x1 = xmm1; | |||
7202 | const XMMRegister x2 = xmm2; | |||
7203 | const XMMRegister x3 = xmm3; | |||
7204 | ||||
7205 | const XMMRegister x4 = xmm4; | |||
7206 | const XMMRegister x5 = xmm5; | |||
7207 | const XMMRegister x6 = xmm6; | |||
7208 | const XMMRegister x7 = xmm7; | |||
7209 | ||||
7210 | const Register tmp = r11; | |||
7211 | ||||
7212 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
7213 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7214 | ||||
7215 | __masm-> fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); | |||
7216 | ||||
7217 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7218 | __masm-> ret(0); | |||
7219 | ||||
7220 | return start; | |||
7221 | ||||
7222 | } | |||
7223 | ||||
7224 | address generate_libmLog() { | |||
7225 | StubCodeMark mark(this, "StubRoutines", "libmLog"); | |||
7226 | ||||
7227 | address start = __masm-> pc(); | |||
7228 | ||||
7229 | const XMMRegister x0 = xmm0; | |||
7230 | const XMMRegister x1 = xmm1; | |||
7231 | const XMMRegister x2 = xmm2; | |||
7232 | const XMMRegister x3 = xmm3; | |||
7233 | ||||
7234 | const XMMRegister x4 = xmm4; | |||
7235 | const XMMRegister x5 = xmm5; | |||
7236 | const XMMRegister x6 = xmm6; | |||
7237 | const XMMRegister x7 = xmm7; | |||
7238 | ||||
7239 | const Register tmp1 = r11; | |||
7240 | const Register tmp2 = r8; | |||
7241 | ||||
7242 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
7243 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7244 | ||||
7245 | __masm-> fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2); | |||
7246 | ||||
7247 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7248 | __masm-> ret(0); | |||
7249 | ||||
7250 | return start; | |||
7251 | ||||
7252 | } | |||
7253 | ||||
7254 | address generate_libmLog10() { | |||
7255 | StubCodeMark mark(this, "StubRoutines", "libmLog10"); | |||
7256 | ||||
7257 | address start = __masm-> pc(); | |||
7258 | ||||
7259 | const XMMRegister x0 = xmm0; | |||
7260 | const XMMRegister x1 = xmm1; | |||
7261 | const XMMRegister x2 = xmm2; | |||
7262 | const XMMRegister x3 = xmm3; | |||
7263 | ||||
7264 | const XMMRegister x4 = xmm4; | |||
7265 | const XMMRegister x5 = xmm5; | |||
7266 | const XMMRegister x6 = xmm6; | |||
7267 | const XMMRegister x7 = xmm7; | |||
7268 | ||||
7269 | const Register tmp = r11; | |||
7270 | ||||
7271 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
7272 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7273 | ||||
7274 | __masm-> fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); | |||
7275 | ||||
7276 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7277 | __masm-> ret(0); | |||
7278 | ||||
7279 | return start; | |||
7280 | ||||
7281 | } | |||
7282 | ||||
7283 | address generate_libmPow() { | |||
7284 | StubCodeMark mark(this, "StubRoutines", "libmPow"); | |||
7285 | ||||
7286 | address start = __masm-> pc(); | |||
7287 | ||||
7288 | const XMMRegister x0 = xmm0; | |||
7289 | const XMMRegister x1 = xmm1; | |||
7290 | const XMMRegister x2 = xmm2; | |||
7291 | const XMMRegister x3 = xmm3; | |||
7292 | ||||
7293 | const XMMRegister x4 = xmm4; | |||
7294 | const XMMRegister x5 = xmm5; | |||
7295 | const XMMRegister x6 = xmm6; | |||
7296 | const XMMRegister x7 = xmm7; | |||
7297 | ||||
7298 | const Register tmp1 = r8; | |||
7299 | const Register tmp2 = r9; | |||
7300 | const Register tmp3 = r10; | |||
7301 | const Register tmp4 = r11; | |||
7302 | ||||
7303 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
7304 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7305 | ||||
7306 | __masm-> fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); | |||
7307 | ||||
7308 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7309 | __masm-> ret(0); | |||
7310 | ||||
7311 | return start; | |||
7312 | ||||
7313 | } | |||
7314 | ||||
7315 | address generate_libmSin() { | |||
7316 | StubCodeMark mark(this, "StubRoutines", "libmSin"); | |||
7317 | ||||
7318 | address start = __masm-> pc(); | |||
7319 | ||||
7320 | const XMMRegister x0 = xmm0; | |||
7321 | const XMMRegister x1 = xmm1; | |||
7322 | const XMMRegister x2 = xmm2; | |||
7323 | const XMMRegister x3 = xmm3; | |||
7324 | ||||
7325 | const XMMRegister x4 = xmm4; | |||
7326 | const XMMRegister x5 = xmm5; | |||
7327 | const XMMRegister x6 = xmm6; | |||
7328 | const XMMRegister x7 = xmm7; | |||
7329 | ||||
7330 | const Register tmp1 = r8; | |||
7331 | const Register tmp2 = r9; | |||
7332 | const Register tmp3 = r10; | |||
7333 | const Register tmp4 = r11; | |||
7334 | ||||
7335 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
7336 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7337 | ||||
7338 | #ifdef _WIN64 | |||
7339 | __masm-> push(rsi); | |||
7340 | __masm-> push(rdi); | |||
7341 | #endif | |||
7342 | __masm-> fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1, tmp2, tmp3, tmp4); | |||
7343 | ||||
7344 | #ifdef _WIN64 | |||
7345 | __masm-> pop(rdi); | |||
7346 | __masm-> pop(rsi); | |||
7347 | #endif | |||
7348 | ||||
7349 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7350 | __masm-> ret(0); | |||
7351 | ||||
7352 | return start; | |||
7353 | ||||
7354 | } | |||
7355 | ||||
7356 | address generate_libmCos() { | |||
7357 | StubCodeMark mark(this, "StubRoutines", "libmCos"); | |||
7358 | ||||
7359 | address start = __masm-> pc(); | |||
7360 | ||||
7361 | const XMMRegister x0 = xmm0; | |||
7362 | const XMMRegister x1 = xmm1; | |||
7363 | const XMMRegister x2 = xmm2; | |||
7364 | const XMMRegister x3 = xmm3; | |||
7365 | ||||
7366 | const XMMRegister x4 = xmm4; | |||
7367 | const XMMRegister x5 = xmm5; | |||
7368 | const XMMRegister x6 = xmm6; | |||
7369 | const XMMRegister x7 = xmm7; | |||
7370 | ||||
7371 | const Register tmp1 = r8; | |||
7372 | const Register tmp2 = r9; | |||
7373 | const Register tmp3 = r10; | |||
7374 | const Register tmp4 = r11; | |||
7375 | ||||
7376 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
7377 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7378 | ||||
7379 | #ifdef _WIN64 | |||
7380 | __masm-> push(rsi); | |||
7381 | __masm-> push(rdi); | |||
7382 | #endif | |||
7383 | __masm-> fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); | |||
7384 | ||||
7385 | #ifdef _WIN64 | |||
7386 | __masm-> pop(rdi); | |||
7387 | __masm-> pop(rsi); | |||
7388 | #endif | |||
7389 | ||||
7390 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7391 | __masm-> ret(0); | |||
7392 | ||||
7393 | return start; | |||
7394 | ||||
7395 | } | |||
7396 | ||||
7397 | address generate_libmTan() { | |||
7398 | StubCodeMark mark(this, "StubRoutines", "libmTan"); | |||
7399 | ||||
7400 | address start = __masm-> pc(); | |||
7401 | ||||
7402 | const XMMRegister x0 = xmm0; | |||
7403 | const XMMRegister x1 = xmm1; | |||
7404 | const XMMRegister x2 = xmm2; | |||
7405 | const XMMRegister x3 = xmm3; | |||
7406 | ||||
7407 | const XMMRegister x4 = xmm4; | |||
7408 | const XMMRegister x5 = xmm5; | |||
7409 | const XMMRegister x6 = xmm6; | |||
7410 | const XMMRegister x7 = xmm7; | |||
7411 | ||||
7412 | const Register tmp1 = r8; | |||
7413 | const Register tmp2 = r9; | |||
7414 | const Register tmp3 = r10; | |||
7415 | const Register tmp4 = r11; | |||
7416 | ||||
7417 | BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:"); | |||
7418 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7419 | ||||
7420 | #ifdef _WIN64 | |||
7421 | __masm-> push(rsi); | |||
7422 | __masm-> push(rdi); | |||
7423 | #endif | |||
7424 | __masm-> fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); | |||
7425 | ||||
7426 | #ifdef _WIN64 | |||
7427 | __masm-> pop(rdi); | |||
7428 | __masm-> pop(rsi); | |||
7429 | #endif | |||
7430 | ||||
7431 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7432 | __masm-> ret(0); | |||
7433 | ||||
7434 | return start; | |||
7435 | ||||
7436 | } | |||
7437 | ||||
7438 | #undef __masm-> | |||
7439 | #define __masm-> masm-> | |||
7440 | ||||
7441 | // Continuation point for throwing of implicit exceptions that are | |||
7442 | // not handled in the current activation. Fabricates an exception | |||
7443 | // oop and initiates normal exception dispatching in this | |||
7444 | // frame. Since we need to preserve callee-saved values (currently | |||
7445 | // only for C2, but done for C1 as well) we need a callee-saved oop | |||
7446 | // map and therefore have to make these stubs into RuntimeStubs | |||
7447 | // rather than BufferBlobs. If the compiler needs all registers to | |||
7448 | // be preserved between the fault point and the exception handler | |||
7449 | // then it must assume responsibility for that in | |||
7450 | // AbstractCompiler::continuation_for_implicit_null_exception or | |||
7451 | // continuation_for_implicit_division_by_zero_exception. All other | |||
7452 | // implicit exceptions (e.g., NullPointerException or | |||
7453 | // AbstractMethodError on entry) are either at call sites or | |||
7454 | // otherwise assume that stack unwinding will be initiated, so | |||
7455 | // caller saved registers were assumed volatile in the compiler. | |||
7456 | address generate_throw_exception(const char* name, | |||
7457 | address runtime_entry, | |||
7458 | Register arg1 = noreg, | |||
7459 | Register arg2 = noreg) { | |||
7460 | // Information about frame layout at time of blocking runtime call. | |||
7461 | // Note that we only have to preserve callee-saved registers since | |||
7462 | // the compilers are responsible for supplying a continuation point | |||
7463 | // if they expect all registers to be preserved. | |||
7464 | enum layout { | |||
7465 | rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, | |||
7466 | rbp_off2, | |||
7467 | return_off, | |||
7468 | return_off2, | |||
7469 | framesize // inclusive of return address | |||
7470 | }; | |||
7471 | ||||
7472 | int insts_size = 512; | |||
7473 | int locs_size = 64; | |||
7474 | ||||
7475 | CodeBuffer code(name, insts_size, locs_size); | |||
7476 | OopMapSet* oop_maps = new OopMapSet(); | |||
7477 | MacroAssembler* masm = new MacroAssembler(&code); | |||
7478 | ||||
7479 | address start = __masm-> pc(); | |||
7480 | ||||
7481 | // This is an inlined and slightly modified version of call_VM | |||
7482 | // which has the ability to fetch the return PC out of | |||
7483 | // thread-local storage and also sets up last_Java_sp slightly | |||
7484 | // differently than the real call_VM | |||
7485 | ||||
7486 | __masm-> enter(); // required for proper stackwalking of RuntimeStub frame | |||
7487 | ||||
7488 | assert(is_even(framesize/2), "sp not 16-byte aligned")do { if (!(is_even(framesize/2))) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 7488, "assert(" "is_even(framesize/2)" ") failed", "sp not 16-byte aligned" ); ::breakpoint(); } } while (0); | |||
7489 | ||||
7490 | // return address and rbp are already in place | |||
7491 | __masm-> subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog | |||
7492 | ||||
7493 | int frame_complete = __masm-> pc() - start; | |||
7494 | ||||
7495 | // Set up last_Java_sp and last_Java_fp | |||
7496 | address the_pc = __masm-> pc(); | |||
7497 | __masm-> set_last_Java_frame(rsp, rbp, the_pc); | |||
7498 | __masm-> andptr(rsp, -(StackAlignmentInBytes)); // Align stack | |||
7499 | ||||
7500 | // Call runtime | |||
7501 | if (arg1 != noreg) { | |||
7502 | assert(arg2 != c_rarg1, "clobbered")do { if (!(arg2 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp" , 7502, "assert(" "arg2 != c_rarg1" ") failed", "clobbered"); ::breakpoint(); } } while (0); | |||
7503 | __masm-> movptr(c_rarg1, arg1); | |||
7504 | } | |||
7505 | if (arg2 != noreg) { | |||
7506 | __masm-> movptr(c_rarg2, arg2); | |||
7507 | } | |||
7508 | __masm-> movptr(c_rarg0, r15_thread); | |||
7509 | BLOCK_COMMENT("call runtime_entry")masm-> block_comment("call runtime_entry"); | |||
7510 | __masm-> call(RuntimeAddress(runtime_entry)); | |||
7511 | ||||
7512 | // Generate oop map | |||
7513 | OopMap* map = new OopMap(framesize, 0); | |||
7514 | ||||
7515 | oop_maps->add_gc_map(the_pc - start, map); | |||
7516 | ||||
7517 | __masm-> reset_last_Java_frame(true); | |||
7518 | ||||
7519 | __masm-> leave(); // required for proper stackwalking of RuntimeStub frame | |||
7520 | ||||
7521 | // check for pending exceptions | |||
7522 | #ifdef ASSERT1 | |||
7523 | Label L; | |||
7524 | __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()), | |||
7525 | (int32_t) NULL_WORD0L); | |||
7526 | __masm-> jcc(Assembler::notEqual, L); | |||
7527 | __masm-> should_not_reach_here(); | |||
7528 | __masm-> bind(L); | |||
7529 | #endif // ASSERT | |||
7530 | __masm-> jump(RuntimeAddress(StubRoutines::forward_exception_entry())); | |||
7531 | ||||
7532 | ||||
7533 | // codeBlob framesize is in words (not VMRegImpl::slot_size) | |||
7534 | RuntimeStub* stub = | |||
7535 | RuntimeStub::new_runtime_stub(name, | |||
7536 | &code, | |||
7537 | frame_complete, | |||
7538 | (framesize >> (LogBytesPerWord - LogBytesPerInt)), | |||
7539 | oop_maps, false); | |||
7540 | return stub->entry_point(); | |||
7541 | } | |||
7542 | ||||
7543 | void create_control_words() { | |||
7544 | // Round to nearest, 64-bit mode, exceptions masked | |||
7545 | StubRoutines::x86::_mxcsr_std = 0x1F80; | |||
7546 | } | |||
7547 | ||||
7548 | // Initialization | |||
7549 | void generate_initial() { | |||
7550 | // Generates all stubs and initializes the entry points | |||
7551 | ||||
7552 | // This platform-specific settings are needed by generate_call_stub() | |||
7553 | create_control_words(); | |||
7554 | ||||
7555 | // entry points that exist in all platforms Note: This is code | |||
7556 | // that could be shared among different platforms - however the | |||
7557 | // benefit seems to be smaller than the disadvantage of having a | |||
7558 | // much more complicated generator structure. See also comment in | |||
7559 | // stubRoutines.hpp. | |||
7560 | ||||
7561 | StubRoutines::_forward_exception_entry = generate_forward_exception(); | |||
7562 | ||||
7563 | StubRoutines::_call_stub_entry = | |||
7564 | generate_call_stub(StubRoutines::_call_stub_return_address); | |||
7565 | ||||
7566 | // is referenced by megamorphic call | |||
7567 | StubRoutines::_catch_exception_entry = generate_catch_exception(); | |||
7568 | ||||
7569 | // atomic calls | |||
7570 | StubRoutines::_fence_entry = generate_orderaccess_fence(); | |||
7571 | ||||
7572 | // platform dependent | |||
7573 | StubRoutines::x86::_get_previous_sp_entry = generate_get_previous_sp(); | |||
7574 | ||||
7575 | StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); | |||
7576 | ||||
7577 | StubRoutines::x86::_f2i_fixup = generate_f2i_fixup(); | |||
7578 | StubRoutines::x86::_f2l_fixup = generate_f2l_fixup(); | |||
7579 | StubRoutines::x86::_d2i_fixup = generate_d2i_fixup(); | |||
7580 | StubRoutines::x86::_d2l_fixup = generate_d2l_fixup(); | |||
7581 | ||||
7582 | StubRoutines::x86::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF); | |||
7583 | StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000); | |||
7584 | StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF); | |||
7585 | StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000); | |||
7586 | ||||
7587 | // Build this early so it's available for the interpreter. | |||
7588 | StubRoutines::_throw_StackOverflowError_entry = | |||
7589 | generate_throw_exception("StackOverflowError throw_exception", | |||
7590 | CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_StackOverflowError ))) | |||
7591 | SharedRuntime::((address)((address_word)(SharedRuntime:: throw_StackOverflowError ))) | |||
7592 | throw_StackOverflowError)((address)((address_word)(SharedRuntime:: throw_StackOverflowError )))); | |||
7593 | StubRoutines::_throw_delayed_StackOverflowError_entry = | |||
7594 | generate_throw_exception("delayed StackOverflowError throw_exception", | |||
7595 | CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError ))) | |||
7596 | SharedRuntime::((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError ))) | |||
7597 | throw_delayed_StackOverflowError)((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError )))); | |||
7598 | if (UseCRC32Intrinsics) { | |||
7599 | // set table address before stub generation which use it | |||
7600 | StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; | |||
7601 | StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); | |||
7602 | } | |||
7603 | ||||
7604 | if (UseCRC32CIntrinsics) { | |||
7605 | bool supports_clmul = VM_Version::supports_clmul(); | |||
7606 | StubRoutines::x86::generate_CRC32C_table(supports_clmul); | |||
7607 | StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table; | |||
7608 | StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul); | |||
7609 | } | |||
7610 | ||||
7611 | if (UseAdler32Intrinsics) { | |||
7612 | StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32(); | |||
7613 | } | |||
7614 | ||||
7615 | if (UseLibmIntrinsic && InlineIntrinsics) { | |||
7616 | if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || | |||
7617 | vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) || | |||
7618 | vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { | |||
7619 | StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF; | |||
7620 | StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2; | |||
7621 | StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4; | |||
7622 | StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable; | |||
7623 | StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2; | |||
7624 | StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3; | |||
7625 | StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1; | |||
7626 | StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE; | |||
7627 | StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4; | |||
7628 | StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV; | |||
7629 | StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK; | |||
7630 | StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1; | |||
7631 | StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3; | |||
7632 | StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO; | |||
7633 | } | |||
7634 | if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { | |||
7635 | StubRoutines::_dexp = generate_libmExp(); | |||
7636 | } | |||
7637 | if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) { | |||
7638 | StubRoutines::_dlog = generate_libmLog(); | |||
7639 | } | |||
7640 | if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) { | |||
7641 | StubRoutines::_dlog10 = generate_libmLog10(); | |||
7642 | } | |||
7643 | if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) { | |||
7644 | StubRoutines::_dpow = generate_libmPow(); | |||
7645 | } | |||
7646 | if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { | |||
7647 | StubRoutines::_dsin = generate_libmSin(); | |||
7648 | } | |||
7649 | if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { | |||
7650 | StubRoutines::_dcos = generate_libmCos(); | |||
7651 | } | |||
7652 | if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { | |||
7653 | StubRoutines::_dtan = generate_libmTan(); | |||
7654 | } | |||
7655 | } | |||
7656 | ||||
7657 | // Safefetch stubs. | |||
7658 | generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, | |||
7659 | &StubRoutines::_safefetch32_fault_pc, | |||
7660 | &StubRoutines::_safefetch32_continuation_pc); | |||
7661 | generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, | |||
7662 | &StubRoutines::_safefetchN_fault_pc, | |||
7663 | &StubRoutines::_safefetchN_continuation_pc); | |||
7664 | } | |||
7665 | ||||
7666 | void generate_all() { | |||
7667 | // Generates all stubs and initializes the entry points | |||
7668 | ||||
7669 | // These entry points require SharedInfo::stack0 to be set up in | |||
7670 | // non-core builds and need to be relocatable, so they each | |||
7671 | // fabricate a RuntimeStub internally. | |||
7672 | StubRoutines::_throw_AbstractMethodError_entry = | |||
7673 | generate_throw_exception("AbstractMethodError throw_exception", | |||
7674 | CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_AbstractMethodError ))) | |||
7675 | SharedRuntime::((address)((address_word)(SharedRuntime:: throw_AbstractMethodError ))) | |||
7676 | throw_AbstractMethodError)((address)((address_word)(SharedRuntime:: throw_AbstractMethodError )))); | |||
7677 | ||||
7678 | StubRoutines::_throw_IncompatibleClassChangeError_entry = | |||
7679 | generate_throw_exception("IncompatibleClassChangeError throw_exception", | |||
7680 | CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError ))) | |||
7681 | SharedRuntime::((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError ))) | |||
7682 | throw_IncompatibleClassChangeError)((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError )))); | |||
7683 | ||||
7684 | StubRoutines::_throw_NullPointerException_at_call_entry = | |||
7685 | generate_throw_exception("NullPointerException at call throw_exception", | |||
7686 | CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call ))) | |||
7687 | SharedRuntime::((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call ))) | |||
7688 | throw_NullPointerException_at_call)((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call )))); | |||
7689 | ||||
7690 | // entry points that are platform specific | |||
7691 | StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF7FFFFFFF); | |||
7692 | StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000); | |||
7693 | StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF); | |||
7694 | StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000); | |||
7695 | StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFFFFFFFFFF); | |||
7696 | StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x0000000100000001); | |||
7697 | StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff); | |||
7698 | StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask"); | |||
7699 | StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff000000ff); | |||
7700 | StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff0000ffff); | |||
7701 | StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit, | |||
7702 | 0xFFFFFFFF, 0, 0, 0); | |||
7703 | StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit, | |||
7704 | 0xFFFFFFFF, 0xFFFFFFFF, 0, 0); | |||
7705 | StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x0302010003020100); | |||
7706 | StubRoutines::x86::_vector_byte_shuffle_mask = generate_vector_byte_shuffle_mask("vector_byte_shuffle_mask"); | |||
7707 | StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x0100010001000100); | |||
7708 | StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask("vector_long_shuffle_mask", 0x0000000100000000); | |||
7709 | StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000); | |||
7710 | StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices"); | |||
7711 | ||||
7712 | // support for verify_oop (must happen after universe_init) | |||
7713 | if (VerifyOops) { | |||
7714 | StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); | |||
7715 | } | |||
7716 | ||||
7717 | // data cache line writeback | |||
7718 | StubRoutines::_data_cache_writeback = generate_data_cache_writeback(); | |||
7719 | StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync(); | |||
7720 | ||||
7721 | // arraycopy stubs used by compilers | |||
7722 | generate_arraycopy_stubs(); | |||
7723 | ||||
7724 | // don't bother generating these AES intrinsic stubs unless global flag is set | |||
7725 | if (UseAESIntrinsics) { | |||
7726 | StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others | |||
7727 | StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); | |||
7728 | StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); | |||
7729 | StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); | |||
7730 | if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512vl() && VM_Version::supports_avx512dq() ) { | |||
7731 | StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt(); | |||
7732 | StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt(); | |||
7733 | StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt(); | |||
7734 | StubRoutines::x86::_counter_mask_addr = counter_mask_addr(); | |||
7735 | StubRoutines::x86::_ghash_poly512_addr = ghash_polynomial512_addr(); | |||
7736 | StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); | |||
7737 | StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt(); | |||
7738 | } else { | |||
7739 | StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); | |||
7740 | } | |||
7741 | } | |||
7742 | ||||
7743 | if (UseAESCTRIntrinsics) { | |||
7744 | if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) { | |||
7745 | if (StubRoutines::x86::_counter_mask_addr == NULL__null) { | |||
7746 | StubRoutines::x86::_counter_mask_addr = counter_mask_addr(); | |||
7747 | } | |||
7748 | StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt(); | |||
7749 | } else { | |||
7750 | StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask(); | |||
7751 | StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); | |||
7752 | } | |||
7753 | } | |||
7754 | ||||
7755 | if (UseMD5Intrinsics) { | |||
7756 | StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress"); | |||
7757 | StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB"); | |||
7758 | } | |||
7759 | if (UseSHA1Intrinsics) { | |||
7760 | StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); | |||
7761 | StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); | |||
7762 | StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); | |||
7763 | StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); | |||
7764 | } | |||
7765 | if (UseSHA256Intrinsics) { | |||
7766 | StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256; | |||
7767 | char* dst = (char*)StubRoutines::x86::_k256_W; | |||
7768 | char* src = (char*)StubRoutines::x86::_k256; | |||
7769 | for (int ii = 0; ii < 16; ++ii) { | |||
7770 | memcpy(dst + 32 * ii, src + 16 * ii, 16); | |||
7771 | memcpy(dst + 32 * ii + 16, src + 16 * ii, 16); | |||
7772 | } | |||
7773 | StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W; | |||
7774 | StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); | |||
7775 | StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); | |||
7776 | StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); | |||
7777 | } | |||
7778 | if (UseSHA512Intrinsics) { | |||
7779 | StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W; | |||
7780 | StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512(); | |||
7781 | StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); | |||
7782 | StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); | |||
7783 | } | |||
7784 | ||||
7785 | // Generate GHASH intrinsics code | |||
7786 | if (UseGHASHIntrinsics) { | |||
7787 | if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL__null) { | |||
7788 | StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); | |||
7789 | } | |||
7790 | StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); | |||
7791 | if (VM_Version::supports_avx()) { | |||
7792 | StubRoutines::x86::_ghash_shuffmask_addr = ghash_shufflemask_addr(); | |||
7793 | StubRoutines::x86::_ghash_poly_addr = ghash_polynomial_addr(); | |||
7794 | StubRoutines::_ghash_processBlocks = generate_avx_ghash_processBlocks(); | |||
7795 | } else { | |||
7796 | StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); | |||
7797 | } | |||
7798 | } | |||
7799 | ||||
7800 | ||||
7801 | if (UseBASE64Intrinsics) { | |||
7802 | if(VM_Version::supports_avx2() && | |||
7803 | VM_Version::supports_avx512bw() && | |||
7804 | VM_Version::supports_avx512vl()) { | |||
7805 | StubRoutines::x86::_avx2_shuffle_base64 = base64_avx2_shuffle_addr(); | |||
7806 | StubRoutines::x86::_avx2_input_mask_base64 = base64_avx2_input_mask_addr(); | |||
7807 | StubRoutines::x86::_avx2_lut_base64 = base64_avx2_lut_addr(); | |||
7808 | } | |||
7809 | StubRoutines::x86::_encoding_table_base64 = base64_encoding_table_addr(); | |||
7810 | if (VM_Version::supports_avx512_vbmi()) { | |||
7811 | StubRoutines::x86::_shuffle_base64 = base64_shuffle_addr(); | |||
7812 | StubRoutines::x86::_lookup_lo_base64 = base64_vbmi_lookup_lo_addr(); | |||
7813 | StubRoutines::x86::_lookup_hi_base64 = base64_vbmi_lookup_hi_addr(); | |||
7814 | StubRoutines::x86::_lookup_lo_base64url = base64_vbmi_lookup_lo_url_addr(); | |||
7815 | StubRoutines::x86::_lookup_hi_base64url = base64_vbmi_lookup_hi_url_addr(); | |||
7816 | StubRoutines::x86::_pack_vec_base64 = base64_vbmi_pack_vec_addr(); | |||
7817 | StubRoutines::x86::_join_0_1_base64 = base64_vbmi_join_0_1_addr(); | |||
7818 | StubRoutines::x86::_join_1_2_base64 = base64_vbmi_join_1_2_addr(); | |||
7819 | StubRoutines::x86::_join_2_3_base64 = base64_vbmi_join_2_3_addr(); | |||
7820 | } | |||
7821 | StubRoutines::x86::_decoding_table_base64 = base64_decoding_table_addr(); | |||
7822 | StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock(); | |||
7823 | StubRoutines::_base64_decodeBlock = generate_base64_decodeBlock(); | |||
7824 | } | |||
7825 | ||||
7826 | BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); | |||
7827 | if (bs_nm != NULL__null) { | |||
7828 | StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier(); | |||
7829 | } | |||
7830 | #ifdef COMPILER21 | |||
7831 | if (UseMultiplyToLenIntrinsic) { | |||
7832 | StubRoutines::_multiplyToLen = generate_multiplyToLen(); | |||
7833 | } | |||
7834 | if (UseSquareToLenIntrinsic) { | |||
7835 | StubRoutines::_squareToLen = generate_squareToLen(); | |||
7836 | } | |||
7837 | if (UseMulAddIntrinsic) { | |||
7838 | StubRoutines::_mulAdd = generate_mulAdd(); | |||
7839 | } | |||
7840 | if (VM_Version::supports_avx512_vbmi2()) { | |||
7841 | StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); | |||
7842 | StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); | |||
7843 | } | |||
7844 | if (UseMontgomeryMultiplyIntrinsic) { | |||
7845 | StubRoutines::_montgomeryMultiply | |||
7846 | = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply)((address)((address_word)(SharedRuntime::montgomery_multiply) )); | |||
7847 | } | |||
7848 | if (UseMontgomerySquareIntrinsic) { | |||
7849 | StubRoutines::_montgomerySquare | |||
7850 | = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square)((address)((address_word)(SharedRuntime::montgomery_square))); | |||
7851 | } | |||
7852 | ||||
7853 | // Get svml stub routine addresses | |||
7854 | void *libjsvml = NULL__null; | |||
7855 | char ebuf[1024]; | |||
7856 | char dll_name[JVM_MAXPATHLEN4096 + 1]; | |||
7857 | if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "jsvml")) { | |||
7858 | libjsvml = os::dll_load(dll_name, ebuf, sizeof ebuf); | |||
7859 | } | |||
7860 | if (libjsvml != NULL__null) { | |||
7861 | // SVML method naming convention | |||
7862 | // All the methods are named as __jsvml_op<T><N>_ha_<VV> | |||
7863 | // Where: | |||
7864 | // ha stands for high accuracy | |||
7865 | // <T> is optional to indicate float/double | |||
7866 | // Set to f for vector float operation | |||
7867 | // Omitted for vector double operation | |||
7868 | // <N> is the number of elements in the vector | |||
7869 | // 1, 2, 4, 8, 16 | |||
7870 | // e.g. 128 bit float vector has 4 float elements | |||
7871 | // <VV> indicates the avx/sse level: | |||
7872 | // z0 is AVX512, l9 is AVX2, e9 is AVX1 and ex is for SSE2 | |||
7873 | // e.g. __jsvml_expf16_ha_z0 is the method for computing 16 element vector float exp using AVX 512 insns | |||
7874 | // __jsvml_exp8_ha_z0 is the method for computing 8 element vector double exp using AVX 512 insns | |||
7875 | ||||
7876 | log_info(library)(!(LogImpl<(LogTag::_library), (LogTag::__NO_TAG), (LogTag ::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag:: __NO_TAG)>::is_level(LogLevel::Info))) ? (void)0 : LogImpl <(LogTag::_library), (LogTag::__NO_TAG), (LogTag::__NO_TAG ), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG) >::write<LogLevel::Info>("Loaded library %s, handle " INTPTR_FORMAT"0x%016" "l" "x", JNI_LIB_PREFIX"lib" "jsvml" JNI_LIB_SUFFIX".so", p2i(libjsvml)); | |||
7877 | if (UseAVX > 2) { | |||
7878 | for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) { | |||
7879 | int vop = VectorSupport::VECTOR_OP_SVML_START + op; | |||
7880 | if ((!VM_Version::supports_avx512dq()) && | |||
7881 | (vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) { | |||
7882 | continue; | |||
7883 | } | |||
7884 | snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]); | |||
7885 | StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf); | |||
7886 | ||||
7887 | snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]); | |||
7888 | StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf); | |||
7889 | } | |||
7890 | } | |||
7891 | const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex"); | |||
7892 | for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) { | |||
7893 | int vop = VectorSupport::VECTOR_OP_SVML_START + op; | |||
7894 | if (vop == VectorSupport::VECTOR_OP_POW) { | |||
7895 | continue; | |||
7896 | } | |||
7897 | snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str); | |||
7898 | StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf); | |||
7899 | ||||
7900 | snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str); | |||
7901 | StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf); | |||
7902 | ||||
7903 | snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str); | |||
7904 | StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf); | |||
7905 | ||||
7906 | snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str); | |||
7907 | StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf); | |||
7908 | ||||
7909 | snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str); | |||
7910 | StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf); | |||
7911 | ||||
7912 | snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str); | |||
7913 | StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf); | |||
7914 | } | |||
7915 | } | |||
7916 | #endif // COMPILER2 | |||
7917 | ||||
7918 | if (UseVectorizedMismatchIntrinsic) { | |||
7919 | StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch(); | |||
7920 | } | |||
7921 | } | |||
7922 | ||||
7923 | public: | |||
7924 | StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { | |||
7925 | if (all) { | |||
7926 | generate_all(); | |||
7927 | } else { | |||
7928 | generate_initial(); | |||
7929 | } | |||
7930 | } | |||
7931 | }; // end class declaration | |||
7932 | ||||
7933 | #define UCM_TABLE_MAX_ENTRIES16 16 | |||
7934 | void StubGenerator_generate(CodeBuffer* code, bool all) { | |||
7935 | if (UnsafeCopyMemory::_table == NULL__null) { | |||
7936 | UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES16); | |||
7937 | } | |||
7938 | StubGenerator g(code, all); | |||
7939 | } |
1 | /* |
2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #ifndef CPU_X86_VM_VERSION_X86_HPP |
26 | #define CPU_X86_VM_VERSION_X86_HPP |
27 | |
28 | #include "runtime/abstract_vm_version.hpp" |
29 | #include "utilities/macros.hpp" |
30 | #include "utilities/sizes.hpp" |
31 | |
32 | class VM_Version : public Abstract_VM_Version { |
33 | friend class VMStructs; |
34 | friend class JVMCIVMStructs; |
35 | |
36 | public: |
37 | // cpuid result register layouts. These are all unions of a uint32_t |
38 | // (in case anyone wants access to the register as a whole) and a bitfield. |
39 | |
40 | union StdCpuid1Eax { |
41 | uint32_t value; |
42 | struct { |
43 | uint32_t stepping : 4, |
44 | model : 4, |
45 | family : 4, |
46 | proc_type : 2, |
47 | : 2, |
48 | ext_model : 4, |
49 | ext_family : 8, |
50 | : 4; |
51 | } bits; |
52 | }; |
53 | |
54 | union StdCpuid1Ebx { // example, unused |
55 | uint32_t value; |
56 | struct { |
57 | uint32_t brand_id : 8, |
58 | clflush_size : 8, |
59 | threads_per_cpu : 8, |
60 | apic_id : 8; |
61 | } bits; |
62 | }; |
63 | |
64 | union StdCpuid1Ecx { |
65 | uint32_t value; |
66 | struct { |
67 | uint32_t sse3 : 1, |
68 | clmul : 1, |
69 | : 1, |
70 | monitor : 1, |
71 | : 1, |
72 | vmx : 1, |
73 | : 1, |
74 | est : 1, |
75 | : 1, |
76 | ssse3 : 1, |
77 | cid : 1, |
78 | : 1, |
79 | fma : 1, |
80 | cmpxchg16: 1, |
81 | : 4, |
82 | dca : 1, |
83 | sse4_1 : 1, |
84 | sse4_2 : 1, |
85 | : 2, |
86 | popcnt : 1, |
87 | : 1, |
88 | aes : 1, |
89 | : 1, |
90 | osxsave : 1, |
91 | avx : 1, |
92 | : 2, |
93 | hv : 1; |
94 | } bits; |
95 | }; |
96 | |
97 | union StdCpuid1Edx { |
98 | uint32_t value; |
99 | struct { |
100 | uint32_t : 4, |
101 | tsc : 1, |
102 | : 3, |
103 | cmpxchg8 : 1, |
104 | : 6, |
105 | cmov : 1, |
106 | : 3, |
107 | clflush : 1, |
108 | : 3, |
109 | mmx : 1, |
110 | fxsr : 1, |
111 | sse : 1, |
112 | sse2 : 1, |
113 | : 1, |
114 | ht : 1, |
115 | : 3; |
116 | } bits; |
117 | }; |
118 | |
119 | union DcpCpuid4Eax { |
120 | uint32_t value; |
121 | struct { |
122 | uint32_t cache_type : 5, |
123 | : 21, |
124 | cores_per_cpu : 6; |
125 | } bits; |
126 | }; |
127 | |
128 | union DcpCpuid4Ebx { |
129 | uint32_t value; |
130 | struct { |
131 | uint32_t L1_line_size : 12, |
132 | partitions : 10, |
133 | associativity : 10; |
134 | } bits; |
135 | }; |
136 | |
137 | union TplCpuidBEbx { |
138 | uint32_t value; |
139 | struct { |
140 | uint32_t logical_cpus : 16, |
141 | : 16; |
142 | } bits; |
143 | }; |
144 | |
145 | union ExtCpuid1Ecx { |
146 | uint32_t value; |
147 | struct { |
148 | uint32_t LahfSahf : 1, |
149 | CmpLegacy : 1, |
150 | : 3, |
151 | lzcnt_intel : 1, |
152 | lzcnt : 1, |
153 | sse4a : 1, |
154 | misalignsse : 1, |
155 | prefetchw : 1, |
156 | : 22; |
157 | } bits; |
158 | }; |
159 | |
160 | union ExtCpuid1Edx { |
161 | uint32_t value; |
162 | struct { |
163 | uint32_t : 22, |
164 | mmx_amd : 1, |
165 | mmx : 1, |
166 | fxsr : 1, |
167 | : 4, |
168 | long_mode : 1, |
169 | tdnow2 : 1, |
170 | tdnow : 1; |
171 | } bits; |
172 | }; |
173 | |
174 | union ExtCpuid5Ex { |
175 | uint32_t value; |
176 | struct { |
177 | uint32_t L1_line_size : 8, |
178 | L1_tag_lines : 8, |
179 | L1_assoc : 8, |
180 | L1_size : 8; |
181 | } bits; |
182 | }; |
183 | |
184 | union ExtCpuid7Edx { |
185 | uint32_t value; |
186 | struct { |
187 | uint32_t : 8, |
188 | tsc_invariance : 1, |
189 | : 23; |
190 | } bits; |
191 | }; |
192 | |
193 | union ExtCpuid8Ecx { |
194 | uint32_t value; |
195 | struct { |
196 | uint32_t cores_per_cpu : 8, |
197 | : 24; |
198 | } bits; |
199 | }; |
200 | |
201 | union SefCpuid7Eax { |
202 | uint32_t value; |
203 | }; |
204 | |
205 | union SefCpuid7Ebx { |
206 | uint32_t value; |
207 | struct { |
208 | uint32_t fsgsbase : 1, |
209 | : 2, |
210 | bmi1 : 1, |
211 | : 1, |
212 | avx2 : 1, |
213 | : 2, |
214 | bmi2 : 1, |
215 | erms : 1, |
216 | : 1, |
217 | rtm : 1, |
218 | : 4, |
219 | avx512f : 1, |
220 | avx512dq : 1, |
221 | : 1, |
222 | adx : 1, |
223 | : 3, |
224 | clflushopt : 1, |
225 | clwb : 1, |
226 | : 1, |
227 | avx512pf : 1, |
228 | avx512er : 1, |
229 | avx512cd : 1, |
230 | sha : 1, |
231 | avx512bw : 1, |
232 | avx512vl : 1; |
233 | } bits; |
234 | }; |
235 | |
236 | union SefCpuid7Ecx { |
237 | uint32_t value; |
238 | struct { |
239 | uint32_t prefetchwt1 : 1, |
240 | avx512_vbmi : 1, |
241 | umip : 1, |
242 | pku : 1, |
243 | ospke : 1, |
244 | : 1, |
245 | avx512_vbmi2 : 1, |
246 | : 1, |
247 | gfni : 1, |
248 | vaes : 1, |
249 | avx512_vpclmulqdq : 1, |
250 | avx512_vnni : 1, |
251 | avx512_bitalg : 1, |
252 | : 1, |
253 | avx512_vpopcntdq : 1, |
254 | : 17; |
255 | } bits; |
256 | }; |
257 | |
258 | union SefCpuid7Edx { |
259 | uint32_t value; |
260 | struct { |
261 | uint32_t : 2, |
262 | avx512_4vnniw : 1, |
263 | avx512_4fmaps : 1, |
264 | : 10, |
265 | serialize : 1, |
266 | : 17; |
267 | } bits; |
268 | }; |
269 | |
270 | union ExtCpuid1EEbx { |
271 | uint32_t value; |
272 | struct { |
273 | uint32_t : 8, |
274 | threads_per_core : 8, |
275 | : 16; |
276 | } bits; |
277 | }; |
278 | |
279 | union XemXcr0Eax { |
280 | uint32_t value; |
281 | struct { |
282 | uint32_t x87 : 1, |
283 | sse : 1, |
284 | ymm : 1, |
285 | bndregs : 1, |
286 | bndcsr : 1, |
287 | opmask : 1, |
288 | zmm512 : 1, |
289 | zmm32 : 1, |
290 | : 24; |
291 | } bits; |
292 | }; |
293 | |
294 | protected: |
295 | static int _cpu; |
296 | static int _model; |
297 | static int _stepping; |
298 | |
299 | static bool _has_intel_jcc_erratum; |
300 | |
301 | static address _cpuinfo_segv_addr; // address of instruction which causes SEGV |
302 | static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV |
303 | |
304 | enum Feature_Flag : uint64_t { |
305 | #define CPU_FEATURE_FLAGS(decl)decl(CX8, "cx8", 0) decl(CMOV, "cmov", 1) decl(FXSR, "fxsr", 2 ) decl(HT, "ht", 3) decl(MMX, "mmx", 4) decl(3DNOW_PREFETCH, "3dnowpref" , 5) decl(SSE, "sse", 6) decl(SSE2, "sse2", 7) decl(SSE3, "sse3" , 8 ) decl(SSSE3, "ssse3", 9 ) decl(SSE4A, "sse4a", 10) decl( SSE4_1, "sse4.1", 11) decl(SSE4_2, "sse4.2", 12) decl(POPCNT, "popcnt", 13) decl(LZCNT, "lzcnt", 14) decl(TSC, "tsc", 15) decl (TSCINV_BIT, "tscinvbit", 16) decl(TSCINV, "tscinv", 17) decl (AVX, "avx", 18) decl(AVX2, "avx2", 19) decl(AES, "aes", 20) decl (ERMS, "erms", 21) decl(CLMUL, "clmul", 22) decl(BMI1, "bmi1" , 23) decl(BMI2, "bmi2", 24) decl(RTM, "rtm", 25) decl(ADX, "adx" , 26) decl(AVX512F, "avx512f", 27) decl(AVX512DQ, "avx512dq", 28) decl(AVX512PF, "avx512pf", 29) decl(AVX512ER, "avx512er" , 30) decl(AVX512CD, "avx512cd", 31) decl(AVX512BW, "avx512bw" , 32) decl(AVX512VL, "avx512vl", 33) decl(SHA, "sha", 34) decl (FMA, "fma", 35) decl(VZEROUPPER, "vzeroupper", 36) decl(AVX512_VPOPCNTDQ , "avx512_vpopcntdq", 37) decl(AVX512_VPCLMULQDQ, "avx512_vpclmulqdq" , 38) decl(AVX512_VAES, "avx512_vaes", 39) decl(AVX512_VNNI, "avx512_vnni" , 40) decl(FLUSH, "clflush", 41) decl(FLUSHOPT, "clflushopt", 42) decl(CLWB, "clwb", 43) decl(AVX512_VBMI2, "avx512_vbmi2" , 44) decl(AVX512_VBMI, "avx512_vbmi", 45) decl(HV, "hv", 46) decl(SERIALIZE, "serialize", 47) \ |
306 | decl(CX8, "cx8", 0) /* next bits are from cpuid 1 (EDX) */ \ |
307 | decl(CMOV, "cmov", 1) \ |
308 | decl(FXSR, "fxsr", 2) \ |
309 | decl(HT, "ht", 3) \ |
310 | \ |
311 | decl(MMX, "mmx", 4) \ |
312 | decl(3DNOW_PREFETCH, "3dnowpref", 5) /* Processor supports 3dnow prefetch and prefetchw instructions */ \ |
313 | /* may not necessarily support other 3dnow instructions */ \ |
314 | decl(SSE, "sse", 6) \ |
315 | decl(SSE2, "sse2", 7) \ |
316 | \ |
317 | decl(SSE3, "sse3", 8 ) /* SSE3 comes from cpuid 1 (ECX) */ \ |
318 | decl(SSSE3, "ssse3", 9 ) \ |
319 | decl(SSE4A, "sse4a", 10) \ |
320 | decl(SSE4_1, "sse4.1", 11) \ |
321 | \ |
322 | decl(SSE4_2, "sse4.2", 12) \ |
323 | decl(POPCNT, "popcnt", 13) \ |
324 | decl(LZCNT, "lzcnt", 14) \ |
325 | decl(TSC, "tsc", 15) \ |
326 | \ |
327 | decl(TSCINV_BIT, "tscinvbit", 16) \ |
328 | decl(TSCINV, "tscinv", 17) \ |
329 | decl(AVX, "avx", 18) \ |
330 | decl(AVX2, "avx2", 19) \ |
331 | \ |
332 | decl(AES, "aes", 20) \ |
333 | decl(ERMS, "erms", 21) /* enhanced 'rep movsb/stosb' instructions */ \ |
334 | decl(CLMUL, "clmul", 22) /* carryless multiply for CRC */ \ |
335 | decl(BMI1, "bmi1", 23) \ |
336 | \ |
337 | decl(BMI2, "bmi2", 24) \ |
338 | decl(RTM, "rtm", 25) /* Restricted Transactional Memory instructions */ \ |
339 | decl(ADX, "adx", 26) \ |
340 | decl(AVX512F, "avx512f", 27) /* AVX 512bit foundation instructions */ \ |
341 | \ |
342 | decl(AVX512DQ, "avx512dq", 28) \ |
343 | decl(AVX512PF, "avx512pf", 29) \ |
344 | decl(AVX512ER, "avx512er", 30) \ |
345 | decl(AVX512CD, "avx512cd", 31) \ |
346 | \ |
347 | decl(AVX512BW, "avx512bw", 32) /* Byte and word vector instructions */ \ |
348 | decl(AVX512VL, "avx512vl", 33) /* EVEX instructions with smaller vector length */ \ |
349 | decl(SHA, "sha", 34) /* SHA instructions */ \ |
350 | decl(FMA, "fma", 35) /* FMA instructions */ \ |
351 | \ |
352 | decl(VZEROUPPER, "vzeroupper", 36) /* Vzeroupper instruction */ \ |
353 | decl(AVX512_VPOPCNTDQ, "avx512_vpopcntdq", 37) /* Vector popcount */ \ |
354 | decl(AVX512_VPCLMULQDQ, "avx512_vpclmulqdq", 38) /* Vector carryless multiplication */ \ |
355 | decl(AVX512_VAES, "avx512_vaes", 39) /* Vector AES instruction */ \ |
356 | \ |
357 | decl(AVX512_VNNI, "avx512_vnni", 40) /* Vector Neural Network Instructions */ \ |
358 | decl(FLUSH, "clflush", 41) /* flush instruction */ \ |
359 | decl(FLUSHOPT, "clflushopt", 42) /* flusopth instruction */ \ |
360 | decl(CLWB, "clwb", 43) /* clwb instruction */ \ |
361 | \ |
362 | decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \ |
363 | decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \ |
364 | decl(HV, "hv", 46) /* Hypervisor instructions */ \ |
365 | decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */ |
366 | |
367 | #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit), |
368 | CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)DECLARE_CPU_FEATURE_FLAG(CX8, "cx8", 0) DECLARE_CPU_FEATURE_FLAG (CMOV, "cmov", 1) DECLARE_CPU_FEATURE_FLAG(FXSR, "fxsr", 2) DECLARE_CPU_FEATURE_FLAG (HT, "ht", 3) DECLARE_CPU_FEATURE_FLAG(MMX, "mmx", 4) DECLARE_CPU_FEATURE_FLAG (3DNOW_PREFETCH, "3dnowpref", 5) DECLARE_CPU_FEATURE_FLAG(SSE , "sse", 6) DECLARE_CPU_FEATURE_FLAG(SSE2, "sse2", 7) DECLARE_CPU_FEATURE_FLAG (SSE3, "sse3", 8 ) DECLARE_CPU_FEATURE_FLAG(SSSE3, "ssse3", 9 ) DECLARE_CPU_FEATURE_FLAG(SSE4A, "sse4a", 10) DECLARE_CPU_FEATURE_FLAG (SSE4_1, "sse4.1", 11) DECLARE_CPU_FEATURE_FLAG(SSE4_2, "sse4.2" , 12) DECLARE_CPU_FEATURE_FLAG(POPCNT, "popcnt", 13) DECLARE_CPU_FEATURE_FLAG (LZCNT, "lzcnt", 14) DECLARE_CPU_FEATURE_FLAG(TSC, "tsc", 15) DECLARE_CPU_FEATURE_FLAG(TSCINV_BIT, "tscinvbit", 16) DECLARE_CPU_FEATURE_FLAG (TSCINV, "tscinv", 17) DECLARE_CPU_FEATURE_FLAG(AVX, "avx", 18 ) DECLARE_CPU_FEATURE_FLAG(AVX2, "avx2", 19) DECLARE_CPU_FEATURE_FLAG (AES, "aes", 20) DECLARE_CPU_FEATURE_FLAG(ERMS, "erms", 21) DECLARE_CPU_FEATURE_FLAG (CLMUL, "clmul", 22) DECLARE_CPU_FEATURE_FLAG(BMI1, "bmi1", 23 ) DECLARE_CPU_FEATURE_FLAG(BMI2, "bmi2", 24) DECLARE_CPU_FEATURE_FLAG (RTM, "rtm", 25) DECLARE_CPU_FEATURE_FLAG(ADX, "adx", 26) DECLARE_CPU_FEATURE_FLAG (AVX512F, "avx512f", 27) DECLARE_CPU_FEATURE_FLAG(AVX512DQ, "avx512dq" , 28) DECLARE_CPU_FEATURE_FLAG(AVX512PF, "avx512pf", 29) DECLARE_CPU_FEATURE_FLAG (AVX512ER, "avx512er", 30) DECLARE_CPU_FEATURE_FLAG(AVX512CD, "avx512cd", 31) DECLARE_CPU_FEATURE_FLAG(AVX512BW, "avx512bw" , 32) DECLARE_CPU_FEATURE_FLAG(AVX512VL, "avx512vl", 33) DECLARE_CPU_FEATURE_FLAG (SHA, "sha", 34) DECLARE_CPU_FEATURE_FLAG(FMA, "fma", 35) DECLARE_CPU_FEATURE_FLAG (VZEROUPPER, "vzeroupper", 36) DECLARE_CPU_FEATURE_FLAG(AVX512_VPOPCNTDQ , "avx512_vpopcntdq", 37) DECLARE_CPU_FEATURE_FLAG(AVX512_VPCLMULQDQ , "avx512_vpclmulqdq", 38) DECLARE_CPU_FEATURE_FLAG(AVX512_VAES , "avx512_vaes", 39) DECLARE_CPU_FEATURE_FLAG(AVX512_VNNI, "avx512_vnni" , 40) DECLARE_CPU_FEATURE_FLAG(FLUSH, "clflush", 41) DECLARE_CPU_FEATURE_FLAG (FLUSHOPT, "clflushopt", 42) DECLARE_CPU_FEATURE_FLAG(CLWB, "clwb" , 43) DECLARE_CPU_FEATURE_FLAG(AVX512_VBMI2, "avx512_vbmi2", 44 ) DECLARE_CPU_FEATURE_FLAG(AVX512_VBMI, "avx512_vbmi", 45) DECLARE_CPU_FEATURE_FLAG (HV, "hv", 46) DECLARE_CPU_FEATURE_FLAG(SERIALIZE, "serialize" , 47) |
369 | #undef DECLARE_CPU_FEATURE_FLAG |
370 | }; |
371 | |
372 | static const char* _features_names[]; |
373 | |
374 | enum Extended_Family { |
375 | // AMD |
376 | CPU_FAMILY_AMD_11H = 0x11, |
377 | // ZX |
378 | CPU_FAMILY_ZX_CORE_F6 = 6, |
379 | CPU_FAMILY_ZX_CORE_F7 = 7, |
380 | // Intel |
381 | CPU_FAMILY_INTEL_CORE = 6, |
382 | CPU_MODEL_NEHALEM = 0x1e, |
383 | CPU_MODEL_NEHALEM_EP = 0x1a, |
384 | CPU_MODEL_NEHALEM_EX = 0x2e, |
385 | CPU_MODEL_WESTMERE = 0x25, |
386 | CPU_MODEL_WESTMERE_EP = 0x2c, |
387 | CPU_MODEL_WESTMERE_EX = 0x2f, |
388 | CPU_MODEL_SANDYBRIDGE = 0x2a, |
389 | CPU_MODEL_SANDYBRIDGE_EP = 0x2d, |
390 | CPU_MODEL_IVYBRIDGE_EP = 0x3a, |
391 | CPU_MODEL_HASWELL_E3 = 0x3c, |
392 | CPU_MODEL_HASWELL_E7 = 0x3f, |
393 | CPU_MODEL_BROADWELL = 0x3d, |
394 | CPU_MODEL_SKYLAKE = 0x55 |
395 | }; |
396 | |
397 | // cpuid information block. All info derived from executing cpuid with |
398 | // various function numbers is stored here. Intel and AMD info is |
399 | // merged in this block: accessor methods disentangle it. |
400 | // |
401 | // The info block is laid out in subblocks of 4 dwords corresponding to |
402 | // eax, ebx, ecx and edx, whether or not they contain anything useful. |
403 | struct CpuidInfo { |
404 | // cpuid function 0 |
405 | uint32_t std_max_function; |
406 | uint32_t std_vendor_name_0; |
407 | uint32_t std_vendor_name_1; |
408 | uint32_t std_vendor_name_2; |
409 | |
410 | // cpuid function 1 |
411 | StdCpuid1Eax std_cpuid1_eax; |
412 | StdCpuid1Ebx std_cpuid1_ebx; |
413 | StdCpuid1Ecx std_cpuid1_ecx; |
414 | StdCpuid1Edx std_cpuid1_edx; |
415 | |
416 | // cpuid function 4 (deterministic cache parameters) |
417 | DcpCpuid4Eax dcp_cpuid4_eax; |
418 | DcpCpuid4Ebx dcp_cpuid4_ebx; |
419 | uint32_t dcp_cpuid4_ecx; // unused currently |
420 | uint32_t dcp_cpuid4_edx; // unused currently |
421 | |
422 | // cpuid function 7 (structured extended features) |
423 | SefCpuid7Eax sef_cpuid7_eax; |
424 | SefCpuid7Ebx sef_cpuid7_ebx; |
425 | SefCpuid7Ecx sef_cpuid7_ecx; |
426 | SefCpuid7Edx sef_cpuid7_edx; |
427 | |
428 | // cpuid function 0xB (processor topology) |
429 | // ecx = 0 |
430 | uint32_t tpl_cpuidB0_eax; |
431 | TplCpuidBEbx tpl_cpuidB0_ebx; |
432 | uint32_t tpl_cpuidB0_ecx; // unused currently |
433 | uint32_t tpl_cpuidB0_edx; // unused currently |
434 | |
435 | // ecx = 1 |
436 | uint32_t tpl_cpuidB1_eax; |
437 | TplCpuidBEbx tpl_cpuidB1_ebx; |
438 | uint32_t tpl_cpuidB1_ecx; // unused currently |
439 | uint32_t tpl_cpuidB1_edx; // unused currently |
440 | |
441 | // ecx = 2 |
442 | uint32_t tpl_cpuidB2_eax; |
443 | TplCpuidBEbx tpl_cpuidB2_ebx; |
444 | uint32_t tpl_cpuidB2_ecx; // unused currently |
445 | uint32_t tpl_cpuidB2_edx; // unused currently |
446 | |
447 | // cpuid function 0x80000000 // example, unused |
448 | uint32_t ext_max_function; |
449 | uint32_t ext_vendor_name_0; |
450 | uint32_t ext_vendor_name_1; |
451 | uint32_t ext_vendor_name_2; |
452 | |
453 | // cpuid function 0x80000001 |
454 | uint32_t ext_cpuid1_eax; // reserved |
455 | uint32_t ext_cpuid1_ebx; // reserved |
456 | ExtCpuid1Ecx ext_cpuid1_ecx; |
457 | ExtCpuid1Edx ext_cpuid1_edx; |
458 | |
459 | // cpuid functions 0x80000002 thru 0x80000004: example, unused |
460 | uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; |
461 | uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; |
462 | uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; |
463 | |
464 | // cpuid function 0x80000005 // AMD L1, Intel reserved |
465 | uint32_t ext_cpuid5_eax; // unused currently |
466 | uint32_t ext_cpuid5_ebx; // reserved |
467 | ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) |
468 | ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) |
469 | |
470 | // cpuid function 0x80000007 |
471 | uint32_t ext_cpuid7_eax; // reserved |
472 | uint32_t ext_cpuid7_ebx; // reserved |
473 | uint32_t ext_cpuid7_ecx; // reserved |
474 | ExtCpuid7Edx ext_cpuid7_edx; // tscinv |
475 | |
476 | // cpuid function 0x80000008 |
477 | uint32_t ext_cpuid8_eax; // unused currently |
478 | uint32_t ext_cpuid8_ebx; // reserved |
479 | ExtCpuid8Ecx ext_cpuid8_ecx; |
480 | uint32_t ext_cpuid8_edx; // reserved |
481 | |
482 | // cpuid function 0x8000001E // AMD 17h |
483 | uint32_t ext_cpuid1E_eax; |
484 | ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h) |
485 | uint32_t ext_cpuid1E_ecx; |
486 | uint32_t ext_cpuid1E_edx; // unused currently |
487 | |
488 | // extended control register XCR0 (the XFEATURE_ENABLED_MASK register) |
489 | XemXcr0Eax xem_xcr0_eax; |
490 | uint32_t xem_xcr0_edx; // reserved |
491 | |
492 | // Space to save ymm registers after signal handle |
493 | int ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15 |
494 | |
495 | // Space to save zmm registers after signal handle |
496 | int zmm_save[16*4]; // Save zmm0, zmm7, zmm8, zmm31 |
497 | }; |
498 | |
499 | // The actual cpuid info block |
500 | static CpuidInfo _cpuid_info; |
501 | |
502 | // Extractors and predicates |
503 | static uint32_t extended_cpu_family() { |
504 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; |
505 | result += _cpuid_info.std_cpuid1_eax.bits.ext_family; |
506 | return result; |
507 | } |
508 | |
509 | static uint32_t extended_cpu_model() { |
510 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; |
511 | result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; |
512 | return result; |
513 | } |
514 | |
515 | static uint32_t cpu_stepping() { |
516 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; |
517 | return result; |
518 | } |
519 | |
520 | static uint logical_processor_count() { |
521 | uint result = threads_per_core(); |
522 | return result; |
523 | } |
524 | |
525 | static bool compute_has_intel_jcc_erratum(); |
526 | |
527 | static uint64_t feature_flags() { |
528 | uint64_t result = 0; |
529 | if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) |
530 | result |= CPU_CX8; |
531 | if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) |
532 | result |= CPU_CMOV; |
533 | if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0) |
534 | result |= CPU_FLUSH; |
535 | #ifdef _LP641 |
536 | // clflush should always be available on x86_64 |
537 | // if not we are in real trouble because we rely on it |
538 | // to flush the code cache. |
539 | assert ((result & CPU_FLUSH) != 0, "clflush should be available")do { if (!((result & CPU_FLUSH) != 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp" , 539, "assert(" "(result & CPU_FLUSH) != 0" ") failed", "clflush should be available" ); ::breakpoint(); } } while (0); |
540 | #endif |
541 | if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && |
542 | _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) |
543 | result |= CPU_FXSR; |
544 | // HT flag is set for multi-core processors also. |
545 | if (threads_per_core() > 1) |
546 | result |= CPU_HT; |
547 | if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && |
548 | _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) |
549 | result |= CPU_MMX; |
550 | if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) |
551 | result |= CPU_SSE; |
552 | if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) |
553 | result |= CPU_SSE2; |
554 | if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) |
555 | result |= CPU_SSE3; |
556 | if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) |
557 | result |= CPU_SSSE3; |
558 | if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) |
559 | result |= CPU_SSE4_1; |
560 | if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) |
561 | result |= CPU_SSE4_2; |
562 | if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) |
563 | result |= CPU_POPCNT; |
564 | if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && |
565 | _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && |
566 | _cpuid_info.xem_xcr0_eax.bits.sse != 0 && |
567 | _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { |
568 | result |= CPU_AVX; |
569 | result |= CPU_VZEROUPPER; |
570 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) |
571 | result |= CPU_AVX2; |
572 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 && |
573 | _cpuid_info.xem_xcr0_eax.bits.opmask != 0 && |
574 | _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 && |
575 | _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) { |
576 | result |= CPU_AVX512F; |
577 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0) |
578 | result |= CPU_AVX512CD; |
579 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0) |
580 | result |= CPU_AVX512DQ; |
581 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0) |
582 | result |= CPU_AVX512PF; |
583 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0) |
584 | result |= CPU_AVX512ER; |
585 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0) |
586 | result |= CPU_AVX512BW; |
587 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0) |
588 | result |= CPU_AVX512VL; |
589 | if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) |
590 | result |= CPU_AVX512_VPOPCNTDQ; |
591 | if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) |
592 | result |= CPU_AVX512_VPCLMULQDQ; |
593 | if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0) |
594 | result |= CPU_AVX512_VAES; |
595 | if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0) |
596 | result |= CPU_AVX512_VNNI; |
597 | if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0) |
598 | result |= CPU_AVX512_VBMI; |
599 | if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) |
600 | result |= CPU_AVX512_VBMI2; |
601 | } |
602 | } |
603 | if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0) |
604 | result |= CPU_HV; |
605 | if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) |
606 | result |= CPU_BMI1; |
607 | if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) |
608 | result |= CPU_TSC; |
609 | if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) |
610 | result |= CPU_TSCINV_BIT; |
611 | if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) |
612 | result |= CPU_AES; |
613 | if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) |
614 | result |= CPU_ERMS; |
615 | if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) |
616 | result |= CPU_CLMUL; |
617 | if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0) |
618 | result |= CPU_RTM; |
619 | if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0) |
620 | result |= CPU_ADX; |
621 | if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) |
622 | result |= CPU_BMI2; |
623 | if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0) |
624 | result |= CPU_SHA; |
625 | if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0) |
626 | result |= CPU_FMA; |
627 | if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0) |
628 | result |= CPU_FLUSHOPT; |
629 | |
630 | // AMD|Hygon features. |
631 | if (is_amd_family()) { |
632 | if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || |
633 | (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) |
634 | result |= CPU_3DNOW_PREFETCH; |
635 | if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) |
636 | result |= CPU_LZCNT; |
637 | if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) |
638 | result |= CPU_SSE4A; |
639 | } |
640 | |
641 | // Intel features. |
642 | if (is_intel()) { |
643 | if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) |
644 | result |= CPU_LZCNT; |
645 | // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw |
646 | if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) { |
647 | result |= CPU_3DNOW_PREFETCH; |
648 | } |
649 | if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) { |
650 | result |= CPU_CLWB; |
651 | } |
652 | if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0) |
653 | result |= CPU_SERIALIZE; |
654 | } |
655 | |
656 | // ZX features. |
657 | if (is_zx()) { |
658 | if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) |
659 | result |= CPU_LZCNT; |
660 | // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw |
661 | if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) { |
662 | result |= CPU_3DNOW_PREFETCH; |
663 | } |
664 | } |
665 | |
666 | // Composite features. |
667 | if (supports_tscinv_bit() && |
668 | ((is_amd_family() && !is_amd_Barcelona()) || |
669 | is_intel_tsc_synched_at_init())) { |
670 | result |= CPU_TSCINV; |
671 | } |
672 | |
673 | return result; |
674 | } |
675 | |
676 | static bool os_supports_avx_vectors() { |
677 | bool retVal = false; |
678 | int nreg = 2 LP64_ONLY(+2)+2; |
679 | if (supports_evex()) { |
680 | // Verify that OS save/restore all bits of EVEX registers |
681 | // during signal processing. |
682 | retVal = true; |
683 | for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register |
684 | if (_cpuid_info.zmm_save[i] != ymm_test_value()) { |
685 | retVal = false; |
686 | break; |
687 | } |
688 | } |
689 | } else if (supports_avx()) { |
690 | // Verify that OS save/restore all bits of AVX registers |
691 | // during signal processing. |
692 | retVal = true; |
693 | for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register |
694 | if (_cpuid_info.ymm_save[i] != ymm_test_value()) { |
695 | retVal = false; |
696 | break; |
697 | } |
698 | } |
699 | // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen |
700 | if (retVal == false) { |
701 | // Verify that OS save/restore all bits of EVEX registers |
702 | // during signal processing. |
703 | retVal = true; |
704 | for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register |
705 | if (_cpuid_info.zmm_save[i] != ymm_test_value()) { |
706 | retVal = false; |
707 | break; |
708 | } |
709 | } |
710 | } |
711 | } |
712 | return retVal; |
713 | } |
714 | |
715 | static void get_processor_features(); |
716 | |
717 | public: |
718 | // Offsets for cpuid asm stub |
719 | static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->std_max_function ) - 16)); } |
720 | static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->std_cpuid1_eax ) - 16)); } |
721 | static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->dcp_cpuid4_eax ) - 16)); } |
722 | static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->sef_cpuid7_eax ) - 16)); } |
723 | static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid1_eax ) - 16)); } |
724 | static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid5_eax ) - 16)); } |
725 | static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid7_eax ) - 16)); } |
726 | static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid8_eax ) - 16)); } |
727 | static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid1E_eax ) - 16)); } |
728 | static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->tpl_cpuidB0_eax ) - 16)); } |
729 | static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->tpl_cpuidB1_eax ) - 16)); } |
730 | static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->tpl_cpuidB2_eax ) - 16)); } |
731 | static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->xem_xcr0_eax ) - 16)); } |
732 | static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ymm_save ) - 16)); } |
733 | static ByteSize zmm_save_offset() { return byte_offset_of(CpuidInfo, zmm_save)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->zmm_save ) - 16)); } |
734 | |
735 | // The value used to check ymm register after signal handle |
736 | static int ymm_test_value() { return 0xCAFEBABE; } |
737 | |
738 | static void get_cpu_info_wrapper(); |
739 | static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; } |
740 | static bool is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; } |
741 | static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; } |
742 | static address cpuinfo_cont_addr() { return _cpuinfo_cont_addr; } |
743 | |
744 | static void clean_cpuFeatures() { _features = 0; } |
745 | static void set_avx_cpuFeatures() { _features = (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); } |
746 | static void set_evex_cpuFeatures() { _features = (CPU_AVX512F | CPU_SSE | CPU_SSE2 | CPU_VZEROUPPER ); } |
747 | |
748 | |
749 | // Initialization |
750 | static void initialize(); |
751 | |
752 | // Override Abstract_VM_Version implementation |
753 | static void print_platform_virtualization_info(outputStream*); |
754 | |
755 | // Asserts |
756 | static void assert_is_initialized() { |
757 | assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized")do { if (!(_cpuid_info.std_cpuid1_eax.bits.family != 0)) { (* g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp" , 757, "assert(" "_cpuid_info.std_cpuid1_eax.bits.family != 0" ") failed", "VM_Version not initialized"); ::breakpoint(); } } while (0); |
758 | } |
759 | |
760 | // |
761 | // Processor family: |
762 | // 3 - 386 |
763 | // 4 - 486 |
764 | // 5 - Pentium |
765 | // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, |
766 | // Pentium M, Core Solo, Core Duo, Core2 Duo |
767 | // family 6 model: 9, 13, 14, 15 |
768 | // 0x0f - Pentium 4, Opteron |
769 | // |
770 | // Note: The cpu family should be used to select between |
771 | // instruction sequences which are valid on all Intel |
772 | // processors. Use the feature test functions below to |
773 | // determine whether a particular instruction is supported. |
774 | // |
775 | static int cpu_family() { return _cpu;} |
776 | static bool is_P6() { return cpu_family() >= 6; } |
777 | static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' |
778 | static bool is_hygon() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH' |
779 | static bool is_amd_family() { return is_amd() || is_hygon(); } |
780 | static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' |
781 | static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS ' |
782 | static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton |
783 | static bool is_knights_family() { return UseKNLSetting || ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi |
784 | |
785 | static bool supports_processor_topology() { |
786 | return (_cpuid_info.std_max_function >= 0xB) && |
787 | // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level. |
788 | // Some cpus have max cpuid >= 0xB but do not support processor topology. |
789 | (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0); |
790 | } |
791 | |
792 | static uint cores_per_cpu() { |
793 | uint result = 1; |
794 | if (is_intel()) { |
795 | bool supports_topology = supports_processor_topology(); |
796 | if (supports_topology) { |
797 | result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / |
798 | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
799 | } |
800 | if (!supports_topology || result == 0) { |
801 | result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); |
802 | } |
803 | } else if (is_amd_family()) { |
804 | result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); |
805 | } else if (is_zx()) { |
806 | bool supports_topology = supports_processor_topology(); |
807 | if (supports_topology) { |
808 | result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / |
809 | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
810 | } |
811 | if (!supports_topology || result == 0) { |
812 | result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); |
813 | } |
814 | } |
815 | return result; |
816 | } |
817 | |
818 | static uint threads_per_core() { |
819 | uint result = 1; |
820 | if (is_intel() && supports_processor_topology()) { |
821 | result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
822 | } else if (is_zx() && supports_processor_topology()) { |
823 | result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
824 | } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { |
825 | if (cpu_family() >= 0x17) { |
826 | result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; |
827 | } else { |
828 | result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / |
829 | cores_per_cpu(); |
830 | } |
831 | } |
832 | return (result == 0 ? 1 : result); |
833 | } |
834 | |
835 | static intx L1_line_size() { |
836 | intx result = 0; |
837 | if (is_intel()) { |
838 | result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); |
839 | } else if (is_amd_family()) { |
840 | result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; |
841 | } else if (is_zx()) { |
842 | result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); |
843 | } |
844 | if (result < 32) // not defined ? |
845 | result = 32; // 32 bytes by default on x86 and other x64 |
846 | return result; |
847 | } |
848 | |
849 | static intx prefetch_data_size() { |
850 | return L1_line_size(); |
851 | } |
852 | |
853 | // |
854 | // Feature identification |
855 | // |
856 | static bool supports_cpuid() { return _features != 0; } |
857 | static bool supports_cmpxchg8() { return (_features & CPU_CX8) != 0; } |
858 | static bool supports_cmov() { return (_features & CPU_CMOV) != 0; } |
859 | static bool supports_fxsr() { return (_features & CPU_FXSR) != 0; } |
860 | static bool supports_ht() { return (_features & CPU_HT) != 0; } |
861 | static bool supports_mmx() { return (_features & CPU_MMX) != 0; } |
862 | static bool supports_sse() { return (_features & CPU_SSE) != 0; } |
863 | static bool supports_sse2() { return (_features & CPU_SSE2) != 0; } |
864 | static bool supports_sse3() { return (_features & CPU_SSE3) != 0; } |
865 | static bool supports_ssse3() { return (_features & CPU_SSSE3)!= 0; } |
866 | static bool supports_sse4_1() { return (_features & CPU_SSE4_1) != 0; } |
867 | static bool supports_sse4_2() { return (_features & CPU_SSE4_2) != 0; } |
868 | static bool supports_popcnt() { return (_features & CPU_POPCNT) != 0; } |
869 | static bool supports_avx() { return (_features & CPU_AVX) != 0; } |
870 | static bool supports_avx2() { return (_features & CPU_AVX2) != 0; } |
871 | static bool supports_tsc() { return (_features & CPU_TSC) != 0; } |
872 | static bool supports_aes() { return (_features & CPU_AES) != 0; } |
873 | static bool supports_erms() { return (_features & CPU_ERMS) != 0; } |
874 | static bool supports_clmul() { return (_features & CPU_CLMUL) != 0; } |
875 | static bool supports_rtm() { return (_features & CPU_RTM) != 0; } |
876 | static bool supports_bmi1() { return (_features & CPU_BMI1) != 0; } |
877 | static bool supports_bmi2() { return (_features & CPU_BMI2) != 0; } |
878 | static bool supports_adx() { return (_features & CPU_ADX) != 0; } |
879 | static bool supports_evex() { return (_features & CPU_AVX512F) != 0; } |
880 | static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; } |
881 | static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; } |
882 | static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; } |
883 | static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; } |
884 | static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; } |
885 | static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; } |
886 | static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); } |
887 | static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); } |
888 | static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); } |
889 | static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() && |
890 | supports_avx512bw() && supports_avx512dq()); } |
891 | static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); } |
892 | static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); } |
893 | static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); } |
894 | static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); } |
895 | static bool supports_sha() { return (_features & CPU_SHA) != 0; } |
896 | static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); } |
897 | static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; } |
898 | static bool supports_avx512_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; } |
899 | static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; } |
900 | static bool supports_avx512_vaes() { return (_features & CPU_AVX512_VAES) != 0; } |
901 | static bool supports_avx512_vnni() { return (_features & CPU_AVX512_VNNI) != 0; } |
902 | static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; } |
903 | static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; } |
904 | static bool supports_hv() { return (_features & CPU_HV) != 0; } |
905 | static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; } |
906 | |
907 | // Intel features |
908 | static bool is_intel_family_core() { return is_intel() && |
909 | extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } |
910 | |
911 | static bool is_intel_skylake() { return is_intel_family_core() && |
912 | extended_cpu_model() == CPU_MODEL_SKYLAKE; } |
913 | |
914 | static int avx3_threshold(); |
915 | |
916 | static bool is_intel_tsc_synched_at_init() { |
917 | if (is_intel_family_core()) { |
918 | uint32_t ext_model = extended_cpu_model(); |
919 | if (ext_model == CPU_MODEL_NEHALEM_EP || |
920 | ext_model == CPU_MODEL_WESTMERE_EP || |
921 | ext_model == CPU_MODEL_SANDYBRIDGE_EP || |
922 | ext_model == CPU_MODEL_IVYBRIDGE_EP) { |
923 | // <= 2-socket invariant tsc support. EX versions are usually used |
924 | // in > 2-socket systems and likely don't synchronize tscs at |
925 | // initialization. |
926 | // Code that uses tsc values must be prepared for them to arbitrarily |
927 | // jump forward or backward. |
928 | return true; |
929 | } |
930 | } |
931 | return false; |
932 | } |
933 | |
934 | // This checks if the JVM is potentially affected by an erratum on Intel CPUs (SKX102) |
935 | // that causes unpredictable behaviour when jcc crosses 64 byte boundaries. Its microcode |
936 | // mitigation causes regressions when jumps or fused conditional branches cross or end at |
937 | // 32 byte boundaries. |
938 | static bool has_intel_jcc_erratum() { return _has_intel_jcc_erratum; } |
939 | |
940 | // AMD features |
941 | static bool supports_3dnow_prefetch() { return (_features & CPU_3DNOW_PREFETCH) != 0; } |
942 | static bool supports_lzcnt() { return (_features & CPU_LZCNT) != 0; } |
943 | static bool supports_sse4a() { return (_features & CPU_SSE4A) != 0; } |
944 | |
945 | static bool is_amd_Barcelona() { return is_amd() && |
946 | extended_cpu_family() == CPU_FAMILY_AMD_11H; } |
947 | |
948 | // Intel and AMD newer cores support fast timestamps well |
949 | static bool supports_tscinv_bit() { |
950 | return (_features & CPU_TSCINV_BIT) != 0; |
951 | } |
952 | static bool supports_tscinv() { |
953 | return (_features & CPU_TSCINV) != 0; |
954 | } |
955 | |
956 | // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). |
957 | static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && |
958 | supports_sse3() && _model != 0x1C; } |
959 | |
960 | static bool supports_compare_and_exchange() { return true; } |
961 | |
962 | static intx allocate_prefetch_distance(bool use_watermark_prefetch) { |
963 | // Hardware prefetching (distance/size in bytes): |
964 | // Pentium 3 - 64 / 32 |
965 | // Pentium 4 - 256 / 128 |
966 | // Athlon - 64 / 32 ???? |
967 | // Opteron - 128 / 64 only when 2 sequential cache lines accessed |
968 | // Core - 128 / 64 |
969 | // |
970 | // Software prefetching (distance in bytes / instruction with best score): |
971 | // Pentium 3 - 128 / prefetchnta |
972 | // Pentium 4 - 512 / prefetchnta |
973 | // Athlon - 128 / prefetchnta |
974 | // Opteron - 256 / prefetchnta |
975 | // Core - 256 / prefetchnta |
976 | // It will be used only when AllocatePrefetchStyle > 0 |
977 | |
978 | if (is_amd_family()) { // AMD | Hygon |
979 | if (supports_sse2()) { |
980 | return 256; // Opteron |
981 | } else { |
982 | return 128; // Athlon |
983 | } |
984 | } else { // Intel |
985 | if (supports_sse3() && cpu_family() == 6) { |
986 | if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus |
987 | return 192; |
988 | } else if (use_watermark_prefetch) { // watermark prefetching on Core |
989 | #ifdef _LP641 |
990 | return 384; |
991 | #else |
992 | return 320; |
993 | #endif |
994 | } |
995 | } |
996 | if (supports_sse2()) { |
997 | if (cpu_family() == 6) { |
998 | return 256; // Pentium M, Core, Core2 |
999 | } else { |
1000 | return 512; // Pentium 4 |
1001 | } |
1002 | } else { |
1003 | return 128; // Pentium 3 (and all other old CPUs) |
1004 | } |
1005 | } |
1006 | } |
1007 | |
1008 | // SSE2 and later processors implement a 'pause' instruction |
1009 | // that can be used for efficient implementation of |
1010 | // the intrinsic for java.lang.Thread.onSpinWait() |
1011 | static bool supports_on_spin_wait() { return supports_sse2(); } |
1012 | |
1013 | // x86_64 supports fast class initialization checks for static methods. |
1014 | static bool supports_fast_class_init_checks() { |
1015 | return LP64_ONLY(true)true NOT_LP64(false); // not implemented on x86_32 |
1016 | } |
1017 | |
1018 | constexpr static bool supports_stack_watermark_barrier() { |
1019 | return true; |
1020 | } |
1021 | |
1022 | // there are several insns to force cache line sync to memory which |
1023 | // we can use to ensure mapped non-volatile memory is up to date with |
1024 | // pending in-cache changes. |
1025 | // |
1026 | // 64 bit cpus always support clflush which writes back and evicts |
1027 | // on 32 bit cpus support is recorded via a feature flag |
1028 | // |
1029 | // clflushopt is optional and acts like clflush except it does |
1030 | // not synchronize with other memory ops. it needs a preceding |
1031 | // and trailing StoreStore fence |
1032 | // |
1033 | // clwb is an optional intel-specific instruction which |
1034 | // writes back without evicting the line. it also does not |
1035 | // synchronize with other memory ops. so, it needs preceding |
1036 | // and trailing StoreStore fences. |
1037 | |
1038 | #ifdef _LP641 |
1039 | |
1040 | static bool supports_clflush(); // Can't inline due to header file conflict |
1041 | #else |
1042 | static bool supports_clflush() { return ((_features & CPU_FLUSH) != 0); } |
1043 | #endif // _LP64 |
1044 | // Note: CPU_FLUSHOPT and CPU_CLWB bits should always be zero for 32-bit |
1045 | static bool supports_clflushopt() { return ((_features & CPU_FLUSHOPT) != 0); } |
1046 | static bool supports_clwb() { return ((_features & CPU_CLWB) != 0); } |
1047 | |
1048 | #ifdef __APPLE__ |
1049 | // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64) |
1050 | static bool is_cpu_emulated(); |
1051 | #endif |
1052 | |
1053 | // support functions for virtualization detection |
1054 | private: |
1055 | static void check_virtualizations(); |
1056 | |
1057 | static const char* cpu_family_description(void); |
1058 | static const char* cpu_model_description(void); |
1059 | static const char* cpu_brand(void); |
1060 | static const char* cpu_brand_string(void); |
1061 | |
1062 | static int cpu_type_description(char* const buf, size_t buf_len); |
1063 | static int cpu_detailed_description(char* const buf, size_t buf_len); |
1064 | static int cpu_extended_brand_string(char* const buf, size_t buf_len); |
1065 | |
1066 | static bool cpu_is_em64t(void); |
1067 | static bool is_netburst(void); |
1068 | |
1069 | // Returns bytes written excluding termninating null byte. |
1070 | static size_t cpu_write_support_string(char* const buf, size_t buf_len); |
1071 | static void resolve_cpu_information_details(void); |
1072 | static int64_t max_qualified_cpu_freq_from_brand_string(void); |
1073 | |
1074 | public: |
1075 | // Offsets for cpuid asm stub brand string |
1076 | static ByteSize proc_name_0_offset() { return byte_offset_of(CpuidInfo, proc_name_0)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_0 ) - 16)); } |
1077 | static ByteSize proc_name_1_offset() { return byte_offset_of(CpuidInfo, proc_name_1)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_1 ) - 16)); } |
1078 | static ByteSize proc_name_2_offset() { return byte_offset_of(CpuidInfo, proc_name_2)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_2 ) - 16)); } |
1079 | static ByteSize proc_name_3_offset() { return byte_offset_of(CpuidInfo, proc_name_3)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_3 ) - 16)); } |
1080 | static ByteSize proc_name_4_offset() { return byte_offset_of(CpuidInfo, proc_name_4)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_4 ) - 16)); } |
1081 | static ByteSize proc_name_5_offset() { return byte_offset_of(CpuidInfo, proc_name_5)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_5 ) - 16)); } |
1082 | static ByteSize proc_name_6_offset() { return byte_offset_of(CpuidInfo, proc_name_6)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_6 ) - 16)); } |
1083 | static ByteSize proc_name_7_offset() { return byte_offset_of(CpuidInfo, proc_name_7)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_7 ) - 16)); } |
1084 | static ByteSize proc_name_8_offset() { return byte_offset_of(CpuidInfo, proc_name_8)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_8 ) - 16)); } |
1085 | static ByteSize proc_name_9_offset() { return byte_offset_of(CpuidInfo, proc_name_9)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_9 ) - 16)); } |
1086 | static ByteSize proc_name_10_offset() { return byte_offset_of(CpuidInfo, proc_name_10)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_10 ) - 16)); } |
1087 | static ByteSize proc_name_11_offset() { return byte_offset_of(CpuidInfo, proc_name_11)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_11 ) - 16)); } |
1088 | |
1089 | static int64_t maximum_qualified_cpu_frequency(void); |
1090 | |
1091 | static bool supports_tscinv_ext(void); |
1092 | |
1093 | static void initialize_tsc(); |
1094 | static void initialize_cpu_information(void); |
1095 | }; |
1096 | |
1097 | #endif // CPU_X86_VM_VERSION_X86_HPP |
1 | /* | |||
2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. | |||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |||
4 | * | |||
5 | * This code is free software; you can redistribute it and/or modify it | |||
6 | * under the terms of the GNU General Public License version 2 only, as | |||
7 | * published by the Free Software Foundation. | |||
8 | * | |||
9 | * This code is distributed in the hope that it will be useful, but WITHOUT | |||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
12 | * version 2 for more details (a copy is included in the LICENSE file that | |||
13 | * accompanied this code). | |||
14 | * | |||
15 | * You should have received a copy of the GNU General Public License version | |||
16 | * 2 along with this work; if not, write to the Free Software Foundation, | |||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |||
18 | * | |||
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |||
20 | * or visit www.oracle.com if you need additional information or have any | |||
21 | * questions. | |||
22 | * | |||
23 | */ | |||
24 | ||||
25 | #ifndef CPU_X86_MACROASSEMBLER_X86_HPP | |||
26 | #define CPU_X86_MACROASSEMBLER_X86_HPP | |||
27 | ||||
28 | #include "asm/assembler.hpp" | |||
29 | #include "code/vmreg.inline.hpp" | |||
30 | #include "compiler/oopMap.hpp" | |||
31 | #include "utilities/macros.hpp" | |||
32 | #include "runtime/rtmLocking.hpp" | |||
33 | #include "runtime/vm_version.hpp" | |||
34 | ||||
35 | // MacroAssembler extends Assembler by frequently used macros. | |||
36 | // | |||
37 | // Instructions for which a 'better' code sequence exists depending | |||
38 | // on arguments should also go in here. | |||
39 | ||||
40 | class MacroAssembler: public Assembler { | |||
41 | friend class LIR_Assembler; | |||
42 | friend class Runtime1; // as_Address() | |||
43 | ||||
44 | public: | |||
45 | // Support for VM calls | |||
46 | // | |||
47 | // This is the base routine called by the different versions of call_VM_leaf. The interpreter | |||
48 | // may customize this version by overriding it for its purposes (e.g., to save/restore | |||
49 | // additional registers when doing a VM call). | |||
50 | ||||
51 | virtual void call_VM_leaf_base( | |||
52 | address entry_point, // the entry point | |||
53 | int number_of_arguments // the number of arguments to pop after the call | |||
54 | ); | |||
55 | ||||
56 | protected: | |||
57 | // This is the base routine called by the different versions of call_VM. The interpreter | |||
58 | // may customize this version by overriding it for its purposes (e.g., to save/restore | |||
59 | // additional registers when doing a VM call). | |||
60 | // | |||
61 | // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base | |||
62 | // returns the register which contains the thread upon return. If a thread register has been | |||
63 | // specified, the return value will correspond to that register. If no last_java_sp is specified | |||
64 | // (noreg) than rsp will be used instead. | |||
65 | virtual void call_VM_base( // returns the register containing the thread upon return | |||
66 | Register oop_result, // where an oop-result ends up if any; use noreg otherwise | |||
67 | Register java_thread, // the thread if computed before ; use noreg otherwise | |||
68 | Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise | |||
69 | address entry_point, // the entry point | |||
70 | int number_of_arguments, // the number of arguments (w/o thread) to pop after the call | |||
71 | bool check_exceptions // whether to check for pending exceptions after return | |||
72 | ); | |||
73 | ||||
74 | void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); | |||
75 | ||||
76 | // helpers for FPU flag access | |||
77 | // tmp is a temporary register, if none is available use noreg | |||
78 | void save_rax (Register tmp); | |||
79 | void restore_rax(Register tmp); | |||
80 | ||||
81 | public: | |||
82 | MacroAssembler(CodeBuffer* code) : Assembler(code) {} | |||
83 | ||||
84 | // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. | |||
85 | // The implementation is only non-empty for the InterpreterMacroAssembler, | |||
86 | // as only the interpreter handles PopFrame and ForceEarlyReturn requests. | |||
87 | virtual void check_and_handle_popframe(Register java_thread); | |||
88 | virtual void check_and_handle_earlyret(Register java_thread); | |||
89 | ||||
90 | Address as_Address(AddressLiteral adr); | |||
91 | Address as_Address(ArrayAddress adr); | |||
92 | ||||
93 | // Support for NULL-checks | |||
94 | // | |||
95 | // Generates code that causes a NULL OS exception if the content of reg is NULL. | |||
96 | // If the accessed location is M[reg + offset] and the offset is known, provide the | |||
97 | // offset. No explicit code generation is needed if the offset is within a certain | |||
98 | // range (0 <= offset <= page_size). | |||
99 | ||||
100 | void null_check(Register reg, int offset = -1); | |||
101 | static bool needs_explicit_null_check(intptr_t offset); | |||
102 | static bool uses_implicit_null_check(void* address); | |||
103 | ||||
104 | // Required platform-specific helpers for Label::patch_instructions. | |||
105 | // They _shadow_ the declarations in AbstractAssembler, which are undefined. | |||
106 | void pd_patch_instruction(address branch, address target, const char* file, int line) { | |||
107 | unsigned char op = branch[0]; | |||
108 | assert(op == 0xE8 /* call */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
109 | op == 0xE9 /* jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
110 | op == 0xEB /* short jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
111 | (op & 0xF0) == 0x70 /* short jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
112 | op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
113 | op == 0xC7 && branch[1] == 0xF8 /* xbegin */,do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
114 | "Invalid opcode at patch point")do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0); | |||
115 | ||||
116 | if (op == 0xEB || (op & 0xF0) == 0x70) { | |||
117 | // short offset operators (jmp and jcc) | |||
118 | char* disp = (char*) &branch[1]; | |||
119 | int imm8 = target - (address) &disp[1]; | |||
120 | guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d",do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d" , file == __null ? "<NULL>" : file, line); ::breakpoint (); } } while (0) | |||
121 | file == NULL ? "<NULL>" : file, line)do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d" , file == __null ? "<NULL>" : file, line); ::breakpoint (); } } while (0); | |||
122 | *disp = imm8; | |||
123 | } else { | |||
124 | int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1]; | |||
125 | int imm32 = target - (address) &disp[1]; | |||
126 | *disp = imm32; | |||
127 | } | |||
128 | } | |||
129 | ||||
130 | // The following 4 methods return the offset of the appropriate move instruction | |||
131 | ||||
132 | // Support for fast byte/short loading with zero extension (depending on particular CPU) | |||
133 | int load_unsigned_byte(Register dst, Address src); | |||
134 | int load_unsigned_short(Register dst, Address src); | |||
135 | ||||
136 | // Support for fast byte/short loading with sign extension (depending on particular CPU) | |||
137 | int load_signed_byte(Register dst, Address src); | |||
138 | int load_signed_short(Register dst, Address src); | |||
139 | ||||
140 | // Support for sign-extension (hi:lo = extend_sign(lo)) | |||
141 | void extend_sign(Register hi, Register lo); | |||
142 | ||||
143 | // Load and store values by size and signed-ness | |||
144 | void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); | |||
145 | void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); | |||
146 | ||||
147 | // Support for inc/dec with optimal instruction selection depending on value | |||
148 | ||||
149 | void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value))incrementq(reg, value) NOT_LP64(incrementl(reg, value)) ; } | |||
150 | void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value))decrementq(reg, value) NOT_LP64(decrementl(reg, value)) ; } | |||
151 | ||||
152 | void decrementl(Address dst, int value = 1); | |||
153 | void decrementl(Register reg, int value = 1); | |||
154 | ||||
155 | void decrementq(Register reg, int value = 1); | |||
156 | void decrementq(Address dst, int value = 1); | |||
157 | ||||
158 | void incrementl(Address dst, int value = 1); | |||
159 | void incrementl(Register reg, int value = 1); | |||
160 | ||||
161 | void incrementq(Register reg, int value = 1); | |||
162 | void incrementq(Address dst, int value = 1); | |||
163 | ||||
164 | // Support optimal SSE move instructions. | |||
165 | void movflt(XMMRegister dst, XMMRegister src) { | |||
166 | if (dst-> encoding() == src->encoding()) return; | |||
167 | if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } | |||
168 | else { movss (dst, src); return; } | |||
169 | } | |||
170 | void movflt(XMMRegister dst, Address src) { movss(dst, src); } | |||
171 | void movflt(XMMRegister dst, AddressLiteral src); | |||
172 | void movflt(Address dst, XMMRegister src) { movss(dst, src); } | |||
173 | ||||
174 | // Move with zero extension | |||
175 | void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); } | |||
176 | ||||
177 | void movdbl(XMMRegister dst, XMMRegister src) { | |||
178 | if (dst-> encoding() == src->encoding()) return; | |||
179 | if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } | |||
180 | else { movsd (dst, src); return; } | |||
181 | } | |||
182 | ||||
183 | void movdbl(XMMRegister dst, AddressLiteral src); | |||
184 | ||||
185 | void movdbl(XMMRegister dst, Address src) { | |||
186 | if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } | |||
187 | else { movlpd(dst, src); return; } | |||
188 | } | |||
189 | void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } | |||
190 | ||||
191 | void incrementl(AddressLiteral dst); | |||
192 | void incrementl(ArrayAddress dst); | |||
193 | ||||
194 | void incrementq(AddressLiteral dst); | |||
195 | ||||
196 | // Alignment | |||
197 | void align32(); | |||
198 | void align64(); | |||
199 | void align(int modulus); | |||
200 | void align(int modulus, int target); | |||
201 | ||||
202 | // A 5 byte nop that is safe for patching (see patch_verified_entry) | |||
203 | void fat_nop(); | |||
204 | ||||
205 | // Stack frame creation/removal | |||
206 | void enter(); | |||
207 | void leave(); | |||
208 | ||||
209 | // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) | |||
210 | // The pointer will be loaded into the thread register. | |||
211 | void get_thread(Register thread); | |||
212 | ||||
213 | #ifdef _LP641 | |||
214 | // Support for argument shuffling | |||
215 | ||||
216 | void move32_64(VMRegPair src, VMRegPair dst); | |||
217 | void long_move(VMRegPair src, VMRegPair dst); | |||
218 | void float_move(VMRegPair src, VMRegPair dst); | |||
219 | void double_move(VMRegPair src, VMRegPair dst); | |||
220 | void move_ptr(VMRegPair src, VMRegPair dst); | |||
221 | void object_move(OopMap* map, | |||
222 | int oop_handle_offset, | |||
223 | int framesize_in_slots, | |||
224 | VMRegPair src, | |||
225 | VMRegPair dst, | |||
226 | bool is_receiver, | |||
227 | int* receiver_offset); | |||
228 | #endif // _LP64 | |||
229 | ||||
230 | // Support for VM calls | |||
231 | // | |||
232 | // It is imperative that all calls into the VM are handled via the call_VM macros. | |||
233 | // They make sure that the stack linkage is setup correctly. call_VM's correspond | |||
234 | // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. | |||
235 | ||||
236 | ||||
237 | void call_VM(Register oop_result, | |||
238 | address entry_point, | |||
239 | bool check_exceptions = true); | |||
240 | void call_VM(Register oop_result, | |||
241 | address entry_point, | |||
242 | Register arg_1, | |||
243 | bool check_exceptions = true); | |||
244 | void call_VM(Register oop_result, | |||
245 | address entry_point, | |||
246 | Register arg_1, Register arg_2, | |||
247 | bool check_exceptions = true); | |||
248 | void call_VM(Register oop_result, | |||
249 | address entry_point, | |||
250 | Register arg_1, Register arg_2, Register arg_3, | |||
251 | bool check_exceptions = true); | |||
252 | ||||
253 | // Overloadings with last_Java_sp | |||
254 | void call_VM(Register oop_result, | |||
255 | Register last_java_sp, | |||
256 | address entry_point, | |||
257 | int number_of_arguments = 0, | |||
258 | bool check_exceptions = true); | |||
259 | void call_VM(Register oop_result, | |||
260 | Register last_java_sp, | |||
261 | address entry_point, | |||
262 | Register arg_1, bool | |||
263 | check_exceptions = true); | |||
264 | void call_VM(Register oop_result, | |||
265 | Register last_java_sp, | |||
266 | address entry_point, | |||
267 | Register arg_1, Register arg_2, | |||
268 | bool check_exceptions = true); | |||
269 | void call_VM(Register oop_result, | |||
270 | Register last_java_sp, | |||
271 | address entry_point, | |||
272 | Register arg_1, Register arg_2, Register arg_3, | |||
273 | bool check_exceptions = true); | |||
274 | ||||
275 | void get_vm_result (Register oop_result, Register thread); | |||
276 | void get_vm_result_2(Register metadata_result, Register thread); | |||
277 | ||||
278 | // These always tightly bind to MacroAssembler::call_VM_base | |||
279 | // bypassing the virtual implementation | |||
280 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); | |||
281 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); | |||
282 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); | |||
283 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); | |||
284 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); | |||
285 | ||||
286 | void call_VM_leaf0(address entry_point); | |||
287 | void call_VM_leaf(address entry_point, | |||
288 | int number_of_arguments = 0); | |||
289 | void call_VM_leaf(address entry_point, | |||
290 | Register arg_1); | |||
291 | void call_VM_leaf(address entry_point, | |||
292 | Register arg_1, Register arg_2); | |||
293 | void call_VM_leaf(address entry_point, | |||
294 | Register arg_1, Register arg_2, Register arg_3); | |||
295 | ||||
296 | // These always tightly bind to MacroAssembler::call_VM_leaf_base | |||
297 | // bypassing the virtual implementation | |||
298 | void super_call_VM_leaf(address entry_point); | |||
299 | void super_call_VM_leaf(address entry_point, Register arg_1); | |||
300 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); | |||
301 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); | |||
302 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); | |||
303 | ||||
304 | // last Java Frame (fills frame anchor) | |||
305 | void set_last_Java_frame(Register thread, | |||
306 | Register last_java_sp, | |||
307 | Register last_java_fp, | |||
308 | address last_java_pc); | |||
309 | ||||
310 | // thread in the default location (r15_thread on 64bit) | |||
311 | void set_last_Java_frame(Register last_java_sp, | |||
312 | Register last_java_fp, | |||
313 | address last_java_pc); | |||
314 | ||||
315 | void reset_last_Java_frame(Register thread, bool clear_fp); | |||
316 | ||||
317 | // thread in the default location (r15_thread on 64bit) | |||
318 | void reset_last_Java_frame(bool clear_fp); | |||
319 | ||||
320 | // jobjects | |||
321 | void clear_jweak_tag(Register possibly_jweak); | |||
322 | void resolve_jobject(Register value, Register thread, Register tmp); | |||
323 | ||||
324 | // C 'boolean' to Java boolean: x == 0 ? 0 : 1 | |||
325 | void c2bool(Register x); | |||
326 | ||||
327 | // C++ bool manipulation | |||
328 | ||||
329 | void movbool(Register dst, Address src); | |||
330 | void movbool(Address dst, bool boolconst); | |||
331 | void movbool(Address dst, Register src); | |||
332 | void testbool(Register dst); | |||
333 | ||||
334 | void resolve_oop_handle(Register result, Register tmp = rscratch2); | |||
335 | void resolve_weak_handle(Register result, Register tmp); | |||
336 | void load_mirror(Register mirror, Register method, Register tmp = rscratch2); | |||
337 | void load_method_holder_cld(Register rresult, Register rmethod); | |||
338 | ||||
339 | void load_method_holder(Register holder, Register method); | |||
340 | ||||
341 | // oop manipulations | |||
342 | void load_klass(Register dst, Register src, Register tmp); | |||
343 | void store_klass(Register dst, Register src, Register tmp); | |||
344 | ||||
345 | void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, | |||
346 | Register tmp1, Register thread_tmp); | |||
347 | void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, | |||
348 | Register tmp1, Register tmp2); | |||
349 | ||||
350 | void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, | |||
351 | Register thread_tmp = noreg, DecoratorSet decorators = 0); | |||
352 | void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, | |||
353 | Register thread_tmp = noreg, DecoratorSet decorators = 0); | |||
354 | void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, | |||
355 | Register tmp2 = noreg, DecoratorSet decorators = 0); | |||
356 | ||||
357 | // Used for storing NULL. All other oop constants should be | |||
358 | // stored using routines that take a jobject. | |||
359 | void store_heap_oop_null(Address dst); | |||
360 | ||||
361 | #ifdef _LP641 | |||
362 | void store_klass_gap(Register dst, Register src); | |||
363 | ||||
364 | // This dummy is to prevent a call to store_heap_oop from | |||
365 | // converting a zero (like NULL) into a Register by giving | |||
366 | // the compiler two choices it can't resolve | |||
367 | ||||
368 | void store_heap_oop(Address dst, void* dummy); | |||
369 | ||||
370 | void encode_heap_oop(Register r); | |||
371 | void decode_heap_oop(Register r); | |||
372 | void encode_heap_oop_not_null(Register r); | |||
373 | void decode_heap_oop_not_null(Register r); | |||
374 | void encode_heap_oop_not_null(Register dst, Register src); | |||
375 | void decode_heap_oop_not_null(Register dst, Register src); | |||
376 | ||||
377 | void set_narrow_oop(Register dst, jobject obj); | |||
378 | void set_narrow_oop(Address dst, jobject obj); | |||
379 | void cmp_narrow_oop(Register dst, jobject obj); | |||
380 | void cmp_narrow_oop(Address dst, jobject obj); | |||
381 | ||||
382 | void encode_klass_not_null(Register r, Register tmp); | |||
383 | void decode_klass_not_null(Register r, Register tmp); | |||
384 | void encode_and_move_klass_not_null(Register dst, Register src); | |||
385 | void decode_and_move_klass_not_null(Register dst, Register src); | |||
386 | void set_narrow_klass(Register dst, Klass* k); | |||
387 | void set_narrow_klass(Address dst, Klass* k); | |||
388 | void cmp_narrow_klass(Register dst, Klass* k); | |||
389 | void cmp_narrow_klass(Address dst, Klass* k); | |||
390 | ||||
391 | // if heap base register is used - reinit it with the correct value | |||
392 | void reinit_heapbase(); | |||
393 | ||||
394 | DEBUG_ONLY(void verify_heapbase(const char* msg);)void verify_heapbase(const char* msg); | |||
395 | ||||
396 | #endif // _LP64 | |||
397 | ||||
398 | // Int division/remainder for Java | |||
399 | // (as idivl, but checks for special case as described in JVM spec.) | |||
400 | // returns idivl instruction offset for implicit exception handling | |||
401 | int corrected_idivl(Register reg); | |||
402 | ||||
403 | // Long division/remainder for Java | |||
404 | // (as idivq, but checks for special case as described in JVM spec.) | |||
405 | // returns idivq instruction offset for implicit exception handling | |||
406 | int corrected_idivq(Register reg); | |||
407 | ||||
408 | void int3(); | |||
409 | ||||
410 | // Long operation macros for a 32bit cpu | |||
411 | // Long negation for Java | |||
412 | void lneg(Register hi, Register lo); | |||
413 | ||||
414 | // Long multiplication for Java | |||
415 | // (destroys contents of eax, ebx, ecx and edx) | |||
416 | void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y | |||
417 | ||||
418 | // Long shifts for Java | |||
419 | // (semantics as described in JVM spec.) | |||
420 | void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) | |||
421 | void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) | |||
422 | ||||
423 | // Long compare for Java | |||
424 | // (semantics as described in JVM spec.) | |||
425 | void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) | |||
426 | ||||
427 | ||||
428 | // misc | |||
429 | ||||
430 | // Sign extension | |||
431 | void sign_extend_short(Register reg); | |||
432 | void sign_extend_byte(Register reg); | |||
433 | ||||
434 | // Division by power of 2, rounding towards 0 | |||
435 | void division_with_shift(Register reg, int shift_value); | |||
436 | ||||
437 | #ifndef _LP641 | |||
438 | // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: | |||
439 | // | |||
440 | // CF (corresponds to C0) if x < y | |||
441 | // PF (corresponds to C2) if unordered | |||
442 | // ZF (corresponds to C3) if x = y | |||
443 | // | |||
444 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). | |||
445 | // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) | |||
446 | void fcmp(Register tmp); | |||
447 | // Variant of the above which allows y to be further down the stack | |||
448 | // and which only pops x and y if specified. If pop_right is | |||
449 | // specified then pop_left must also be specified. | |||
450 | void fcmp(Register tmp, int index, bool pop_left, bool pop_right); | |||
451 | ||||
452 | // Floating-point comparison for Java | |||
453 | // Compares the top-most stack entries on the FPU stack and stores the result in dst. | |||
454 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). | |||
455 | // (semantics as described in JVM spec.) | |||
456 | void fcmp2int(Register dst, bool unordered_is_less); | |||
457 | // Variant of the above which allows y to be further down the stack | |||
458 | // and which only pops x and y if specified. If pop_right is | |||
459 | // specified then pop_left must also be specified. | |||
460 | void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); | |||
461 | ||||
462 | // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) | |||
463 | // tmp is a temporary register, if none is available use noreg | |||
464 | void fremr(Register tmp); | |||
465 | ||||
466 | // only if +VerifyFPU | |||
467 | void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); | |||
468 | #endif // !LP64 | |||
469 | ||||
470 | // dst = c = a * b + c | |||
471 | void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); | |||
472 | void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); | |||
473 | ||||
474 | void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); | |||
475 | void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); | |||
476 | void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); | |||
477 | void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); | |||
478 | ||||
479 | ||||
480 | // same as fcmp2int, but using SSE2 | |||
481 | void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); | |||
482 | void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); | |||
483 | ||||
484 | // branch to L if FPU flag C2 is set/not set | |||
485 | // tmp is a temporary register, if none is available use noreg | |||
486 | void jC2 (Register tmp, Label& L); | |||
487 | void jnC2(Register tmp, Label& L); | |||
488 | ||||
489 | // Load float value from 'address'. If UseSSE >= 1, the value is loaded into | |||
490 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. | |||
491 | void load_float(Address src); | |||
492 | ||||
493 | // Store float value to 'address'. If UseSSE >= 1, the value is stored | |||
494 | // from register xmm0. Otherwise, the value is stored from the FPU stack. | |||
495 | void store_float(Address dst); | |||
496 | ||||
497 | // Load double value from 'address'. If UseSSE >= 2, the value is loaded into | |||
498 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. | |||
499 | void load_double(Address src); | |||
500 | ||||
501 | // Store double value to 'address'. If UseSSE >= 2, the value is stored | |||
502 | // from register xmm0. Otherwise, the value is stored from the FPU stack. | |||
503 | void store_double(Address dst); | |||
504 | ||||
505 | #ifndef _LP641 | |||
506 | // Pop ST (ffree & fincstp combined) | |||
507 | void fpop(); | |||
508 | ||||
509 | void empty_FPU_stack(); | |||
510 | #endif // !_LP64 | |||
511 | ||||
512 | void push_IU_state(); | |||
513 | void pop_IU_state(); | |||
514 | ||||
515 | void push_FPU_state(); | |||
516 | void pop_FPU_state(); | |||
517 | ||||
518 | void push_CPU_state(); | |||
519 | void pop_CPU_state(); | |||
520 | ||||
521 | // Round up to a power of two | |||
522 | void round_to(Register reg, int modulus); | |||
523 | ||||
524 | // Callee saved registers handling | |||
525 | void push_callee_saved_registers(); | |||
526 | void pop_callee_saved_registers(); | |||
527 | ||||
528 | // allocation | |||
529 | void eden_allocate( | |||
530 | Register thread, // Current thread | |||
531 | Register obj, // result: pointer to object after successful allocation | |||
532 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise | |||
533 | int con_size_in_bytes, // object size in bytes if known at compile time | |||
534 | Register t1, // temp register | |||
535 | Label& slow_case // continuation point if fast allocation fails | |||
536 | ); | |||
537 | void tlab_allocate( | |||
538 | Register thread, // Current thread | |||
539 | Register obj, // result: pointer to object after successful allocation | |||
540 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise | |||
541 | int con_size_in_bytes, // object size in bytes if known at compile time | |||
542 | Register t1, // temp register | |||
543 | Register t2, // temp register | |||
544 | Label& slow_case // continuation point if fast allocation fails | |||
545 | ); | |||
546 | void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); | |||
547 | ||||
548 | // interface method calling | |||
549 | void lookup_interface_method(Register recv_klass, | |||
550 | Register intf_klass, | |||
551 | RegisterOrConstant itable_index, | |||
552 | Register method_result, | |||
553 | Register scan_temp, | |||
554 | Label& no_such_interface, | |||
555 | bool return_method = true); | |||
556 | ||||
557 | // virtual method calling | |||
558 | void lookup_virtual_method(Register recv_klass, | |||
559 | RegisterOrConstant vtable_index, | |||
560 | Register method_result); | |||
561 | ||||
562 | // Test sub_klass against super_klass, with fast and slow paths. | |||
563 | ||||
564 | // The fast path produces a tri-state answer: yes / no / maybe-slow. | |||
565 | // One of the three labels can be NULL, meaning take the fall-through. | |||
566 | // If super_check_offset is -1, the value is loaded up from super_klass. | |||
567 | // No registers are killed, except temp_reg. | |||
568 | void check_klass_subtype_fast_path(Register sub_klass, | |||
569 | Register super_klass, | |||
570 | Register temp_reg, | |||
571 | Label* L_success, | |||
572 | Label* L_failure, | |||
573 | Label* L_slow_path, | |||
574 | RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); | |||
575 | ||||
576 | // The rest of the type check; must be wired to a corresponding fast path. | |||
577 | // It does not repeat the fast path logic, so don't use it standalone. | |||
578 | // The temp_reg and temp2_reg can be noreg, if no temps are available. | |||
579 | // Updates the sub's secondary super cache as necessary. | |||
580 | // If set_cond_codes, condition codes will be Z on success, NZ on failure. | |||
581 | void check_klass_subtype_slow_path(Register sub_klass, | |||
582 | Register super_klass, | |||
583 | Register temp_reg, | |||
584 | Register temp2_reg, | |||
585 | Label* L_success, | |||
586 | Label* L_failure, | |||
587 | bool set_cond_codes = false); | |||
588 | ||||
589 | // Simplified, combined version, good for typical uses. | |||
590 | // Falls through on failure. | |||
591 | void check_klass_subtype(Register sub_klass, | |||
592 | Register super_klass, | |||
593 | Register temp_reg, | |||
594 | Label& L_success); | |||
595 | ||||
596 | void clinit_barrier(Register klass, | |||
597 | Register thread, | |||
598 | Label* L_fast_path = NULL__null, | |||
599 | Label* L_slow_path = NULL__null); | |||
600 | ||||
601 | // method handles (JSR 292) | |||
602 | Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); | |||
603 | ||||
604 | // Debugging | |||
605 | ||||
606 | // only if +VerifyOops | |||
607 | void _verify_oop(Register reg, const char* s, const char* file, int line); | |||
608 | void _verify_oop_addr(Address addr, const char* s, const char* file, int line); | |||
609 | ||||
610 | void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { | |||
611 | if (VerifyOops) { | |||
612 | _verify_oop(reg, s, file, line); | |||
613 | } | |||
614 | } | |||
615 | void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { | |||
616 | if (VerifyOops) { | |||
617 | _verify_oop_addr(reg, s, file, line); | |||
618 | } | |||
619 | } | |||
620 | ||||
621 | // TODO: verify method and klass metadata (compare against vptr?) | |||
622 | void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} | |||
623 | void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} | |||
624 | ||||
625 | #define verify_oop(reg)_verify_oop_checked(reg, "broken oop " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 625) _verify_oop_checked(reg, "broken oop " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__625) | |||
626 | #define verify_oop_msg(reg, msg)_verify_oop_checked(reg, "broken oop " "reg" ", " "msg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 626) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__626) | |||
627 | #define verify_oop_addr(addr)_verify_oop_addr_checked(addr, "broken oop addr " "addr", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 627) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__627) | |||
628 | #define verify_method_ptr(reg)_verify_method_ptr(reg, "broken method " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 628) _verify_method_ptr(reg, "broken method " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__628) | |||
629 | #define verify_klass_ptr(reg)_verify_klass_ptr(reg, "broken klass " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 629) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__629) | |||
630 | ||||
631 | // Verify or restore cpu control state after JNI call | |||
632 | void restore_cpu_control_state_after_jni(); | |||
633 | ||||
634 | // prints msg, dumps registers and stops execution | |||
635 | void stop(const char* msg); | |||
636 | ||||
637 | // prints msg and continues | |||
638 | void warn(const char* msg); | |||
639 | ||||
640 | // dumps registers and other state | |||
641 | void print_state(); | |||
642 | ||||
643 | static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); | |||
644 | static void debug64(char* msg, int64_t pc, int64_t regs[]); | |||
645 | static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); | |||
646 | static void print_state64(int64_t pc, int64_t regs[]); | |||
647 | ||||
648 | void os_breakpoint(); | |||
649 | ||||
650 | void untested() { stop("untested"); } | |||
651 | ||||
652 | void unimplemented(const char* what = ""); | |||
653 | ||||
654 | void should_not_reach_here() { stop("should not reach here"); } | |||
655 | ||||
656 | void print_CPU_state(); | |||
657 | ||||
658 | // Stack overflow checking | |||
659 | void bang_stack_with_offset(int offset) { | |||
660 | // stack grows down, caller passes positive offset | |||
661 | assert(offset > 0, "must bang with negative offset")do { if (!(offset > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 661, "assert(" "offset > 0" ") failed", "must bang with negative offset" ); ::breakpoint(); } } while (0); | |||
662 | movl(Address(rsp, (-offset)), rax); | |||
663 | } | |||
664 | ||||
665 | // Writes to stack successive pages until offset reached to check for | |||
666 | // stack overflow + shadow pages. Also, clobbers tmp | |||
667 | void bang_stack_size(Register size, Register tmp); | |||
668 | ||||
669 | // Check for reserved stack access in method being exited (for JIT) | |||
670 | void reserved_stack_check(); | |||
671 | ||||
672 | void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod); | |||
673 | ||||
674 | void verify_tlab(); | |||
675 | ||||
676 | Condition negate_condition(Condition cond); | |||
677 | ||||
678 | // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit | |||
679 | // operands. In general the names are modified to avoid hiding the instruction in Assembler | |||
680 | // so that we don't need to implement all the varieties in the Assembler with trivial wrappers | |||
681 | // here in MacroAssembler. The major exception to this rule is call | |||
682 | ||||
683 | // Arithmetics | |||
684 | ||||
685 | ||||
686 | void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)) ; } | |||
687 | void addptr(Address dst, Register src); | |||
688 | ||||
689 | void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); } | |||
690 | void addptr(Register dst, int32_t src); | |||
691 | void addptr(Register dst, Register src); | |||
692 | void addptr(Register dst, RegisterOrConstant src) { | |||
693 | if (src.is_constant()) addptr(dst, (int) src.as_constant()); | |||
694 | else addptr(dst, src.as_register()); | |||
695 | } | |||
696 | ||||
697 | void andptr(Register dst, int32_t src); | |||
698 | void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2))andq(src1, src2) NOT_LP64(andl(src1, src2)) ; } | |||
699 | ||||
700 | void cmp8(AddressLiteral src1, int imm); | |||
701 | ||||
702 | // renamed to drag out the casting of address to int32_t/intptr_t | |||
703 | void cmp32(Register src1, int32_t imm); | |||
704 | ||||
705 | void cmp32(AddressLiteral src1, int32_t imm); | |||
706 | // compare reg - mem, or reg - &mem | |||
707 | void cmp32(Register src1, AddressLiteral src2); | |||
708 | ||||
709 | void cmp32(Register src1, Address src2); | |||
710 | ||||
711 | #ifndef _LP641 | |||
712 | void cmpklass(Address dst, Metadata* obj); | |||
713 | void cmpklass(Register dst, Metadata* obj); | |||
714 | void cmpoop(Address dst, jobject obj); | |||
715 | #endif // _LP64 | |||
716 | ||||
717 | void cmpoop(Register src1, Register src2); | |||
718 | void cmpoop(Register src1, Address src2); | |||
719 | void cmpoop(Register dst, jobject obj); | |||
720 | ||||
721 | // NOTE src2 must be the lval. This is NOT an mem-mem compare | |||
722 | void cmpptr(Address src1, AddressLiteral src2); | |||
723 | ||||
724 | void cmpptr(Register src1, AddressLiteral src2); | |||
725 | ||||
726 | void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
727 | void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
728 | // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } | |||
729 | ||||
730 | void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
731 | void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
732 | ||||
733 | // cmp64 to avoild hiding cmpq | |||
734 | void cmp64(Register src1, AddressLiteral src); | |||
735 | ||||
736 | void cmpxchgptr(Register reg, Address adr); | |||
737 | ||||
738 | void locked_cmpxchgptr(Register reg, AddressLiteral adr); | |||
739 | ||||
740 | ||||
741 | void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src))imulq(dst, src) NOT_LP64(imull(dst, src)); } | |||
742 | void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32))imulq(dst, src, imm32) NOT_LP64(imull(dst, src, imm32)); } | |||
743 | ||||
744 | ||||
745 | void negptr(Register dst) { LP64_ONLY(negq(dst))negq(dst) NOT_LP64(negl(dst)); } | |||
746 | ||||
747 | void notptr(Register dst) { LP64_ONLY(notq(dst))notq(dst) NOT_LP64(notl(dst)); } | |||
748 | ||||
749 | void shlptr(Register dst, int32_t shift); | |||
750 | void shlptr(Register dst) { LP64_ONLY(shlq(dst))shlq(dst) NOT_LP64(shll(dst)); } | |||
751 | ||||
752 | void shrptr(Register dst, int32_t shift); | |||
753 | void shrptr(Register dst) { LP64_ONLY(shrq(dst))shrq(dst) NOT_LP64(shrl(dst)); } | |||
754 | ||||
755 | void sarptr(Register dst) { LP64_ONLY(sarq(dst))sarq(dst) NOT_LP64(sarl(dst)); } | |||
756 | void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src))sarq(dst, src) NOT_LP64(sarl(dst, src)); } | |||
757 | ||||
758 | void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); } | |||
759 | ||||
760 | void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); } | |||
761 | void subptr(Register dst, int32_t src); | |||
762 | // Force generation of a 4 byte immediate value even if it fits into 8bit | |||
763 | void subptr_imm32(Register dst, int32_t src); | |||
764 | void subptr(Register dst, Register src); | |||
765 | void subptr(Register dst, RegisterOrConstant src) { | |||
766 | if (src.is_constant()) subptr(dst, (int) src.as_constant()); | |||
767 | else subptr(dst, src.as_register()); | |||
768 | } | |||
769 | ||||
770 | void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); } | |||
771 | void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); } | |||
772 | ||||
773 | void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; } | |||
774 | void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; } | |||
775 | ||||
776 | void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2))xaddq(src1, src2) NOT_LP64(xaddl(src1, src2)) ; } | |||
777 | ||||
778 | ||||
779 | ||||
780 | // Helper functions for statistics gathering. | |||
781 | // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. | |||
782 | void cond_inc32(Condition cond, AddressLiteral counter_addr); | |||
783 | // Unconditional atomic increment. | |||
784 | void atomic_incl(Address counter_addr); | |||
785 | void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1); | |||
786 | #ifdef _LP641 | |||
787 | void atomic_incq(Address counter_addr); | |||
788 | void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1); | |||
789 | #endif | |||
790 | void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr))atomic_incq(counter_addr, scr) NOT_LP64(atomic_incl(counter_addr, scr)) ; } | |||
791 | void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr))atomic_incq(counter_addr) NOT_LP64(atomic_incl(counter_addr)) ; } | |||
792 | ||||
793 | void lea(Register dst, AddressLiteral adr); | |||
794 | void lea(Address dst, AddressLiteral adr); | |||
795 | void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } | |||
796 | ||||
797 | void leal32(Register dst, Address src) { leal(dst, src); } | |||
798 | ||||
799 | // Import other testl() methods from the parent class or else | |||
800 | // they will be hidden by the following overriding declaration. | |||
801 | using Assembler::testl; | |||
802 | void testl(Register dst, AddressLiteral src); | |||
803 | ||||
804 | void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
805 | void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
806 | void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
807 | void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32))orq(dst, imm32) NOT_LP64(orl(dst, imm32)); } | |||
808 | ||||
809 | void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32))testq(src, imm32) NOT_LP64(testl(src, imm32)); } | |||
810 | void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2))testq(src1, src2) NOT_LP64(testl(src1, src2)); } | |||
811 | void testptr(Register src1, Register src2); | |||
812 | ||||
813 | void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); } | |||
814 | void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); } | |||
815 | ||||
816 | // Calls | |||
817 | ||||
818 | void call(Label& L, relocInfo::relocType rtype); | |||
819 | void call(Register entry); | |||
820 | void call(Address addr) { Assembler::call(addr); } | |||
821 | ||||
822 | // NOTE: this call transfers to the effective address of entry NOT | |||
823 | // the address contained by entry. This is because this is more natural | |||
824 | // for jumps/calls. | |||
825 | void call(AddressLiteral entry); | |||
826 | ||||
827 | // Emit the CompiledIC call idiom | |||
828 | void ic_call(address entry, jint method_index = 0); | |||
829 | ||||
830 | // Jumps | |||
831 | ||||
832 | // NOTE: these jumps tranfer to the effective address of dst NOT | |||
833 | // the address contained by dst. This is because this is more natural | |||
834 | // for jumps/calls. | |||
835 | void jump(AddressLiteral dst); | |||
836 | void jump_cc(Condition cc, AddressLiteral dst); | |||
837 | ||||
838 | // 32bit can do a case table jump in one instruction but we no longer allow the base | |||
839 | // to be installed in the Address class. This jump will tranfers to the address | |||
840 | // contained in the location described by entry (not the address of entry) | |||
841 | void jump(ArrayAddress entry); | |||
842 | ||||
843 | // Floating | |||
844 | ||||
845 | void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } | |||
846 | void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
847 | void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } | |||
848 | ||||
849 | void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } | |||
850 | void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } | |||
851 | void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
852 | ||||
853 | void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } | |||
854 | void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } | |||
855 | void comiss(XMMRegister dst, AddressLiteral src); | |||
856 | ||||
857 | void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } | |||
858 | void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } | |||
859 | void comisd(XMMRegister dst, AddressLiteral src); | |||
860 | ||||
861 | #ifndef _LP641 | |||
862 | void fadd_s(Address src) { Assembler::fadd_s(src); } | |||
863 | void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } | |||
864 | ||||
865 | void fldcw(Address src) { Assembler::fldcw(src); } | |||
866 | void fldcw(AddressLiteral src); | |||
867 | ||||
868 | void fld_s(int index) { Assembler::fld_s(index); } | |||
869 | void fld_s(Address src) { Assembler::fld_s(src); } | |||
870 | void fld_s(AddressLiteral src); | |||
871 | ||||
872 | void fld_d(Address src) { Assembler::fld_d(src); } | |||
873 | void fld_d(AddressLiteral src); | |||
874 | ||||
875 | void fmul_s(Address src) { Assembler::fmul_s(src); } | |||
876 | void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } | |||
877 | #endif // _LP64 | |||
878 | ||||
879 | void fld_x(Address src) { Assembler::fld_x(src); } | |||
880 | void fld_x(AddressLiteral src); | |||
881 | ||||
882 | void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } | |||
883 | void ldmxcsr(AddressLiteral src); | |||
884 | ||||
885 | #ifdef _LP641 | |||
886 | private: | |||
887 | void sha256_AVX2_one_round_compute( | |||
888 | Register reg_old_h, | |||
889 | Register reg_a, | |||
890 | Register reg_b, | |||
891 | Register reg_c, | |||
892 | Register reg_d, | |||
893 | Register reg_e, | |||
894 | Register reg_f, | |||
895 | Register reg_g, | |||
896 | Register reg_h, | |||
897 | int iter); | |||
898 | void sha256_AVX2_four_rounds_compute_first(int start); | |||
899 | void sha256_AVX2_four_rounds_compute_last(int start); | |||
900 | void sha256_AVX2_one_round_and_sched( | |||
901 | XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ | |||
902 | XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ | |||
903 | XMMRegister xmm_2, /* ymm6 */ | |||
904 | XMMRegister xmm_3, /* ymm7 */ | |||
905 | Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ | |||
906 | Register reg_b, /* ebx */ /* full cycle is 8 iterations */ | |||
907 | Register reg_c, /* edi */ | |||
908 | Register reg_d, /* esi */ | |||
909 | Register reg_e, /* r8d */ | |||
910 | Register reg_f, /* r9d */ | |||
911 | Register reg_g, /* r10d */ | |||
912 | Register reg_h, /* r11d */ | |||
913 | int iter); | |||
914 | ||||
915 | void addm(int disp, Register r1, Register r2); | |||
916 | void gfmul(XMMRegister tmp0, XMMRegister t); | |||
917 | void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0, | |||
918 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3); | |||
919 | void generateHtbl_one_block(Register htbl); | |||
920 | void generateHtbl_eight_blocks(Register htbl); | |||
921 | public: | |||
922 | void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
923 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
924 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
925 | bool multi_block, XMMRegister shuf_mask); | |||
926 | void avx_ghash(Register state, Register htbl, Register data, Register blocks); | |||
927 | #endif | |||
928 | ||||
929 | #ifdef _LP641 | |||
930 | private: | |||
931 | void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, | |||
932 | Register e, Register f, Register g, Register h, int iteration); | |||
933 | ||||
934 | void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
935 | Register a, Register b, Register c, Register d, Register e, Register f, | |||
936 | Register g, Register h, int iteration); | |||
937 | ||||
938 | void addmq(int disp, Register r1, Register r2); | |||
939 | public: | |||
940 | void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
941 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
942 | Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, | |||
943 | XMMRegister shuf_mask); | |||
944 | private: | |||
945 | void roundEnc(XMMRegister key, int rnum); | |||
946 | void lastroundEnc(XMMRegister key, int rnum); | |||
947 | void roundDec(XMMRegister key, int rnum); | |||
948 | void lastroundDec(XMMRegister key, int rnum); | |||
949 | void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask); | |||
950 | void gfmul_avx512(XMMRegister ghash, XMMRegister hkey); | |||
951 | void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl); | |||
952 | void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, | |||
953 | XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction, | |||
954 | XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos, | |||
955 | bool final_reduction, int index, XMMRegister counter_inc_mask); | |||
956 | public: | |||
957 | void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len); | |||
958 | void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len); | |||
959 | void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter, | |||
960 | Register len_reg, Register used, Register used_addr, Register saved_encCounter_start); | |||
961 | void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key, | |||
962 | Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter); | |||
963 | ||||
964 | #endif | |||
965 | ||||
966 | void fast_md5(Register buf, Address state, Address ofs, Address limit, | |||
967 | bool multi_block); | |||
968 | ||||
969 | void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, | |||
970 | XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, | |||
971 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
972 | bool multi_block); | |||
973 | ||||
974 | #ifdef _LP641 | |||
975 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
976 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
977 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
978 | bool multi_block, XMMRegister shuf_mask); | |||
979 | #else | |||
980 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
981 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
982 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
983 | bool multi_block); | |||
984 | #endif | |||
985 | ||||
986 | void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
987 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
988 | Register rax, Register rcx, Register rdx, Register tmp); | |||
989 | ||||
990 | #ifdef _LP641 | |||
991 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
992 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
993 | Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2); | |||
994 | ||||
995 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
996 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
997 | Register rax, Register rcx, Register rdx, Register r11); | |||
998 | ||||
999 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, | |||
1000 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, | |||
1001 | Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); | |||
1002 | ||||
1003 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1004 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1005 | Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2, | |||
1006 | Register tmp3, Register tmp4); | |||
1007 | ||||
1008 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1009 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1010 | Register rax, Register rcx, Register rdx, Register tmp1, | |||
1011 | Register tmp2, Register tmp3, Register tmp4); | |||
1012 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1013 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1014 | Register rax, Register rcx, Register rdx, Register tmp1, | |||
1015 | Register tmp2, Register tmp3, Register tmp4); | |||
1016 | #else | |||
1017 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1018 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1019 | Register rax, Register rcx, Register rdx, Register tmp1); | |||
1020 | ||||
1021 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1022 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1023 | Register rax, Register rcx, Register rdx, Register tmp); | |||
1024 | ||||
1025 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, | |||
1026 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, | |||
1027 | Register rdx, Register tmp); | |||
1028 | ||||
1029 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1030 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1031 | Register rax, Register rbx, Register rdx); | |||
1032 | ||||
1033 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1034 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1035 | Register rax, Register rcx, Register rdx, Register tmp); | |||
1036 | ||||
1037 | void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, | |||
1038 | Register edx, Register ebx, Register esi, Register edi, | |||
1039 | Register ebp, Register esp); | |||
1040 | ||||
1041 | void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, | |||
1042 | Register esi, Register edi, Register ebp, Register esp); | |||
1043 | ||||
1044 | void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, | |||
1045 | Register edx, Register ebx, Register esi, Register edi, | |||
1046 | Register ebp, Register esp); | |||
1047 | ||||
1048 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1049 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1050 | Register rax, Register rcx, Register rdx, Register tmp); | |||
1051 | #endif | |||
1052 | ||||
1053 | private: | |||
1054 | ||||
1055 | // these are private because users should be doing movflt/movdbl | |||
1056 | ||||
1057 | void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } | |||
1058 | void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } | |||
1059 | void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } | |||
1060 | void movss(XMMRegister dst, AddressLiteral src); | |||
1061 | ||||
1062 | void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } | |||
1063 | void movlpd(XMMRegister dst, AddressLiteral src); | |||
1064 | ||||
1065 | public: | |||
1066 | ||||
1067 | void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } | |||
1068 | void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } | |||
1069 | void addsd(XMMRegister dst, AddressLiteral src); | |||
1070 | ||||
1071 | void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } | |||
1072 | void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } | |||
1073 | void addss(XMMRegister dst, AddressLiteral src); | |||
1074 | ||||
1075 | void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } | |||
1076 | void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } | |||
1077 | void addpd(XMMRegister dst, AddressLiteral src); | |||
1078 | ||||
1079 | void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } | |||
1080 | void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } | |||
1081 | void divsd(XMMRegister dst, AddressLiteral src); | |||
1082 | ||||
1083 | void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } | |||
1084 | void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } | |||
1085 | void divss(XMMRegister dst, AddressLiteral src); | |||
1086 | ||||
1087 | // Move Unaligned Double Quadword | |||
1088 | void movdqu(Address dst, XMMRegister src); | |||
1089 | void movdqu(XMMRegister dst, Address src); | |||
1090 | void movdqu(XMMRegister dst, XMMRegister src); | |||
1091 | void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1); | |||
1092 | ||||
1093 | void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); } | |||
1094 | void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
1095 | void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); } | |||
1096 | void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1097 | void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
1098 | void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
1099 | ||||
1100 | void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
1101 | void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); } | |||
1102 | void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
1103 | void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); } | |||
1104 | void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
1105 | void kmovql(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1106 | ||||
1107 | // Safe move operation, lowers down to 16bit moves for targets supporting | |||
1108 | // AVX512F feature and 64bit moves for targets supporting AVX512BW feature. | |||
1109 | void kmov(Address dst, KRegister src); | |||
1110 | void kmov(KRegister dst, Address src); | |||
1111 | void kmov(KRegister dst, KRegister src); | |||
1112 | void kmov(Register dst, KRegister src); | |||
1113 | void kmov(KRegister dst, Register src); | |||
1114 | ||||
1115 | // AVX Unaligned forms | |||
1116 | void vmovdqu(Address dst, XMMRegister src); | |||
1117 | void vmovdqu(XMMRegister dst, Address src); | |||
1118 | void vmovdqu(XMMRegister dst, XMMRegister src); | |||
1119 | void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1120 | void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len); | |||
1121 | ||||
1122 | ||||
1123 | // AVX512 Unaligned | |||
1124 | void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); | |||
1125 | void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); | |||
1126 | ||||
1127 | void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
1128 | void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
1129 | void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
1130 | void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } | |||
1131 | void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } | |||
1132 | void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1133 | ||||
1134 | void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } | |||
1135 | void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } | |||
1136 | void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } | |||
1137 | void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } | |||
1138 | void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1139 | ||||
1140 | void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } | |||
1141 | void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } | |||
1142 | void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { | |||
1143 | if (dst->encoding() == src->encoding()) return; | |||
1144 | Assembler::evmovdqul(dst, src, vector_len); | |||
1145 | } | |||
1146 | void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } | |||
1147 | void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } | |||
1148 | void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { | |||
1149 | if (dst->encoding() == src->encoding() && mask == k0) return; | |||
1150 | Assembler::evmovdqul(dst, mask, src, merge, vector_len); | |||
1151 | } | |||
1152 | void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1153 | ||||
1154 | void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } | |||
1155 | void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } | |||
1156 | void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); | |||
1157 | void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { | |||
1158 | if (dst->encoding() == src->encoding()) return; | |||
| ||||
1159 | Assembler::evmovdquq(dst, src, vector_len); | |||
1160 | } | |||
1161 | void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } | |||
1162 | void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } | |||
1163 | void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { | |||
1164 | if (dst->encoding() == src->encoding() && mask == k0) return; | |||
1165 | Assembler::evmovdquq(dst, mask, src, merge, vector_len); | |||
1166 | } | |||
1167 | void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1168 | ||||
1169 | // Move Aligned Double Quadword | |||
1170 | void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } | |||
1171 | void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } | |||
1172 | void movdqa(XMMRegister dst, AddressLiteral src); | |||
1173 | ||||
1174 | void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } | |||
1175 | void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } | |||
1176 | void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } | |||
1177 | void movsd(XMMRegister dst, AddressLiteral src); | |||
1178 | ||||
1179 | void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } | |||
1180 | void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } | |||
1181 | void mulpd(XMMRegister dst, AddressLiteral src); | |||
1182 | ||||
1183 | void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } | |||
1184 | void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } | |||
1185 | void mulsd(XMMRegister dst, AddressLiteral src); | |||
1186 | ||||
1187 | void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } | |||
1188 | void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } | |||
1189 | void mulss(XMMRegister dst, AddressLiteral src); | |||
1190 | ||||
1191 | // Carry-Less Multiplication Quadword | |||
1192 | void pclmulldq(XMMRegister dst, XMMRegister src) { | |||
1193 | // 0x00 - multiply lower 64 bits [0:63] | |||
1194 | Assembler::pclmulqdq(dst, src, 0x00); | |||
1195 | } | |||
1196 | void pclmulhdq(XMMRegister dst, XMMRegister src) { | |||
1197 | // 0x11 - multiply upper 64 bits [64:127] | |||
1198 | Assembler::pclmulqdq(dst, src, 0x11); | |||
1199 | } | |||
1200 | ||||
1201 | void pcmpeqb(XMMRegister dst, XMMRegister src); | |||
1202 | void pcmpeqw(XMMRegister dst, XMMRegister src); | |||
1203 | ||||
1204 | void pcmpestri(XMMRegister dst, Address src, int imm8); | |||
1205 | void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); | |||
1206 | ||||
1207 | void pmovzxbw(XMMRegister dst, XMMRegister src); | |||
1208 | void pmovzxbw(XMMRegister dst, Address src); | |||
1209 | ||||
1210 | void pmovmskb(Register dst, XMMRegister src); | |||
1211 | ||||
1212 | void ptest(XMMRegister dst, XMMRegister src); | |||
1213 | ||||
1214 | void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } | |||
1215 | void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } | |||
1216 | void sqrtsd(XMMRegister dst, AddressLiteral src); | |||
1217 | ||||
1218 | void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } | |||
1219 | void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } | |||
1220 | void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg); | |||
1221 | ||||
1222 | void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } | |||
1223 | void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } | |||
1224 | void sqrtss(XMMRegister dst, AddressLiteral src); | |||
1225 | ||||
1226 | void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } | |||
1227 | void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } | |||
1228 | void subsd(XMMRegister dst, AddressLiteral src); | |||
1229 | ||||
1230 | void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } | |||
1231 | void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } | |||
1232 | void subss(XMMRegister dst, AddressLiteral src); | |||
1233 | ||||
1234 | void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } | |||
1235 | void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } | |||
1236 | void ucomiss(XMMRegister dst, AddressLiteral src); | |||
1237 | ||||
1238 | void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } | |||
1239 | void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } | |||
1240 | void ucomisd(XMMRegister dst, AddressLiteral src); | |||
1241 | ||||
1242 | // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values | |||
1243 | void xorpd(XMMRegister dst, XMMRegister src); | |||
1244 | void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } | |||
1245 | void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1246 | ||||
1247 | // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values | |||
1248 | void xorps(XMMRegister dst, XMMRegister src); | |||
1249 | void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } | |||
1250 | void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1251 | ||||
1252 | // Shuffle Bytes | |||
1253 | void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } | |||
1254 | void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } | |||
1255 | void pshufb(XMMRegister dst, AddressLiteral src); | |||
1256 | // AVX 3-operands instructions | |||
1257 | ||||
1258 | void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } | |||
1259 | void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } | |||
1260 | void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1261 | ||||
1262 | void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } | |||
1263 | void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } | |||
1264 | void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1265 | ||||
1266 | void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); | |||
1267 | void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); | |||
1268 | ||||
1269 | void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1270 | void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1271 | void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch); | |||
1272 | ||||
1273 | void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1274 | void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1275 | ||||
1276 | void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } | |||
1277 | void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } | |||
1278 | void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch); | |||
1279 | ||||
1280 | void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } | |||
1281 | void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } | |||
1282 | void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1283 | ||||
1284 | void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); | |||
1285 | void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); } | |||
1286 | ||||
1287 | void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1288 | ||||
1289 | void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1290 | void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
1291 | ||||
1292 | // Vector compares | |||
1293 | void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
1294 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
1295 | void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
1296 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
1297 | void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
1298 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
1299 | void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
1300 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
1301 | void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
1302 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
1303 | void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
1304 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
1305 | void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
1306 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
1307 | void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
1308 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
1309 | ||||
1310 | void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len); | |||
1311 | ||||
1312 | // Emit comparison instruction for the specified comparison predicate. | |||
1313 | void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg); | |||
1314 | void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len); | |||
1315 | ||||
1316 | void vpmovzxbw(XMMRegister dst, Address src, int vector_len); | |||
1317 | void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } | |||
1318 | ||||
1319 | void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit); | |||
1320 | ||||
1321 | void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1322 | void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1323 | void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
1324 | Assembler::vpmulld(dst, nds, src, vector_len); | |||
1325 | }; | |||
1326 | void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
1327 | Assembler::vpmulld(dst, nds, src, vector_len); | |||
1328 | } | |||
1329 | void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
1330 | ||||
1331 | void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1332 | void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1333 | ||||
1334 | void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1335 | void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1336 | ||||
1337 | void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
1338 | void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
1339 | ||||
1340 | void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
1341 | void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
1342 | ||||
1343 | void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1344 | if (!is_varshift) { | |||
1345 | Assembler::evpsllw(dst, mask, nds, src, merge, vector_len); | |||
1346 | } else { | |||
1347 | Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len); | |||
1348 | } | |||
1349 | } | |||
1350 | void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1351 | if (!is_varshift) { | |||
1352 | Assembler::evpslld(dst, mask, nds, src, merge, vector_len); | |||
1353 | } else { | |||
1354 | Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len); | |||
1355 | } | |||
1356 | } | |||
1357 | void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1358 | if (!is_varshift) { | |||
1359 | Assembler::evpsllq(dst, mask, nds, src, merge, vector_len); | |||
1360 | } else { | |||
1361 | Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len); | |||
1362 | } | |||
1363 | } | |||
1364 | void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1365 | if (!is_varshift) { | |||
1366 | Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len); | |||
1367 | } else { | |||
1368 | Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len); | |||
1369 | } | |||
1370 | } | |||
1371 | void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1372 | if (!is_varshift) { | |||
1373 | Assembler::evpsrld(dst, mask, nds, src, merge, vector_len); | |||
1374 | } else { | |||
1375 | Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len); | |||
1376 | } | |||
1377 | } | |||
1378 | void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1379 | if (!is_varshift) { | |||
1380 | Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len); | |||
1381 | } else { | |||
1382 | Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len); | |||
1383 | } | |||
1384 | } | |||
1385 | void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1386 | if (!is_varshift) { | |||
1387 | Assembler::evpsraw(dst, mask, nds, src, merge, vector_len); | |||
1388 | } else { | |||
1389 | Assembler::evpsravw(dst, mask, nds, src, merge, vector_len); | |||
1390 | } | |||
1391 | } | |||
1392 | void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1393 | if (!is_varshift) { | |||
1394 | Assembler::evpsrad(dst, mask, nds, src, merge, vector_len); | |||
1395 | } else { | |||
1396 | Assembler::evpsravd(dst, mask, nds, src, merge, vector_len); | |||
1397 | } | |||
1398 | } | |||
1399 | void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1400 | if (!is_varshift) { | |||
1401 | Assembler::evpsraq(dst, mask, nds, src, merge, vector_len); | |||
1402 | } else { | |||
1403 | Assembler::evpsravq(dst, mask, nds, src, merge, vector_len); | |||
1404 | } | |||
1405 | } | |||
1406 | ||||
1407 | void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1408 | void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1409 | void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1410 | void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1411 | ||||
1412 | void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
1413 | void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
1414 | ||||
1415 | void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
1416 | void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
1417 | ||||
1418 | void vptest(XMMRegister dst, XMMRegister src); | |||
1419 | void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); } | |||
1420 | ||||
1421 | void punpcklbw(XMMRegister dst, XMMRegister src); | |||
1422 | void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } | |||
1423 | ||||
1424 | void pshufd(XMMRegister dst, Address src, int mode); | |||
1425 | void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); } | |||
1426 | ||||
1427 | void pshuflw(XMMRegister dst, XMMRegister src, int mode); | |||
1428 | void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } | |||
1429 | ||||
1430 | void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } | |||
1431 | void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } | |||
1432 | void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1433 | ||||
1434 | void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } | |||
1435 | void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } | |||
1436 | void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1437 | ||||
1438 | void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1439 | ||||
1440 | void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } | |||
1441 | void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } | |||
1442 | void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1443 | ||||
1444 | void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } | |||
1445 | void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } | |||
1446 | void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1447 | ||||
1448 | void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } | |||
1449 | void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } | |||
1450 | void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1451 | ||||
1452 | void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } | |||
1453 | void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } | |||
1454 | void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1455 | ||||
1456 | void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } | |||
1457 | void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } | |||
1458 | void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1459 | ||||
1460 | void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } | |||
1461 | void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } | |||
1462 | void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1463 | ||||
1464 | void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1465 | void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1466 | ||||
1467 | // AVX Vector instructions | |||
1468 | ||||
1469 | void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } | |||
1470 | void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } | |||
1471 | void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1472 | ||||
1473 | void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } | |||
1474 | void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } | |||
1475 | void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1476 | ||||
1477 | void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
1478 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 | |||
1479 | Assembler::vpxor(dst, nds, src, vector_len); | |||
1480 | else | |||
1481 | Assembler::vxorpd(dst, nds, src, vector_len); | |||
1482 | } | |||
1483 | void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
1484 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 | |||
1485 | Assembler::vpxor(dst, nds, src, vector_len); | |||
1486 | else | |||
1487 | Assembler::vxorpd(dst, nds, src, vector_len); | |||
1488 | } | |||
1489 | void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1490 | ||||
1491 | // Simple version for AVX2 256bit vectors | |||
1492 | void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } | |||
1493 | void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } | |||
1494 | ||||
1495 | void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); } | |||
1496 | void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
1497 | ||||
1498 | void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { | |||
1499 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1500 | Assembler::vinserti32x4(dst, nds, src, imm8); | |||
1501 | } else if (UseAVX > 1) { | |||
1502 | // vinserti128 is available only in AVX2 | |||
1503 | Assembler::vinserti128(dst, nds, src, imm8); | |||
1504 | } else { | |||
1505 | Assembler::vinsertf128(dst, nds, src, imm8); | |||
1506 | } | |||
1507 | } | |||
1508 | ||||
1509 | void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { | |||
1510 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1511 | Assembler::vinserti32x4(dst, nds, src, imm8); | |||
1512 | } else if (UseAVX > 1) { | |||
1513 | // vinserti128 is available only in AVX2 | |||
1514 | Assembler::vinserti128(dst, nds, src, imm8); | |||
1515 | } else { | |||
1516 | Assembler::vinsertf128(dst, nds, src, imm8); | |||
1517 | } | |||
1518 | } | |||
1519 | ||||
1520 | void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) { | |||
1521 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1522 | Assembler::vextracti32x4(dst, src, imm8); | |||
1523 | } else if (UseAVX > 1) { | |||
1524 | // vextracti128 is available only in AVX2 | |||
1525 | Assembler::vextracti128(dst, src, imm8); | |||
1526 | } else { | |||
1527 | Assembler::vextractf128(dst, src, imm8); | |||
1528 | } | |||
1529 | } | |||
1530 | ||||
1531 | void vextracti128(Address dst, XMMRegister src, uint8_t imm8) { | |||
1532 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1533 | Assembler::vextracti32x4(dst, src, imm8); | |||
1534 | } else if (UseAVX > 1) { | |||
1535 | // vextracti128 is available only in AVX2 | |||
1536 | Assembler::vextracti128(dst, src, imm8); | |||
1537 | } else { | |||
1538 | Assembler::vextractf128(dst, src, imm8); | |||
1539 | } | |||
1540 | } | |||
1541 | ||||
1542 | // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers | |||
1543 | void vinserti128_high(XMMRegister dst, XMMRegister src) { | |||
1544 | vinserti128(dst, dst, src, 1); | |||
1545 | } | |||
1546 | void vinserti128_high(XMMRegister dst, Address src) { | |||
1547 | vinserti128(dst, dst, src, 1); | |||
1548 | } | |||
1549 | void vextracti128_high(XMMRegister dst, XMMRegister src) { | |||
1550 | vextracti128(dst, src, 1); | |||
1551 | } | |||
1552 | void vextracti128_high(Address dst, XMMRegister src) { | |||
1553 | vextracti128(dst, src, 1); | |||
1554 | } | |||
1555 | ||||
1556 | void vinsertf128_high(XMMRegister dst, XMMRegister src) { | |||
1557 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1558 | Assembler::vinsertf32x4(dst, dst, src, 1); | |||
1559 | } else { | |||
1560 | Assembler::vinsertf128(dst, dst, src, 1); | |||
1561 | } | |||
1562 | } | |||
1563 | ||||
1564 | void vinsertf128_high(XMMRegister dst, Address src) { | |||
1565 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1566 | Assembler::vinsertf32x4(dst, dst, src, 1); | |||
1567 | } else { | |||
1568 | Assembler::vinsertf128(dst, dst, src, 1); | |||
1569 | } | |||
1570 | } | |||
1571 | ||||
1572 | void vextractf128_high(XMMRegister dst, XMMRegister src) { | |||
1573 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1574 | Assembler::vextractf32x4(dst, src, 1); | |||
1575 | } else { | |||
1576 | Assembler::vextractf128(dst, src, 1); | |||
1577 | } | |||
1578 | } | |||
1579 | ||||
1580 | void vextractf128_high(Address dst, XMMRegister src) { | |||
1581 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1582 | Assembler::vextractf32x4(dst, src, 1); | |||
1583 | } else { | |||
1584 | Assembler::vextractf128(dst, src, 1); | |||
1585 | } | |||
1586 | } | |||
1587 | ||||
1588 | // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers | |||
1589 | void vinserti64x4_high(XMMRegister dst, XMMRegister src) { | |||
1590 | Assembler::vinserti64x4(dst, dst, src, 1); | |||
1591 | } | |||
1592 | void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { | |||
1593 | Assembler::vinsertf64x4(dst, dst, src, 1); | |||
1594 | } | |||
1595 | void vextracti64x4_high(XMMRegister dst, XMMRegister src) { | |||
1596 | Assembler::vextracti64x4(dst, src, 1); | |||
1597 | } | |||
1598 | void vextractf64x4_high(XMMRegister dst, XMMRegister src) { | |||
1599 | Assembler::vextractf64x4(dst, src, 1); | |||
1600 | } | |||
1601 | void vextractf64x4_high(Address dst, XMMRegister src) { | |||
1602 | Assembler::vextractf64x4(dst, src, 1); | |||
1603 | } | |||
1604 | void vinsertf64x4_high(XMMRegister dst, Address src) { | |||
1605 | Assembler::vinsertf64x4(dst, dst, src, 1); | |||
1606 | } | |||
1607 | ||||
1608 | // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers | |||
1609 | void vinserti128_low(XMMRegister dst, XMMRegister src) { | |||
1610 | vinserti128(dst, dst, src, 0); | |||
1611 | } | |||
1612 | void vinserti128_low(XMMRegister dst, Address src) { | |||
1613 | vinserti128(dst, dst, src, 0); | |||
1614 | } | |||
1615 | void vextracti128_low(XMMRegister dst, XMMRegister src) { | |||
1616 | vextracti128(dst, src, 0); | |||
1617 | } | |||
1618 | void vextracti128_low(Address dst, XMMRegister src) { | |||
1619 | vextracti128(dst, src, 0); | |||
1620 | } | |||
1621 | ||||
1622 | void vinsertf128_low(XMMRegister dst, XMMRegister src) { | |||
1623 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1624 | Assembler::vinsertf32x4(dst, dst, src, 0); | |||
1625 | } else { | |||
1626 | Assembler::vinsertf128(dst, dst, src, 0); | |||
1627 | } | |||
1628 | } | |||
1629 | ||||
1630 | void vinsertf128_low(XMMRegister dst, Address src) { | |||
1631 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1632 | Assembler::vinsertf32x4(dst, dst, src, 0); | |||
1633 | } else { | |||
1634 | Assembler::vinsertf128(dst, dst, src, 0); | |||
1635 | } | |||
1636 | } | |||
1637 | ||||
1638 | void vextractf128_low(XMMRegister dst, XMMRegister src) { | |||
1639 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1640 | Assembler::vextractf32x4(dst, src, 0); | |||
1641 | } else { | |||
1642 | Assembler::vextractf128(dst, src, 0); | |||
1643 | } | |||
1644 | } | |||
1645 | ||||
1646 | void vextractf128_low(Address dst, XMMRegister src) { | |||
1647 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1648 | Assembler::vextractf32x4(dst, src, 0); | |||
1649 | } else { | |||
1650 | Assembler::vextractf128(dst, src, 0); | |||
1651 | } | |||
1652 | } | |||
1653 | ||||
1654 | // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers | |||
1655 | void vinserti64x4_low(XMMRegister dst, XMMRegister src) { | |||
1656 | Assembler::vinserti64x4(dst, dst, src, 0); | |||
1657 | } | |||
1658 | void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { | |||
1659 | Assembler::vinsertf64x4(dst, dst, src, 0); | |||
1660 | } | |||
1661 | void vextracti64x4_low(XMMRegister dst, XMMRegister src) { | |||
1662 | Assembler::vextracti64x4(dst, src, 0); | |||
1663 | } | |||
1664 | void vextractf64x4_low(XMMRegister dst, XMMRegister src) { | |||
1665 | Assembler::vextractf64x4(dst, src, 0); | |||
1666 | } | |||
1667 | void vextractf64x4_low(Address dst, XMMRegister src) { | |||
1668 | Assembler::vextractf64x4(dst, src, 0); | |||
1669 | } | |||
1670 | void vinsertf64x4_low(XMMRegister dst, Address src) { | |||
1671 | Assembler::vinsertf64x4(dst, dst, src, 0); | |||
1672 | } | |||
1673 | ||||
1674 | // Carry-Less Multiplication Quadword | |||
1675 | void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
1676 | // 0x00 - multiply lower 64 bits [0:63] | |||
1677 | Assembler::vpclmulqdq(dst, nds, src, 0x00); | |||
1678 | } | |||
1679 | void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
1680 | // 0x11 - multiply upper 64 bits [64:127] | |||
1681 | Assembler::vpclmulqdq(dst, nds, src, 0x11); | |||
1682 | } | |||
1683 | void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
1684 | // 0x10 - multiply nds[0:63] and src[64:127] | |||
1685 | Assembler::vpclmulqdq(dst, nds, src, 0x10); | |||
1686 | } | |||
1687 | void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
1688 | //0x01 - multiply nds[64:127] and src[0:63] | |||
1689 | Assembler::vpclmulqdq(dst, nds, src, 0x01); | |||
1690 | } | |||
1691 | ||||
1692 | void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
1693 | // 0x00 - multiply lower 64 bits [0:63] | |||
1694 | Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); | |||
1695 | } | |||
1696 | void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
1697 | // 0x11 - multiply upper 64 bits [64:127] | |||
1698 | Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); | |||
1699 | } | |||
1700 | ||||
1701 | // AVX-512 mask operations. | |||
1702 | void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2); | |||
1703 | void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2); | |||
1704 | void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg); | |||
1705 | void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2); | |||
1706 | void kortest(uint masklen, KRegister src1, KRegister src2); | |||
1707 | void ktest(uint masklen, KRegister src1, KRegister src2); | |||
1708 | ||||
1709 | void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1710 | void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1711 | ||||
1712 | void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1713 | void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1714 | ||||
1715 | void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1716 | void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1717 | ||||
1718 | void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1719 | void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1720 | ||||
1721 | void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); | |||
1722 | void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); | |||
1723 | void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); | |||
1724 | void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); | |||
1725 | ||||
1726 | void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch); | |||
1727 | void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch); | |||
1728 | ||||
1729 | void cmov32( Condition cc, Register dst, Address src); | |||
1730 | void cmov32( Condition cc, Register dst, Register src); | |||
1731 | ||||
1732 | void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } | |||
1733 | ||||
1734 | void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); } | |||
1735 | void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); } | |||
1736 | ||||
1737 | void movoop(Register dst, jobject obj); | |||
1738 | void movoop(Address dst, jobject obj); | |||
1739 | ||||
1740 | void mov_metadata(Register dst, Metadata* obj); | |||
1741 | void mov_metadata(Address dst, Metadata* obj); | |||
1742 | ||||
1743 | void movptr(ArrayAddress dst, Register src); | |||
1744 | // can this do an lea? | |||
1745 | void movptr(Register dst, ArrayAddress src); | |||
1746 | ||||
1747 | void movptr(Register dst, Address src); | |||
1748 | ||||
1749 | #ifdef _LP641 | |||
1750 | void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1); | |||
1751 | #else | |||
1752 | void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit | |||
1753 | #endif | |||
1754 | ||||
1755 | void movptr(Register dst, intptr_t src); | |||
1756 | void movptr(Register dst, Register src); | |||
1757 | void movptr(Address dst, intptr_t src); | |||
1758 | ||||
1759 | void movptr(Address dst, Register src); | |||
1760 | ||||
1761 | void movptr(Register dst, RegisterOrConstant src) { | |||
1762 | if (src.is_constant()) movptr(dst, src.as_constant()); | |||
1763 | else movptr(dst, src.as_register()); | |||
1764 | } | |||
1765 | ||||
1766 | #ifdef _LP641 | |||
1767 | // Generally the next two are only used for moving NULL | |||
1768 | // Although there are situations in initializing the mark word where | |||
1769 | // they could be used. They are dangerous. | |||
1770 | ||||
1771 | // They only exist on LP64 so that int32_t and intptr_t are not the same | |||
1772 | // and we have ambiguous declarations. | |||
1773 | ||||
1774 | void movptr(Address dst, int32_t imm32); | |||
1775 | void movptr(Register dst, int32_t imm32); | |||
1776 | #endif // _LP64 | |||
1777 | ||||
1778 | // to avoid hiding movl | |||
1779 | void mov32(AddressLiteral dst, Register src); | |||
1780 | void mov32(Register dst, AddressLiteral src); | |||
1781 | ||||
1782 | // to avoid hiding movb | |||
1783 | void movbyte(ArrayAddress dst, int src); | |||
1784 | ||||
1785 | // Import other mov() methods from the parent class or else | |||
1786 | // they will be hidden by the following overriding declaration. | |||
1787 | using Assembler::movdl; | |||
1788 | using Assembler::movq; | |||
1789 | void movdl(XMMRegister dst, AddressLiteral src); | |||
1790 | void movq(XMMRegister dst, AddressLiteral src); | |||
1791 | ||||
1792 | // Can push value or effective address | |||
1793 | void pushptr(AddressLiteral src); | |||
1794 | ||||
1795 | void pushptr(Address src) { LP64_ONLY(pushq(src))pushq(src) NOT_LP64(pushl(src)); } | |||
1796 | void popptr(Address src) { LP64_ONLY(popq(src))popq(src) NOT_LP64(popl(src)); } | |||
1797 | ||||
1798 | void pushoop(jobject obj); | |||
1799 | void pushklass(Metadata* obj); | |||
1800 | ||||
1801 | // sign extend as need a l to ptr sized element | |||
1802 | void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(movl(dst, src)); } | |||
1803 | void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(if (dst != src) movl(dst, src)); } | |||
1804 | ||||
1805 | ||||
1806 | public: | |||
1807 | // C2 compiled method's prolog code. | |||
1808 | void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); | |||
1809 | ||||
1810 | // clear memory of size 'cnt' qwords, starting at 'base'; | |||
1811 | // if 'is_large' is set, do not try to produce short loop | |||
1812 | void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg); | |||
1813 | ||||
1814 | // clear memory initialization sequence for constant size; | |||
1815 | void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); | |||
1816 | ||||
1817 | // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers | |||
1818 | void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); | |||
1819 | ||||
1820 | // Fill primitive arrays | |||
1821 | void generate_fill(BasicType t, bool aligned, | |||
1822 | Register to, Register value, Register count, | |||
1823 | Register rtmp, XMMRegister xtmp); | |||
1824 | ||||
1825 | void encode_iso_array(Register src, Register dst, Register len, | |||
1826 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, | |||
1827 | XMMRegister tmp4, Register tmp5, Register result, bool ascii); | |||
1828 | ||||
1829 | #ifdef _LP641 | |||
1830 | void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); | |||
1831 | void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, | |||
1832 | Register y, Register y_idx, Register z, | |||
1833 | Register carry, Register product, | |||
1834 | Register idx, Register kdx); | |||
1835 | void multiply_add_128_x_128(Register x_xstart, Register y, Register z, | |||
1836 | Register yz_idx, Register idx, | |||
1837 | Register carry, Register product, int offset); | |||
1838 | void multiply_128_x_128_bmi2_loop(Register y, Register z, | |||
1839 | Register carry, Register carry2, | |||
1840 | Register idx, Register jdx, | |||
1841 | Register yz_idx1, Register yz_idx2, | |||
1842 | Register tmp, Register tmp3, Register tmp4); | |||
1843 | void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, | |||
1844 | Register yz_idx, Register idx, Register jdx, | |||
1845 | Register carry, Register product, | |||
1846 | Register carry2); | |||
1847 | void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, | |||
1848 | Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); | |||
1849 | void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, | |||
1850 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg); | |||
1851 | void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, | |||
1852 | Register tmp2); | |||
1853 | void multiply_add_64(Register sum, Register op1, Register op2, Register carry, | |||
1854 | Register rdxReg, Register raxReg); | |||
1855 | void add_one_64(Register z, Register zlen, Register carry, Register tmp1); | |||
1856 | void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, | |||
1857 | Register tmp3, Register tmp4); | |||
1858 | void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, | |||
1859 | Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); | |||
1860 | ||||
1861 | void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, | |||
1862 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, | |||
1863 | Register raxReg); | |||
1864 | void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, | |||
1865 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, | |||
1866 | Register raxReg); | |||
1867 | void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, | |||
1868 | Register result, Register tmp1, Register tmp2, | |||
1869 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); | |||
1870 | #endif | |||
1871 | ||||
1872 | // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. | |||
1873 | void update_byte_crc32(Register crc, Register val, Register table); | |||
1874 | void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); | |||
1875 | ||||
1876 | ||||
1877 | #ifdef _LP641 | |||
1878 | void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2); | |||
1879 | void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos, | |||
1880 | Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop, | |||
1881 | Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup); | |||
1882 | void updateBytesAdler32(Register adler32, Register buf, Register length, XMMRegister shuf0, XMMRegister shuf1, ExternalAddress scale); | |||
1883 | #endif // _LP64 | |||
1884 | ||||
1885 | // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic | |||
1886 | // Note on a naming convention: | |||
1887 | // Prefix w = register only used on a Westmere+ architecture | |||
1888 | // Prefix n = register only used on a Nehalem architecture | |||
1889 | #ifdef _LP641 | |||
1890 | void crc32c_ipl_alg4(Register in_out, uint32_t n, | |||
1891 | Register tmp1, Register tmp2, Register tmp3); | |||
1892 | #else | |||
1893 | void crc32c_ipl_alg4(Register in_out, uint32_t n, | |||
1894 | Register tmp1, Register tmp2, Register tmp3, | |||
1895 | XMMRegister xtmp1, XMMRegister xtmp2); | |||
1896 | #endif | |||
1897 | void crc32c_pclmulqdq(XMMRegister w_xtmp1, | |||
1898 | Register in_out, | |||
1899 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, | |||
1900 | XMMRegister w_xtmp2, | |||
1901 | Register tmp1, | |||
1902 | Register n_tmp2, Register n_tmp3); | |||
1903 | void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, | |||
1904 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
1905 | Register tmp1, Register tmp2, | |||
1906 | Register n_tmp3); | |||
1907 | void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, | |||
1908 | Register in_out1, Register in_out2, Register in_out3, | |||
1909 | Register tmp1, Register tmp2, Register tmp3, | |||
1910 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
1911 | Register tmp4, Register tmp5, | |||
1912 | Register n_tmp6); | |||
1913 | void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, | |||
1914 | Register tmp1, Register tmp2, Register tmp3, | |||
1915 | Register tmp4, Register tmp5, Register tmp6, | |||
1916 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
1917 | bool is_pclmulqdq_supported); | |||
1918 | // Fold 128-bit data chunk | |||
1919 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); | |||
1920 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); | |||
1921 | #ifdef _LP641 | |||
1922 | // Fold 512-bit data chunk | |||
1923 | void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset); | |||
1924 | #endif // _LP64 | |||
1925 | // Fold 8-bit data | |||
1926 | void fold_8bit_crc32(Register crc, Register table, Register tmp); | |||
1927 | void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); | |||
1928 | ||||
1929 | // Compress char[] array to byte[]. | |||
1930 | void char_array_compress(Register src, Register dst, Register len, | |||
1931 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, | |||
1932 | XMMRegister tmp4, Register tmp5, Register result, | |||
1933 | KRegister mask1 = knoreg, KRegister mask2 = knoreg); | |||
1934 | ||||
1935 | // Inflate byte[] array to char[]. | |||
1936 | void byte_array_inflate(Register src, Register dst, Register len, | |||
1937 | XMMRegister tmp1, Register tmp2, KRegister mask = knoreg); | |||
1938 | ||||
1939 | void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask, | |||
1940 | Register length, Register temp, int vec_enc); | |||
1941 | ||||
1942 | void fill64_masked(uint shift, Register dst, int disp, | |||
1943 | XMMRegister xmm, KRegister mask, Register length, | |||
1944 | Register temp, bool use64byteVector = false); | |||
1945 | ||||
1946 | void fill32_masked(uint shift, Register dst, int disp, | |||
1947 | XMMRegister xmm, KRegister mask, Register length, | |||
1948 | Register temp); | |||
1949 | ||||
1950 | void fill32(Register dst, int disp, XMMRegister xmm); | |||
1951 | ||||
1952 | void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false); | |||
1953 | ||||
1954 | #ifdef _LP641 | |||
1955 | void convert_f2i(Register dst, XMMRegister src); | |||
1956 | void convert_d2i(Register dst, XMMRegister src); | |||
1957 | void convert_f2l(Register dst, XMMRegister src); | |||
1958 | void convert_d2l(Register dst, XMMRegister src); | |||
1959 | ||||
1960 | void cache_wb(Address line); | |||
1961 | void cache_wbsync(bool is_pre); | |||
1962 | ||||
1963 | #if COMPILER2_OR_JVMCI1 | |||
1964 | void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from, | |||
1965 | Register to, Register count, int shift, | |||
1966 | Register index, Register temp, | |||
1967 | bool use64byteVector, Label& L_entry, Label& L_exit); | |||
1968 | ||||
1969 | void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from, | |||
1970 | Register to, Register start_index, Register end_index, | |||
1971 | Register count, int shift, Register temp, | |||
1972 | bool use64byteVector, Label& L_entry, Label& L_exit); | |||
1973 | ||||
1974 | void copy64_masked_avx(Register dst, Register src, XMMRegister xmm, | |||
1975 | KRegister mask, Register length, Register index, | |||
1976 | Register temp, int shift = Address::times_1, int offset = 0, | |||
1977 | bool use64byteVector = false); | |||
1978 | ||||
1979 | void copy32_masked_avx(Register dst, Register src, XMMRegister xmm, | |||
1980 | KRegister mask, Register length, Register index, | |||
1981 | Register temp, int shift = Address::times_1, int offset = 0); | |||
1982 | ||||
1983 | void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm, | |||
1984 | int shift = Address::times_1, int offset = 0); | |||
1985 | ||||
1986 | void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm, | |||
1987 | bool conjoint, int shift = Address::times_1, int offset = 0, | |||
1988 | bool use64byteVector = false); | |||
1989 | ||||
1990 | void generate_fill_avx3(BasicType type, Register to, Register value, | |||
1991 | Register count, Register rtmp, XMMRegister xtmp); | |||
1992 | ||||
1993 | #endif // COMPILER2_OR_JVMCI | |||
1994 | ||||
1995 | #endif // _LP64 | |||
1996 | ||||
1997 | void vallones(XMMRegister dst, int vector_len); | |||
1998 | }; | |||
1999 | ||||
2000 | /** | |||
2001 | * class SkipIfEqual: | |||
2002 | * | |||
2003 | * Instantiating this class will result in assembly code being output that will | |||
2004 | * jump around any code emitted between the creation of the instance and it's | |||
2005 | * automatic destruction at the end of a scope block, depending on the value of | |||
2006 | * the flag passed to the constructor, which will be checked at run-time. | |||
2007 | */ | |||
2008 | class SkipIfEqual { | |||
2009 | private: | |||
2010 | MacroAssembler* _masm; | |||
2011 | Label _label; | |||
2012 | ||||
2013 | public: | |||
2014 | SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); | |||
2015 | ~SkipIfEqual(); | |||
2016 | }; | |||
2017 | ||||
2018 | #endif // CPU_X86_MACROASSEMBLER_X86_HPP |