File: | jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp |
Warning: | line 1143, column 29 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | ||||
2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. | ||||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||||
4 | * | ||||
5 | * This code is free software; you can redistribute it and/or modify it | ||||
6 | * under the terms of the GNU General Public License version 2 only, as | ||||
7 | * published by the Free Software Foundation. | ||||
8 | * | ||||
9 | * This code is distributed in the hope that it will be useful, but WITHOUT | ||||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||||
12 | * version 2 for more details (a copy is included in the LICENSE file that | ||||
13 | * accompanied this code). | ||||
14 | * | ||||
15 | * You should have received a copy of the GNU General Public License version | ||||
16 | * 2 along with this work; if not, write to the Free Software Foundation, | ||||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
18 | * | ||||
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||||
20 | * or visit www.oracle.com if you need additional information or have any | ||||
21 | * questions. | ||||
22 | * | ||||
23 | */ | ||||
24 | |||||
25 | #include "precompiled.hpp" | ||||
26 | #include "jvm.h" | ||||
27 | #include "asm/macroAssembler.hpp" | ||||
28 | #include "asm/macroAssembler.inline.hpp" | ||||
29 | #include "code/codeBlob.hpp" | ||||
30 | #include "logging/log.hpp" | ||||
31 | #include "logging/logStream.hpp" | ||||
32 | #include "memory/resourceArea.hpp" | ||||
33 | #include "memory/universe.hpp" | ||||
34 | #include "runtime/globals_extension.hpp" | ||||
35 | #include "runtime/java.hpp" | ||||
36 | #include "runtime/os.hpp" | ||||
37 | #include "runtime/stubCodeGenerator.hpp" | ||||
38 | #include "runtime/vm_version.hpp" | ||||
39 | #include "utilities/powerOfTwo.hpp" | ||||
40 | #include "utilities/virtualizationSupport.hpp" | ||||
41 | |||||
42 | #include OS_HEADER_INLINE(os)"os_linux.inline.hpp" | ||||
43 | |||||
44 | int VM_Version::_cpu; | ||||
45 | int VM_Version::_model; | ||||
46 | int VM_Version::_stepping; | ||||
47 | bool VM_Version::_has_intel_jcc_erratum; | ||||
48 | VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; | ||||
49 | |||||
50 | #define DECLARE_CPU_FEATURE_NAME(id, name, bit)name, name, | ||||
51 | const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)"cx8", "cmov", "fxsr", "ht", "mmx", "3dnowpref", "sse", "sse2" , "sse3", "ssse3", "sse4a", "sse4.1", "sse4.2", "popcnt", "lzcnt" , "tsc", "tscinvbit", "tscinv", "avx", "avx2", "aes", "erms", "clmul", "bmi1", "bmi2", "rtm", "adx", "avx512f", "avx512dq" , "avx512pf", "avx512er", "avx512cd", "avx512bw", "avx512vl", "sha", "fma", "vzeroupper", "avx512_vpopcntdq", "avx512_vpclmulqdq" , "avx512_vaes", "avx512_vnni", "clflush", "clflushopt", "clwb" , "avx512_vbmi2", "avx512_vbmi", "hv", "serialize",}; | ||||
52 | #undef DECLARE_CPU_FEATURE_FLAG | ||||
53 | |||||
54 | // Address of instruction which causes SEGV | ||||
55 | address VM_Version::_cpuinfo_segv_addr = 0; | ||||
56 | // Address of instruction after the one which causes SEGV | ||||
57 | address VM_Version::_cpuinfo_cont_addr = 0; | ||||
58 | |||||
59 | static BufferBlob* stub_blob; | ||||
60 | static const int stub_size = 2000; | ||||
61 | |||||
62 | extern "C" { | ||||
63 | typedef void (*get_cpu_info_stub_t)(void*); | ||||
64 | typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); | ||||
65 | } | ||||
66 | static get_cpu_info_stub_t get_cpu_info_stub = NULL__null; | ||||
67 | static detect_virt_stub_t detect_virt_stub = NULL__null; | ||||
68 | |||||
69 | #ifdef _LP641 | ||||
70 | |||||
71 | bool VM_Version::supports_clflush() { | ||||
72 | // clflush should always be available on x86_64 | ||||
73 | // if not we are in real trouble because we rely on it | ||||
74 | // to flush the code cache. | ||||
75 | // Unfortunately, Assembler::clflush is currently called as part | ||||
76 | // of generation of the code cache flush routine. This happens | ||||
77 | // under Universe::init before the processor features are set | ||||
78 | // up. Assembler::flush calls this routine to check that clflush | ||||
79 | // is allowed. So, we give the caller a free pass if Universe init | ||||
80 | // is still in progress. | ||||
81 | assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available")do { if (!((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 81, "assert(" "(!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0)" ") failed", "clflush should be available"); ::breakpoint(); } } while (0); | ||||
82 | return true; | ||||
83 | } | ||||
84 | #endif | ||||
85 | |||||
86 | #define CPUID_STANDARD_FN0x0 0x0 | ||||
87 | #define CPUID_STANDARD_FN_10x1 0x1 | ||||
88 | #define CPUID_STANDARD_FN_40x4 0x4 | ||||
89 | #define CPUID_STANDARD_FN_B0xb 0xb | ||||
90 | |||||
91 | #define CPUID_EXTENDED_FN0x80000000 0x80000000 | ||||
92 | #define CPUID_EXTENDED_FN_10x80000001 0x80000001 | ||||
93 | #define CPUID_EXTENDED_FN_20x80000002 0x80000002 | ||||
94 | #define CPUID_EXTENDED_FN_30x80000003 0x80000003 | ||||
95 | #define CPUID_EXTENDED_FN_40x80000004 0x80000004 | ||||
96 | #define CPUID_EXTENDED_FN_70x80000007 0x80000007 | ||||
97 | #define CPUID_EXTENDED_FN_80x80000008 0x80000008 | ||||
98 | |||||
99 | class VM_Version_StubGenerator: public StubCodeGenerator { | ||||
100 | public: | ||||
101 | |||||
102 | VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} | ||||
103 | |||||
104 | address generate_get_cpu_info() { | ||||
105 | // Flags to test CPU type. | ||||
106 | const uint32_t HS_EFL_AC = 0x40000; | ||||
107 | const uint32_t HS_EFL_ID = 0x200000; | ||||
108 | // Values for when we don't have a CPUID instruction. | ||||
109 | const int CPU_FAMILY_SHIFT = 8; | ||||
110 | const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | ||||
111 | const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | ||||
112 | bool use_evex = FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum)) || (UseAVX > 2); | ||||
113 | |||||
114 | Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; | ||||
115 | Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; | ||||
116 | Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; | ||||
117 | |||||
118 | StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); | ||||
119 | # define __ _masm-> | ||||
120 | |||||
121 | address start = __ pc(); | ||||
122 | |||||
123 | // | ||||
124 | // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); | ||||
125 | // | ||||
126 | // LP64: rcx and rdx are first and second argument registers on windows | ||||
127 | |||||
128 | __ push(rbp); | ||||
129 | #ifdef _LP641 | ||||
130 | __ mov(rbp, c_rarg0); // cpuid_info address | ||||
131 | #else | ||||
132 | __ movptr(rbp, Address(rsp, 8)); // cpuid_info address | ||||
133 | #endif | ||||
134 | __ push(rbx); | ||||
135 | __ push(rsi); | ||||
136 | __ pushf(); // preserve rbx, and flags | ||||
137 | __ pop(rax); | ||||
138 | __ push(rax); | ||||
139 | __ mov(rcx, rax); | ||||
140 | // | ||||
141 | // if we are unable to change the AC flag, we have a 386 | ||||
142 | // | ||||
143 | __ xorl(rax, HS_EFL_AC); | ||||
144 | __ push(rax); | ||||
145 | __ popf(); | ||||
146 | __ pushf(); | ||||
147 | __ pop(rax); | ||||
148 | __ cmpptr(rax, rcx); | ||||
149 | __ jccb(Assembler::notEqual, detect_486)jccb_0(Assembler::notEqual, detect_486, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 149); | ||||
150 | |||||
151 | __ movl(rax, CPU_FAMILY_386); | ||||
152 | __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | ||||
153 | __ jmp(done); | ||||
154 | |||||
155 | // | ||||
156 | // If we are unable to change the ID flag, we have a 486 which does | ||||
157 | // not support the "cpuid" instruction. | ||||
158 | // | ||||
159 | __ bind(detect_486); | ||||
160 | __ mov(rax, rcx); | ||||
161 | __ xorl(rax, HS_EFL_ID); | ||||
162 | __ push(rax); | ||||
163 | __ popf(); | ||||
164 | __ pushf(); | ||||
165 | __ pop(rax); | ||||
166 | __ cmpptr(rcx, rax); | ||||
167 | __ jccb(Assembler::notEqual, detect_586)jccb_0(Assembler::notEqual, detect_586, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 167); | ||||
168 | |||||
169 | __ bind(cpu486); | ||||
170 | __ movl(rax, CPU_FAMILY_486); | ||||
171 | __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | ||||
172 | __ jmp(done); | ||||
173 | |||||
174 | // | ||||
175 | // At this point, we have a chip which supports the "cpuid" instruction | ||||
176 | // | ||||
177 | __ bind(detect_586); | ||||
178 | __ xorl(rax, rax); | ||||
179 | __ cpuid(); | ||||
180 | __ orl(rax, rax); | ||||
181 | __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input | ||||
182 | // value of at least 1, we give up and | ||||
183 | // assume a 486 | ||||
184 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); | ||||
185 | __ movl(Address(rsi, 0), rax); | ||||
186 | __ movl(Address(rsi, 4), rbx); | ||||
187 | __ movl(Address(rsi, 8), rcx); | ||||
188 | __ movl(Address(rsi,12), rdx); | ||||
189 | |||||
190 | __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? | ||||
191 | __ jccb(Assembler::belowEqual, std_cpuid4)jccb_0(Assembler::belowEqual, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 191); | ||||
192 | |||||
193 | // | ||||
194 | // cpuid(0xB) Processor Topology | ||||
195 | // | ||||
196 | __ movl(rax, 0xb); | ||||
197 | __ xorl(rcx, rcx); // Threads level | ||||
198 | __ cpuid(); | ||||
199 | |||||
200 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); | ||||
201 | __ movl(Address(rsi, 0), rax); | ||||
202 | __ movl(Address(rsi, 4), rbx); | ||||
203 | __ movl(Address(rsi, 8), rcx); | ||||
204 | __ movl(Address(rsi,12), rdx); | ||||
205 | |||||
206 | __ movl(rax, 0xb); | ||||
207 | __ movl(rcx, 1); // Cores level | ||||
208 | __ cpuid(); | ||||
209 | __ push(rax); | ||||
210 | __ andl(rax, 0x1f); // Determine if valid topology level | ||||
211 | __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | ||||
212 | __ andl(rax, 0xffff); | ||||
213 | __ pop(rax); | ||||
214 | __ jccb(Assembler::equal, std_cpuid4)jccb_0(Assembler::equal, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 214); | ||||
215 | |||||
216 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); | ||||
217 | __ movl(Address(rsi, 0), rax); | ||||
218 | __ movl(Address(rsi, 4), rbx); | ||||
219 | __ movl(Address(rsi, 8), rcx); | ||||
220 | __ movl(Address(rsi,12), rdx); | ||||
221 | |||||
222 | __ movl(rax, 0xb); | ||||
223 | __ movl(rcx, 2); // Packages level | ||||
224 | __ cpuid(); | ||||
225 | __ push(rax); | ||||
226 | __ andl(rax, 0x1f); // Determine if valid topology level | ||||
227 | __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | ||||
228 | __ andl(rax, 0xffff); | ||||
229 | __ pop(rax); | ||||
230 | __ jccb(Assembler::equal, std_cpuid4)jccb_0(Assembler::equal, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 230); | ||||
231 | |||||
232 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); | ||||
233 | __ movl(Address(rsi, 0), rax); | ||||
234 | __ movl(Address(rsi, 4), rbx); | ||||
235 | __ movl(Address(rsi, 8), rcx); | ||||
236 | __ movl(Address(rsi,12), rdx); | ||||
237 | |||||
238 | // | ||||
239 | // cpuid(0x4) Deterministic cache params | ||||
240 | // | ||||
241 | __ bind(std_cpuid4); | ||||
242 | __ movl(rax, 4); | ||||
243 | __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? | ||||
244 | __ jccb(Assembler::greater, std_cpuid1)jccb_0(Assembler::greater, std_cpuid1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 244); | ||||
245 | |||||
246 | __ xorl(rcx, rcx); // L1 cache | ||||
247 | __ cpuid(); | ||||
248 | __ push(rax); | ||||
249 | __ andl(rax, 0x1f); // Determine if valid cache parameters used | ||||
250 | __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache | ||||
251 | __ pop(rax); | ||||
252 | __ jccb(Assembler::equal, std_cpuid1)jccb_0(Assembler::equal, std_cpuid1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 252); | ||||
253 | |||||
254 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); | ||||
255 | __ movl(Address(rsi, 0), rax); | ||||
256 | __ movl(Address(rsi, 4), rbx); | ||||
257 | __ movl(Address(rsi, 8), rcx); | ||||
258 | __ movl(Address(rsi,12), rdx); | ||||
259 | |||||
260 | // | ||||
261 | // Standard cpuid(0x1) | ||||
262 | // | ||||
263 | __ bind(std_cpuid1); | ||||
264 | __ movl(rax, 1); | ||||
265 | __ cpuid(); | ||||
266 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
267 | __ movl(Address(rsi, 0), rax); | ||||
268 | __ movl(Address(rsi, 4), rbx); | ||||
269 | __ movl(Address(rsi, 8), rcx); | ||||
270 | __ movl(Address(rsi,12), rdx); | ||||
271 | |||||
272 | // | ||||
273 | // Check if OS has enabled XGETBV instruction to access XCR0 | ||||
274 | // (OSXSAVE feature flag) and CPU supports AVX | ||||
275 | // | ||||
276 | __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx | ||||
277 | __ cmpl(rcx, 0x18000000); | ||||
278 | __ jccb(Assembler::notEqual, sef_cpuid)jccb_0(Assembler::notEqual, sef_cpuid, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 278); // jump if AVX is not supported | ||||
279 | |||||
280 | // | ||||
281 | // XCR0, XFEATURE_ENABLED_MASK register | ||||
282 | // | ||||
283 | __ xorl(rcx, rcx); // zero for XCR0 register | ||||
284 | __ xgetbv(); | ||||
285 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); | ||||
286 | __ movl(Address(rsi, 0), rax); | ||||
287 | __ movl(Address(rsi, 4), rdx); | ||||
288 | |||||
289 | // | ||||
290 | // cpuid(0x7) Structured Extended Features | ||||
291 | // | ||||
292 | __ bind(sef_cpuid); | ||||
293 | __ movl(rax, 7); | ||||
294 | __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? | ||||
295 | __ jccb(Assembler::greater, ext_cpuid)jccb_0(Assembler::greater, ext_cpuid, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 295); | ||||
296 | |||||
297 | __ xorl(rcx, rcx); | ||||
298 | __ cpuid(); | ||||
299 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | ||||
300 | __ movl(Address(rsi, 0), rax); | ||||
301 | __ movl(Address(rsi, 4), rbx); | ||||
302 | __ movl(Address(rsi, 8), rcx); | ||||
303 | __ movl(Address(rsi, 12), rdx); | ||||
304 | |||||
305 | // | ||||
306 | // Extended cpuid(0x80000000) | ||||
307 | // | ||||
308 | __ bind(ext_cpuid); | ||||
309 | __ movl(rax, 0x80000000); | ||||
310 | __ cpuid(); | ||||
311 | __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? | ||||
312 | __ jcc(Assembler::belowEqual, done); | ||||
313 | __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? | ||||
314 | __ jcc(Assembler::belowEqual, ext_cpuid1); | ||||
315 | __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? | ||||
316 | __ jccb(Assembler::belowEqual, ext_cpuid5)jccb_0(Assembler::belowEqual, ext_cpuid5, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 316); | ||||
317 | __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? | ||||
318 | __ jccb(Assembler::belowEqual, ext_cpuid7)jccb_0(Assembler::belowEqual, ext_cpuid7, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 318); | ||||
319 | __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? | ||||
320 | __ jccb(Assembler::belowEqual, ext_cpuid8)jccb_0(Assembler::belowEqual, ext_cpuid8, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 320); | ||||
321 | __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? | ||||
322 | __ jccb(Assembler::below, ext_cpuid8)jccb_0(Assembler::below, ext_cpuid8, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 322); | ||||
323 | // | ||||
324 | // Extended cpuid(0x8000001E) | ||||
325 | // | ||||
326 | __ movl(rax, 0x8000001E); | ||||
327 | __ cpuid(); | ||||
328 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); | ||||
329 | __ movl(Address(rsi, 0), rax); | ||||
330 | __ movl(Address(rsi, 4), rbx); | ||||
331 | __ movl(Address(rsi, 8), rcx); | ||||
332 | __ movl(Address(rsi,12), rdx); | ||||
333 | |||||
334 | // | ||||
335 | // Extended cpuid(0x80000008) | ||||
336 | // | ||||
337 | __ bind(ext_cpuid8); | ||||
338 | __ movl(rax, 0x80000008); | ||||
339 | __ cpuid(); | ||||
340 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); | ||||
341 | __ movl(Address(rsi, 0), rax); | ||||
342 | __ movl(Address(rsi, 4), rbx); | ||||
343 | __ movl(Address(rsi, 8), rcx); | ||||
344 | __ movl(Address(rsi,12), rdx); | ||||
345 | |||||
346 | // | ||||
347 | // Extended cpuid(0x80000007) | ||||
348 | // | ||||
349 | __ bind(ext_cpuid7); | ||||
350 | __ movl(rax, 0x80000007); | ||||
351 | __ cpuid(); | ||||
352 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); | ||||
353 | __ movl(Address(rsi, 0), rax); | ||||
354 | __ movl(Address(rsi, 4), rbx); | ||||
355 | __ movl(Address(rsi, 8), rcx); | ||||
356 | __ movl(Address(rsi,12), rdx); | ||||
357 | |||||
358 | // | ||||
359 | // Extended cpuid(0x80000005) | ||||
360 | // | ||||
361 | __ bind(ext_cpuid5); | ||||
362 | __ movl(rax, 0x80000005); | ||||
363 | __ cpuid(); | ||||
364 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); | ||||
365 | __ movl(Address(rsi, 0), rax); | ||||
366 | __ movl(Address(rsi, 4), rbx); | ||||
367 | __ movl(Address(rsi, 8), rcx); | ||||
368 | __ movl(Address(rsi,12), rdx); | ||||
369 | |||||
370 | // | ||||
371 | // Extended cpuid(0x80000001) | ||||
372 | // | ||||
373 | __ bind(ext_cpuid1); | ||||
374 | __ movl(rax, 0x80000001); | ||||
375 | __ cpuid(); | ||||
376 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); | ||||
377 | __ movl(Address(rsi, 0), rax); | ||||
378 | __ movl(Address(rsi, 4), rbx); | ||||
379 | __ movl(Address(rsi, 8), rcx); | ||||
380 | __ movl(Address(rsi,12), rdx); | ||||
381 | |||||
382 | // | ||||
383 | // Check if OS has enabled XGETBV instruction to access XCR0 | ||||
384 | // (OSXSAVE feature flag) and CPU supports AVX | ||||
385 | // | ||||
386 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
387 | __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx | ||||
388 | __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx | ||||
389 | __ cmpl(rcx, 0x18000000); | ||||
390 | __ jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 390); // jump if AVX is not supported | ||||
391 | |||||
392 | __ movl(rax, 0x6); | ||||
393 | __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm | ||||
394 | __ cmpl(rax, 0x6); | ||||
395 | __ jccb(Assembler::equal, start_simd_check)jccb_0(Assembler::equal, start_simd_check, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 395); // return if AVX is not supported | ||||
396 | |||||
397 | // we need to bridge farther than imm8, so we use this island as a thunk | ||||
398 | __ bind(done); | ||||
399 | __ jmp(wrapup); | ||||
400 | |||||
401 | __ bind(start_simd_check); | ||||
402 | // | ||||
403 | // Some OSs have a bug when upper 128/256bits of YMM/ZMM | ||||
404 | // registers are not restored after a signal processing. | ||||
405 | // Generate SEGV here (reference through NULL) | ||||
406 | // and check upper YMM/ZMM bits after it. | ||||
407 | // | ||||
408 | intx saved_useavx = UseAVX; | ||||
409 | intx saved_usesse = UseSSE; | ||||
410 | |||||
411 | // If UseAVX is unitialized or is set by the user to include EVEX | ||||
412 | if (use_evex
| ||||
413 | // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f | ||||
414 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | ||||
415 | __ movl(rax, 0x10000); | ||||
416 | __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm | ||||
417 | __ cmpl(rax, 0x10000); | ||||
418 | __ jccb(Assembler::notEqual, legacy_setup)jccb_0(Assembler::notEqual, legacy_setup, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 418); // jump if EVEX is not supported | ||||
419 | // check _cpuid_info.xem_xcr0_eax.bits.opmask | ||||
420 | // check _cpuid_info.xem_xcr0_eax.bits.zmm512 | ||||
421 | // check _cpuid_info.xem_xcr0_eax.bits.zmm32 | ||||
422 | __ movl(rax, 0xE0); | ||||
423 | __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm | ||||
424 | __ cmpl(rax, 0xE0); | ||||
425 | __ jccb(Assembler::notEqual, legacy_setup)jccb_0(Assembler::notEqual, legacy_setup, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 425); // jump if EVEX is not supported | ||||
426 | |||||
427 | if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) { | ||||
428 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
429 | __ movl(rax, Address(rsi, 0)); | ||||
430 | __ cmpl(rax, 0x50654); // If it is Skylake | ||||
431 | __ jcc(Assembler::equal, legacy_setup); | ||||
432 | } | ||||
433 | // EVEX setup: run in lowest evex mode | ||||
434 | VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts | ||||
435 | UseAVX = 3; | ||||
436 | UseSSE = 2; | ||||
437 | #ifdef _WINDOWS | ||||
438 | // xmm5-xmm15 are not preserved by caller on windows | ||||
439 | // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx | ||||
440 | __ subptr(rsp, 64); | ||||
441 | __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); | ||||
442 | #ifdef _LP641 | ||||
443 | __ subptr(rsp, 64); | ||||
444 | __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); | ||||
445 | __ subptr(rsp, 64); | ||||
446 | __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); | ||||
447 | #endif // _LP64 | ||||
448 | #endif // _WINDOWS | ||||
449 | |||||
450 | // load value into all 64 bytes of zmm7 register | ||||
451 | __ movl(rcx, VM_Version::ymm_test_value()); | ||||
452 | __ movdl(xmm0, rcx); | ||||
453 | __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); | ||||
454 | __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); | ||||
455 | #ifdef _LP641 | ||||
456 | __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); | ||||
457 | __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); | ||||
458 | #endif | ||||
459 | VM_Version::clean_cpuFeatures(); | ||||
460 | __ jmp(save_restore_except); | ||||
461 | } | ||||
462 | |||||
463 | __ bind(legacy_setup); | ||||
464 | // AVX setup | ||||
465 | VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts | ||||
466 | UseAVX = 1; | ||||
467 | UseSSE = 2; | ||||
468 | #ifdef _WINDOWS | ||||
469 | __ subptr(rsp, 32); | ||||
470 | __ vmovdqu(Address(rsp, 0), xmm7); | ||||
471 | #ifdef _LP641 | ||||
472 | __ subptr(rsp, 32); | ||||
473 | __ vmovdqu(Address(rsp, 0), xmm8); | ||||
474 | __ subptr(rsp, 32); | ||||
475 | __ vmovdqu(Address(rsp, 0), xmm15); | ||||
476 | #endif // _LP64 | ||||
477 | #endif // _WINDOWS | ||||
478 | |||||
479 | // load value into all 32 bytes of ymm7 register | ||||
480 | __ movl(rcx, VM_Version::ymm_test_value()); | ||||
481 | |||||
482 | __ movdl(xmm0, rcx); | ||||
483 | __ pshufd(xmm0, xmm0, 0x00); | ||||
484 | __ vinsertf128_high(xmm0, xmm0); | ||||
485 | __ vmovdqu(xmm7, xmm0); | ||||
486 | #ifdef _LP641 | ||||
487 | __ vmovdqu(xmm8, xmm0); | ||||
488 | __ vmovdqu(xmm15, xmm0); | ||||
489 | #endif | ||||
490 | VM_Version::clean_cpuFeatures(); | ||||
491 | |||||
492 | __ bind(save_restore_except); | ||||
493 | __ xorl(rsi, rsi); | ||||
494 | VM_Version::set_cpuinfo_segv_addr(__ pc()); | ||||
495 | // Generate SEGV | ||||
496 | __ movl(rax, Address(rsi, 0)); | ||||
497 | |||||
498 | VM_Version::set_cpuinfo_cont_addr(__ pc()); | ||||
499 | // Returns here after signal. Save xmm0 to check it later. | ||||
500 | |||||
501 | // If UseAVX is unitialized or is set by the user to include EVEX | ||||
502 | if (use_evex) { | ||||
503 | // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f | ||||
504 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | ||||
505 | __ movl(rax, 0x10000); | ||||
506 | __ andl(rax, Address(rsi, 4)); | ||||
507 | __ cmpl(rax, 0x10000); | ||||
508 | __ jcc(Assembler::notEqual, legacy_save_restore); | ||||
509 | // check _cpuid_info.xem_xcr0_eax.bits.opmask | ||||
510 | // check _cpuid_info.xem_xcr0_eax.bits.zmm512 | ||||
511 | // check _cpuid_info.xem_xcr0_eax.bits.zmm32 | ||||
512 | __ movl(rax, 0xE0); | ||||
513 | __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm | ||||
514 | __ cmpl(rax, 0xE0); | ||||
515 | __ jcc(Assembler::notEqual, legacy_save_restore); | ||||
516 | |||||
517 | if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) { | ||||
518 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
519 | __ movl(rax, Address(rsi, 0)); | ||||
520 | __ cmpl(rax, 0x50654); // If it is Skylake | ||||
521 | __ jcc(Assembler::equal, legacy_save_restore); | ||||
522 | } | ||||
523 | // EVEX check: run in lowest evex mode | ||||
524 | VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts | ||||
525 | UseAVX = 3; | ||||
526 | UseSSE = 2; | ||||
527 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); | ||||
528 | __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); | ||||
529 | __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); | ||||
530 | #ifdef _LP641 | ||||
531 | __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); | ||||
532 | __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); | ||||
533 | #endif | ||||
534 | |||||
535 | #ifdef _WINDOWS | ||||
536 | #ifdef _LP641 | ||||
537 | __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); | ||||
538 | __ addptr(rsp, 64); | ||||
539 | __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); | ||||
540 | __ addptr(rsp, 64); | ||||
541 | #endif // _LP64 | ||||
542 | __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); | ||||
543 | __ addptr(rsp, 64); | ||||
544 | #endif // _WINDOWS | ||||
545 | generate_vzeroupper(wrapup); | ||||
546 | VM_Version::clean_cpuFeatures(); | ||||
547 | UseAVX = saved_useavx; | ||||
548 | UseSSE = saved_usesse; | ||||
549 | __ jmp(wrapup); | ||||
550 | } | ||||
551 | |||||
552 | __ bind(legacy_save_restore); | ||||
553 | // AVX check | ||||
554 | VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts | ||||
555 | UseAVX = 1; | ||||
556 | UseSSE = 2; | ||||
557 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); | ||||
558 | __ vmovdqu(Address(rsi, 0), xmm0); | ||||
559 | __ vmovdqu(Address(rsi, 32), xmm7); | ||||
560 | #ifdef _LP641 | ||||
561 | __ vmovdqu(Address(rsi, 64), xmm8); | ||||
562 | __ vmovdqu(Address(rsi, 96), xmm15); | ||||
563 | #endif | ||||
564 | |||||
565 | #ifdef _WINDOWS | ||||
566 | #ifdef _LP641 | ||||
567 | __ vmovdqu(xmm15, Address(rsp, 0)); | ||||
568 | __ addptr(rsp, 32); | ||||
569 | __ vmovdqu(xmm8, Address(rsp, 0)); | ||||
570 | __ addptr(rsp, 32); | ||||
571 | #endif // _LP64 | ||||
572 | __ vmovdqu(xmm7, Address(rsp, 0)); | ||||
573 | __ addptr(rsp, 32); | ||||
574 | #endif // _WINDOWS | ||||
575 | generate_vzeroupper(wrapup); | ||||
576 | VM_Version::clean_cpuFeatures(); | ||||
577 | UseAVX = saved_useavx; | ||||
578 | UseSSE = saved_usesse; | ||||
579 | |||||
580 | __ bind(wrapup); | ||||
581 | __ popf(); | ||||
582 | __ pop(rsi); | ||||
583 | __ pop(rbx); | ||||
584 | __ pop(rbp); | ||||
585 | __ ret(0); | ||||
586 | |||||
587 | # undef __ | ||||
588 | |||||
589 | return start; | ||||
590 | }; | ||||
591 | void generate_vzeroupper(Label& L_wrapup) { | ||||
592 | # define __ _masm-> | ||||
593 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); | ||||
594 | __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' | ||||
595 | __ jcc(Assembler::notEqual, L_wrapup); | ||||
596 | __ movl(rcx, 0x0FFF0FF0); | ||||
597 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
598 | __ andl(rcx, Address(rsi, 0)); | ||||
599 | __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 | ||||
600 | __ jcc(Assembler::equal, L_wrapup); | ||||
601 | __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi | ||||
602 | __ jcc(Assembler::equal, L_wrapup); | ||||
603 | // vzeroupper() will use a pre-computed instruction sequence that we | ||||
604 | // can't compute until after we've determined CPU capabilities. Use | ||||
605 | // uncached variant here directly to be able to bootstrap correctly | ||||
606 | __ vzeroupper_uncached(); | ||||
607 | # undef __ | ||||
608 | } | ||||
609 | address generate_detect_virt() { | ||||
610 | StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); | ||||
611 | # define __ _masm-> | ||||
612 | |||||
613 | address start = __ pc(); | ||||
614 | |||||
615 | // Evacuate callee-saved registers | ||||
616 | __ push(rbp); | ||||
617 | __ push(rbx); | ||||
618 | __ push(rsi); // for Windows | ||||
619 | |||||
620 | #ifdef _LP641 | ||||
621 | __ mov(rax, c_rarg0); // CPUID leaf | ||||
622 | __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) | ||||
623 | #else | ||||
624 | __ movptr(rax, Address(rsp, 16)); // CPUID leaf | ||||
625 | __ movptr(rsi, Address(rsp, 20)); // register array address | ||||
626 | #endif | ||||
627 | |||||
628 | __ cpuid(); | ||||
629 | |||||
630 | // Store result to register array | ||||
631 | __ movl(Address(rsi, 0), rax); | ||||
632 | __ movl(Address(rsi, 4), rbx); | ||||
633 | __ movl(Address(rsi, 8), rcx); | ||||
634 | __ movl(Address(rsi, 12), rdx); | ||||
635 | |||||
636 | // Epilogue | ||||
637 | __ pop(rsi); | ||||
638 | __ pop(rbx); | ||||
639 | __ pop(rbp); | ||||
640 | __ ret(0); | ||||
641 | |||||
642 | # undef __ | ||||
643 | |||||
644 | return start; | ||||
645 | }; | ||||
646 | |||||
647 | |||||
648 | address generate_getCPUIDBrandString(void) { | ||||
649 | // Flags to test CPU type. | ||||
650 | const uint32_t HS_EFL_AC = 0x40000; | ||||
651 | const uint32_t HS_EFL_ID = 0x200000; | ||||
652 | // Values for when we don't have a CPUID instruction. | ||||
653 | const int CPU_FAMILY_SHIFT = 8; | ||||
654 | const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | ||||
655 | const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | ||||
656 | |||||
657 | Label detect_486, cpu486, detect_586, done, ext_cpuid; | ||||
658 | |||||
659 | StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); | ||||
660 | # define __ _masm-> | ||||
661 | |||||
662 | address start = __ pc(); | ||||
663 | |||||
664 | // | ||||
665 | // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); | ||||
666 | // | ||||
667 | // LP64: rcx and rdx are first and second argument registers on windows | ||||
668 | |||||
669 | __ push(rbp); | ||||
670 | #ifdef _LP641 | ||||
671 | __ mov(rbp, c_rarg0); // cpuid_info address | ||||
672 | #else | ||||
673 | __ movptr(rbp, Address(rsp, 8)); // cpuid_info address | ||||
674 | #endif | ||||
675 | __ push(rbx); | ||||
676 | __ push(rsi); | ||||
677 | __ pushf(); // preserve rbx, and flags | ||||
678 | __ pop(rax); | ||||
679 | __ push(rax); | ||||
680 | __ mov(rcx, rax); | ||||
681 | // | ||||
682 | // if we are unable to change the AC flag, we have a 386 | ||||
683 | // | ||||
684 | __ xorl(rax, HS_EFL_AC); | ||||
685 | __ push(rax); | ||||
686 | __ popf(); | ||||
687 | __ pushf(); | ||||
688 | __ pop(rax); | ||||
689 | __ cmpptr(rax, rcx); | ||||
690 | __ jccb(Assembler::notEqual, detect_486)jccb_0(Assembler::notEqual, detect_486, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 690); | ||||
691 | |||||
692 | __ movl(rax, CPU_FAMILY_386); | ||||
693 | __ jmp(done); | ||||
694 | |||||
695 | // | ||||
696 | // If we are unable to change the ID flag, we have a 486 which does | ||||
697 | // not support the "cpuid" instruction. | ||||
698 | // | ||||
699 | __ bind(detect_486); | ||||
700 | __ mov(rax, rcx); | ||||
701 | __ xorl(rax, HS_EFL_ID); | ||||
702 | __ push(rax); | ||||
703 | __ popf(); | ||||
704 | __ pushf(); | ||||
705 | __ pop(rax); | ||||
706 | __ cmpptr(rcx, rax); | ||||
707 | __ jccb(Assembler::notEqual, detect_586)jccb_0(Assembler::notEqual, detect_586, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 707); | ||||
708 | |||||
709 | __ bind(cpu486); | ||||
710 | __ movl(rax, CPU_FAMILY_486); | ||||
711 | __ jmp(done); | ||||
712 | |||||
713 | // | ||||
714 | // At this point, we have a chip which supports the "cpuid" instruction | ||||
715 | // | ||||
716 | __ bind(detect_586); | ||||
717 | __ xorl(rax, rax); | ||||
718 | __ cpuid(); | ||||
719 | __ orl(rax, rax); | ||||
720 | __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input | ||||
721 | // value of at least 1, we give up and | ||||
722 | // assume a 486 | ||||
723 | |||||
724 | // | ||||
725 | // Extended cpuid(0x80000000) for processor brand string detection | ||||
726 | // | ||||
727 | __ bind(ext_cpuid); | ||||
728 | __ movl(rax, CPUID_EXTENDED_FN0x80000000); | ||||
729 | __ cpuid(); | ||||
730 | __ cmpl(rax, CPUID_EXTENDED_FN_40x80000004); | ||||
731 | __ jcc(Assembler::below, done); | ||||
732 | |||||
733 | // | ||||
734 | // Extended cpuid(0x80000002) // first 16 bytes in brand string | ||||
735 | // | ||||
736 | __ movl(rax, CPUID_EXTENDED_FN_20x80000002); | ||||
737 | __ cpuid(); | ||||
738 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); | ||||
739 | __ movl(Address(rsi, 0), rax); | ||||
740 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); | ||||
741 | __ movl(Address(rsi, 0), rbx); | ||||
742 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); | ||||
743 | __ movl(Address(rsi, 0), rcx); | ||||
744 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); | ||||
745 | __ movl(Address(rsi,0), rdx); | ||||
746 | |||||
747 | // | ||||
748 | // Extended cpuid(0x80000003) // next 16 bytes in brand string | ||||
749 | // | ||||
750 | __ movl(rax, CPUID_EXTENDED_FN_30x80000003); | ||||
751 | __ cpuid(); | ||||
752 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); | ||||
753 | __ movl(Address(rsi, 0), rax); | ||||
754 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); | ||||
755 | __ movl(Address(rsi, 0), rbx); | ||||
756 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); | ||||
757 | __ movl(Address(rsi, 0), rcx); | ||||
758 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); | ||||
759 | __ movl(Address(rsi,0), rdx); | ||||
760 | |||||
761 | // | ||||
762 | // Extended cpuid(0x80000004) // last 16 bytes in brand string | ||||
763 | // | ||||
764 | __ movl(rax, CPUID_EXTENDED_FN_40x80000004); | ||||
765 | __ cpuid(); | ||||
766 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); | ||||
767 | __ movl(Address(rsi, 0), rax); | ||||
768 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); | ||||
769 | __ movl(Address(rsi, 0), rbx); | ||||
770 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); | ||||
771 | __ movl(Address(rsi, 0), rcx); | ||||
772 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); | ||||
773 | __ movl(Address(rsi,0), rdx); | ||||
774 | |||||
775 | // | ||||
776 | // return | ||||
777 | // | ||||
778 | __ bind(done); | ||||
779 | __ popf(); | ||||
780 | __ pop(rsi); | ||||
781 | __ pop(rbx); | ||||
782 | __ pop(rbp); | ||||
783 | __ ret(0); | ||||
784 | |||||
785 | # undef __ | ||||
786 | |||||
787 | return start; | ||||
788 | }; | ||||
789 | }; | ||||
790 | |||||
791 | void VM_Version::get_processor_features() { | ||||
792 | |||||
793 | _cpu = 4; // 486 by default | ||||
794 | _model = 0; | ||||
795 | _stepping = 0; | ||||
796 | _features = 0; | ||||
797 | _logical_processors_per_package = 1; | ||||
798 | // i486 internal cache is both I&D and has a 16-byte line size | ||||
799 | _L1_data_cache_line_size = 16; | ||||
800 | |||||
801 | // Get raw processor info | ||||
802 | |||||
803 | get_cpu_info_stub(&_cpuid_info); | ||||
804 | |||||
805 | assert_is_initialized(); | ||||
806 | _cpu = extended_cpu_family(); | ||||
807 | _model = extended_cpu_model(); | ||||
808 | _stepping = cpu_stepping(); | ||||
809 | |||||
810 | if (cpu_family() > 4) { // it supports CPUID | ||||
811 | _features = feature_flags(); | ||||
812 | // Logical processors are only available on P4s and above, | ||||
813 | // and only if hyperthreading is available. | ||||
814 | _logical_processors_per_package = logical_processor_count(); | ||||
815 | _L1_data_cache_line_size = L1_line_size(); | ||||
816 | } | ||||
817 | |||||
818 | _supports_cx8 = supports_cmpxchg8(); | ||||
819 | // xchg and xadd instructions | ||||
820 | _supports_atomic_getset4 = true; | ||||
821 | _supports_atomic_getadd4 = true; | ||||
822 | LP64_ONLY(_supports_atomic_getset8 = true)_supports_atomic_getset8 = true; | ||||
823 | LP64_ONLY(_supports_atomic_getadd8 = true)_supports_atomic_getadd8 = true; | ||||
824 | |||||
825 | #ifdef _LP641 | ||||
826 | // OS should support SSE for x64 and hardware should support at least SSE2. | ||||
827 | if (!VM_Version::supports_sse2()) { | ||||
828 | vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); | ||||
829 | } | ||||
830 | // in 64 bit the use of SSE2 is the minimum | ||||
831 | if (UseSSE < 2) UseSSE = 2; | ||||
832 | #endif | ||||
833 | |||||
834 | #ifdef AMD641 | ||||
835 | // flush_icache_stub have to be generated first. | ||||
836 | // That is why Icache line size is hard coded in ICache class, | ||||
837 | // see icache_x86.hpp. It is also the reason why we can't use | ||||
838 | // clflush instruction in 32-bit VM since it could be running | ||||
839 | // on CPU which does not support it. | ||||
840 | // | ||||
841 | // The only thing we can do is to verify that flushed | ||||
842 | // ICache::line_size has correct value. | ||||
843 | guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported")do { if (!(_cpuid_info.std_cpuid1_edx.bits.clflush != 0)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 843, "guarantee(" "_cpuid_info.std_cpuid1_edx.bits.clflush != 0" ") failed", "clflush is not supported"); ::breakpoint(); } } while (0); | ||||
844 | // clflush_size is size in quadwords (8 bytes). | ||||
845 | guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported")do { if (!(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8) ) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 845, "guarantee(" "_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8" ") failed", "such clflush size is not supported"); ::breakpoint (); } } while (0); | ||||
846 | #endif | ||||
847 | |||||
848 | #ifdef _LP641 | ||||
849 | // assigning this field effectively enables Unsafe.writebackMemory() | ||||
850 | // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero | ||||
851 | // that is only implemented on x86_64 and only if the OS plays ball | ||||
852 | if (os::supports_map_sync()) { | ||||
853 | // publish data cache line flush size to generic field, otherwise | ||||
854 | // let if default to zero thereby disabling writeback | ||||
855 | _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; | ||||
856 | } | ||||
857 | #endif | ||||
858 | // If the OS doesn't support SSE, we can't use this feature even if the HW does | ||||
859 | if (!os::supports_sse()) | ||||
860 | _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); | ||||
861 | |||||
862 | if (UseSSE < 4) { | ||||
863 | _features &= ~CPU_SSE4_1; | ||||
864 | _features &= ~CPU_SSE4_2; | ||||
865 | } | ||||
866 | |||||
867 | if (UseSSE < 3) { | ||||
868 | _features &= ~CPU_SSE3; | ||||
869 | _features &= ~CPU_SSSE3; | ||||
870 | _features &= ~CPU_SSE4A; | ||||
871 | } | ||||
872 | |||||
873 | if (UseSSE < 2) | ||||
874 | _features &= ~CPU_SSE2; | ||||
875 | |||||
876 | if (UseSSE < 1) | ||||
877 | _features &= ~CPU_SSE; | ||||
878 | |||||
879 | //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. | ||||
880 | if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { | ||||
881 | UseAVX = 0; | ||||
882 | } | ||||
883 | |||||
884 | // first try initial setting and detect what we can support | ||||
885 | int use_avx_limit = 0; | ||||
886 | if (UseAVX > 0) { | ||||
887 | if (UseAVX > 2 && supports_evex()) { | ||||
888 | use_avx_limit = 3; | ||||
889 | } else if (UseAVX > 1 && supports_avx2()) { | ||||
890 | use_avx_limit = 2; | ||||
891 | } else if (UseAVX > 0 && supports_avx()) { | ||||
892 | use_avx_limit = 1; | ||||
893 | } else { | ||||
894 | use_avx_limit = 0; | ||||
895 | } | ||||
896 | } | ||||
897 | if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) { | ||||
898 | // Don't use AVX-512 on older Skylakes unless explicitly requested. | ||||
899 | if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { | ||||
900 | FLAG_SET_DEFAULT(UseAVX, 2)((UseAVX) = (2)); | ||||
901 | } else { | ||||
902 | FLAG_SET_DEFAULT(UseAVX, use_avx_limit)((UseAVX) = (use_avx_limit)); | ||||
903 | } | ||||
904 | } | ||||
905 | if (UseAVX > use_avx_limit) { | ||||
906 | warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit); | ||||
907 | FLAG_SET_DEFAULT(UseAVX, use_avx_limit)((UseAVX) = (use_avx_limit)); | ||||
908 | } else if (UseAVX < 0) { | ||||
909 | warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX); | ||||
910 | FLAG_SET_DEFAULT(UseAVX, 0)((UseAVX) = (0)); | ||||
911 | } | ||||
912 | |||||
913 | if (UseAVX < 3) { | ||||
914 | _features &= ~CPU_AVX512F; | ||||
915 | _features &= ~CPU_AVX512DQ; | ||||
916 | _features &= ~CPU_AVX512CD; | ||||
917 | _features &= ~CPU_AVX512BW; | ||||
918 | _features &= ~CPU_AVX512VL; | ||||
919 | _features &= ~CPU_AVX512_VPOPCNTDQ; | ||||
920 | _features &= ~CPU_AVX512_VPCLMULQDQ; | ||||
921 | _features &= ~CPU_AVX512_VAES; | ||||
922 | _features &= ~CPU_AVX512_VNNI; | ||||
923 | _features &= ~CPU_AVX512_VBMI; | ||||
924 | _features &= ~CPU_AVX512_VBMI2; | ||||
925 | } | ||||
926 | |||||
927 | if (UseAVX < 2) | ||||
928 | _features &= ~CPU_AVX2; | ||||
929 | |||||
930 | if (UseAVX < 1) { | ||||
931 | _features &= ~CPU_AVX; | ||||
932 | _features &= ~CPU_VZEROUPPER; | ||||
933 | } | ||||
934 | |||||
935 | if (logical_processors_per_package() == 1) { | ||||
936 | // HT processor could be installed on a system which doesn't support HT. | ||||
937 | _features &= ~CPU_HT; | ||||
938 | } | ||||
939 | |||||
940 | if (is_intel()) { // Intel cpus specific settings | ||||
941 | if (is_knights_family()) { | ||||
942 | _features &= ~CPU_VZEROUPPER; | ||||
943 | _features &= ~CPU_AVX512BW; | ||||
944 | _features &= ~CPU_AVX512VL; | ||||
945 | _features &= ~CPU_AVX512DQ; | ||||
946 | _features &= ~CPU_AVX512_VNNI; | ||||
947 | _features &= ~CPU_AVX512_VAES; | ||||
948 | _features &= ~CPU_AVX512_VPOPCNTDQ; | ||||
949 | _features &= ~CPU_AVX512_VPCLMULQDQ; | ||||
950 | _features &= ~CPU_AVX512_VBMI; | ||||
951 | _features &= ~CPU_AVX512_VBMI2; | ||||
952 | _features &= ~CPU_CLWB; | ||||
953 | _features &= ~CPU_FLUSHOPT; | ||||
954 | } | ||||
955 | } | ||||
956 | |||||
957 | if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)(JVMFlag::is_default(Flag_IntelJccErratumMitigation_enum))) { | ||||
958 | _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); | ||||
959 | } else { | ||||
960 | _has_intel_jcc_erratum = IntelJccErratumMitigation; | ||||
961 | } | ||||
962 | |||||
963 | char buf[512]; | ||||
964 | int res = jio_snprintf( | ||||
965 | buf, sizeof(buf), | ||||
966 | "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", | ||||
967 | cores_per_cpu(), threads_per_core(), | ||||
968 | cpu_family(), _model, _stepping, os::cpu_microcode_revision()); | ||||
969 | assert(res > 0, "not enough temporary space allocated")do { if (!(res > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 969, "assert(" "res > 0" ") failed", "not enough temporary space allocated" ); ::breakpoint(); } } while (0); | ||||
970 | insert_features_names(buf + res, sizeof(buf) - res, _features_names); | ||||
971 | |||||
972 | _features_string = os::strdup(buf); | ||||
973 | |||||
974 | // UseSSE is set to the smaller of what hardware supports and what | ||||
975 | // the command line requires. I.e., you cannot set UseSSE to 2 on | ||||
976 | // older Pentiums which do not support it. | ||||
977 | int use_sse_limit = 0; | ||||
978 | if (UseSSE > 0) { | ||||
979 | if (UseSSE > 3 && supports_sse4_1()) { | ||||
980 | use_sse_limit = 4; | ||||
981 | } else if (UseSSE > 2 && supports_sse3()) { | ||||
982 | use_sse_limit = 3; | ||||
983 | } else if (UseSSE > 1 && supports_sse2()) { | ||||
984 | use_sse_limit = 2; | ||||
985 | } else if (UseSSE > 0 && supports_sse()) { | ||||
986 | use_sse_limit = 1; | ||||
987 | } else { | ||||
988 | use_sse_limit = 0; | ||||
989 | } | ||||
990 | } | ||||
991 | if (FLAG_IS_DEFAULT(UseSSE)(JVMFlag::is_default(Flag_UseSSE_enum))) { | ||||
992 | FLAG_SET_DEFAULT(UseSSE, use_sse_limit)((UseSSE) = (use_sse_limit)); | ||||
993 | } else if (UseSSE > use_sse_limit) { | ||||
994 | warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit); | ||||
995 | FLAG_SET_DEFAULT(UseSSE, use_sse_limit)((UseSSE) = (use_sse_limit)); | ||||
996 | } else if (UseSSE < 0) { | ||||
997 | warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE); | ||||
998 | FLAG_SET_DEFAULT(UseSSE, 0)((UseSSE) = (0)); | ||||
999 | } | ||||
1000 | |||||
1001 | // Use AES instructions if available. | ||||
1002 | if (supports_aes()) { | ||||
1003 | if (FLAG_IS_DEFAULT(UseAES)(JVMFlag::is_default(Flag_UseAES_enum))) { | ||||
1004 | FLAG_SET_DEFAULT(UseAES, true)((UseAES) = (true)); | ||||
1005 | } | ||||
1006 | if (!UseAES) { | ||||
1007 | if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
1008 | warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); | ||||
1009 | } | ||||
1010 | FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false)); | ||||
1011 | } else { | ||||
1012 | if (UseSSE > 2) { | ||||
1013 | if (FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
1014 | FLAG_SET_DEFAULT(UseAESIntrinsics, true)((UseAESIntrinsics) = (true)); | ||||
1015 | } | ||||
1016 | } else { | ||||
1017 | // The AES intrinsic stubs require AES instruction support (of course) | ||||
1018 | // but also require sse3 mode or higher for instructions it use. | ||||
1019 | if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
1020 | warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); | ||||
1021 | } | ||||
1022 | FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false)); | ||||
1023 | } | ||||
1024 | |||||
1025 | // --AES-CTR begins-- | ||||
1026 | if (!UseAESIntrinsics) { | ||||
1027 | if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) { | ||||
1028 | warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); | ||||
1029 | FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false)); | ||||
1030 | } | ||||
1031 | } else { | ||||
1032 | if (supports_sse4_1()) { | ||||
1033 | if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) { | ||||
1034 | FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true)((UseAESCTRIntrinsics) = (true)); | ||||
1035 | } | ||||
1036 | } else { | ||||
1037 | // The AES-CTR intrinsic stubs require AES instruction support (of course) | ||||
1038 | // but also require sse4.1 mode or higher for instructions it use. | ||||
1039 | if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) { | ||||
1040 | warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); | ||||
1041 | } | ||||
1042 | FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false)); | ||||
1043 | } | ||||
1044 | } | ||||
1045 | // --AES-CTR ends-- | ||||
1046 | } | ||||
1047 | } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { | ||||
1048 | if (UseAES && !FLAG_IS_DEFAULT(UseAES)(JVMFlag::is_default(Flag_UseAES_enum))) { | ||||
1049 | warning("AES instructions are not available on this CPU"); | ||||
1050 | FLAG_SET_DEFAULT(UseAES, false)((UseAES) = (false)); | ||||
1051 | } | ||||
1052 | if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
1053 | warning("AES intrinsics are not available on this CPU"); | ||||
1054 | FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false)); | ||||
1055 | } | ||||
1056 | if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) { | ||||
1057 | warning("AES-CTR intrinsics are not available on this CPU"); | ||||
1058 | FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false)); | ||||
1059 | } | ||||
1060 | } | ||||
1061 | |||||
1062 | // Use CLMUL instructions if available. | ||||
1063 | if (supports_clmul()) { | ||||
1064 | if (FLAG_IS_DEFAULT(UseCLMUL)(JVMFlag::is_default(Flag_UseCLMUL_enum))) { | ||||
1065 | UseCLMUL = true; | ||||
1066 | } | ||||
1067 | } else if (UseCLMUL) { | ||||
1068 | if (!FLAG_IS_DEFAULT(UseCLMUL)(JVMFlag::is_default(Flag_UseCLMUL_enum))) | ||||
1069 | warning("CLMUL instructions not available on this CPU (AVX may also be required)"); | ||||
1070 | FLAG_SET_DEFAULT(UseCLMUL, false)((UseCLMUL) = (false)); | ||||
1071 | } | ||||
1072 | |||||
1073 | if (UseCLMUL && (UseSSE > 2)) { | ||||
1074 | if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)(JVMFlag::is_default(Flag_UseCRC32Intrinsics_enum))) { | ||||
1075 | UseCRC32Intrinsics = true; | ||||
1076 | } | ||||
1077 | } else if (UseCRC32Intrinsics) { | ||||
1078 | if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)(JVMFlag::is_default(Flag_UseCRC32Intrinsics_enum))) | ||||
1079 | warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); | ||||
1080 | FLAG_SET_DEFAULT(UseCRC32Intrinsics, false)((UseCRC32Intrinsics) = (false)); | ||||
1081 | } | ||||
1082 | |||||
1083 | #ifdef _LP641 | ||||
1084 | if (supports_avx2()) { | ||||
1085 | if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)(JVMFlag::is_default(Flag_UseAdler32Intrinsics_enum))) { | ||||
1086 | UseAdler32Intrinsics = true; | ||||
1087 | } | ||||
1088 | } else if (UseAdler32Intrinsics) { | ||||
1089 | if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)(JVMFlag::is_default(Flag_UseAdler32Intrinsics_enum))) { | ||||
1090 | warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); | ||||
1091 | } | ||||
1092 | FLAG_SET_DEFAULT(UseAdler32Intrinsics, false)((UseAdler32Intrinsics) = (false)); | ||||
1093 | } | ||||
1094 | #else | ||||
1095 | if (UseAdler32Intrinsics) { | ||||
1096 | warning("Adler32Intrinsics not available on this CPU."); | ||||
1097 | FLAG_SET_DEFAULT(UseAdler32Intrinsics, false)((UseAdler32Intrinsics) = (false)); | ||||
1098 | } | ||||
1099 | #endif | ||||
1100 | |||||
1101 | if (supports_sse4_2() && supports_clmul()) { | ||||
1102 | if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)(JVMFlag::is_default(Flag_UseCRC32CIntrinsics_enum))) { | ||||
1103 | UseCRC32CIntrinsics = true; | ||||
1104 | } | ||||
1105 | } else if (UseCRC32CIntrinsics) { | ||||
1106 | if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)(JVMFlag::is_default(Flag_UseCRC32CIntrinsics_enum))) { | ||||
1107 | warning("CRC32C intrinsics are not available on this CPU"); | ||||
1108 | } | ||||
1109 | FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false)((UseCRC32CIntrinsics) = (false)); | ||||
1110 | } | ||||
1111 | |||||
1112 | // GHASH/GCM intrinsics | ||||
1113 | if (UseCLMUL && (UseSSE > 2)) { | ||||
1114 | if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)(JVMFlag::is_default(Flag_UseGHASHIntrinsics_enum))) { | ||||
1115 | UseGHASHIntrinsics = true; | ||||
1116 | } | ||||
1117 | } else if (UseGHASHIntrinsics) { | ||||
1118 | if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)(JVMFlag::is_default(Flag_UseGHASHIntrinsics_enum))) | ||||
1119 | warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); | ||||
1120 | FLAG_SET_DEFAULT(UseGHASHIntrinsics, false)((UseGHASHIntrinsics) = (false)); | ||||
1121 | } | ||||
1122 | |||||
1123 | // Base64 Intrinsics (Check the condition for which the intrinsic will be active) | ||||
1124 | if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) { | ||||
1125 | if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)(JVMFlag::is_default(Flag_UseBASE64Intrinsics_enum))) { | ||||
1126 | UseBASE64Intrinsics = true; | ||||
1127 | } | ||||
1128 | } else if (UseBASE64Intrinsics) { | ||||
1129 | if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)(JVMFlag::is_default(Flag_UseBASE64Intrinsics_enum))) | ||||
1130 | warning("Base64 intrinsic requires EVEX instructions on this CPU"); | ||||
1131 | FLAG_SET_DEFAULT(UseBASE64Intrinsics, false)((UseBASE64Intrinsics) = (false)); | ||||
1132 | } | ||||
1133 | |||||
1134 | if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions | ||||
1135 | if (FLAG_IS_DEFAULT(UseFMA)(JVMFlag::is_default(Flag_UseFMA_enum))) { | ||||
1136 | UseFMA = true; | ||||
1137 | } | ||||
1138 | } else if (UseFMA) { | ||||
1139 | warning("FMA instructions are not available on this CPU"); | ||||
1140 | FLAG_SET_DEFAULT(UseFMA, false)((UseFMA) = (false)); | ||||
1141 | } | ||||
1142 | |||||
1143 | if (FLAG_IS_DEFAULT(UseMD5Intrinsics)(JVMFlag::is_default(Flag_UseMD5Intrinsics_enum))) { | ||||
1144 | UseMD5Intrinsics = true; | ||||
1145 | } | ||||
1146 | |||||
1147 | if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())|| supports_avx2() && supports_bmi2()) { | ||||
1148 | if (FLAG_IS_DEFAULT(UseSHA)(JVMFlag::is_default(Flag_UseSHA_enum))) { | ||||
1149 | UseSHA = true; | ||||
1150 | } | ||||
1151 | } else if (UseSHA) { | ||||
1152 | warning("SHA instructions are not available on this CPU"); | ||||
1153 | FLAG_SET_DEFAULT(UseSHA, false)((UseSHA) = (false)); | ||||
1154 | } | ||||
1155 | |||||
1156 | if (supports_sha() && supports_sse4_1() && UseSHA) { | ||||
1157 | if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)(JVMFlag::is_default(Flag_UseSHA1Intrinsics_enum))) { | ||||
1158 | FLAG_SET_DEFAULT(UseSHA1Intrinsics, true)((UseSHA1Intrinsics) = (true)); | ||||
1159 | } | ||||
1160 | } else if (UseSHA1Intrinsics) { | ||||
1161 | warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); | ||||
1162 | FLAG_SET_DEFAULT(UseSHA1Intrinsics, false)((UseSHA1Intrinsics) = (false)); | ||||
1163 | } | ||||
1164 | |||||
1165 | if (supports_sse4_1() && UseSHA) { | ||||
1166 | if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)(JVMFlag::is_default(Flag_UseSHA256Intrinsics_enum))) { | ||||
1167 | FLAG_SET_DEFAULT(UseSHA256Intrinsics, true)((UseSHA256Intrinsics) = (true)); | ||||
1168 | } | ||||
1169 | } else if (UseSHA256Intrinsics) { | ||||
1170 | warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); | ||||
1171 | FLAG_SET_DEFAULT(UseSHA256Intrinsics, false)((UseSHA256Intrinsics) = (false)); | ||||
1172 | } | ||||
1173 | |||||
1174 | #ifdef _LP641 | ||||
1175 | // These are only supported on 64-bit | ||||
1176 | if (UseSHA && supports_avx2() && supports_bmi2()) { | ||||
1177 | if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)(JVMFlag::is_default(Flag_UseSHA512Intrinsics_enum))) { | ||||
1178 | FLAG_SET_DEFAULT(UseSHA512Intrinsics, true)((UseSHA512Intrinsics) = (true)); | ||||
1179 | } | ||||
1180 | } else | ||||
1181 | #endif | ||||
1182 | if (UseSHA512Intrinsics) { | ||||
1183 | warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); | ||||
1184 | FLAG_SET_DEFAULT(UseSHA512Intrinsics, false)((UseSHA512Intrinsics) = (false)); | ||||
1185 | } | ||||
1186 | |||||
1187 | if (UseSHA3Intrinsics) { | ||||
1188 | warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); | ||||
1189 | FLAG_SET_DEFAULT(UseSHA3Intrinsics, false)((UseSHA3Intrinsics) = (false)); | ||||
1190 | } | ||||
1191 | |||||
1192 | if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { | ||||
1193 | FLAG_SET_DEFAULT(UseSHA, false)((UseSHA) = (false)); | ||||
1194 | } | ||||
1195 | |||||
1196 | if (!supports_rtm() && UseRTMLocking) { | ||||
1197 | vm_exit_during_initialization("RTM instructions are not available on this CPU"); | ||||
1198 | } | ||||
1199 | |||||
1200 | #if INCLUDE_RTM_OPT1 | ||||
1201 | if (UseRTMLocking) { | ||||
1202 | if (!CompilerConfig::is_c2_enabled()) { | ||||
1203 | // Only C2 does RTM locking optimization. | ||||
1204 | vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); | ||||
1205 | } | ||||
1206 | if (is_intel_family_core()) { | ||||
1207 | if ((_model == CPU_MODEL_HASWELL_E3) || | ||||
1208 | (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || | ||||
1209 | (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { | ||||
1210 | // currently a collision between SKL and HSW_E3 | ||||
1211 | if (!UnlockExperimentalVMOptions && UseAVX < 3) { | ||||
1212 | vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " | ||||
1213 | "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); | ||||
1214 | } else { | ||||
1215 | warning("UseRTMLocking is only available as experimental option on this platform."); | ||||
1216 | } | ||||
1217 | } | ||||
1218 | } | ||||
1219 | if (!FLAG_IS_CMDLINE(UseRTMLocking)(JVMFlag::is_cmdline(Flag_UseRTMLocking_enum))) { | ||||
1220 | // RTM locking should be used only for applications with | ||||
1221 | // high lock contention. For now we do not use it by default. | ||||
1222 | vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); | ||||
1223 | } | ||||
1224 | } else { // !UseRTMLocking | ||||
1225 | if (UseRTMForStackLocks) { | ||||
1226 | if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)(JVMFlag::is_default(Flag_UseRTMForStackLocks_enum))) { | ||||
1227 | warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); | ||||
1228 | } | ||||
1229 | FLAG_SET_DEFAULT(UseRTMForStackLocks, false)((UseRTMForStackLocks) = (false)); | ||||
1230 | } | ||||
1231 | if (UseRTMDeopt) { | ||||
1232 | FLAG_SET_DEFAULT(UseRTMDeopt, false)((UseRTMDeopt) = (false)); | ||||
1233 | } | ||||
1234 | if (PrintPreciseRTMLockingStatistics) { | ||||
1235 | FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false)((PrintPreciseRTMLockingStatistics) = (false)); | ||||
1236 | } | ||||
1237 | } | ||||
1238 | #else | ||||
1239 | if (UseRTMLocking) { | ||||
1240 | // Only C2 does RTM locking optimization. | ||||
1241 | vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); | ||||
1242 | } | ||||
1243 | #endif | ||||
1244 | |||||
1245 | #ifdef COMPILER21 | ||||
1246 | if (UseFPUForSpilling) { | ||||
1247 | if (UseSSE < 2) { | ||||
1248 | // Only supported with SSE2+ | ||||
1249 | FLAG_SET_DEFAULT(UseFPUForSpilling, false)((UseFPUForSpilling) = (false)); | ||||
1250 | } | ||||
1251 | } | ||||
1252 | #endif | ||||
1253 | |||||
1254 | #if COMPILER2_OR_JVMCI1 | ||||
1255 | int max_vector_size = 0; | ||||
1256 | if (UseSSE < 2) { | ||||
1257 | // Vectors (in XMM) are only supported with SSE2+ | ||||
1258 | // SSE is always 2 on x64. | ||||
1259 | max_vector_size = 0; | ||||
1260 | } else if (UseAVX == 0 || !os_supports_avx_vectors()) { | ||||
1261 | // 16 byte vectors (in XMM) are supported with SSE2+ | ||||
1262 | max_vector_size = 16; | ||||
1263 | } else if (UseAVX == 1 || UseAVX == 2) { | ||||
1264 | // 32 bytes vectors (in YMM) are only supported with AVX+ | ||||
1265 | max_vector_size = 32; | ||||
1266 | } else if (UseAVX > 2) { | ||||
1267 | // 64 bytes vectors (in ZMM) are only supported with AVX 3 | ||||
1268 | max_vector_size = 64; | ||||
1269 | } | ||||
1270 | |||||
1271 | #ifdef _LP641 | ||||
1272 | int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit | ||||
1273 | #else | ||||
1274 | int min_vector_size = 0; | ||||
1275 | #endif | ||||
1276 | |||||
1277 | if (!FLAG_IS_DEFAULT(MaxVectorSize)(JVMFlag::is_default(Flag_MaxVectorSize_enum))) { | ||||
1278 | if (MaxVectorSize < min_vector_size) { | ||||
1279 | warning("MaxVectorSize must be at least %i on this platform", min_vector_size); | ||||
1280 | FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size)((MaxVectorSize) = (min_vector_size)); | ||||
1281 | } | ||||
1282 | if (MaxVectorSize > max_vector_size) { | ||||
1283 | warning("MaxVectorSize must be at most %i on this platform", max_vector_size); | ||||
1284 | FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size)); | ||||
1285 | } | ||||
1286 | if (!is_power_of_2(MaxVectorSize)) { | ||||
1287 | warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); | ||||
1288 | FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size)); | ||||
1289 | } | ||||
1290 | } else { | ||||
1291 | // If default, use highest supported configuration | ||||
1292 | FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size)); | ||||
1293 | } | ||||
1294 | |||||
1295 | #if defined(COMPILER21) && defined(ASSERT1) | ||||
1296 | if (MaxVectorSize > 0) { | ||||
1297 | if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { | ||||
1298 | tty->print_cr("State of YMM registers after signal handle:"); | ||||
1299 | int nreg = 2 LP64_ONLY(+2)+2; | ||||
1300 | const char* ymm_name[4] = {"0", "7", "8", "15"}; | ||||
1301 | for (int i = 0; i < nreg; i++) { | ||||
1302 | tty->print("YMM%s:", ymm_name[i]); | ||||
1303 | for (int j = 7; j >=0; j--) { | ||||
1304 | tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); | ||||
1305 | } | ||||
1306 | tty->cr(); | ||||
1307 | } | ||||
1308 | } | ||||
1309 | } | ||||
1310 | #endif // COMPILER2 && ASSERT | ||||
1311 | |||||
1312 | #ifdef _LP641 | ||||
1313 | if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)(JVMFlag::is_default(Flag_UseMultiplyToLenIntrinsic_enum))) { | ||||
1314 | UseMultiplyToLenIntrinsic = true; | ||||
1315 | } | ||||
1316 | if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)(JVMFlag::is_default(Flag_UseSquareToLenIntrinsic_enum))) { | ||||
1317 | UseSquareToLenIntrinsic = true; | ||||
1318 | } | ||||
1319 | if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)(JVMFlag::is_default(Flag_UseMulAddIntrinsic_enum))) { | ||||
1320 | UseMulAddIntrinsic = true; | ||||
1321 | } | ||||
1322 | if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)(JVMFlag::is_default(Flag_UseMontgomeryMultiplyIntrinsic_enum ))) { | ||||
1323 | UseMontgomeryMultiplyIntrinsic = true; | ||||
1324 | } | ||||
1325 | if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)(JVMFlag::is_default(Flag_UseMontgomerySquareIntrinsic_enum))) { | ||||
1326 | UseMontgomerySquareIntrinsic = true; | ||||
1327 | } | ||||
1328 | #else | ||||
1329 | if (UseMultiplyToLenIntrinsic) { | ||||
1330 | if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)(JVMFlag::is_default(Flag_UseMultiplyToLenIntrinsic_enum))) { | ||||
1331 | warning("multiplyToLen intrinsic is not available in 32-bit VM"); | ||||
1332 | } | ||||
1333 | FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false)((UseMultiplyToLenIntrinsic) = (false)); | ||||
1334 | } | ||||
1335 | if (UseMontgomeryMultiplyIntrinsic) { | ||||
1336 | if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)(JVMFlag::is_default(Flag_UseMontgomeryMultiplyIntrinsic_enum ))) { | ||||
1337 | warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); | ||||
1338 | } | ||||
1339 | FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false)((UseMontgomeryMultiplyIntrinsic) = (false)); | ||||
1340 | } | ||||
1341 | if (UseMontgomerySquareIntrinsic) { | ||||
1342 | if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)(JVMFlag::is_default(Flag_UseMontgomerySquareIntrinsic_enum))) { | ||||
1343 | warning("montgomerySquare intrinsic is not available in 32-bit VM"); | ||||
1344 | } | ||||
1345 | FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false)((UseMontgomerySquareIntrinsic) = (false)); | ||||
1346 | } | ||||
1347 | if (UseSquareToLenIntrinsic) { | ||||
1348 | if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)(JVMFlag::is_default(Flag_UseSquareToLenIntrinsic_enum))) { | ||||
1349 | warning("squareToLen intrinsic is not available in 32-bit VM"); | ||||
1350 | } | ||||
1351 | FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false)((UseSquareToLenIntrinsic) = (false)); | ||||
1352 | } | ||||
1353 | if (UseMulAddIntrinsic) { | ||||
1354 | if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)(JVMFlag::is_default(Flag_UseMulAddIntrinsic_enum))) { | ||||
1355 | warning("mulAdd intrinsic is not available in 32-bit VM"); | ||||
1356 | } | ||||
1357 | FLAG_SET_DEFAULT(UseMulAddIntrinsic, false)((UseMulAddIntrinsic) = (false)); | ||||
1358 | } | ||||
1359 | #endif // _LP64 | ||||
1360 | #endif // COMPILER2_OR_JVMCI | ||||
1361 | |||||
1362 | // On new cpus instructions which update whole XMM register should be used | ||||
1363 | // to prevent partial register stall due to dependencies on high half. | ||||
1364 | // | ||||
1365 | // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) | ||||
1366 | // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) | ||||
1367 | // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). | ||||
1368 | // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). | ||||
1369 | |||||
1370 | |||||
1371 | if (is_zx()) { // ZX cpus specific settings | ||||
1372 | if (FLAG_IS_DEFAULT(UseStoreImmI16)(JVMFlag::is_default(Flag_UseStoreImmI16_enum))) { | ||||
1373 | UseStoreImmI16 = false; // don't use it on ZX cpus | ||||
1374 | } | ||||
1375 | if ((cpu_family() == 6) || (cpu_family() == 7)) { | ||||
1376 | if (FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) { | ||||
1377 | // Use it on all ZX cpus | ||||
1378 | UseAddressNop = true; | ||||
1379 | } | ||||
1380 | } | ||||
1381 | if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) { | ||||
1382 | UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus | ||||
1383 | } | ||||
1384 | if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) { | ||||
1385 | if (supports_sse3()) { | ||||
1386 | UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus | ||||
1387 | } else { | ||||
1388 | UseXmmRegToRegMoveAll = false; | ||||
1389 | } | ||||
1390 | } | ||||
1391 | if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus | ||||
1392 | #ifdef COMPILER21 | ||||
1393 | if (FLAG_IS_DEFAULT(MaxLoopPad)(JVMFlag::is_default(Flag_MaxLoopPad_enum))) { | ||||
1394 | // For new ZX cpus do the next optimization: | ||||
1395 | // don't align the beginning of a loop if there are enough instructions | ||||
1396 | // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) | ||||
1397 | // in current fetch line (OptoLoopAlignment) or the padding | ||||
1398 | // is big (> MaxLoopPad). | ||||
1399 | // Set MaxLoopPad to 11 for new ZX cpus to reduce number of | ||||
1400 | // generated NOP instructions. 11 is the largest size of one | ||||
1401 | // address NOP instruction '0F 1F' (see Assembler::nop(i)). | ||||
1402 | MaxLoopPad = 11; | ||||
1403 | } | ||||
1404 | #endif // COMPILER2 | ||||
1405 | if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) { | ||||
1406 | UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus | ||||
1407 | } | ||||
1408 | if (supports_sse4_2()) { // new ZX cpus | ||||
1409 | if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
1410 | UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus | ||||
1411 | } | ||||
1412 | } | ||||
1413 | if (supports_sse4_2()) { | ||||
1414 | if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) { | ||||
1415 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true)); | ||||
1416 | } | ||||
1417 | } else { | ||||
1418 | if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
1419 | warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); | ||||
1420 | } | ||||
1421 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false)); | ||||
1422 | } | ||||
1423 | } | ||||
1424 | |||||
1425 | if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum)) && supports_3dnow_prefetch()) { | ||||
1426 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3)); | ||||
1427 | } | ||||
1428 | } | ||||
1429 | |||||
1430 | if (is_amd_family()) { // AMD cpus specific settings | ||||
1431 | if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) { | ||||
1432 | // Use it on new AMD cpus starting from Opteron. | ||||
1433 | UseAddressNop = true; | ||||
1434 | } | ||||
1435 | if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)(JVMFlag::is_default(Flag_UseNewLongLShift_enum))) { | ||||
1436 | // Use it on new AMD cpus starting from Opteron. | ||||
1437 | UseNewLongLShift = true; | ||||
1438 | } | ||||
1439 | if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) { | ||||
1440 | if (supports_sse4a()) { | ||||
1441 | UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron | ||||
1442 | } else { | ||||
1443 | UseXmmLoadAndClearUpper = false; | ||||
1444 | } | ||||
1445 | } | ||||
1446 | if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) { | ||||
1447 | if (supports_sse4a()) { | ||||
1448 | UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' | ||||
1449 | } else { | ||||
1450 | UseXmmRegToRegMoveAll = false; | ||||
1451 | } | ||||
1452 | } | ||||
1453 | if (FLAG_IS_DEFAULT(UseXmmI2F)(JVMFlag::is_default(Flag_UseXmmI2F_enum))) { | ||||
1454 | if (supports_sse4a()) { | ||||
1455 | UseXmmI2F = true; | ||||
1456 | } else { | ||||
1457 | UseXmmI2F = false; | ||||
1458 | } | ||||
1459 | } | ||||
1460 | if (FLAG_IS_DEFAULT(UseXmmI2D)(JVMFlag::is_default(Flag_UseXmmI2D_enum))) { | ||||
1461 | if (supports_sse4a()) { | ||||
1462 | UseXmmI2D = true; | ||||
1463 | } else { | ||||
1464 | UseXmmI2D = false; | ||||
1465 | } | ||||
1466 | } | ||||
1467 | if (supports_sse4_2()) { | ||||
1468 | if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) { | ||||
1469 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true)); | ||||
1470 | } | ||||
1471 | } else { | ||||
1472 | if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
1473 | warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); | ||||
1474 | } | ||||
1475 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false)); | ||||
1476 | } | ||||
1477 | |||||
1478 | // some defaults for AMD family 15h | ||||
1479 | if (cpu_family() == 0x15) { | ||||
1480 | // On family 15h processors default is no sw prefetch | ||||
1481 | if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)(JVMFlag::is_default(Flag_AllocatePrefetchStyle_enum))) { | ||||
1482 | FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0)((AllocatePrefetchStyle) = (0)); | ||||
1483 | } | ||||
1484 | // Also, if some other prefetch style is specified, default instruction type is PREFETCHW | ||||
1485 | if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum))) { | ||||
1486 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3)); | ||||
1487 | } | ||||
1488 | // On family 15h processors use XMM and UnalignedLoadStores for Array Copy | ||||
1489 | if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) { | ||||
1490 | FLAG_SET_DEFAULT(UseXMMForArrayCopy, true)((UseXMMForArrayCopy) = (true)); | ||||
1491 | } | ||||
1492 | if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
1493 | FLAG_SET_DEFAULT(UseUnalignedLoadStores, true)((UseUnalignedLoadStores) = (true)); | ||||
1494 | } | ||||
1495 | } | ||||
1496 | |||||
1497 | #ifdef COMPILER21 | ||||
1498 | if (cpu_family() < 0x17 && MaxVectorSize > 16) { | ||||
1499 | // Limit vectors size to 16 bytes on AMD cpus < 17h. | ||||
1500 | FLAG_SET_DEFAULT(MaxVectorSize, 16)((MaxVectorSize) = (16)); | ||||
1501 | } | ||||
1502 | #endif // COMPILER2 | ||||
1503 | |||||
1504 | // Some defaults for AMD family >= 17h && Hygon family 18h | ||||
1505 | if (cpu_family() >= 0x17) { | ||||
1506 | // On family >=17h processors use XMM and UnalignedLoadStores | ||||
1507 | // for Array Copy | ||||
1508 | if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) { | ||||
1509 | FLAG_SET_DEFAULT(UseXMMForArrayCopy, true)((UseXMMForArrayCopy) = (true)); | ||||
1510 | } | ||||
1511 | if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
1512 | FLAG_SET_DEFAULT(UseUnalignedLoadStores, true)((UseUnalignedLoadStores) = (true)); | ||||
1513 | } | ||||
1514 | #ifdef COMPILER21 | ||||
1515 | if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum))) { | ||||
1516 | FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true)); | ||||
1517 | } | ||||
1518 | #endif | ||||
1519 | } | ||||
1520 | } | ||||
1521 | |||||
1522 | if (is_intel()) { // Intel cpus specific settings | ||||
1523 | if (FLAG_IS_DEFAULT(UseStoreImmI16)(JVMFlag::is_default(Flag_UseStoreImmI16_enum))) { | ||||
1524 | UseStoreImmI16 = false; // don't use it on Intel cpus | ||||
1525 | } | ||||
1526 | if (cpu_family() == 6 || cpu_family() == 15) { | ||||
1527 | if (FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) { | ||||
1528 | // Use it on all Intel cpus starting from PentiumPro | ||||
1529 | UseAddressNop = true; | ||||
1530 | } | ||||
1531 | } | ||||
1532 | if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) { | ||||
1533 | UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus | ||||
1534 | } | ||||
1535 | if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) { | ||||
1536 | if (supports_sse3()) { | ||||
1537 | UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus | ||||
1538 | } else { | ||||
1539 | UseXmmRegToRegMoveAll = false; | ||||
1540 | } | ||||
1541 | } | ||||
1542 | if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus | ||||
1543 | #ifdef COMPILER21 | ||||
1544 | if (FLAG_IS_DEFAULT(MaxLoopPad)(JVMFlag::is_default(Flag_MaxLoopPad_enum))) { | ||||
1545 | // For new Intel cpus do the next optimization: | ||||
1546 | // don't align the beginning of a loop if there are enough instructions | ||||
1547 | // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) | ||||
1548 | // in current fetch line (OptoLoopAlignment) or the padding | ||||
1549 | // is big (> MaxLoopPad). | ||||
1550 | // Set MaxLoopPad to 11 for new Intel cpus to reduce number of | ||||
1551 | // generated NOP instructions. 11 is the largest size of one | ||||
1552 | // address NOP instruction '0F 1F' (see Assembler::nop(i)). | ||||
1553 | MaxLoopPad = 11; | ||||
1554 | } | ||||
1555 | #endif // COMPILER2 | ||||
1556 | |||||
1557 | if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) { | ||||
1558 | UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus | ||||
1559 | } | ||||
1560 | if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus | ||||
1561 | if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
1562 | UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus | ||||
1563 | } | ||||
1564 | } | ||||
1565 | if (supports_sse4_2()) { | ||||
1566 | if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) { | ||||
1567 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true)); | ||||
1568 | } | ||||
1569 | } else { | ||||
1570 | if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
1571 | warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); | ||||
1572 | } | ||||
1573 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false)); | ||||
1574 | } | ||||
1575 | } | ||||
1576 | if (is_atom_family() || is_knights_family()) { | ||||
1577 | #ifdef COMPILER21 | ||||
1578 | if (FLAG_IS_DEFAULT(OptoScheduling)(JVMFlag::is_default(Flag_OptoScheduling_enum))) { | ||||
1579 | OptoScheduling = true; | ||||
1580 | } | ||||
1581 | #endif | ||||
1582 | if (supports_sse4_2()) { // Silvermont | ||||
1583 | if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
1584 | UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus | ||||
1585 | } | ||||
1586 | } | ||||
1587 | if (FLAG_IS_DEFAULT(UseIncDec)(JVMFlag::is_default(Flag_UseIncDec_enum))) { | ||||
1588 | FLAG_SET_DEFAULT(UseIncDec, false)((UseIncDec) = (false)); | ||||
1589 | } | ||||
1590 | } | ||||
1591 | if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum)) && supports_3dnow_prefetch()) { | ||||
1592 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3)); | ||||
1593 | } | ||||
1594 | #ifdef COMPILER21 | ||||
1595 | if (UseAVX > 2) { | ||||
1596 | if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum )) || | ||||
1597 | (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum )) && | ||||
1598 | ArrayOperationPartialInlineSize != 0 && | ||||
1599 | ArrayOperationPartialInlineSize != 16 && | ||||
1600 | ArrayOperationPartialInlineSize != 32 && | ||||
1601 | ArrayOperationPartialInlineSize != 64)) { | ||||
1602 | int inline_size = 0; | ||||
1603 | if (MaxVectorSize >= 64 && AVX3Threshold == 0) { | ||||
1604 | inline_size = 64; | ||||
1605 | } else if (MaxVectorSize >= 32) { | ||||
1606 | inline_size = 32; | ||||
1607 | } else if (MaxVectorSize >= 16) { | ||||
1608 | inline_size = 16; | ||||
1609 | } | ||||
1610 | if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum ))) { | ||||
1611 | warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); | ||||
1612 | } | ||||
1613 | ArrayOperationPartialInlineSize = inline_size; | ||||
1614 | } | ||||
1615 | |||||
1616 | if (ArrayOperationPartialInlineSize > MaxVectorSize) { | ||||
1617 | ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; | ||||
1618 | if (ArrayOperationPartialInlineSize) { | ||||
1619 | warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT"%" "l" "d" ")", MaxVectorSize); | ||||
1620 | } else { | ||||
1621 | warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT"%" "l" "d", ArrayOperationPartialInlineSize); | ||||
1622 | } | ||||
1623 | } | ||||
1624 | } | ||||
1625 | #endif | ||||
1626 | } | ||||
1627 | |||||
1628 | #ifdef COMPILER21 | ||||
1629 | if (FLAG_IS_DEFAULT(OptimizeFill)(JVMFlag::is_default(Flag_OptimizeFill_enum))) { | ||||
1630 | if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { | ||||
1631 | OptimizeFill = false; | ||||
1632 | } | ||||
1633 | } | ||||
1634 | #endif | ||||
1635 | |||||
1636 | #ifdef _LP641 | ||||
1637 | if (UseSSE42Intrinsics) { | ||||
1638 | if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum ))) { | ||||
1639 | UseVectorizedMismatchIntrinsic = true; | ||||
1640 | } | ||||
1641 | } else if (UseVectorizedMismatchIntrinsic) { | ||||
1642 | if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum ))) | ||||
1643 | warning("vectorizedMismatch intrinsics are not available on this CPU"); | ||||
1644 | FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false)((UseVectorizedMismatchIntrinsic) = (false)); | ||||
1645 | } | ||||
1646 | #else | ||||
1647 | if (UseVectorizedMismatchIntrinsic) { | ||||
1648 | if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum ))) { | ||||
1649 | warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); | ||||
1650 | } | ||||
1651 | FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false)((UseVectorizedMismatchIntrinsic) = (false)); | ||||
1652 | } | ||||
1653 | #endif // _LP64 | ||||
1654 | |||||
1655 | // Use count leading zeros count instruction if available. | ||||
1656 | if (supports_lzcnt()) { | ||||
1657 | if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)(JVMFlag::is_default(Flag_UseCountLeadingZerosInstruction_enum ))) { | ||||
1658 | UseCountLeadingZerosInstruction = true; | ||||
1659 | } | ||||
1660 | } else if (UseCountLeadingZerosInstruction) { | ||||
1661 | warning("lzcnt instruction is not available on this CPU"); | ||||
1662 | FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false)((UseCountLeadingZerosInstruction) = (false)); | ||||
1663 | } | ||||
1664 | |||||
1665 | // Use count trailing zeros instruction if available | ||||
1666 | if (supports_bmi1()) { | ||||
1667 | // tzcnt does not require VEX prefix | ||||
1668 | if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)(JVMFlag::is_default(Flag_UseCountTrailingZerosInstruction_enum ))) { | ||||
1669 | if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)(JVMFlag::is_default(Flag_UseBMI1Instructions_enum))) { | ||||
1670 | // Don't use tzcnt if BMI1 is switched off on command line. | ||||
1671 | UseCountTrailingZerosInstruction = false; | ||||
1672 | } else { | ||||
1673 | UseCountTrailingZerosInstruction = true; | ||||
1674 | } | ||||
1675 | } | ||||
1676 | } else if (UseCountTrailingZerosInstruction) { | ||||
1677 | warning("tzcnt instruction is not available on this CPU"); | ||||
1678 | FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false)((UseCountTrailingZerosInstruction) = (false)); | ||||
1679 | } | ||||
1680 | |||||
1681 | // BMI instructions (except tzcnt) use an encoding with VEX prefix. | ||||
1682 | // VEX prefix is generated only when AVX > 0. | ||||
1683 | if (supports_bmi1() && supports_avx()) { | ||||
1684 | if (FLAG_IS_DEFAULT(UseBMI1Instructions)(JVMFlag::is_default(Flag_UseBMI1Instructions_enum))) { | ||||
1685 | UseBMI1Instructions = true; | ||||
1686 | } | ||||
1687 | } else if (UseBMI1Instructions) { | ||||
1688 | warning("BMI1 instructions are not available on this CPU (AVX is also required)"); | ||||
1689 | FLAG_SET_DEFAULT(UseBMI1Instructions, false)((UseBMI1Instructions) = (false)); | ||||
1690 | } | ||||
1691 | |||||
1692 | if (supports_bmi2() && supports_avx()) { | ||||
1693 | if (FLAG_IS_DEFAULT(UseBMI2Instructions)(JVMFlag::is_default(Flag_UseBMI2Instructions_enum))) { | ||||
1694 | UseBMI2Instructions = true; | ||||
1695 | } | ||||
1696 | } else if (UseBMI2Instructions) { | ||||
1697 | warning("BMI2 instructions are not available on this CPU (AVX is also required)"); | ||||
1698 | FLAG_SET_DEFAULT(UseBMI2Instructions, false)((UseBMI2Instructions) = (false)); | ||||
1699 | } | ||||
1700 | |||||
1701 | // Use population count instruction if available. | ||||
1702 | if (supports_popcnt()) { | ||||
1703 | if (FLAG_IS_DEFAULT(UsePopCountInstruction)(JVMFlag::is_default(Flag_UsePopCountInstruction_enum))) { | ||||
1704 | UsePopCountInstruction = true; | ||||
1705 | } | ||||
1706 | } else if (UsePopCountInstruction) { | ||||
1707 | warning("POPCNT instruction is not available on this CPU"); | ||||
1708 | FLAG_SET_DEFAULT(UsePopCountInstruction, false)((UsePopCountInstruction) = (false)); | ||||
1709 | } | ||||
1710 | |||||
1711 | // Use fast-string operations if available. | ||||
1712 | if (supports_erms()) { | ||||
1713 | if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) { | ||||
1714 | UseFastStosb = true; | ||||
1715 | } | ||||
1716 | } else if (UseFastStosb) { | ||||
1717 | warning("fast-string operations are not available on this CPU"); | ||||
1718 | FLAG_SET_DEFAULT(UseFastStosb, false)((UseFastStosb) = (false)); | ||||
1719 | } | ||||
1720 | |||||
1721 | // For AMD Processors use XMM/YMM MOVDQU instructions | ||||
1722 | // for Object Initialization as default | ||||
1723 | if (is_amd() && cpu_family() >= 0x19) { | ||||
1724 | if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) { | ||||
1725 | UseFastStosb = false; | ||||
1726 | } | ||||
1727 | } | ||||
1728 | |||||
1729 | #ifdef COMPILER21 | ||||
1730 | if (is_intel() && MaxVectorSize > 16) { | ||||
1731 | if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) { | ||||
1732 | UseFastStosb = false; | ||||
1733 | } | ||||
1734 | } | ||||
1735 | #endif | ||||
1736 | |||||
1737 | // Use XMM/YMM MOVDQU instruction for Object Initialization | ||||
1738 | if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) { | ||||
1739 | if (FLAG_IS_DEFAULT(UseXMMForObjInit)(JVMFlag::is_default(Flag_UseXMMForObjInit_enum))) { | ||||
1740 | UseXMMForObjInit = true; | ||||
1741 | } | ||||
1742 | } else if (UseXMMForObjInit) { | ||||
1743 | warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); | ||||
1744 | FLAG_SET_DEFAULT(UseXMMForObjInit, false)((UseXMMForObjInit) = (false)); | ||||
1745 | } | ||||
1746 | |||||
1747 | #ifdef COMPILER21 | ||||
1748 | if (FLAG_IS_DEFAULT(AlignVector)(JVMFlag::is_default(Flag_AlignVector_enum))) { | ||||
1749 | // Modern processors allow misaligned memory operations for vectors. | ||||
1750 | AlignVector = !UseUnalignedLoadStores; | ||||
1751 | } | ||||
1752 | #endif // COMPILER2 | ||||
1753 | |||||
1754 | if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum))) { | ||||
1755 | if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { | ||||
1756 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0)((AllocatePrefetchInstr) = (0)); | ||||
1757 | } else if (!supports_sse() && supports_3dnow_prefetch()) { | ||||
1758 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3)); | ||||
1759 | } | ||||
1760 | } | ||||
1761 | |||||
1762 | // Allocation prefetch settings | ||||
1763 | intx cache_line_size = prefetch_data_size(); | ||||
1764 | if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)(JVMFlag::is_default(Flag_AllocatePrefetchStepSize_enum)) && | ||||
1765 | (cache_line_size > AllocatePrefetchStepSize)) { | ||||
1766 | FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size)((AllocatePrefetchStepSize) = (cache_line_size)); | ||||
1767 | } | ||||
1768 | |||||
1769 | if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { | ||||
1770 | assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0")do { if (!(!(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum )))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 1770, "assert(" "!(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum))" ") failed", "default value should not be 0"); ::breakpoint() ; } } while (0); | ||||
1771 | if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)(JVMFlag::is_default(Flag_AllocatePrefetchStyle_enum))) { | ||||
1772 | warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); | ||||
1773 | } | ||||
1774 | FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0)((AllocatePrefetchStyle) = (0)); | ||||
1775 | } | ||||
1776 | |||||
1777 | if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum))) { | ||||
1778 | bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); | ||||
1779 | FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch))((AllocatePrefetchDistance) = (allocate_prefetch_distance(use_watermark_prefetch ))); | ||||
1780 | } | ||||
1781 | |||||
1782 | if (is_intel() && cpu_family() == 6 && supports_sse3()) { | ||||
1783 | if (FLAG_IS_DEFAULT(AllocatePrefetchLines)(JVMFlag::is_default(Flag_AllocatePrefetchLines_enum)) && | ||||
1784 | supports_sse4_2() && supports_ht()) { // Nehalem based cpus | ||||
1785 | FLAG_SET_DEFAULT(AllocatePrefetchLines, 4)((AllocatePrefetchLines) = (4)); | ||||
1786 | } | ||||
1787 | #ifdef COMPILER21 | ||||
1788 | if (FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum)) && supports_sse4_2()) { | ||||
1789 | FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true)); | ||||
1790 | } | ||||
1791 | #endif | ||||
1792 | } | ||||
1793 | |||||
1794 | if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { | ||||
1795 | #ifdef COMPILER21 | ||||
1796 | if (FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum))) { | ||||
1797 | FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true)); | ||||
1798 | } | ||||
1799 | #endif | ||||
1800 | } | ||||
1801 | |||||
1802 | #ifdef _LP641 | ||||
1803 | // Prefetch settings | ||||
1804 | |||||
1805 | // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from | ||||
1806 | // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. | ||||
1807 | // Tested intervals from 128 to 2048 in increments of 64 == one cache line. | ||||
1808 | // 256 bytes (4 dcache lines) was the nearest runner-up to 576. | ||||
1809 | |||||
1810 | // gc copy/scan is disabled if prefetchw isn't supported, because | ||||
1811 | // Prefetch::write emits an inlined prefetchw on Linux. | ||||
1812 | // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. | ||||
1813 | // The used prefetcht0 instruction works for both amd64 and em64t. | ||||
1814 | |||||
1815 | if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)(JVMFlag::is_default(Flag_PrefetchCopyIntervalInBytes_enum))) { | ||||
1816 | FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576)((PrefetchCopyIntervalInBytes) = (576)); | ||||
1817 | } | ||||
1818 | if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)(JVMFlag::is_default(Flag_PrefetchScanIntervalInBytes_enum))) { | ||||
1819 | FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576)((PrefetchScanIntervalInBytes) = (576)); | ||||
1820 | } | ||||
1821 | if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)(JVMFlag::is_default(Flag_PrefetchFieldsAhead_enum))) { | ||||
1822 | FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1)((PrefetchFieldsAhead) = (1)); | ||||
1823 | } | ||||
1824 | #endif | ||||
1825 | |||||
1826 | if (FLAG_IS_DEFAULT(ContendedPaddingWidth)(JVMFlag::is_default(Flag_ContendedPaddingWidth_enum)) && | ||||
1827 | (cache_line_size > ContendedPaddingWidth)) | ||||
1828 | ContendedPaddingWidth = cache_line_size; | ||||
1829 | |||||
1830 | // This machine allows unaligned memory accesses | ||||
1831 | if (FLAG_IS_DEFAULT(UseUnalignedAccesses)(JVMFlag::is_default(Flag_UseUnalignedAccesses_enum))) { | ||||
1832 | FLAG_SET_DEFAULT(UseUnalignedAccesses, true)((UseUnalignedAccesses) = (true)); | ||||
1833 | } | ||||
1834 | |||||
1835 | #ifndef PRODUCT | ||||
1836 | if (log_is_enabled(Info, os, cpu)(LogImpl<(LogTag::_os), (LogTag::_cpu), (LogTag::__NO_TAG) , (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG)> ::is_level(LogLevel::Info))) { | ||||
1837 | LogStream ls(Log(os, cpu)LogImpl<(LogTag::_os), (LogTag::_cpu), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG)>::info()); | ||||
1838 | outputStream* log = &ls; | ||||
1839 | log->print_cr("Logical CPUs per core: %u", | ||||
1840 | logical_processors_per_package()); | ||||
1841 | log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); | ||||
1842 | log->print("UseSSE=%d", (int) UseSSE); | ||||
1843 | if (UseAVX > 0) { | ||||
1844 | log->print(" UseAVX=%d", (int) UseAVX); | ||||
1845 | } | ||||
1846 | if (UseAES) { | ||||
1847 | log->print(" UseAES=1"); | ||||
1848 | } | ||||
1849 | #ifdef COMPILER21 | ||||
1850 | if (MaxVectorSize > 0) { | ||||
1851 | log->print(" MaxVectorSize=%d", (int) MaxVectorSize); | ||||
1852 | } | ||||
1853 | #endif | ||||
1854 | log->cr(); | ||||
1855 | log->print("Allocation"); | ||||
1856 | if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { | ||||
1857 | log->print_cr(": no prefetching"); | ||||
1858 | } else { | ||||
1859 | log->print(" prefetching: "); | ||||
1860 | if (UseSSE == 0 && supports_3dnow_prefetch()) { | ||||
1861 | log->print("PREFETCHW"); | ||||
1862 | } else if (UseSSE >= 1) { | ||||
1863 | if (AllocatePrefetchInstr == 0) { | ||||
1864 | log->print("PREFETCHNTA"); | ||||
1865 | } else if (AllocatePrefetchInstr == 1) { | ||||
1866 | log->print("PREFETCHT0"); | ||||
1867 | } else if (AllocatePrefetchInstr == 2) { | ||||
1868 | log->print("PREFETCHT2"); | ||||
1869 | } else if (AllocatePrefetchInstr == 3) { | ||||
1870 | log->print("PREFETCHW"); | ||||
1871 | } | ||||
1872 | } | ||||
1873 | if (AllocatePrefetchLines > 1) { | ||||
1874 | log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize); | ||||
1875 | } else { | ||||
1876 | log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize); | ||||
1877 | } | ||||
1878 | } | ||||
1879 | |||||
1880 | if (PrefetchCopyIntervalInBytes > 0) { | ||||
1881 | log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); | ||||
1882 | } | ||||
1883 | if (PrefetchScanIntervalInBytes > 0) { | ||||
1884 | log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); | ||||
1885 | } | ||||
1886 | if (PrefetchFieldsAhead > 0) { | ||||
1887 | log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead); | ||||
1888 | } | ||||
1889 | if (ContendedPaddingWidth > 0) { | ||||
1890 | log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); | ||||
1891 | } | ||||
1892 | } | ||||
1893 | #endif // !PRODUCT | ||||
1894 | if (FLAG_IS_DEFAULT(UseSignumIntrinsic)(JVMFlag::is_default(Flag_UseSignumIntrinsic_enum))) { | ||||
1895 | FLAG_SET_DEFAULT(UseSignumIntrinsic, true)((UseSignumIntrinsic) = (true)); | ||||
1896 | } | ||||
1897 | if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)(JVMFlag::is_default(Flag_UseCopySignIntrinsic_enum))) { | ||||
1898 | FLAG_SET_DEFAULT(UseCopySignIntrinsic, true)((UseCopySignIntrinsic) = (true)); | ||||
1899 | } | ||||
1900 | } | ||||
1901 | |||||
1902 | void VM_Version::print_platform_virtualization_info(outputStream* st) { | ||||
1903 | VirtualizationType vrt = VM_Version::get_detected_virtualization(); | ||||
1904 | if (vrt == XenHVM) { | ||||
1905 | st->print_cr("Xen hardware-assisted virtualization detected"); | ||||
1906 | } else if (vrt == KVM) { | ||||
1907 | st->print_cr("KVM virtualization detected"); | ||||
1908 | } else if (vrt == VMWare) { | ||||
1909 | st->print_cr("VMWare virtualization detected"); | ||||
1910 | VirtualizationSupport::print_virtualization_info(st); | ||||
1911 | } else if (vrt == HyperV) { | ||||
1912 | st->print_cr("Hyper-V virtualization detected"); | ||||
1913 | } else if (vrt == HyperVRole) { | ||||
1914 | st->print_cr("Hyper-V role detected"); | ||||
1915 | } | ||||
1916 | } | ||||
1917 | |||||
1918 | bool VM_Version::compute_has_intel_jcc_erratum() { | ||||
1919 | if (!is_intel_family_core()) { | ||||
1920 | // Only Intel CPUs are affected. | ||||
1921 | return false; | ||||
1922 | } | ||||
1923 | // The following table of affected CPUs is based on the following document released by Intel: | ||||
1924 | // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf | ||||
1925 | switch (_model) { | ||||
1926 | case 0x8E: | ||||
1927 | // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y | ||||
1928 | // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U | ||||
1929 | // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e | ||||
1930 | // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y | ||||
1931 | // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e | ||||
1932 | // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U | ||||
1933 | // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y | ||||
1934 | // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 | ||||
1935 | // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U | ||||
1936 | return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; | ||||
1937 | case 0x4E: | ||||
1938 | // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U | ||||
1939 | // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e | ||||
1940 | // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y | ||||
1941 | return _stepping == 0x3; | ||||
1942 | case 0x55: | ||||
1943 | // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville | ||||
1944 | // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server | ||||
1945 | // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W | ||||
1946 | // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X | ||||
1947 | // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 | ||||
1948 | // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) | ||||
1949 | return _stepping == 0x4 || _stepping == 0x7; | ||||
1950 | case 0x5E: | ||||
1951 | // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H | ||||
1952 | // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S | ||||
1953 | return _stepping == 0x3; | ||||
1954 | case 0x9E: | ||||
1955 | // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G | ||||
1956 | // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H | ||||
1957 | // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S | ||||
1958 | // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X | ||||
1959 | // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 | ||||
1960 | // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H | ||||
1961 | // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S | ||||
1962 | // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP | ||||
1963 | // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) | ||||
1964 | // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) | ||||
1965 | // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) | ||||
1966 | // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) | ||||
1967 | // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) | ||||
1968 | // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) | ||||
1969 | return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; | ||||
1970 | case 0xA5: | ||||
1971 | // Not in Intel documentation. | ||||
1972 | // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H | ||||
1973 | return true; | ||||
1974 | case 0xA6: | ||||
1975 | // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 | ||||
1976 | return _stepping == 0x0; | ||||
1977 | case 0xAE: | ||||
1978 | // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) | ||||
1979 | return _stepping == 0xA; | ||||
1980 | default: | ||||
1981 | // If we are running on another intel machine not recognized in the table, we are okay. | ||||
1982 | return false; | ||||
1983 | } | ||||
1984 | } | ||||
1985 | |||||
1986 | // On Xen, the cpuid instruction returns | ||||
1987 | // eax / registers[0]: Version of Xen | ||||
1988 | // ebx / registers[1]: chars 'XenV' | ||||
1989 | // ecx / registers[2]: chars 'MMXe' | ||||
1990 | // edx / registers[3]: chars 'nVMM' | ||||
1991 | // | ||||
1992 | // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns | ||||
1993 | // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' | ||||
1994 | // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' | ||||
1995 | // edx / registers[3]: chars 'M' / 'ware' / 't Hv' | ||||
1996 | // | ||||
1997 | // more information : | ||||
1998 | // https://kb.vmware.com/s/article/1009458 | ||||
1999 | // | ||||
2000 | void VM_Version::check_virtualizations() { | ||||
2001 | uint32_t registers[4] = {0}; | ||||
2002 | char signature[13] = {0}; | ||||
2003 | |||||
2004 | // Xen cpuid leaves can be found 0x100 aligned boundary starting | ||||
2005 | // from 0x40000000 until 0x40010000. | ||||
2006 | // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html | ||||
2007 | for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { | ||||
2008 | detect_virt_stub(leaf, registers); | ||||
2009 | memcpy(signature, ®isters[1], 12); | ||||
2010 | |||||
2011 | if (strncmp("VMwareVMware", signature, 12) == 0) { | ||||
2012 | Abstract_VM_Version::_detected_virtualization = VMWare; | ||||
2013 | // check for extended metrics from guestlib | ||||
2014 | VirtualizationSupport::initialize(); | ||||
2015 | } else if (strncmp("Microsoft Hv", signature, 12) == 0) { | ||||
2016 | Abstract_VM_Version::_detected_virtualization = HyperV; | ||||
2017 | #ifdef _WINDOWS | ||||
2018 | // CPUID leaf 0x40000007 is available to the root partition only. | ||||
2019 | // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. | ||||
2020 | // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf | ||||
2021 | detect_virt_stub(0x40000007, registers); | ||||
2022 | if ((registers[0] != 0x0) || | ||||
2023 | (registers[1] != 0x0) || | ||||
2024 | (registers[2] != 0x0) || | ||||
2025 | (registers[3] != 0x0)) { | ||||
2026 | Abstract_VM_Version::_detected_virtualization = HyperVRole; | ||||
2027 | } | ||||
2028 | #endif | ||||
2029 | } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { | ||||
2030 | Abstract_VM_Version::_detected_virtualization = KVM; | ||||
2031 | } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { | ||||
2032 | Abstract_VM_Version::_detected_virtualization = XenHVM; | ||||
2033 | } | ||||
2034 | } | ||||
2035 | } | ||||
2036 | |||||
2037 | // avx3_threshold() sets the threshold at which 64-byte instructions are used | ||||
2038 | // for implementing the array copy and clear operations. | ||||
2039 | // The Intel platforms that supports the serialize instruction | ||||
2040 | // has improved implementation of 64-byte load/stores and so the default | ||||
2041 | // threshold is set to 0 for these platforms. | ||||
2042 | int VM_Version::avx3_threshold() { | ||||
2043 | return (is_intel_family_core() && | ||||
2044 | supports_serialize() && | ||||
2045 | FLAG_IS_DEFAULT(AVX3Threshold)(JVMFlag::is_default(Flag_AVX3Threshold_enum))) ? 0 : AVX3Threshold; | ||||
2046 | } | ||||
2047 | |||||
2048 | static bool _vm_version_initialized = false; | ||||
2049 | |||||
2050 | void VM_Version::initialize() { | ||||
2051 | ResourceMark rm; | ||||
2052 | // Making this stub must be FIRST use of assembler | ||||
2053 | stub_blob = BufferBlob::create("VM_Version stub", stub_size); | ||||
2054 | if (stub_blob == NULL__null) { | ||||
| |||||
2055 | vm_exit_during_initialization("Unable to allocate stub for VM_Version"); | ||||
2056 | } | ||||
2057 | CodeBuffer c(stub_blob); | ||||
2058 | VM_Version_StubGenerator g(&c); | ||||
2059 | |||||
2060 | get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,(reinterpret_cast<get_cpu_info_stub_t>(g.generate_get_cpu_info ())) | ||||
2061 | g.generate_get_cpu_info())(reinterpret_cast<get_cpu_info_stub_t>(g.generate_get_cpu_info ())); | ||||
2062 | detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,(reinterpret_cast<detect_virt_stub_t>(g.generate_detect_virt ())) | ||||
2063 | g.generate_detect_virt())(reinterpret_cast<detect_virt_stub_t>(g.generate_detect_virt ())); | ||||
2064 | |||||
2065 | get_processor_features(); | ||||
2066 | |||||
2067 | LP64_ONLY(Assembler::precompute_instructions();)Assembler::precompute_instructions(); | ||||
2068 | |||||
2069 | if (VM_Version::supports_hv()) { // Supports hypervisor | ||||
2070 | check_virtualizations(); | ||||
2071 | } | ||||
2072 | _vm_version_initialized = true; | ||||
2073 | } | ||||
2074 | |||||
2075 | typedef enum { | ||||
2076 | CPU_FAMILY_8086_8088 = 0, | ||||
2077 | CPU_FAMILY_INTEL_286 = 2, | ||||
2078 | CPU_FAMILY_INTEL_386 = 3, | ||||
2079 | CPU_FAMILY_INTEL_486 = 4, | ||||
2080 | CPU_FAMILY_PENTIUM = 5, | ||||
2081 | CPU_FAMILY_PENTIUMPRO = 6, // Same family several models | ||||
2082 | CPU_FAMILY_PENTIUM_4 = 0xF | ||||
2083 | } FamilyFlag; | ||||
2084 | |||||
2085 | typedef enum { | ||||
2086 | RDTSCP_FLAG = 0x08000000, // bit 27 | ||||
2087 | INTEL64_FLAG = 0x20000000 // bit 29 | ||||
2088 | } _featureExtendedEdxFlag; | ||||
2089 | |||||
2090 | typedef enum { | ||||
2091 | FPU_FLAG = 0x00000001, | ||||
2092 | VME_FLAG = 0x00000002, | ||||
2093 | DE_FLAG = 0x00000004, | ||||
2094 | PSE_FLAG = 0x00000008, | ||||
2095 | TSC_FLAG = 0x00000010, | ||||
2096 | MSR_FLAG = 0x00000020, | ||||
2097 | PAE_FLAG = 0x00000040, | ||||
2098 | MCE_FLAG = 0x00000080, | ||||
2099 | CX8_FLAG = 0x00000100, | ||||
2100 | APIC_FLAG = 0x00000200, | ||||
2101 | SEP_FLAG = 0x00000800, | ||||
2102 | MTRR_FLAG = 0x00001000, | ||||
2103 | PGE_FLAG = 0x00002000, | ||||
2104 | MCA_FLAG = 0x00004000, | ||||
2105 | CMOV_FLAG = 0x00008000, | ||||
2106 | PAT_FLAG = 0x00010000, | ||||
2107 | PSE36_FLAG = 0x00020000, | ||||
2108 | PSNUM_FLAG = 0x00040000, | ||||
2109 | CLFLUSH_FLAG = 0x00080000, | ||||
2110 | DTS_FLAG = 0x00200000, | ||||
2111 | ACPI_FLAG = 0x00400000, | ||||
2112 | MMX_FLAG = 0x00800000, | ||||
2113 | FXSR_FLAG = 0x01000000, | ||||
2114 | SSE_FLAG = 0x02000000, | ||||
2115 | SSE2_FLAG = 0x04000000, | ||||
2116 | SS_FLAG = 0x08000000, | ||||
2117 | HTT_FLAG = 0x10000000, | ||||
2118 | TM_FLAG = 0x20000000 | ||||
2119 | } FeatureEdxFlag; | ||||
2120 | |||||
2121 | static BufferBlob* cpuid_brand_string_stub_blob; | ||||
2122 | static const int cpuid_brand_string_stub_size = 550; | ||||
2123 | |||||
2124 | extern "C" { | ||||
2125 | typedef void (*getCPUIDBrandString_stub_t)(void*); | ||||
2126 | } | ||||
2127 | |||||
2128 | static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = NULL__null; | ||||
2129 | |||||
2130 | // VM_Version statics | ||||
2131 | enum { | ||||
2132 | ExtendedFamilyIdLength_INTEL = 16, | ||||
2133 | ExtendedFamilyIdLength_AMD = 24 | ||||
2134 | }; | ||||
2135 | |||||
2136 | const size_t VENDOR_LENGTH = 13; | ||||
2137 | const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); | ||||
2138 | static char* _cpu_brand_string = NULL__null; | ||||
2139 | static int64_t _max_qualified_cpu_frequency = 0; | ||||
2140 | |||||
2141 | static int _no_of_threads = 0; | ||||
2142 | static int _no_of_cores = 0; | ||||
2143 | |||||
2144 | const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { | ||||
2145 | "8086/8088", | ||||
2146 | "", | ||||
2147 | "286", | ||||
2148 | "386", | ||||
2149 | "486", | ||||
2150 | "Pentium", | ||||
2151 | "Pentium Pro", //or Pentium-M/Woodcrest depeding on model | ||||
2152 | "", | ||||
2153 | "", | ||||
2154 | "", | ||||
2155 | "", | ||||
2156 | "", | ||||
2157 | "", | ||||
2158 | "", | ||||
2159 | "", | ||||
2160 | "Pentium 4" | ||||
2161 | }; | ||||
2162 | |||||
2163 | const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { | ||||
2164 | "", | ||||
2165 | "", | ||||
2166 | "", | ||||
2167 | "", | ||||
2168 | "5x86", | ||||
2169 | "K5/K6", | ||||
2170 | "Athlon/AthlonXP", | ||||
2171 | "", | ||||
2172 | "", | ||||
2173 | "", | ||||
2174 | "", | ||||
2175 | "", | ||||
2176 | "", | ||||
2177 | "", | ||||
2178 | "", | ||||
2179 | "Opteron/Athlon64", | ||||
2180 | "Opteron QC/Phenom", // Barcelona et.al. | ||||
2181 | "", | ||||
2182 | "", | ||||
2183 | "", | ||||
2184 | "", | ||||
2185 | "", | ||||
2186 | "", | ||||
2187 | "Zen" | ||||
2188 | }; | ||||
2189 | // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, | ||||
2190 | // September 2013, Vol 3C Table 35-1 | ||||
2191 | const char* const _model_id_pentium_pro[] = { | ||||
2192 | "", | ||||
2193 | "Pentium Pro", | ||||
2194 | "", | ||||
2195 | "Pentium II model 3", | ||||
2196 | "", | ||||
2197 | "Pentium II model 5/Xeon/Celeron", | ||||
2198 | "Celeron", | ||||
2199 | "Pentium III/Pentium III Xeon", | ||||
2200 | "Pentium III/Pentium III Xeon", | ||||
2201 | "Pentium M model 9", // Yonah | ||||
2202 | "Pentium III, model A", | ||||
2203 | "Pentium III, model B", | ||||
2204 | "", | ||||
2205 | "Pentium M model D", // Dothan | ||||
2206 | "", | ||||
2207 | "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown | ||||
2208 | "", | ||||
2209 | "", | ||||
2210 | "", | ||||
2211 | "", | ||||
2212 | "", | ||||
2213 | "", | ||||
2214 | "Celeron", // 0x16 Celeron 65nm | ||||
2215 | "Core 2", // 0x17 Penryn / Harpertown | ||||
2216 | "", | ||||
2217 | "", | ||||
2218 | "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP | ||||
2219 | "Atom", // 0x1B Z5xx series Silverthorn | ||||
2220 | "", | ||||
2221 | "Core 2", // 0x1D Dunnington (6-core) | ||||
2222 | "Nehalem", // 0x1E CPU_MODEL_NEHALEM | ||||
2223 | "", | ||||
2224 | "", | ||||
2225 | "", | ||||
2226 | "", | ||||
2227 | "", | ||||
2228 | "", | ||||
2229 | "Westmere", // 0x25 CPU_MODEL_WESTMERE | ||||
2230 | "", | ||||
2231 | "", | ||||
2232 | "", // 0x28 | ||||
2233 | "", | ||||
2234 | "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" | ||||
2235 | "", | ||||
2236 | "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP | ||||
2237 | "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP | ||||
2238 | "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX | ||||
2239 | "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX | ||||
2240 | "", | ||||
2241 | "", | ||||
2242 | "", | ||||
2243 | "", | ||||
2244 | "", | ||||
2245 | "", | ||||
2246 | "", | ||||
2247 | "", | ||||
2248 | "", | ||||
2249 | "", | ||||
2250 | "Ivy Bridge", // 0x3a | ||||
2251 | "", | ||||
2252 | "Haswell", // 0x3c "4th Generation Intel Core Processor" | ||||
2253 | "", // 0x3d "Next Generation Intel Core Processor" | ||||
2254 | "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" | ||||
2255 | "", // 0x3f "Future Generation Intel Xeon Processor" | ||||
2256 | "", | ||||
2257 | "", | ||||
2258 | "", | ||||
2259 | "", | ||||
2260 | "", | ||||
2261 | "Haswell", // 0x45 "4th Generation Intel Core Processor" | ||||
2262 | "Haswell", // 0x46 "4th Generation Intel Core Processor" | ||||
2263 | NULL__null | ||||
2264 | }; | ||||
2265 | |||||
2266 | /* Brand ID is for back compability | ||||
2267 | * Newer CPUs uses the extended brand string */ | ||||
2268 | const char* const _brand_id[] = { | ||||
2269 | "", | ||||
2270 | "Celeron processor", | ||||
2271 | "Pentium III processor", | ||||
2272 | "Intel Pentium III Xeon processor", | ||||
2273 | "", | ||||
2274 | "", | ||||
2275 | "", | ||||
2276 | "", | ||||
2277 | "Intel Pentium 4 processor", | ||||
2278 | NULL__null | ||||
2279 | }; | ||||
2280 | |||||
2281 | |||||
2282 | const char* const _feature_edx_id[] = { | ||||
2283 | "On-Chip FPU", | ||||
2284 | "Virtual Mode Extensions", | ||||
2285 | "Debugging Extensions", | ||||
2286 | "Page Size Extensions", | ||||
2287 | "Time Stamp Counter", | ||||
2288 | "Model Specific Registers", | ||||
2289 | "Physical Address Extension", | ||||
2290 | "Machine Check Exceptions", | ||||
2291 | "CMPXCHG8B Instruction", | ||||
2292 | "On-Chip APIC", | ||||
2293 | "", | ||||
2294 | "Fast System Call", | ||||
2295 | "Memory Type Range Registers", | ||||
2296 | "Page Global Enable", | ||||
2297 | "Machine Check Architecture", | ||||
2298 | "Conditional Mov Instruction", | ||||
2299 | "Page Attribute Table", | ||||
2300 | "36-bit Page Size Extension", | ||||
2301 | "Processor Serial Number", | ||||
2302 | "CLFLUSH Instruction", | ||||
2303 | "", | ||||
2304 | "Debug Trace Store feature", | ||||
2305 | "ACPI registers in MSR space", | ||||
2306 | "Intel Architecture MMX Technology", | ||||
2307 | "Fast Float Point Save and Restore", | ||||
2308 | "Streaming SIMD extensions", | ||||
2309 | "Streaming SIMD extensions 2", | ||||
2310 | "Self-Snoop", | ||||
2311 | "Hyper Threading", | ||||
2312 | "Thermal Monitor", | ||||
2313 | "", | ||||
2314 | "Pending Break Enable" | ||||
2315 | }; | ||||
2316 | |||||
2317 | const char* const _feature_extended_edx_id[] = { | ||||
2318 | "", | ||||
2319 | "", | ||||
2320 | "", | ||||
2321 | "", | ||||
2322 | "", | ||||
2323 | "", | ||||
2324 | "", | ||||
2325 | "", | ||||
2326 | "", | ||||
2327 | "", | ||||
2328 | "", | ||||
2329 | "SYSCALL/SYSRET", | ||||
2330 | "", | ||||
2331 | "", | ||||
2332 | "", | ||||
2333 | "", | ||||
2334 | "", | ||||
2335 | "", | ||||
2336 | "", | ||||
2337 | "", | ||||
2338 | "Execute Disable Bit", | ||||
2339 | "", | ||||
2340 | "", | ||||
2341 | "", | ||||
2342 | "", | ||||
2343 | "", | ||||
2344 | "", | ||||
2345 | "RDTSCP", | ||||
2346 | "", | ||||
2347 | "Intel 64 Architecture", | ||||
2348 | "", | ||||
2349 | "" | ||||
2350 | }; | ||||
2351 | |||||
2352 | const char* const _feature_ecx_id[] = { | ||||
2353 | "Streaming SIMD Extensions 3", | ||||
2354 | "PCLMULQDQ", | ||||
2355 | "64-bit DS Area", | ||||
2356 | "MONITOR/MWAIT instructions", | ||||
2357 | "CPL Qualified Debug Store", | ||||
2358 | "Virtual Machine Extensions", | ||||
2359 | "Safer Mode Extensions", | ||||
2360 | "Enhanced Intel SpeedStep technology", | ||||
2361 | "Thermal Monitor 2", | ||||
2362 | "Supplemental Streaming SIMD Extensions 3", | ||||
2363 | "L1 Context ID", | ||||
2364 | "", | ||||
2365 | "Fused Multiply-Add", | ||||
2366 | "CMPXCHG16B", | ||||
2367 | "xTPR Update Control", | ||||
2368 | "Perfmon and Debug Capability", | ||||
2369 | "", | ||||
2370 | "Process-context identifiers", | ||||
2371 | "Direct Cache Access", | ||||
2372 | "Streaming SIMD extensions 4.1", | ||||
2373 | "Streaming SIMD extensions 4.2", | ||||
2374 | "x2APIC", | ||||
2375 | "MOVBE", | ||||
2376 | "Popcount instruction", | ||||
2377 | "TSC-Deadline", | ||||
2378 | "AESNI", | ||||
2379 | "XSAVE", | ||||
2380 | "OSXSAVE", | ||||
2381 | "AVX", | ||||
2382 | "F16C", | ||||
2383 | "RDRAND", | ||||
2384 | "" | ||||
2385 | }; | ||||
2386 | |||||
2387 | const char* const _feature_extended_ecx_id[] = { | ||||
2388 | "LAHF/SAHF instruction support", | ||||
2389 | "Core multi-processor legacy mode", | ||||
2390 | "", | ||||
2391 | "", | ||||
2392 | "", | ||||
2393 | "Advanced Bit Manipulations: LZCNT", | ||||
2394 | "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", | ||||
2395 | "Misaligned SSE mode", | ||||
2396 | "", | ||||
2397 | "", | ||||
2398 | "", | ||||
2399 | "", | ||||
2400 | "", | ||||
2401 | "", | ||||
2402 | "", | ||||
2403 | "", | ||||
2404 | "", | ||||
2405 | "", | ||||
2406 | "", | ||||
2407 | "", | ||||
2408 | "", | ||||
2409 | "", | ||||
2410 | "", | ||||
2411 | "", | ||||
2412 | "", | ||||
2413 | "", | ||||
2414 | "", | ||||
2415 | "", | ||||
2416 | "", | ||||
2417 | "", | ||||
2418 | "", | ||||
2419 | "" | ||||
2420 | }; | ||||
2421 | |||||
2422 | void VM_Version::initialize_tsc(void) { | ||||
2423 | ResourceMark rm; | ||||
2424 | |||||
2425 | cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); | ||||
2426 | if (cpuid_brand_string_stub_blob == NULL__null) { | ||||
2427 | vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); | ||||
2428 | } | ||||
2429 | CodeBuffer c(cpuid_brand_string_stub_blob); | ||||
2430 | VM_Version_StubGenerator g(&c); | ||||
2431 | getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,(reinterpret_cast<getCPUIDBrandString_stub_t>(g.generate_getCPUIDBrandString ())) | ||||
2432 | g.generate_getCPUIDBrandString())(reinterpret_cast<getCPUIDBrandString_stub_t>(g.generate_getCPUIDBrandString ())); | ||||
2433 | } | ||||
2434 | |||||
2435 | const char* VM_Version::cpu_model_description(void) { | ||||
2436 | uint32_t cpu_family = extended_cpu_family(); | ||||
2437 | uint32_t cpu_model = extended_cpu_model(); | ||||
2438 | const char* model = NULL__null; | ||||
2439 | |||||
2440 | if (cpu_family == CPU_FAMILY_PENTIUMPRO) { | ||||
2441 | for (uint32_t i = 0; i <= cpu_model; i++) { | ||||
2442 | model = _model_id_pentium_pro[i]; | ||||
2443 | if (model == NULL__null) { | ||||
2444 | break; | ||||
2445 | } | ||||
2446 | } | ||||
2447 | } | ||||
2448 | return model; | ||||
2449 | } | ||||
2450 | |||||
2451 | const char* VM_Version::cpu_brand_string(void) { | ||||
2452 | if (_cpu_brand_string == NULL__null) { | ||||
2453 | _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal)(char*) (AllocateHeap(((CPU_EBS_MAX_LENGTH)) * sizeof(char), mtInternal , AllocFailStrategy::RETURN_NULL)); | ||||
2454 | if (NULL__null == _cpu_brand_string) { | ||||
2455 | return NULL__null; | ||||
2456 | } | ||||
2457 | int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); | ||||
2458 | if (ret_val != OS_OK) { | ||||
2459 | FREE_C_HEAP_ARRAY(char, _cpu_brand_string)FreeHeap((char*)(_cpu_brand_string)); | ||||
2460 | _cpu_brand_string = NULL__null; | ||||
2461 | } | ||||
2462 | } | ||||
2463 | return _cpu_brand_string; | ||||
2464 | } | ||||
2465 | |||||
2466 | const char* VM_Version::cpu_brand(void) { | ||||
2467 | const char* brand = NULL__null; | ||||
2468 | |||||
2469 | if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { | ||||
2470 | int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; | ||||
2471 | brand = _brand_id[0]; | ||||
2472 | for (int i = 0; brand != NULL__null && i <= brand_num; i += 1) { | ||||
2473 | brand = _brand_id[i]; | ||||
2474 | } | ||||
2475 | } | ||||
2476 | return brand; | ||||
2477 | } | ||||
2478 | |||||
2479 | bool VM_Version::cpu_is_em64t(void) { | ||||
2480 | return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); | ||||
2481 | } | ||||
2482 | |||||
2483 | bool VM_Version::is_netburst(void) { | ||||
2484 | return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); | ||||
2485 | } | ||||
2486 | |||||
2487 | bool VM_Version::supports_tscinv_ext(void) { | ||||
2488 | if (!supports_tscinv_bit()) { | ||||
2489 | return false; | ||||
2490 | } | ||||
2491 | |||||
2492 | if (is_intel()) { | ||||
2493 | return true; | ||||
2494 | } | ||||
2495 | |||||
2496 | if (is_amd()) { | ||||
2497 | return !is_amd_Barcelona(); | ||||
2498 | } | ||||
2499 | |||||
2500 | if (is_hygon()) { | ||||
2501 | return true; | ||||
2502 | } | ||||
2503 | |||||
2504 | return false; | ||||
2505 | } | ||||
2506 | |||||
2507 | void VM_Version::resolve_cpu_information_details(void) { | ||||
2508 | |||||
2509 | // in future we want to base this information on proper cpu | ||||
2510 | // and cache topology enumeration such as: | ||||
2511 | // Intel 64 Architecture Processor Topology Enumeration | ||||
2512 | // which supports system cpu and cache topology enumeration | ||||
2513 | // either using 2xAPICIDs or initial APICIDs | ||||
2514 | |||||
2515 | // currently only rough cpu information estimates | ||||
2516 | // which will not necessarily reflect the exact configuration of the system | ||||
2517 | |||||
2518 | // this is the number of logical hardware threads | ||||
2519 | // visible to the operating system | ||||
2520 | _no_of_threads = os::processor_count(); | ||||
2521 | |||||
2522 | // find out number of threads per cpu package | ||||
2523 | int threads_per_package = threads_per_core() * cores_per_cpu(); | ||||
2524 | |||||
2525 | // use amount of threads visible to the process in order to guess number of sockets | ||||
2526 | _no_of_sockets = _no_of_threads / threads_per_package; | ||||
2527 | |||||
2528 | // process might only see a subset of the total number of threads | ||||
2529 | // from a single processor package. Virtualization/resource management for example. | ||||
2530 | // If so then just write a hard 1 as num of pkgs. | ||||
2531 | if (0 == _no_of_sockets) { | ||||
2532 | _no_of_sockets = 1; | ||||
2533 | } | ||||
2534 | |||||
2535 | // estimate the number of cores | ||||
2536 | _no_of_cores = cores_per_cpu() * _no_of_sockets; | ||||
2537 | } | ||||
2538 | |||||
2539 | |||||
2540 | const char* VM_Version::cpu_family_description(void) { | ||||
2541 | int cpu_family_id = extended_cpu_family(); | ||||
2542 | if (is_amd()) { | ||||
2543 | if (cpu_family_id < ExtendedFamilyIdLength_AMD) { | ||||
2544 | return _family_id_amd[cpu_family_id]; | ||||
2545 | } | ||||
2546 | } | ||||
2547 | if (is_intel()) { | ||||
2548 | if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { | ||||
2549 | return cpu_model_description(); | ||||
2550 | } | ||||
2551 | if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { | ||||
2552 | return _family_id_intel[cpu_family_id]; | ||||
2553 | } | ||||
2554 | } | ||||
2555 | if (is_hygon()) { | ||||
2556 | return "Dhyana"; | ||||
2557 | } | ||||
2558 | return "Unknown x86"; | ||||
2559 | } | ||||
2560 | |||||
2561 | int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { | ||||
2562 | assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2562, "assert(" "buf != __null" ") failed", "buffer is NULL!" ); ::breakpoint(); } } while (0); | ||||
2563 | assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!")do { if (!(buf_len >= CPU_TYPE_DESC_BUF_SIZE)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2563, "assert(" "buf_len >= CPU_TYPE_DESC_BUF_SIZE" ") failed" , "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!") ; ::breakpoint(); } } while (0); | ||||
2564 | |||||
2565 | const char* cpu_type = NULL__null; | ||||
2566 | const char* x64 = NULL__null; | ||||
2567 | |||||
2568 | if (is_intel()) { | ||||
2569 | cpu_type = "Intel"; | ||||
2570 | x64 = cpu_is_em64t() ? " Intel64" : ""; | ||||
2571 | } else if (is_amd()) { | ||||
2572 | cpu_type = "AMD"; | ||||
2573 | x64 = cpu_is_em64t() ? " AMD64" : ""; | ||||
2574 | } else if (is_hygon()) { | ||||
2575 | cpu_type = "Hygon"; | ||||
2576 | x64 = cpu_is_em64t() ? " AMD64" : ""; | ||||
2577 | } else { | ||||
2578 | cpu_type = "Unknown x86"; | ||||
2579 | x64 = cpu_is_em64t() ? " x86_64" : ""; | ||||
2580 | } | ||||
2581 | |||||
2582 | jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", | ||||
2583 | cpu_type, | ||||
2584 | cpu_family_description(), | ||||
2585 | supports_ht() ? " (HT)" : "", | ||||
2586 | supports_sse3() ? " SSE3" : "", | ||||
2587 | supports_ssse3() ? " SSSE3" : "", | ||||
2588 | supports_sse4_1() ? " SSE4.1" : "", | ||||
2589 | supports_sse4_2() ? " SSE4.2" : "", | ||||
2590 | supports_sse4a() ? " SSE4A" : "", | ||||
2591 | is_netburst() ? " Netburst" : "", | ||||
2592 | is_intel_family_core() ? " Core" : "", | ||||
2593 | x64); | ||||
2594 | |||||
2595 | return OS_OK; | ||||
2596 | } | ||||
2597 | |||||
2598 | int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { | ||||
2599 | assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2599, "assert(" "buf != __null" ") failed", "buffer is NULL!" ); ::breakpoint(); } } while (0); | ||||
2600 | assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!")do { if (!(buf_len >= CPU_EBS_MAX_LENGTH)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2600, "assert(" "buf_len >= CPU_EBS_MAX_LENGTH" ") failed" , "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); :: breakpoint(); } } while (0); | ||||
2601 | assert(getCPUIDBrandString_stub != NULL, "not initialized")do { if (!(getCPUIDBrandString_stub != __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2601, "assert(" "getCPUIDBrandString_stub != __null" ") failed" , "not initialized"); ::breakpoint(); } } while (0); | ||||
2602 | |||||
2603 | // invoke newly generated asm code to fetch CPU Brand String | ||||
2604 | getCPUIDBrandString_stub(&_cpuid_info); | ||||
2605 | |||||
2606 | // fetch results into buffer | ||||
2607 | *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; | ||||
2608 | *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; | ||||
2609 | *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; | ||||
2610 | *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; | ||||
2611 | *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; | ||||
2612 | *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; | ||||
2613 | *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; | ||||
2614 | *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; | ||||
2615 | *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; | ||||
2616 | *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; | ||||
2617 | *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; | ||||
2618 | *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; | ||||
2619 | |||||
2620 | return OS_OK; | ||||
2621 | } | ||||
2622 | |||||
2623 | size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { | ||||
2624 | guarantee(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2624, "guarantee(" "buf != NULL" ") failed", "buffer is NULL!" ); ::breakpoint(); } } while (0); | ||||
2625 | guarantee(buf_len > 0, "buffer len not enough!")do { if (!(buf_len > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2625, "guarantee(" "buf_len > 0" ") failed", "buffer len not enough!" ); ::breakpoint(); } } while (0); | ||||
2626 | |||||
2627 | unsigned int flag = 0; | ||||
2628 | unsigned int fi = 0; | ||||
2629 | size_t written = 0; | ||||
2630 | const char* prefix = ""; | ||||
2631 | |||||
2632 | #define WRITE_TO_BUF(string){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, string); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix = ", "; } } \ | ||||
2633 | { \ | ||||
2634 | int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ | ||||
2635 | if (res < 0) { \ | ||||
2636 | return buf_len - 1; \ | ||||
2637 | } \ | ||||
2638 | written += res; \ | ||||
2639 | if (prefix[0] == '\0') { \ | ||||
2640 | prefix = ", "; \ | ||||
2641 | } \ | ||||
2642 | } | ||||
2643 | |||||
2644 | for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { | ||||
2645 | if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { | ||||
2646 | continue; /* no hyperthreading */ | ||||
2647 | } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { | ||||
2648 | continue; /* no fast system call */ | ||||
2649 | } | ||||
2650 | if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { | ||||
2651 | WRITE_TO_BUF(_feature_edx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, _feature_edx_id[fi]); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix = ", "; } }; | ||||
2652 | } | ||||
2653 | } | ||||
2654 | |||||
2655 | for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { | ||||
2656 | if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { | ||||
2657 | WRITE_TO_BUF(_feature_ecx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, _feature_ecx_id[fi]); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix = ", "; } }; | ||||
2658 | } | ||||
2659 | } | ||||
2660 | |||||
2661 | for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { | ||||
2662 | if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { | ||||
2663 | WRITE_TO_BUF(_feature_extended_ecx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, _feature_extended_ecx_id[fi]); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0' ) { prefix = ", "; } }; | ||||
2664 | } | ||||
2665 | } | ||||
2666 | |||||
2667 | for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { | ||||
2668 | if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { | ||||
2669 | WRITE_TO_BUF(_feature_extended_edx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, _feature_extended_edx_id[fi]); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0' ) { prefix = ", "; } }; | ||||
2670 | } | ||||
2671 | } | ||||
2672 | |||||
2673 | if (supports_tscinv_bit()) { | ||||
2674 | WRITE_TO_BUF("Invariant TSC"){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, "Invariant TSC"); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix = ", " ; } }; | ||||
2675 | } | ||||
2676 | |||||
2677 | return written; | ||||
2678 | } | ||||
2679 | |||||
2680 | /** | ||||
2681 | * Write a detailed description of the cpu to a given buffer, including | ||||
2682 | * feature set. | ||||
2683 | */ | ||||
2684 | int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { | ||||
2685 | assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2685, "assert(" "buf != __null" ") failed", "buffer is NULL!" ); ::breakpoint(); } } while (0); | ||||
2686 | assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!")do { if (!(buf_len >= CPU_DETAILED_DESC_BUF_SIZE)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2686, "assert(" "buf_len >= CPU_DETAILED_DESC_BUF_SIZE" ") failed" , "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!" ); ::breakpoint(); } } while (0); | ||||
2687 | |||||
2688 | static const char* unknown = "<unknown>"; | ||||
2689 | char vendor_id[VENDOR_LENGTH]; | ||||
2690 | const char* family = NULL__null; | ||||
2691 | const char* model = NULL__null; | ||||
2692 | const char* brand = NULL__null; | ||||
2693 | int outputLen = 0; | ||||
2694 | |||||
2695 | family = cpu_family_description(); | ||||
2696 | if (family == NULL__null) { | ||||
2697 | family = unknown; | ||||
2698 | } | ||||
2699 | |||||
2700 | model = cpu_model_description(); | ||||
2701 | if (model == NULL__null) { | ||||
2702 | model = unknown; | ||||
2703 | } | ||||
2704 | |||||
2705 | brand = cpu_brand_string(); | ||||
2706 | |||||
2707 | if (brand == NULL__null) { | ||||
2708 | brand = cpu_brand(); | ||||
2709 | if (brand == NULL__null) { | ||||
2710 | brand = unknown; | ||||
2711 | } | ||||
2712 | } | ||||
2713 | |||||
2714 | *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; | ||||
2715 | *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; | ||||
2716 | *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; | ||||
2717 | vendor_id[VENDOR_LENGTH-1] = '\0'; | ||||
2718 | |||||
2719 | outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" | ||||
2720 | "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" | ||||
2721 | "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" | ||||
2722 | "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" | ||||
2723 | "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" | ||||
2724 | "Supports: ", | ||||
2725 | brand, | ||||
2726 | vendor_id, | ||||
2727 | family, | ||||
2728 | extended_cpu_family(), | ||||
2729 | model, | ||||
2730 | extended_cpu_model(), | ||||
2731 | cpu_stepping(), | ||||
2732 | _cpuid_info.std_cpuid1_eax.bits.ext_family, | ||||
2733 | _cpuid_info.std_cpuid1_eax.bits.ext_model, | ||||
2734 | _cpuid_info.std_cpuid1_eax.bits.proc_type, | ||||
2735 | _cpuid_info.std_cpuid1_eax.value, | ||||
2736 | _cpuid_info.std_cpuid1_ebx.value, | ||||
2737 | _cpuid_info.std_cpuid1_ecx.value, | ||||
2738 | _cpuid_info.std_cpuid1_edx.value, | ||||
2739 | _cpuid_info.ext_cpuid1_eax, | ||||
2740 | _cpuid_info.ext_cpuid1_ebx, | ||||
2741 | _cpuid_info.ext_cpuid1_ecx, | ||||
2742 | _cpuid_info.ext_cpuid1_edx); | ||||
2743 | |||||
2744 | if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { | ||||
2745 | if (buf_len > 0) { buf[buf_len-1] = '\0'; } | ||||
2746 | return OS_ERR; | ||||
2747 | } | ||||
2748 | |||||
2749 | cpu_write_support_string(&buf[outputLen], buf_len - outputLen); | ||||
2750 | |||||
2751 | return OS_OK; | ||||
2752 | } | ||||
2753 | |||||
2754 | |||||
2755 | // Fill in Abstract_VM_Version statics | ||||
2756 | void VM_Version::initialize_cpu_information() { | ||||
2757 | assert(_vm_version_initialized, "should have initialized VM_Version long ago")do { if (!(_vm_version_initialized)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2757, "assert(" "_vm_version_initialized" ") failed", "should have initialized VM_Version long ago" ); ::breakpoint(); } } while (0); | ||||
2758 | assert(!_initialized, "shouldn't be initialized yet")do { if (!(!_initialized)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2758, "assert(" "!_initialized" ") failed", "shouldn't be initialized yet" ); ::breakpoint(); } } while (0); | ||||
2759 | resolve_cpu_information_details(); | ||||
2760 | |||||
2761 | // initialize cpu_name and cpu_desc | ||||
2762 | cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); | ||||
2763 | cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); | ||||
2764 | _initialized = true; | ||||
2765 | } | ||||
2766 | |||||
2767 | /** | ||||
2768 | * For information about extracting the frequency from the cpu brand string, please see: | ||||
2769 | * | ||||
2770 | * Intel Processor Identification and the CPUID Instruction | ||||
2771 | * Application Note 485 | ||||
2772 | * May 2012 | ||||
2773 | * | ||||
2774 | * The return value is the frequency in Hz. | ||||
2775 | */ | ||||
2776 | int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { | ||||
2777 | const char* const brand_string = cpu_brand_string(); | ||||
2778 | if (brand_string == NULL__null) { | ||||
2779 | return 0; | ||||
2780 | } | ||||
2781 | const int64_t MEGA = 1000000; | ||||
2782 | int64_t multiplier = 0; | ||||
2783 | int64_t frequency = 0; | ||||
2784 | uint8_t idx = 0; | ||||
2785 | // The brand string buffer is at most 48 bytes. | ||||
2786 | // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. | ||||
2787 | for (; idx < 48-2; ++idx) { | ||||
2788 | // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. | ||||
2789 | // Search brand string for "yHz" where y is M, G, or T. | ||||
2790 | if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { | ||||
2791 | if (brand_string[idx] == 'M') { | ||||
2792 | multiplier = MEGA; | ||||
2793 | } else if (brand_string[idx] == 'G') { | ||||
2794 | multiplier = MEGA * 1000; | ||||
2795 | } else if (brand_string[idx] == 'T') { | ||||
2796 | multiplier = MEGA * MEGA; | ||||
2797 | } | ||||
2798 | break; | ||||
2799 | } | ||||
2800 | } | ||||
2801 | if (multiplier > 0) { | ||||
2802 | // Compute freqency (in Hz) from brand string. | ||||
2803 | if (brand_string[idx-3] == '.') { // if format is "x.xx" | ||||
2804 | frequency = (brand_string[idx-4] - '0') * multiplier; | ||||
2805 | frequency += (brand_string[idx-2] - '0') * multiplier / 10; | ||||
2806 | frequency += (brand_string[idx-1] - '0') * multiplier / 100; | ||||
2807 | } else { // format is "xxxx" | ||||
2808 | frequency = (brand_string[idx-4] - '0') * 1000; | ||||
2809 | frequency += (brand_string[idx-3] - '0') * 100; | ||||
2810 | frequency += (brand_string[idx-2] - '0') * 10; | ||||
2811 | frequency += (brand_string[idx-1] - '0'); | ||||
2812 | frequency *= multiplier; | ||||
2813 | } | ||||
2814 | } | ||||
2815 | return frequency; | ||||
2816 | } | ||||
2817 | |||||
2818 | |||||
2819 | int64_t VM_Version::maximum_qualified_cpu_frequency(void) { | ||||
2820 | if (_max_qualified_cpu_frequency == 0) { | ||||
2821 | _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); | ||||
2822 | } | ||||
2823 | return _max_qualified_cpu_frequency; | ||||
2824 | } | ||||
2825 |
1 | /* | |||
2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. | |||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |||
4 | * | |||
5 | * This code is free software; you can redistribute it and/or modify it | |||
6 | * under the terms of the GNU General Public License version 2 only, as | |||
7 | * published by the Free Software Foundation. | |||
8 | * | |||
9 | * This code is distributed in the hope that it will be useful, but WITHOUT | |||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
12 | * version 2 for more details (a copy is included in the LICENSE file that | |||
13 | * accompanied this code). | |||
14 | * | |||
15 | * You should have received a copy of the GNU General Public License version | |||
16 | * 2 along with this work; if not, write to the Free Software Foundation, | |||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |||
18 | * | |||
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |||
20 | * or visit www.oracle.com if you need additional information or have any | |||
21 | * questions. | |||
22 | * | |||
23 | */ | |||
24 | ||||
25 | #ifndef CPU_X86_MACROASSEMBLER_X86_HPP | |||
26 | #define CPU_X86_MACROASSEMBLER_X86_HPP | |||
27 | ||||
28 | #include "asm/assembler.hpp" | |||
29 | #include "code/vmreg.inline.hpp" | |||
30 | #include "compiler/oopMap.hpp" | |||
31 | #include "utilities/macros.hpp" | |||
32 | #include "runtime/rtmLocking.hpp" | |||
33 | #include "runtime/vm_version.hpp" | |||
34 | ||||
35 | // MacroAssembler extends Assembler by frequently used macros. | |||
36 | // | |||
37 | // Instructions for which a 'better' code sequence exists depending | |||
38 | // on arguments should also go in here. | |||
39 | ||||
40 | class MacroAssembler: public Assembler { | |||
41 | friend class LIR_Assembler; | |||
42 | friend class Runtime1; // as_Address() | |||
43 | ||||
44 | public: | |||
45 | // Support for VM calls | |||
46 | // | |||
47 | // This is the base routine called by the different versions of call_VM_leaf. The interpreter | |||
48 | // may customize this version by overriding it for its purposes (e.g., to save/restore | |||
49 | // additional registers when doing a VM call). | |||
50 | ||||
51 | virtual void call_VM_leaf_base( | |||
52 | address entry_point, // the entry point | |||
53 | int number_of_arguments // the number of arguments to pop after the call | |||
54 | ); | |||
55 | ||||
56 | protected: | |||
57 | // This is the base routine called by the different versions of call_VM. The interpreter | |||
58 | // may customize this version by overriding it for its purposes (e.g., to save/restore | |||
59 | // additional registers when doing a VM call). | |||
60 | // | |||
61 | // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base | |||
62 | // returns the register which contains the thread upon return. If a thread register has been | |||
63 | // specified, the return value will correspond to that register. If no last_java_sp is specified | |||
64 | // (noreg) than rsp will be used instead. | |||
65 | virtual void call_VM_base( // returns the register containing the thread upon return | |||
66 | Register oop_result, // where an oop-result ends up if any; use noreg otherwise | |||
67 | Register java_thread, // the thread if computed before ; use noreg otherwise | |||
68 | Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise | |||
69 | address entry_point, // the entry point | |||
70 | int number_of_arguments, // the number of arguments (w/o thread) to pop after the call | |||
71 | bool check_exceptions // whether to check for pending exceptions after return | |||
72 | ); | |||
73 | ||||
74 | void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); | |||
75 | ||||
76 | // helpers for FPU flag access | |||
77 | // tmp is a temporary register, if none is available use noreg | |||
78 | void save_rax (Register tmp); | |||
79 | void restore_rax(Register tmp); | |||
80 | ||||
81 | public: | |||
82 | MacroAssembler(CodeBuffer* code) : Assembler(code) {} | |||
83 | ||||
84 | // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. | |||
85 | // The implementation is only non-empty for the InterpreterMacroAssembler, | |||
86 | // as only the interpreter handles PopFrame and ForceEarlyReturn requests. | |||
87 | virtual void check_and_handle_popframe(Register java_thread); | |||
88 | virtual void check_and_handle_earlyret(Register java_thread); | |||
89 | ||||
90 | Address as_Address(AddressLiteral adr); | |||
91 | Address as_Address(ArrayAddress adr); | |||
92 | ||||
93 | // Support for NULL-checks | |||
94 | // | |||
95 | // Generates code that causes a NULL OS exception if the content of reg is NULL. | |||
96 | // If the accessed location is M[reg + offset] and the offset is known, provide the | |||
97 | // offset. No explicit code generation is needed if the offset is within a certain | |||
98 | // range (0 <= offset <= page_size). | |||
99 | ||||
100 | void null_check(Register reg, int offset = -1); | |||
101 | static bool needs_explicit_null_check(intptr_t offset); | |||
102 | static bool uses_implicit_null_check(void* address); | |||
103 | ||||
104 | // Required platform-specific helpers for Label::patch_instructions. | |||
105 | // They _shadow_ the declarations in AbstractAssembler, which are undefined. | |||
106 | void pd_patch_instruction(address branch, address target, const char* file, int line) { | |||
107 | unsigned char op = branch[0]; | |||
108 | assert(op == 0xE8 /* call */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
109 | op == 0xE9 /* jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
110 | op == 0xEB /* short jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
111 | (op & 0xF0) == 0x70 /* short jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
112 | op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
113 | op == 0xC7 && branch[1] == 0xF8 /* xbegin */,do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
114 | "Invalid opcode at patch point")do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0); | |||
115 | ||||
116 | if (op == 0xEB || (op & 0xF0) == 0x70) { | |||
117 | // short offset operators (jmp and jcc) | |||
118 | char* disp = (char*) &branch[1]; | |||
119 | int imm8 = target - (address) &disp[1]; | |||
120 | guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d",do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d" , file == __null ? "<NULL>" : file, line); ::breakpoint (); } } while (0) | |||
121 | file == NULL ? "<NULL>" : file, line)do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d" , file == __null ? "<NULL>" : file, line); ::breakpoint (); } } while (0); | |||
122 | *disp = imm8; | |||
123 | } else { | |||
124 | int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1]; | |||
125 | int imm32 = target - (address) &disp[1]; | |||
126 | *disp = imm32; | |||
127 | } | |||
128 | } | |||
129 | ||||
130 | // The following 4 methods return the offset of the appropriate move instruction | |||
131 | ||||
132 | // Support for fast byte/short loading with zero extension (depending on particular CPU) | |||
133 | int load_unsigned_byte(Register dst, Address src); | |||
134 | int load_unsigned_short(Register dst, Address src); | |||
135 | ||||
136 | // Support for fast byte/short loading with sign extension (depending on particular CPU) | |||
137 | int load_signed_byte(Register dst, Address src); | |||
138 | int load_signed_short(Register dst, Address src); | |||
139 | ||||
140 | // Support for sign-extension (hi:lo = extend_sign(lo)) | |||
141 | void extend_sign(Register hi, Register lo); | |||
142 | ||||
143 | // Load and store values by size and signed-ness | |||
144 | void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); | |||
145 | void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); | |||
146 | ||||
147 | // Support for inc/dec with optimal instruction selection depending on value | |||
148 | ||||
149 | void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value))incrementq(reg, value) NOT_LP64(incrementl(reg, value)) ; } | |||
150 | void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value))decrementq(reg, value) NOT_LP64(decrementl(reg, value)) ; } | |||
151 | ||||
152 | void decrementl(Address dst, int value = 1); | |||
153 | void decrementl(Register reg, int value = 1); | |||
154 | ||||
155 | void decrementq(Register reg, int value = 1); | |||
156 | void decrementq(Address dst, int value = 1); | |||
157 | ||||
158 | void incrementl(Address dst, int value = 1); | |||
159 | void incrementl(Register reg, int value = 1); | |||
160 | ||||
161 | void incrementq(Register reg, int value = 1); | |||
162 | void incrementq(Address dst, int value = 1); | |||
163 | ||||
164 | // Support optimal SSE move instructions. | |||
165 | void movflt(XMMRegister dst, XMMRegister src) { | |||
166 | if (dst-> encoding() == src->encoding()) return; | |||
167 | if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } | |||
168 | else { movss (dst, src); return; } | |||
169 | } | |||
170 | void movflt(XMMRegister dst, Address src) { movss(dst, src); } | |||
171 | void movflt(XMMRegister dst, AddressLiteral src); | |||
172 | void movflt(Address dst, XMMRegister src) { movss(dst, src); } | |||
173 | ||||
174 | // Move with zero extension | |||
175 | void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); } | |||
176 | ||||
177 | void movdbl(XMMRegister dst, XMMRegister src) { | |||
178 | if (dst-> encoding() == src->encoding()) return; | |||
179 | if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } | |||
180 | else { movsd (dst, src); return; } | |||
181 | } | |||
182 | ||||
183 | void movdbl(XMMRegister dst, AddressLiteral src); | |||
184 | ||||
185 | void movdbl(XMMRegister dst, Address src) { | |||
186 | if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } | |||
187 | else { movlpd(dst, src); return; } | |||
188 | } | |||
189 | void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } | |||
190 | ||||
191 | void incrementl(AddressLiteral dst); | |||
192 | void incrementl(ArrayAddress dst); | |||
193 | ||||
194 | void incrementq(AddressLiteral dst); | |||
195 | ||||
196 | // Alignment | |||
197 | void align32(); | |||
198 | void align64(); | |||
199 | void align(int modulus); | |||
200 | void align(int modulus, int target); | |||
201 | ||||
202 | // A 5 byte nop that is safe for patching (see patch_verified_entry) | |||
203 | void fat_nop(); | |||
204 | ||||
205 | // Stack frame creation/removal | |||
206 | void enter(); | |||
207 | void leave(); | |||
208 | ||||
209 | // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) | |||
210 | // The pointer will be loaded into the thread register. | |||
211 | void get_thread(Register thread); | |||
212 | ||||
213 | #ifdef _LP641 | |||
214 | // Support for argument shuffling | |||
215 | ||||
216 | void move32_64(VMRegPair src, VMRegPair dst); | |||
217 | void long_move(VMRegPair src, VMRegPair dst); | |||
218 | void float_move(VMRegPair src, VMRegPair dst); | |||
219 | void double_move(VMRegPair src, VMRegPair dst); | |||
220 | void move_ptr(VMRegPair src, VMRegPair dst); | |||
221 | void object_move(OopMap* map, | |||
222 | int oop_handle_offset, | |||
223 | int framesize_in_slots, | |||
224 | VMRegPair src, | |||
225 | VMRegPair dst, | |||
226 | bool is_receiver, | |||
227 | int* receiver_offset); | |||
228 | #endif // _LP64 | |||
229 | ||||
230 | // Support for VM calls | |||
231 | // | |||
232 | // It is imperative that all calls into the VM are handled via the call_VM macros. | |||
233 | // They make sure that the stack linkage is setup correctly. call_VM's correspond | |||
234 | // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. | |||
235 | ||||
236 | ||||
237 | void call_VM(Register oop_result, | |||
238 | address entry_point, | |||
239 | bool check_exceptions = true); | |||
240 | void call_VM(Register oop_result, | |||
241 | address entry_point, | |||
242 | Register arg_1, | |||
243 | bool check_exceptions = true); | |||
244 | void call_VM(Register oop_result, | |||
245 | address entry_point, | |||
246 | Register arg_1, Register arg_2, | |||
247 | bool check_exceptions = true); | |||
248 | void call_VM(Register oop_result, | |||
249 | address entry_point, | |||
250 | Register arg_1, Register arg_2, Register arg_3, | |||
251 | bool check_exceptions = true); | |||
252 | ||||
253 | // Overloadings with last_Java_sp | |||
254 | void call_VM(Register oop_result, | |||
255 | Register last_java_sp, | |||
256 | address entry_point, | |||
257 | int number_of_arguments = 0, | |||
258 | bool check_exceptions = true); | |||
259 | void call_VM(Register oop_result, | |||
260 | Register last_java_sp, | |||
261 | address entry_point, | |||
262 | Register arg_1, bool | |||
263 | check_exceptions = true); | |||
264 | void call_VM(Register oop_result, | |||
265 | Register last_java_sp, | |||
266 | address entry_point, | |||
267 | Register arg_1, Register arg_2, | |||
268 | bool check_exceptions = true); | |||
269 | void call_VM(Register oop_result, | |||
270 | Register last_java_sp, | |||
271 | address entry_point, | |||
272 | Register arg_1, Register arg_2, Register arg_3, | |||
273 | bool check_exceptions = true); | |||
274 | ||||
275 | void get_vm_result (Register oop_result, Register thread); | |||
276 | void get_vm_result_2(Register metadata_result, Register thread); | |||
277 | ||||
278 | // These always tightly bind to MacroAssembler::call_VM_base | |||
279 | // bypassing the virtual implementation | |||
280 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); | |||
281 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); | |||
282 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); | |||
283 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); | |||
284 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); | |||
285 | ||||
286 | void call_VM_leaf0(address entry_point); | |||
287 | void call_VM_leaf(address entry_point, | |||
288 | int number_of_arguments = 0); | |||
289 | void call_VM_leaf(address entry_point, | |||
290 | Register arg_1); | |||
291 | void call_VM_leaf(address entry_point, | |||
292 | Register arg_1, Register arg_2); | |||
293 | void call_VM_leaf(address entry_point, | |||
294 | Register arg_1, Register arg_2, Register arg_3); | |||
295 | ||||
296 | // These always tightly bind to MacroAssembler::call_VM_leaf_base | |||
297 | // bypassing the virtual implementation | |||
298 | void super_call_VM_leaf(address entry_point); | |||
299 | void super_call_VM_leaf(address entry_point, Register arg_1); | |||
300 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); | |||
301 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); | |||
302 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); | |||
303 | ||||
304 | // last Java Frame (fills frame anchor) | |||
305 | void set_last_Java_frame(Register thread, | |||
306 | Register last_java_sp, | |||
307 | Register last_java_fp, | |||
308 | address last_java_pc); | |||
309 | ||||
310 | // thread in the default location (r15_thread on 64bit) | |||
311 | void set_last_Java_frame(Register last_java_sp, | |||
312 | Register last_java_fp, | |||
313 | address last_java_pc); | |||
314 | ||||
315 | void reset_last_Java_frame(Register thread, bool clear_fp); | |||
316 | ||||
317 | // thread in the default location (r15_thread on 64bit) | |||
318 | void reset_last_Java_frame(bool clear_fp); | |||
319 | ||||
320 | // jobjects | |||
321 | void clear_jweak_tag(Register possibly_jweak); | |||
322 | void resolve_jobject(Register value, Register thread, Register tmp); | |||
323 | ||||
324 | // C 'boolean' to Java boolean: x == 0 ? 0 : 1 | |||
325 | void c2bool(Register x); | |||
326 | ||||
327 | // C++ bool manipulation | |||
328 | ||||
329 | void movbool(Register dst, Address src); | |||
330 | void movbool(Address dst, bool boolconst); | |||
331 | void movbool(Address dst, Register src); | |||
332 | void testbool(Register dst); | |||
333 | ||||
334 | void resolve_oop_handle(Register result, Register tmp = rscratch2); | |||
335 | void resolve_weak_handle(Register result, Register tmp); | |||
336 | void load_mirror(Register mirror, Register method, Register tmp = rscratch2); | |||
337 | void load_method_holder_cld(Register rresult, Register rmethod); | |||
338 | ||||
339 | void load_method_holder(Register holder, Register method); | |||
340 | ||||
341 | // oop manipulations | |||
342 | void load_klass(Register dst, Register src, Register tmp); | |||
343 | void store_klass(Register dst, Register src, Register tmp); | |||
344 | ||||
345 | void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, | |||
346 | Register tmp1, Register thread_tmp); | |||
347 | void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, | |||
348 | Register tmp1, Register tmp2); | |||
349 | ||||
350 | void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, | |||
351 | Register thread_tmp = noreg, DecoratorSet decorators = 0); | |||
352 | void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, | |||
353 | Register thread_tmp = noreg, DecoratorSet decorators = 0); | |||
354 | void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, | |||
355 | Register tmp2 = noreg, DecoratorSet decorators = 0); | |||
356 | ||||
357 | // Used for storing NULL. All other oop constants should be | |||
358 | // stored using routines that take a jobject. | |||
359 | void store_heap_oop_null(Address dst); | |||
360 | ||||
361 | #ifdef _LP641 | |||
362 | void store_klass_gap(Register dst, Register src); | |||
363 | ||||
364 | // This dummy is to prevent a call to store_heap_oop from | |||
365 | // converting a zero (like NULL) into a Register by giving | |||
366 | // the compiler two choices it can't resolve | |||
367 | ||||
368 | void store_heap_oop(Address dst, void* dummy); | |||
369 | ||||
370 | void encode_heap_oop(Register r); | |||
371 | void decode_heap_oop(Register r); | |||
372 | void encode_heap_oop_not_null(Register r); | |||
373 | void decode_heap_oop_not_null(Register r); | |||
374 | void encode_heap_oop_not_null(Register dst, Register src); | |||
375 | void decode_heap_oop_not_null(Register dst, Register src); | |||
376 | ||||
377 | void set_narrow_oop(Register dst, jobject obj); | |||
378 | void set_narrow_oop(Address dst, jobject obj); | |||
379 | void cmp_narrow_oop(Register dst, jobject obj); | |||
380 | void cmp_narrow_oop(Address dst, jobject obj); | |||
381 | ||||
382 | void encode_klass_not_null(Register r, Register tmp); | |||
383 | void decode_klass_not_null(Register r, Register tmp); | |||
384 | void encode_and_move_klass_not_null(Register dst, Register src); | |||
385 | void decode_and_move_klass_not_null(Register dst, Register src); | |||
386 | void set_narrow_klass(Register dst, Klass* k); | |||
387 | void set_narrow_klass(Address dst, Klass* k); | |||
388 | void cmp_narrow_klass(Register dst, Klass* k); | |||
389 | void cmp_narrow_klass(Address dst, Klass* k); | |||
390 | ||||
391 | // if heap base register is used - reinit it with the correct value | |||
392 | void reinit_heapbase(); | |||
393 | ||||
394 | DEBUG_ONLY(void verify_heapbase(const char* msg);)void verify_heapbase(const char* msg); | |||
395 | ||||
396 | #endif // _LP64 | |||
397 | ||||
398 | // Int division/remainder for Java | |||
399 | // (as idivl, but checks for special case as described in JVM spec.) | |||
400 | // returns idivl instruction offset for implicit exception handling | |||
401 | int corrected_idivl(Register reg); | |||
402 | ||||
403 | // Long division/remainder for Java | |||
404 | // (as idivq, but checks for special case as described in JVM spec.) | |||
405 | // returns idivq instruction offset for implicit exception handling | |||
406 | int corrected_idivq(Register reg); | |||
407 | ||||
408 | void int3(); | |||
409 | ||||
410 | // Long operation macros for a 32bit cpu | |||
411 | // Long negation for Java | |||
412 | void lneg(Register hi, Register lo); | |||
413 | ||||
414 | // Long multiplication for Java | |||
415 | // (destroys contents of eax, ebx, ecx and edx) | |||
416 | void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y | |||
417 | ||||
418 | // Long shifts for Java | |||
419 | // (semantics as described in JVM spec.) | |||
420 | void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) | |||
421 | void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) | |||
422 | ||||
423 | // Long compare for Java | |||
424 | // (semantics as described in JVM spec.) | |||
425 | void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) | |||
426 | ||||
427 | ||||
428 | // misc | |||
429 | ||||
430 | // Sign extension | |||
431 | void sign_extend_short(Register reg); | |||
432 | void sign_extend_byte(Register reg); | |||
433 | ||||
434 | // Division by power of 2, rounding towards 0 | |||
435 | void division_with_shift(Register reg, int shift_value); | |||
436 | ||||
437 | #ifndef _LP641 | |||
438 | // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: | |||
439 | // | |||
440 | // CF (corresponds to C0) if x < y | |||
441 | // PF (corresponds to C2) if unordered | |||
442 | // ZF (corresponds to C3) if x = y | |||
443 | // | |||
444 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). | |||
445 | // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) | |||
446 | void fcmp(Register tmp); | |||
447 | // Variant of the above which allows y to be further down the stack | |||
448 | // and which only pops x and y if specified. If pop_right is | |||
449 | // specified then pop_left must also be specified. | |||
450 | void fcmp(Register tmp, int index, bool pop_left, bool pop_right); | |||
451 | ||||
452 | // Floating-point comparison for Java | |||
453 | // Compares the top-most stack entries on the FPU stack and stores the result in dst. | |||
454 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). | |||
455 | // (semantics as described in JVM spec.) | |||
456 | void fcmp2int(Register dst, bool unordered_is_less); | |||
457 | // Variant of the above which allows y to be further down the stack | |||
458 | // and which only pops x and y if specified. If pop_right is | |||
459 | // specified then pop_left must also be specified. | |||
460 | void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); | |||
461 | ||||
462 | // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) | |||
463 | // tmp is a temporary register, if none is available use noreg | |||
464 | void fremr(Register tmp); | |||
465 | ||||
466 | // only if +VerifyFPU | |||
467 | void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); | |||
468 | #endif // !LP64 | |||
469 | ||||
470 | // dst = c = a * b + c | |||
471 | void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); | |||
472 | void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); | |||
473 | ||||
474 | void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); | |||
475 | void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); | |||
476 | void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); | |||
477 | void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); | |||
478 | ||||
479 | ||||
480 | // same as fcmp2int, but using SSE2 | |||
481 | void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); | |||
482 | void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); | |||
483 | ||||
484 | // branch to L if FPU flag C2 is set/not set | |||
485 | // tmp is a temporary register, if none is available use noreg | |||
486 | void jC2 (Register tmp, Label& L); | |||
487 | void jnC2(Register tmp, Label& L); | |||
488 | ||||
489 | // Load float value from 'address'. If UseSSE >= 1, the value is loaded into | |||
490 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. | |||
491 | void load_float(Address src); | |||
492 | ||||
493 | // Store float value to 'address'. If UseSSE >= 1, the value is stored | |||
494 | // from register xmm0. Otherwise, the value is stored from the FPU stack. | |||
495 | void store_float(Address dst); | |||
496 | ||||
497 | // Load double value from 'address'. If UseSSE >= 2, the value is loaded into | |||
498 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. | |||
499 | void load_double(Address src); | |||
500 | ||||
501 | // Store double value to 'address'. If UseSSE >= 2, the value is stored | |||
502 | // from register xmm0. Otherwise, the value is stored from the FPU stack. | |||
503 | void store_double(Address dst); | |||
504 | ||||
505 | #ifndef _LP641 | |||
506 | // Pop ST (ffree & fincstp combined) | |||
507 | void fpop(); | |||
508 | ||||
509 | void empty_FPU_stack(); | |||
510 | #endif // !_LP64 | |||
511 | ||||
512 | void push_IU_state(); | |||
513 | void pop_IU_state(); | |||
514 | ||||
515 | void push_FPU_state(); | |||
516 | void pop_FPU_state(); | |||
517 | ||||
518 | void push_CPU_state(); | |||
519 | void pop_CPU_state(); | |||
520 | ||||
521 | // Round up to a power of two | |||
522 | void round_to(Register reg, int modulus); | |||
523 | ||||
524 | // Callee saved registers handling | |||
525 | void push_callee_saved_registers(); | |||
526 | void pop_callee_saved_registers(); | |||
527 | ||||
528 | // allocation | |||
529 | void eden_allocate( | |||
530 | Register thread, // Current thread | |||
531 | Register obj, // result: pointer to object after successful allocation | |||
532 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise | |||
533 | int con_size_in_bytes, // object size in bytes if known at compile time | |||
534 | Register t1, // temp register | |||
535 | Label& slow_case // continuation point if fast allocation fails | |||
536 | ); | |||
537 | void tlab_allocate( | |||
538 | Register thread, // Current thread | |||
539 | Register obj, // result: pointer to object after successful allocation | |||
540 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise | |||
541 | int con_size_in_bytes, // object size in bytes if known at compile time | |||
542 | Register t1, // temp register | |||
543 | Register t2, // temp register | |||
544 | Label& slow_case // continuation point if fast allocation fails | |||
545 | ); | |||
546 | void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); | |||
547 | ||||
548 | // interface method calling | |||
549 | void lookup_interface_method(Register recv_klass, | |||
550 | Register intf_klass, | |||
551 | RegisterOrConstant itable_index, | |||
552 | Register method_result, | |||
553 | Register scan_temp, | |||
554 | Label& no_such_interface, | |||
555 | bool return_method = true); | |||
556 | ||||
557 | // virtual method calling | |||
558 | void lookup_virtual_method(Register recv_klass, | |||
559 | RegisterOrConstant vtable_index, | |||
560 | Register method_result); | |||
561 | ||||
562 | // Test sub_klass against super_klass, with fast and slow paths. | |||
563 | ||||
564 | // The fast path produces a tri-state answer: yes / no / maybe-slow. | |||
565 | // One of the three labels can be NULL, meaning take the fall-through. | |||
566 | // If super_check_offset is -1, the value is loaded up from super_klass. | |||
567 | // No registers are killed, except temp_reg. | |||
568 | void check_klass_subtype_fast_path(Register sub_klass, | |||
569 | Register super_klass, | |||
570 | Register temp_reg, | |||
571 | Label* L_success, | |||
572 | Label* L_failure, | |||
573 | Label* L_slow_path, | |||
574 | RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); | |||
575 | ||||
576 | // The rest of the type check; must be wired to a corresponding fast path. | |||
577 | // It does not repeat the fast path logic, so don't use it standalone. | |||
578 | // The temp_reg and temp2_reg can be noreg, if no temps are available. | |||
579 | // Updates the sub's secondary super cache as necessary. | |||
580 | // If set_cond_codes, condition codes will be Z on success, NZ on failure. | |||
581 | void check_klass_subtype_slow_path(Register sub_klass, | |||
582 | Register super_klass, | |||
583 | Register temp_reg, | |||
584 | Register temp2_reg, | |||
585 | Label* L_success, | |||
586 | Label* L_failure, | |||
587 | bool set_cond_codes = false); | |||
588 | ||||
589 | // Simplified, combined version, good for typical uses. | |||
590 | // Falls through on failure. | |||
591 | void check_klass_subtype(Register sub_klass, | |||
592 | Register super_klass, | |||
593 | Register temp_reg, | |||
594 | Label& L_success); | |||
595 | ||||
596 | void clinit_barrier(Register klass, | |||
597 | Register thread, | |||
598 | Label* L_fast_path = NULL__null, | |||
599 | Label* L_slow_path = NULL__null); | |||
600 | ||||
601 | // method handles (JSR 292) | |||
602 | Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); | |||
603 | ||||
604 | // Debugging | |||
605 | ||||
606 | // only if +VerifyOops | |||
607 | void _verify_oop(Register reg, const char* s, const char* file, int line); | |||
608 | void _verify_oop_addr(Address addr, const char* s, const char* file, int line); | |||
609 | ||||
610 | void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { | |||
611 | if (VerifyOops) { | |||
612 | _verify_oop(reg, s, file, line); | |||
613 | } | |||
614 | } | |||
615 | void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { | |||
616 | if (VerifyOops) { | |||
617 | _verify_oop_addr(reg, s, file, line); | |||
618 | } | |||
619 | } | |||
620 | ||||
621 | // TODO: verify method and klass metadata (compare against vptr?) | |||
622 | void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} | |||
623 | void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} | |||
624 | ||||
625 | #define verify_oop(reg)_verify_oop_checked(reg, "broken oop " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 625) _verify_oop_checked(reg, "broken oop " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__625) | |||
626 | #define verify_oop_msg(reg, msg)_verify_oop_checked(reg, "broken oop " "reg" ", " "msg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 626) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__626) | |||
627 | #define verify_oop_addr(addr)_verify_oop_addr_checked(addr, "broken oop addr " "addr", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 627) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__627) | |||
628 | #define verify_method_ptr(reg)_verify_method_ptr(reg, "broken method " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 628) _verify_method_ptr(reg, "broken method " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__628) | |||
629 | #define verify_klass_ptr(reg)_verify_klass_ptr(reg, "broken klass " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 629) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__629) | |||
630 | ||||
631 | // Verify or restore cpu control state after JNI call | |||
632 | void restore_cpu_control_state_after_jni(); | |||
633 | ||||
634 | // prints msg, dumps registers and stops execution | |||
635 | void stop(const char* msg); | |||
636 | ||||
637 | // prints msg and continues | |||
638 | void warn(const char* msg); | |||
639 | ||||
640 | // dumps registers and other state | |||
641 | void print_state(); | |||
642 | ||||
643 | static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); | |||
644 | static void debug64(char* msg, int64_t pc, int64_t regs[]); | |||
645 | static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); | |||
646 | static void print_state64(int64_t pc, int64_t regs[]); | |||
647 | ||||
648 | void os_breakpoint(); | |||
649 | ||||
650 | void untested() { stop("untested"); } | |||
651 | ||||
652 | void unimplemented(const char* what = ""); | |||
653 | ||||
654 | void should_not_reach_here() { stop("should not reach here"); } | |||
655 | ||||
656 | void print_CPU_state(); | |||
657 | ||||
658 | // Stack overflow checking | |||
659 | void bang_stack_with_offset(int offset) { | |||
660 | // stack grows down, caller passes positive offset | |||
661 | assert(offset > 0, "must bang with negative offset")do { if (!(offset > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 661, "assert(" "offset > 0" ") failed", "must bang with negative offset" ); ::breakpoint(); } } while (0); | |||
662 | movl(Address(rsp, (-offset)), rax); | |||
663 | } | |||
664 | ||||
665 | // Writes to stack successive pages until offset reached to check for | |||
666 | // stack overflow + shadow pages. Also, clobbers tmp | |||
667 | void bang_stack_size(Register size, Register tmp); | |||
668 | ||||
669 | // Check for reserved stack access in method being exited (for JIT) | |||
670 | void reserved_stack_check(); | |||
671 | ||||
672 | void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod); | |||
673 | ||||
674 | void verify_tlab(); | |||
675 | ||||
676 | Condition negate_condition(Condition cond); | |||
677 | ||||
678 | // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit | |||
679 | // operands. In general the names are modified to avoid hiding the instruction in Assembler | |||
680 | // so that we don't need to implement all the varieties in the Assembler with trivial wrappers | |||
681 | // here in MacroAssembler. The major exception to this rule is call | |||
682 | ||||
683 | // Arithmetics | |||
684 | ||||
685 | ||||
686 | void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)) ; } | |||
687 | void addptr(Address dst, Register src); | |||
688 | ||||
689 | void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); } | |||
690 | void addptr(Register dst, int32_t src); | |||
691 | void addptr(Register dst, Register src); | |||
692 | void addptr(Register dst, RegisterOrConstant src) { | |||
693 | if (src.is_constant()) addptr(dst, (int) src.as_constant()); | |||
694 | else addptr(dst, src.as_register()); | |||
695 | } | |||
696 | ||||
697 | void andptr(Register dst, int32_t src); | |||
698 | void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2))andq(src1, src2) NOT_LP64(andl(src1, src2)) ; } | |||
699 | ||||
700 | void cmp8(AddressLiteral src1, int imm); | |||
701 | ||||
702 | // renamed to drag out the casting of address to int32_t/intptr_t | |||
703 | void cmp32(Register src1, int32_t imm); | |||
704 | ||||
705 | void cmp32(AddressLiteral src1, int32_t imm); | |||
706 | // compare reg - mem, or reg - &mem | |||
707 | void cmp32(Register src1, AddressLiteral src2); | |||
708 | ||||
709 | void cmp32(Register src1, Address src2); | |||
710 | ||||
711 | #ifndef _LP641 | |||
712 | void cmpklass(Address dst, Metadata* obj); | |||
713 | void cmpklass(Register dst, Metadata* obj); | |||
714 | void cmpoop(Address dst, jobject obj); | |||
715 | #endif // _LP64 | |||
716 | ||||
717 | void cmpoop(Register src1, Register src2); | |||
718 | void cmpoop(Register src1, Address src2); | |||
719 | void cmpoop(Register dst, jobject obj); | |||
720 | ||||
721 | // NOTE src2 must be the lval. This is NOT an mem-mem compare | |||
722 | void cmpptr(Address src1, AddressLiteral src2); | |||
723 | ||||
724 | void cmpptr(Register src1, AddressLiteral src2); | |||
725 | ||||
726 | void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
727 | void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
728 | // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } | |||
729 | ||||
730 | void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
731 | void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
732 | ||||
733 | // cmp64 to avoild hiding cmpq | |||
734 | void cmp64(Register src1, AddressLiteral src); | |||
735 | ||||
736 | void cmpxchgptr(Register reg, Address adr); | |||
737 | ||||
738 | void locked_cmpxchgptr(Register reg, AddressLiteral adr); | |||
739 | ||||
740 | ||||
741 | void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src))imulq(dst, src) NOT_LP64(imull(dst, src)); } | |||
742 | void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32))imulq(dst, src, imm32) NOT_LP64(imull(dst, src, imm32)); } | |||
743 | ||||
744 | ||||
745 | void negptr(Register dst) { LP64_ONLY(negq(dst))negq(dst) NOT_LP64(negl(dst)); } | |||
746 | ||||
747 | void notptr(Register dst) { LP64_ONLY(notq(dst))notq(dst) NOT_LP64(notl(dst)); } | |||
748 | ||||
749 | void shlptr(Register dst, int32_t shift); | |||
750 | void shlptr(Register dst) { LP64_ONLY(shlq(dst))shlq(dst) NOT_LP64(shll(dst)); } | |||
751 | ||||
752 | void shrptr(Register dst, int32_t shift); | |||
753 | void shrptr(Register dst) { LP64_ONLY(shrq(dst))shrq(dst) NOT_LP64(shrl(dst)); } | |||
754 | ||||
755 | void sarptr(Register dst) { LP64_ONLY(sarq(dst))sarq(dst) NOT_LP64(sarl(dst)); } | |||
756 | void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src))sarq(dst, src) NOT_LP64(sarl(dst, src)); } | |||
757 | ||||
758 | void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); } | |||
759 | ||||
760 | void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); } | |||
761 | void subptr(Register dst, int32_t src); | |||
762 | // Force generation of a 4 byte immediate value even if it fits into 8bit | |||
763 | void subptr_imm32(Register dst, int32_t src); | |||
764 | void subptr(Register dst, Register src); | |||
765 | void subptr(Register dst, RegisterOrConstant src) { | |||
766 | if (src.is_constant()) subptr(dst, (int) src.as_constant()); | |||
767 | else subptr(dst, src.as_register()); | |||
768 | } | |||
769 | ||||
770 | void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); } | |||
771 | void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); } | |||
772 | ||||
773 | void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; } | |||
774 | void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; } | |||
775 | ||||
776 | void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2))xaddq(src1, src2) NOT_LP64(xaddl(src1, src2)) ; } | |||
777 | ||||
778 | ||||
779 | ||||
780 | // Helper functions for statistics gathering. | |||
781 | // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. | |||
782 | void cond_inc32(Condition cond, AddressLiteral counter_addr); | |||
783 | // Unconditional atomic increment. | |||
784 | void atomic_incl(Address counter_addr); | |||
785 | void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1); | |||
786 | #ifdef _LP641 | |||
787 | void atomic_incq(Address counter_addr); | |||
788 | void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1); | |||
789 | #endif | |||
790 | void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr))atomic_incq(counter_addr, scr) NOT_LP64(atomic_incl(counter_addr, scr)) ; } | |||
791 | void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr))atomic_incq(counter_addr) NOT_LP64(atomic_incl(counter_addr)) ; } | |||
792 | ||||
793 | void lea(Register dst, AddressLiteral adr); | |||
794 | void lea(Address dst, AddressLiteral adr); | |||
795 | void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } | |||
796 | ||||
797 | void leal32(Register dst, Address src) { leal(dst, src); } | |||
798 | ||||
799 | // Import other testl() methods from the parent class or else | |||
800 | // they will be hidden by the following overriding declaration. | |||
801 | using Assembler::testl; | |||
802 | void testl(Register dst, AddressLiteral src); | |||
803 | ||||
804 | void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
805 | void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
806 | void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
807 | void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32))orq(dst, imm32) NOT_LP64(orl(dst, imm32)); } | |||
808 | ||||
809 | void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32))testq(src, imm32) NOT_LP64(testl(src, imm32)); } | |||
810 | void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2))testq(src1, src2) NOT_LP64(testl(src1, src2)); } | |||
811 | void testptr(Register src1, Register src2); | |||
812 | ||||
813 | void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); } | |||
814 | void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); } | |||
815 | ||||
816 | // Calls | |||
817 | ||||
818 | void call(Label& L, relocInfo::relocType rtype); | |||
819 | void call(Register entry); | |||
820 | void call(Address addr) { Assembler::call(addr); } | |||
821 | ||||
822 | // NOTE: this call transfers to the effective address of entry NOT | |||
823 | // the address contained by entry. This is because this is more natural | |||
824 | // for jumps/calls. | |||
825 | void call(AddressLiteral entry); | |||
826 | ||||
827 | // Emit the CompiledIC call idiom | |||
828 | void ic_call(address entry, jint method_index = 0); | |||
829 | ||||
830 | // Jumps | |||
831 | ||||
832 | // NOTE: these jumps tranfer to the effective address of dst NOT | |||
833 | // the address contained by dst. This is because this is more natural | |||
834 | // for jumps/calls. | |||
835 | void jump(AddressLiteral dst); | |||
836 | void jump_cc(Condition cc, AddressLiteral dst); | |||
837 | ||||
838 | // 32bit can do a case table jump in one instruction but we no longer allow the base | |||
839 | // to be installed in the Address class. This jump will tranfers to the address | |||
840 | // contained in the location described by entry (not the address of entry) | |||
841 | void jump(ArrayAddress entry); | |||
842 | ||||
843 | // Floating | |||
844 | ||||
845 | void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } | |||
846 | void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
847 | void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } | |||
848 | ||||
849 | void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } | |||
850 | void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } | |||
851 | void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
852 | ||||
853 | void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } | |||
854 | void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } | |||
855 | void comiss(XMMRegister dst, AddressLiteral src); | |||
856 | ||||
857 | void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } | |||
858 | void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } | |||
859 | void comisd(XMMRegister dst, AddressLiteral src); | |||
860 | ||||
861 | #ifndef _LP641 | |||
862 | void fadd_s(Address src) { Assembler::fadd_s(src); } | |||
863 | void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } | |||
864 | ||||
865 | void fldcw(Address src) { Assembler::fldcw(src); } | |||
866 | void fldcw(AddressLiteral src); | |||
867 | ||||
868 | void fld_s(int index) { Assembler::fld_s(index); } | |||
869 | void fld_s(Address src) { Assembler::fld_s(src); } | |||
870 | void fld_s(AddressLiteral src); | |||
871 | ||||
872 | void fld_d(Address src) { Assembler::fld_d(src); } | |||
873 | void fld_d(AddressLiteral src); | |||
874 | ||||
875 | void fmul_s(Address src) { Assembler::fmul_s(src); } | |||
876 | void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } | |||
877 | #endif // _LP64 | |||
878 | ||||
879 | void fld_x(Address src) { Assembler::fld_x(src); } | |||
880 | void fld_x(AddressLiteral src); | |||
881 | ||||
882 | void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } | |||
883 | void ldmxcsr(AddressLiteral src); | |||
884 | ||||
885 | #ifdef _LP641 | |||
886 | private: | |||
887 | void sha256_AVX2_one_round_compute( | |||
888 | Register reg_old_h, | |||
889 | Register reg_a, | |||
890 | Register reg_b, | |||
891 | Register reg_c, | |||
892 | Register reg_d, | |||
893 | Register reg_e, | |||
894 | Register reg_f, | |||
895 | Register reg_g, | |||
896 | Register reg_h, | |||
897 | int iter); | |||
898 | void sha256_AVX2_four_rounds_compute_first(int start); | |||
899 | void sha256_AVX2_four_rounds_compute_last(int start); | |||
900 | void sha256_AVX2_one_round_and_sched( | |||
901 | XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ | |||
902 | XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ | |||
903 | XMMRegister xmm_2, /* ymm6 */ | |||
904 | XMMRegister xmm_3, /* ymm7 */ | |||
905 | Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ | |||
906 | Register reg_b, /* ebx */ /* full cycle is 8 iterations */ | |||
907 | Register reg_c, /* edi */ | |||
908 | Register reg_d, /* esi */ | |||
909 | Register reg_e, /* r8d */ | |||
910 | Register reg_f, /* r9d */ | |||
911 | Register reg_g, /* r10d */ | |||
912 | Register reg_h, /* r11d */ | |||
913 | int iter); | |||
914 | ||||
915 | void addm(int disp, Register r1, Register r2); | |||
916 | void gfmul(XMMRegister tmp0, XMMRegister t); | |||
917 | void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0, | |||
918 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3); | |||
919 | void generateHtbl_one_block(Register htbl); | |||
920 | void generateHtbl_eight_blocks(Register htbl); | |||
921 | public: | |||
922 | void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
923 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
924 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
925 | bool multi_block, XMMRegister shuf_mask); | |||
926 | void avx_ghash(Register state, Register htbl, Register data, Register blocks); | |||
927 | #endif | |||
928 | ||||
929 | #ifdef _LP641 | |||
930 | private: | |||
931 | void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, | |||
932 | Register e, Register f, Register g, Register h, int iteration); | |||
933 | ||||
934 | void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
935 | Register a, Register b, Register c, Register d, Register e, Register f, | |||
936 | Register g, Register h, int iteration); | |||
937 | ||||
938 | void addmq(int disp, Register r1, Register r2); | |||
939 | public: | |||
940 | void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
941 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
942 | Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, | |||
943 | XMMRegister shuf_mask); | |||
944 | private: | |||
945 | void roundEnc(XMMRegister key, int rnum); | |||
946 | void lastroundEnc(XMMRegister key, int rnum); | |||
947 | void roundDec(XMMRegister key, int rnum); | |||
948 | void lastroundDec(XMMRegister key, int rnum); | |||
949 | void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask); | |||
950 | void gfmul_avx512(XMMRegister ghash, XMMRegister hkey); | |||
951 | void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl); | |||
952 | void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, | |||
953 | XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction, | |||
954 | XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos, | |||
955 | bool final_reduction, int index, XMMRegister counter_inc_mask); | |||
956 | public: | |||
957 | void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len); | |||
958 | void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len); | |||
959 | void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter, | |||
960 | Register len_reg, Register used, Register used_addr, Register saved_encCounter_start); | |||
961 | void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key, | |||
962 | Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter); | |||
963 | ||||
964 | #endif | |||
965 | ||||
966 | void fast_md5(Register buf, Address state, Address ofs, Address limit, | |||
967 | bool multi_block); | |||
968 | ||||
969 | void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, | |||
970 | XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, | |||
971 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
972 | bool multi_block); | |||
973 | ||||
974 | #ifdef _LP641 | |||
975 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
976 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
977 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
978 | bool multi_block, XMMRegister shuf_mask); | |||
979 | #else | |||
980 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
981 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
982 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
983 | bool multi_block); | |||
984 | #endif | |||
985 | ||||
986 | void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
987 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
988 | Register rax, Register rcx, Register rdx, Register tmp); | |||
989 | ||||
990 | #ifdef _LP641 | |||
991 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
992 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
993 | Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2); | |||
994 | ||||
995 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
996 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
997 | Register rax, Register rcx, Register rdx, Register r11); | |||
998 | ||||
999 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, | |||
1000 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, | |||
1001 | Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); | |||
1002 | ||||
1003 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1004 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1005 | Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2, | |||
1006 | Register tmp3, Register tmp4); | |||
1007 | ||||
1008 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1009 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1010 | Register rax, Register rcx, Register rdx, Register tmp1, | |||
1011 | Register tmp2, Register tmp3, Register tmp4); | |||
1012 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1013 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1014 | Register rax, Register rcx, Register rdx, Register tmp1, | |||
1015 | Register tmp2, Register tmp3, Register tmp4); | |||
1016 | #else | |||
1017 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1018 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1019 | Register rax, Register rcx, Register rdx, Register tmp1); | |||
1020 | ||||
1021 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1022 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1023 | Register rax, Register rcx, Register rdx, Register tmp); | |||
1024 | ||||
1025 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, | |||
1026 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, | |||
1027 | Register rdx, Register tmp); | |||
1028 | ||||
1029 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1030 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1031 | Register rax, Register rbx, Register rdx); | |||
1032 | ||||
1033 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1034 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1035 | Register rax, Register rcx, Register rdx, Register tmp); | |||
1036 | ||||
1037 | void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, | |||
1038 | Register edx, Register ebx, Register esi, Register edi, | |||
1039 | Register ebp, Register esp); | |||
1040 | ||||
1041 | void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, | |||
1042 | Register esi, Register edi, Register ebp, Register esp); | |||
1043 | ||||
1044 | void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, | |||
1045 | Register edx, Register ebx, Register esi, Register edi, | |||
1046 | Register ebp, Register esp); | |||
1047 | ||||
1048 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
1049 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
1050 | Register rax, Register rcx, Register rdx, Register tmp); | |||
1051 | #endif | |||
1052 | ||||
1053 | private: | |||
1054 | ||||
1055 | // these are private because users should be doing movflt/movdbl | |||
1056 | ||||
1057 | void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } | |||
1058 | void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } | |||
1059 | void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } | |||
1060 | void movss(XMMRegister dst, AddressLiteral src); | |||
1061 | ||||
1062 | void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } | |||
1063 | void movlpd(XMMRegister dst, AddressLiteral src); | |||
1064 | ||||
1065 | public: | |||
1066 | ||||
1067 | void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } | |||
1068 | void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } | |||
1069 | void addsd(XMMRegister dst, AddressLiteral src); | |||
1070 | ||||
1071 | void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } | |||
1072 | void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } | |||
1073 | void addss(XMMRegister dst, AddressLiteral src); | |||
1074 | ||||
1075 | void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } | |||
1076 | void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } | |||
1077 | void addpd(XMMRegister dst, AddressLiteral src); | |||
1078 | ||||
1079 | void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } | |||
1080 | void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } | |||
1081 | void divsd(XMMRegister dst, AddressLiteral src); | |||
1082 | ||||
1083 | void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } | |||
1084 | void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } | |||
1085 | void divss(XMMRegister dst, AddressLiteral src); | |||
1086 | ||||
1087 | // Move Unaligned Double Quadword | |||
1088 | void movdqu(Address dst, XMMRegister src); | |||
1089 | void movdqu(XMMRegister dst, Address src); | |||
1090 | void movdqu(XMMRegister dst, XMMRegister src); | |||
1091 | void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1); | |||
1092 | ||||
1093 | void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); } | |||
1094 | void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
1095 | void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); } | |||
1096 | void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1097 | void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
1098 | void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
1099 | ||||
1100 | void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
1101 | void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); } | |||
1102 | void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
1103 | void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); } | |||
1104 | void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
1105 | void kmovql(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1106 | ||||
1107 | // Safe move operation, lowers down to 16bit moves for targets supporting | |||
1108 | // AVX512F feature and 64bit moves for targets supporting AVX512BW feature. | |||
1109 | void kmov(Address dst, KRegister src); | |||
1110 | void kmov(KRegister dst, Address src); | |||
1111 | void kmov(KRegister dst, KRegister src); | |||
1112 | void kmov(Register dst, KRegister src); | |||
1113 | void kmov(KRegister dst, Register src); | |||
1114 | ||||
1115 | // AVX Unaligned forms | |||
1116 | void vmovdqu(Address dst, XMMRegister src); | |||
1117 | void vmovdqu(XMMRegister dst, Address src); | |||
1118 | void vmovdqu(XMMRegister dst, XMMRegister src); | |||
1119 | void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1120 | void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len); | |||
1121 | ||||
1122 | ||||
1123 | // AVX512 Unaligned | |||
1124 | void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); | |||
1125 | void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); | |||
1126 | ||||
1127 | void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
1128 | void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
1129 | void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
1130 | void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } | |||
1131 | void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } | |||
1132 | void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1133 | ||||
1134 | void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } | |||
1135 | void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } | |||
1136 | void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } | |||
1137 | void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } | |||
1138 | void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1139 | ||||
1140 | void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } | |||
1141 | void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } | |||
1142 | void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { | |||
1143 | if (dst->encoding() == src->encoding()) return; | |||
| ||||
1144 | Assembler::evmovdqul(dst, src, vector_len); | |||
1145 | } | |||
1146 | void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } | |||
1147 | void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } | |||
1148 | void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { | |||
1149 | if (dst->encoding() == src->encoding() && mask == k0) return; | |||
1150 | Assembler::evmovdqul(dst, mask, src, merge, vector_len); | |||
1151 | } | |||
1152 | void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1153 | ||||
1154 | void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } | |||
1155 | void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } | |||
1156 | void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); | |||
1157 | void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { | |||
1158 | if (dst->encoding() == src->encoding()) return; | |||
1159 | Assembler::evmovdquq(dst, src, vector_len); | |||
1160 | } | |||
1161 | void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } | |||
1162 | void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } | |||
1163 | void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { | |||
1164 | if (dst->encoding() == src->encoding() && mask == k0) return; | |||
1165 | Assembler::evmovdquq(dst, mask, src, merge, vector_len); | |||
1166 | } | |||
1167 | void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1168 | ||||
1169 | // Move Aligned Double Quadword | |||
1170 | void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } | |||
1171 | void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } | |||
1172 | void movdqa(XMMRegister dst, AddressLiteral src); | |||
1173 | ||||
1174 | void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } | |||
1175 | void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } | |||
1176 | void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } | |||
1177 | void movsd(XMMRegister dst, AddressLiteral src); | |||
1178 | ||||
1179 | void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } | |||
1180 | void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } | |||
1181 | void mulpd(XMMRegister dst, AddressLiteral src); | |||
1182 | ||||
1183 | void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } | |||
1184 | void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } | |||
1185 | void mulsd(XMMRegister dst, AddressLiteral src); | |||
1186 | ||||
1187 | void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } | |||
1188 | void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } | |||
1189 | void mulss(XMMRegister dst, AddressLiteral src); | |||
1190 | ||||
1191 | // Carry-Less Multiplication Quadword | |||
1192 | void pclmulldq(XMMRegister dst, XMMRegister src) { | |||
1193 | // 0x00 - multiply lower 64 bits [0:63] | |||
1194 | Assembler::pclmulqdq(dst, src, 0x00); | |||
1195 | } | |||
1196 | void pclmulhdq(XMMRegister dst, XMMRegister src) { | |||
1197 | // 0x11 - multiply upper 64 bits [64:127] | |||
1198 | Assembler::pclmulqdq(dst, src, 0x11); | |||
1199 | } | |||
1200 | ||||
1201 | void pcmpeqb(XMMRegister dst, XMMRegister src); | |||
1202 | void pcmpeqw(XMMRegister dst, XMMRegister src); | |||
1203 | ||||
1204 | void pcmpestri(XMMRegister dst, Address src, int imm8); | |||
1205 | void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); | |||
1206 | ||||
1207 | void pmovzxbw(XMMRegister dst, XMMRegister src); | |||
1208 | void pmovzxbw(XMMRegister dst, Address src); | |||
1209 | ||||
1210 | void pmovmskb(Register dst, XMMRegister src); | |||
1211 | ||||
1212 | void ptest(XMMRegister dst, XMMRegister src); | |||
1213 | ||||
1214 | void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } | |||
1215 | void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } | |||
1216 | void sqrtsd(XMMRegister dst, AddressLiteral src); | |||
1217 | ||||
1218 | void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } | |||
1219 | void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } | |||
1220 | void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg); | |||
1221 | ||||
1222 | void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } | |||
1223 | void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } | |||
1224 | void sqrtss(XMMRegister dst, AddressLiteral src); | |||
1225 | ||||
1226 | void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } | |||
1227 | void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } | |||
1228 | void subsd(XMMRegister dst, AddressLiteral src); | |||
1229 | ||||
1230 | void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } | |||
1231 | void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } | |||
1232 | void subss(XMMRegister dst, AddressLiteral src); | |||
1233 | ||||
1234 | void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } | |||
1235 | void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } | |||
1236 | void ucomiss(XMMRegister dst, AddressLiteral src); | |||
1237 | ||||
1238 | void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } | |||
1239 | void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } | |||
1240 | void ucomisd(XMMRegister dst, AddressLiteral src); | |||
1241 | ||||
1242 | // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values | |||
1243 | void xorpd(XMMRegister dst, XMMRegister src); | |||
1244 | void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } | |||
1245 | void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1246 | ||||
1247 | // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values | |||
1248 | void xorps(XMMRegister dst, XMMRegister src); | |||
1249 | void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } | |||
1250 | void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
1251 | ||||
1252 | // Shuffle Bytes | |||
1253 | void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } | |||
1254 | void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } | |||
1255 | void pshufb(XMMRegister dst, AddressLiteral src); | |||
1256 | // AVX 3-operands instructions | |||
1257 | ||||
1258 | void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } | |||
1259 | void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } | |||
1260 | void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1261 | ||||
1262 | void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } | |||
1263 | void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } | |||
1264 | void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1265 | ||||
1266 | void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); | |||
1267 | void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); | |||
1268 | ||||
1269 | void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1270 | void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1271 | void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch); | |||
1272 | ||||
1273 | void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1274 | void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1275 | ||||
1276 | void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } | |||
1277 | void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } | |||
1278 | void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch); | |||
1279 | ||||
1280 | void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } | |||
1281 | void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } | |||
1282 | void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1283 | ||||
1284 | void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); | |||
1285 | void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); } | |||
1286 | ||||
1287 | void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1288 | ||||
1289 | void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1290 | void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
1291 | ||||
1292 | // Vector compares | |||
1293 | void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
1294 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
1295 | void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
1296 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
1297 | void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
1298 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
1299 | void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
1300 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
1301 | void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
1302 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
1303 | void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
1304 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
1305 | void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
1306 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
1307 | void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
1308 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
1309 | ||||
1310 | void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len); | |||
1311 | ||||
1312 | // Emit comparison instruction for the specified comparison predicate. | |||
1313 | void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg); | |||
1314 | void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len); | |||
1315 | ||||
1316 | void vpmovzxbw(XMMRegister dst, Address src, int vector_len); | |||
1317 | void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } | |||
1318 | ||||
1319 | void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit); | |||
1320 | ||||
1321 | void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1322 | void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1323 | void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
1324 | Assembler::vpmulld(dst, nds, src, vector_len); | |||
1325 | }; | |||
1326 | void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
1327 | Assembler::vpmulld(dst, nds, src, vector_len); | |||
1328 | } | |||
1329 | void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
1330 | ||||
1331 | void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1332 | void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1333 | ||||
1334 | void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
1335 | void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
1336 | ||||
1337 | void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
1338 | void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
1339 | ||||
1340 | void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
1341 | void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
1342 | ||||
1343 | void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1344 | if (!is_varshift) { | |||
1345 | Assembler::evpsllw(dst, mask, nds, src, merge, vector_len); | |||
1346 | } else { | |||
1347 | Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len); | |||
1348 | } | |||
1349 | } | |||
1350 | void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1351 | if (!is_varshift) { | |||
1352 | Assembler::evpslld(dst, mask, nds, src, merge, vector_len); | |||
1353 | } else { | |||
1354 | Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len); | |||
1355 | } | |||
1356 | } | |||
1357 | void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1358 | if (!is_varshift) { | |||
1359 | Assembler::evpsllq(dst, mask, nds, src, merge, vector_len); | |||
1360 | } else { | |||
1361 | Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len); | |||
1362 | } | |||
1363 | } | |||
1364 | void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1365 | if (!is_varshift) { | |||
1366 | Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len); | |||
1367 | } else { | |||
1368 | Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len); | |||
1369 | } | |||
1370 | } | |||
1371 | void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1372 | if (!is_varshift) { | |||
1373 | Assembler::evpsrld(dst, mask, nds, src, merge, vector_len); | |||
1374 | } else { | |||
1375 | Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len); | |||
1376 | } | |||
1377 | } | |||
1378 | void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1379 | if (!is_varshift) { | |||
1380 | Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len); | |||
1381 | } else { | |||
1382 | Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len); | |||
1383 | } | |||
1384 | } | |||
1385 | void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1386 | if (!is_varshift) { | |||
1387 | Assembler::evpsraw(dst, mask, nds, src, merge, vector_len); | |||
1388 | } else { | |||
1389 | Assembler::evpsravw(dst, mask, nds, src, merge, vector_len); | |||
1390 | } | |||
1391 | } | |||
1392 | void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1393 | if (!is_varshift) { | |||
1394 | Assembler::evpsrad(dst, mask, nds, src, merge, vector_len); | |||
1395 | } else { | |||
1396 | Assembler::evpsravd(dst, mask, nds, src, merge, vector_len); | |||
1397 | } | |||
1398 | } | |||
1399 | void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
1400 | if (!is_varshift) { | |||
1401 | Assembler::evpsraq(dst, mask, nds, src, merge, vector_len); | |||
1402 | } else { | |||
1403 | Assembler::evpsravq(dst, mask, nds, src, merge, vector_len); | |||
1404 | } | |||
1405 | } | |||
1406 | ||||
1407 | void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1408 | void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1409 | void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1410 | void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1411 | ||||
1412 | void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
1413 | void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
1414 | ||||
1415 | void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
1416 | void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
1417 | ||||
1418 | void vptest(XMMRegister dst, XMMRegister src); | |||
1419 | void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); } | |||
1420 | ||||
1421 | void punpcklbw(XMMRegister dst, XMMRegister src); | |||
1422 | void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } | |||
1423 | ||||
1424 | void pshufd(XMMRegister dst, Address src, int mode); | |||
1425 | void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); } | |||
1426 | ||||
1427 | void pshuflw(XMMRegister dst, XMMRegister src, int mode); | |||
1428 | void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } | |||
1429 | ||||
1430 | void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } | |||
1431 | void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } | |||
1432 | void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1433 | ||||
1434 | void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } | |||
1435 | void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } | |||
1436 | void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1437 | ||||
1438 | void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
1439 | ||||
1440 | void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } | |||
1441 | void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } | |||
1442 | void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1443 | ||||
1444 | void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } | |||
1445 | void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } | |||
1446 | void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1447 | ||||
1448 | void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } | |||
1449 | void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } | |||
1450 | void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1451 | ||||
1452 | void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } | |||
1453 | void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } | |||
1454 | void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1455 | ||||
1456 | void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } | |||
1457 | void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } | |||
1458 | void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1459 | ||||
1460 | void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } | |||
1461 | void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } | |||
1462 | void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1463 | ||||
1464 | void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1465 | void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
1466 | ||||
1467 | // AVX Vector instructions | |||
1468 | ||||
1469 | void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } | |||
1470 | void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } | |||
1471 | void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1472 | ||||
1473 | void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } | |||
1474 | void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } | |||
1475 | void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1476 | ||||
1477 | void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
1478 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 | |||
1479 | Assembler::vpxor(dst, nds, src, vector_len); | |||
1480 | else | |||
1481 | Assembler::vxorpd(dst, nds, src, vector_len); | |||
1482 | } | |||
1483 | void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
1484 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 | |||
1485 | Assembler::vpxor(dst, nds, src, vector_len); | |||
1486 | else | |||
1487 | Assembler::vxorpd(dst, nds, src, vector_len); | |||
1488 | } | |||
1489 | void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
1490 | ||||
1491 | // Simple version for AVX2 256bit vectors | |||
1492 | void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } | |||
1493 | void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } | |||
1494 | ||||
1495 | void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); } | |||
1496 | void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
1497 | ||||
1498 | void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { | |||
1499 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1500 | Assembler::vinserti32x4(dst, nds, src, imm8); | |||
1501 | } else if (UseAVX > 1) { | |||
1502 | // vinserti128 is available only in AVX2 | |||
1503 | Assembler::vinserti128(dst, nds, src, imm8); | |||
1504 | } else { | |||
1505 | Assembler::vinsertf128(dst, nds, src, imm8); | |||
1506 | } | |||
1507 | } | |||
1508 | ||||
1509 | void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { | |||
1510 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1511 | Assembler::vinserti32x4(dst, nds, src, imm8); | |||
1512 | } else if (UseAVX > 1) { | |||
1513 | // vinserti128 is available only in AVX2 | |||
1514 | Assembler::vinserti128(dst, nds, src, imm8); | |||
1515 | } else { | |||
1516 | Assembler::vinsertf128(dst, nds, src, imm8); | |||
1517 | } | |||
1518 | } | |||
1519 | ||||
1520 | void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) { | |||
1521 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1522 | Assembler::vextracti32x4(dst, src, imm8); | |||
1523 | } else if (UseAVX > 1) { | |||
1524 | // vextracti128 is available only in AVX2 | |||
1525 | Assembler::vextracti128(dst, src, imm8); | |||
1526 | } else { | |||
1527 | Assembler::vextractf128(dst, src, imm8); | |||
1528 | } | |||
1529 | } | |||
1530 | ||||
1531 | void vextracti128(Address dst, XMMRegister src, uint8_t imm8) { | |||
1532 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1533 | Assembler::vextracti32x4(dst, src, imm8); | |||
1534 | } else if (UseAVX > 1) { | |||
1535 | // vextracti128 is available only in AVX2 | |||
1536 | Assembler::vextracti128(dst, src, imm8); | |||
1537 | } else { | |||
1538 | Assembler::vextractf128(dst, src, imm8); | |||
1539 | } | |||
1540 | } | |||
1541 | ||||
1542 | // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers | |||
1543 | void vinserti128_high(XMMRegister dst, XMMRegister src) { | |||
1544 | vinserti128(dst, dst, src, 1); | |||
1545 | } | |||
1546 | void vinserti128_high(XMMRegister dst, Address src) { | |||
1547 | vinserti128(dst, dst, src, 1); | |||
1548 | } | |||
1549 | void vextracti128_high(XMMRegister dst, XMMRegister src) { | |||
1550 | vextracti128(dst, src, 1); | |||
1551 | } | |||
1552 | void vextracti128_high(Address dst, XMMRegister src) { | |||
1553 | vextracti128(dst, src, 1); | |||
1554 | } | |||
1555 | ||||
1556 | void vinsertf128_high(XMMRegister dst, XMMRegister src) { | |||
1557 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1558 | Assembler::vinsertf32x4(dst, dst, src, 1); | |||
1559 | } else { | |||
1560 | Assembler::vinsertf128(dst, dst, src, 1); | |||
1561 | } | |||
1562 | } | |||
1563 | ||||
1564 | void vinsertf128_high(XMMRegister dst, Address src) { | |||
1565 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1566 | Assembler::vinsertf32x4(dst, dst, src, 1); | |||
1567 | } else { | |||
1568 | Assembler::vinsertf128(dst, dst, src, 1); | |||
1569 | } | |||
1570 | } | |||
1571 | ||||
1572 | void vextractf128_high(XMMRegister dst, XMMRegister src) { | |||
1573 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1574 | Assembler::vextractf32x4(dst, src, 1); | |||
1575 | } else { | |||
1576 | Assembler::vextractf128(dst, src, 1); | |||
1577 | } | |||
1578 | } | |||
1579 | ||||
1580 | void vextractf128_high(Address dst, XMMRegister src) { | |||
1581 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1582 | Assembler::vextractf32x4(dst, src, 1); | |||
1583 | } else { | |||
1584 | Assembler::vextractf128(dst, src, 1); | |||
1585 | } | |||
1586 | } | |||
1587 | ||||
1588 | // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers | |||
1589 | void vinserti64x4_high(XMMRegister dst, XMMRegister src) { | |||
1590 | Assembler::vinserti64x4(dst, dst, src, 1); | |||
1591 | } | |||
1592 | void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { | |||
1593 | Assembler::vinsertf64x4(dst, dst, src, 1); | |||
1594 | } | |||
1595 | void vextracti64x4_high(XMMRegister dst, XMMRegister src) { | |||
1596 | Assembler::vextracti64x4(dst, src, 1); | |||
1597 | } | |||
1598 | void vextractf64x4_high(XMMRegister dst, XMMRegister src) { | |||
1599 | Assembler::vextractf64x4(dst, src, 1); | |||
1600 | } | |||
1601 | void vextractf64x4_high(Address dst, XMMRegister src) { | |||
1602 | Assembler::vextractf64x4(dst, src, 1); | |||
1603 | } | |||
1604 | void vinsertf64x4_high(XMMRegister dst, Address src) { | |||
1605 | Assembler::vinsertf64x4(dst, dst, src, 1); | |||
1606 | } | |||
1607 | ||||
1608 | // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers | |||
1609 | void vinserti128_low(XMMRegister dst, XMMRegister src) { | |||
1610 | vinserti128(dst, dst, src, 0); | |||
1611 | } | |||
1612 | void vinserti128_low(XMMRegister dst, Address src) { | |||
1613 | vinserti128(dst, dst, src, 0); | |||
1614 | } | |||
1615 | void vextracti128_low(XMMRegister dst, XMMRegister src) { | |||
1616 | vextracti128(dst, src, 0); | |||
1617 | } | |||
1618 | void vextracti128_low(Address dst, XMMRegister src) { | |||
1619 | vextracti128(dst, src, 0); | |||
1620 | } | |||
1621 | ||||
1622 | void vinsertf128_low(XMMRegister dst, XMMRegister src) { | |||
1623 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1624 | Assembler::vinsertf32x4(dst, dst, src, 0); | |||
1625 | } else { | |||
1626 | Assembler::vinsertf128(dst, dst, src, 0); | |||
1627 | } | |||
1628 | } | |||
1629 | ||||
1630 | void vinsertf128_low(XMMRegister dst, Address src) { | |||
1631 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1632 | Assembler::vinsertf32x4(dst, dst, src, 0); | |||
1633 | } else { | |||
1634 | Assembler::vinsertf128(dst, dst, src, 0); | |||
1635 | } | |||
1636 | } | |||
1637 | ||||
1638 | void vextractf128_low(XMMRegister dst, XMMRegister src) { | |||
1639 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1640 | Assembler::vextractf32x4(dst, src, 0); | |||
1641 | } else { | |||
1642 | Assembler::vextractf128(dst, src, 0); | |||
1643 | } | |||
1644 | } | |||
1645 | ||||
1646 | void vextractf128_low(Address dst, XMMRegister src) { | |||
1647 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
1648 | Assembler::vextractf32x4(dst, src, 0); | |||
1649 | } else { | |||
1650 | Assembler::vextractf128(dst, src, 0); | |||
1651 | } | |||
1652 | } | |||
1653 | ||||
1654 | // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers | |||
1655 | void vinserti64x4_low(XMMRegister dst, XMMRegister src) { | |||
1656 | Assembler::vinserti64x4(dst, dst, src, 0); | |||
1657 | } | |||
1658 | void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { | |||
1659 | Assembler::vinsertf64x4(dst, dst, src, 0); | |||
1660 | } | |||
1661 | void vextracti64x4_low(XMMRegister dst, XMMRegister src) { | |||
1662 | Assembler::vextracti64x4(dst, src, 0); | |||
1663 | } | |||
1664 | void vextractf64x4_low(XMMRegister dst, XMMRegister src) { | |||
1665 | Assembler::vextractf64x4(dst, src, 0); | |||
1666 | } | |||
1667 | void vextractf64x4_low(Address dst, XMMRegister src) { | |||
1668 | Assembler::vextractf64x4(dst, src, 0); | |||
1669 | } | |||
1670 | void vinsertf64x4_low(XMMRegister dst, Address src) { | |||
1671 | Assembler::vinsertf64x4(dst, dst, src, 0); | |||
1672 | } | |||
1673 | ||||
1674 | // Carry-Less Multiplication Quadword | |||
1675 | void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
1676 | // 0x00 - multiply lower 64 bits [0:63] | |||
1677 | Assembler::vpclmulqdq(dst, nds, src, 0x00); | |||
1678 | } | |||
1679 | void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
1680 | // 0x11 - multiply upper 64 bits [64:127] | |||
1681 | Assembler::vpclmulqdq(dst, nds, src, 0x11); | |||
1682 | } | |||
1683 | void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
1684 | // 0x10 - multiply nds[0:63] and src[64:127] | |||
1685 | Assembler::vpclmulqdq(dst, nds, src, 0x10); | |||
1686 | } | |||
1687 | void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
1688 | //0x01 - multiply nds[64:127] and src[0:63] | |||
1689 | Assembler::vpclmulqdq(dst, nds, src, 0x01); | |||
1690 | } | |||
1691 | ||||
1692 | void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
1693 | // 0x00 - multiply lower 64 bits [0:63] | |||
1694 | Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); | |||
1695 | } | |||
1696 | void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
1697 | // 0x11 - multiply upper 64 bits [64:127] | |||
1698 | Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); | |||
1699 | } | |||
1700 | ||||
1701 | // AVX-512 mask operations. | |||
1702 | void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2); | |||
1703 | void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2); | |||
1704 | void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg); | |||
1705 | void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2); | |||
1706 | void kortest(uint masklen, KRegister src1, KRegister src2); | |||
1707 | void ktest(uint masklen, KRegister src1, KRegister src2); | |||
1708 | ||||
1709 | void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1710 | void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1711 | ||||
1712 | void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1713 | void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1714 | ||||
1715 | void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1716 | void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1717 | ||||
1718 | void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
1719 | void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
1720 | ||||
1721 | void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); | |||
1722 | void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); | |||
1723 | void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); | |||
1724 | void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); | |||
1725 | ||||
1726 | void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch); | |||
1727 | void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch); | |||
1728 | ||||
1729 | void cmov32( Condition cc, Register dst, Address src); | |||
1730 | void cmov32( Condition cc, Register dst, Register src); | |||
1731 | ||||
1732 | void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } | |||
1733 | ||||
1734 | void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); } | |||
1735 | void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); } | |||
1736 | ||||
1737 | void movoop(Register dst, jobject obj); | |||
1738 | void movoop(Address dst, jobject obj); | |||
1739 | ||||
1740 | void mov_metadata(Register dst, Metadata* obj); | |||
1741 | void mov_metadata(Address dst, Metadata* obj); | |||
1742 | ||||
1743 | void movptr(ArrayAddress dst, Register src); | |||
1744 | // can this do an lea? | |||
1745 | void movptr(Register dst, ArrayAddress src); | |||
1746 | ||||
1747 | void movptr(Register dst, Address src); | |||
1748 | ||||
1749 | #ifdef _LP641 | |||
1750 | void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1); | |||
1751 | #else | |||
1752 | void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit | |||
1753 | #endif | |||
1754 | ||||
1755 | void movptr(Register dst, intptr_t src); | |||
1756 | void movptr(Register dst, Register src); | |||
1757 | void movptr(Address dst, intptr_t src); | |||
1758 | ||||
1759 | void movptr(Address dst, Register src); | |||
1760 | ||||
1761 | void movptr(Register dst, RegisterOrConstant src) { | |||
1762 | if (src.is_constant()) movptr(dst, src.as_constant()); | |||
1763 | else movptr(dst, src.as_register()); | |||
1764 | } | |||
1765 | ||||
1766 | #ifdef _LP641 | |||
1767 | // Generally the next two are only used for moving NULL | |||
1768 | // Although there are situations in initializing the mark word where | |||
1769 | // they could be used. They are dangerous. | |||
1770 | ||||
1771 | // They only exist on LP64 so that int32_t and intptr_t are not the same | |||
1772 | // and we have ambiguous declarations. | |||
1773 | ||||
1774 | void movptr(Address dst, int32_t imm32); | |||
1775 | void movptr(Register dst, int32_t imm32); | |||
1776 | #endif // _LP64 | |||
1777 | ||||
1778 | // to avoid hiding movl | |||
1779 | void mov32(AddressLiteral dst, Register src); | |||
1780 | void mov32(Register dst, AddressLiteral src); | |||
1781 | ||||
1782 | // to avoid hiding movb | |||
1783 | void movbyte(ArrayAddress dst, int src); | |||
1784 | ||||
1785 | // Import other mov() methods from the parent class or else | |||
1786 | // they will be hidden by the following overriding declaration. | |||
1787 | using Assembler::movdl; | |||
1788 | using Assembler::movq; | |||
1789 | void movdl(XMMRegister dst, AddressLiteral src); | |||
1790 | void movq(XMMRegister dst, AddressLiteral src); | |||
1791 | ||||
1792 | // Can push value or effective address | |||
1793 | void pushptr(AddressLiteral src); | |||
1794 | ||||
1795 | void pushptr(Address src) { LP64_ONLY(pushq(src))pushq(src) NOT_LP64(pushl(src)); } | |||
1796 | void popptr(Address src) { LP64_ONLY(popq(src))popq(src) NOT_LP64(popl(src)); } | |||
1797 | ||||
1798 | void pushoop(jobject obj); | |||
1799 | void pushklass(Metadata* obj); | |||
1800 | ||||
1801 | // sign extend as need a l to ptr sized element | |||
1802 | void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(movl(dst, src)); } | |||
1803 | void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(if (dst != src) movl(dst, src)); } | |||
1804 | ||||
1805 | ||||
1806 | public: | |||
1807 | // C2 compiled method's prolog code. | |||
1808 | void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); | |||
1809 | ||||
1810 | // clear memory of size 'cnt' qwords, starting at 'base'; | |||
1811 | // if 'is_large' is set, do not try to produce short loop | |||
1812 | void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg); | |||
1813 | ||||
1814 | // clear memory initialization sequence for constant size; | |||
1815 | void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); | |||
1816 | ||||
1817 | // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers | |||
1818 | void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); | |||
1819 | ||||
1820 | // Fill primitive arrays | |||
1821 | void generate_fill(BasicType t, bool aligned, | |||
1822 | Register to, Register value, Register count, | |||
1823 | Register rtmp, XMMRegister xtmp); | |||
1824 | ||||
1825 | void encode_iso_array(Register src, Register dst, Register len, | |||
1826 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, | |||
1827 | XMMRegister tmp4, Register tmp5, Register result, bool ascii); | |||
1828 | ||||
1829 | #ifdef _LP641 | |||
1830 | void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); | |||
1831 | void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, | |||
1832 | Register y, Register y_idx, Register z, | |||
1833 | Register carry, Register product, | |||
1834 | Register idx, Register kdx); | |||
1835 | void multiply_add_128_x_128(Register x_xstart, Register y, Register z, | |||
1836 | Register yz_idx, Register idx, | |||
1837 | Register carry, Register product, int offset); | |||
1838 | void multiply_128_x_128_bmi2_loop(Register y, Register z, | |||
1839 | Register carry, Register carry2, | |||
1840 | Register idx, Register jdx, | |||
1841 | Register yz_idx1, Register yz_idx2, | |||
1842 | Register tmp, Register tmp3, Register tmp4); | |||
1843 | void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, | |||
1844 | Register yz_idx, Register idx, Register jdx, | |||
1845 | Register carry, Register product, | |||
1846 | Register carry2); | |||
1847 | void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, | |||
1848 | Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); | |||
1849 | void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, | |||
1850 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg); | |||
1851 | void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, | |||
1852 | Register tmp2); | |||
1853 | void multiply_add_64(Register sum, Register op1, Register op2, Register carry, | |||
1854 | Register rdxReg, Register raxReg); | |||
1855 | void add_one_64(Register z, Register zlen, Register carry, Register tmp1); | |||
1856 | void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, | |||
1857 | Register tmp3, Register tmp4); | |||
1858 | void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, | |||
1859 | Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); | |||
1860 | ||||
1861 | void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, | |||
1862 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, | |||
1863 | Register raxReg); | |||
1864 | void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, | |||
1865 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, | |||
1866 | Register raxReg); | |||
1867 | void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, | |||
1868 | Register result, Register tmp1, Register tmp2, | |||
1869 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); | |||
1870 | #endif | |||
1871 | ||||
1872 | // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. | |||
1873 | void update_byte_crc32(Register crc, Register val, Register table); | |||
1874 | void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); | |||
1875 | ||||
1876 | ||||
1877 | #ifdef _LP641 | |||
1878 | void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2); | |||
1879 | void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos, | |||
1880 | Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop, | |||
1881 | Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup); | |||
1882 | void updateBytesAdler32(Register adler32, Register buf, Register length, XMMRegister shuf0, XMMRegister shuf1, ExternalAddress scale); | |||
1883 | #endif // _LP64 | |||
1884 | ||||
1885 | // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic | |||
1886 | // Note on a naming convention: | |||
1887 | // Prefix w = register only used on a Westmere+ architecture | |||
1888 | // Prefix n = register only used on a Nehalem architecture | |||
1889 | #ifdef _LP641 | |||
1890 | void crc32c_ipl_alg4(Register in_out, uint32_t n, | |||
1891 | Register tmp1, Register tmp2, Register tmp3); | |||
1892 | #else | |||
1893 | void crc32c_ipl_alg4(Register in_out, uint32_t n, | |||
1894 | Register tmp1, Register tmp2, Register tmp3, | |||
1895 | XMMRegister xtmp1, XMMRegister xtmp2); | |||
1896 | #endif | |||
1897 | void crc32c_pclmulqdq(XMMRegister w_xtmp1, | |||
1898 | Register in_out, | |||
1899 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, | |||
1900 | XMMRegister w_xtmp2, | |||
1901 | Register tmp1, | |||
1902 | Register n_tmp2, Register n_tmp3); | |||
1903 | void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, | |||
1904 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
1905 | Register tmp1, Register tmp2, | |||
1906 | Register n_tmp3); | |||
1907 | void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, | |||
1908 | Register in_out1, Register in_out2, Register in_out3, | |||
1909 | Register tmp1, Register tmp2, Register tmp3, | |||
1910 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
1911 | Register tmp4, Register tmp5, | |||
1912 | Register n_tmp6); | |||
1913 | void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, | |||
1914 | Register tmp1, Register tmp2, Register tmp3, | |||
1915 | Register tmp4, Register tmp5, Register tmp6, | |||
1916 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
1917 | bool is_pclmulqdq_supported); | |||
1918 | // Fold 128-bit data chunk | |||
1919 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); | |||
1920 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); | |||
1921 | #ifdef _LP641 | |||
1922 | // Fold 512-bit data chunk | |||
1923 | void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset); | |||
1924 | #endif // _LP64 | |||
1925 | // Fold 8-bit data | |||
1926 | void fold_8bit_crc32(Register crc, Register table, Register tmp); | |||
1927 | void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); | |||
1928 | ||||
1929 | // Compress char[] array to byte[]. | |||
1930 | void char_array_compress(Register src, Register dst, Register len, | |||
1931 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, | |||
1932 | XMMRegister tmp4, Register tmp5, Register result, | |||
1933 | KRegister mask1 = knoreg, KRegister mask2 = knoreg); | |||
1934 | ||||
1935 | // Inflate byte[] array to char[]. | |||
1936 | void byte_array_inflate(Register src, Register dst, Register len, | |||
1937 | XMMRegister tmp1, Register tmp2, KRegister mask = knoreg); | |||
1938 | ||||
1939 | void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask, | |||
1940 | Register length, Register temp, int vec_enc); | |||
1941 | ||||
1942 | void fill64_masked(uint shift, Register dst, int disp, | |||
1943 | XMMRegister xmm, KRegister mask, Register length, | |||
1944 | Register temp, bool use64byteVector = false); | |||
1945 | ||||
1946 | void fill32_masked(uint shift, Register dst, int disp, | |||
1947 | XMMRegister xmm, KRegister mask, Register length, | |||
1948 | Register temp); | |||
1949 | ||||
1950 | void fill32(Register dst, int disp, XMMRegister xmm); | |||
1951 | ||||
1952 | void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false); | |||
1953 | ||||
1954 | #ifdef _LP641 | |||
1955 | void convert_f2i(Register dst, XMMRegister src); | |||
1956 | void convert_d2i(Register dst, XMMRegister src); | |||
1957 | void convert_f2l(Register dst, XMMRegister src); | |||
1958 | void convert_d2l(Register dst, XMMRegister src); | |||
1959 | ||||
1960 | void cache_wb(Address line); | |||
1961 | void cache_wbsync(bool is_pre); | |||
1962 | ||||
1963 | #if COMPILER2_OR_JVMCI1 | |||
1964 | void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from, | |||
1965 | Register to, Register count, int shift, | |||
1966 | Register index, Register temp, | |||
1967 | bool use64byteVector, Label& L_entry, Label& L_exit); | |||
1968 | ||||
1969 | void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from, | |||
1970 | Register to, Register start_index, Register end_index, | |||
1971 | Register count, int shift, Register temp, | |||
1972 | bool use64byteVector, Label& L_entry, Label& L_exit); | |||
1973 | ||||
1974 | void copy64_masked_avx(Register dst, Register src, XMMRegister xmm, | |||
1975 | KRegister mask, Register length, Register index, | |||
1976 | Register temp, int shift = Address::times_1, int offset = 0, | |||
1977 | bool use64byteVector = false); | |||
1978 | ||||
1979 | void copy32_masked_avx(Register dst, Register src, XMMRegister xmm, | |||
1980 | KRegister mask, Register length, Register index, | |||
1981 | Register temp, int shift = Address::times_1, int offset = 0); | |||
1982 | ||||
1983 | void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm, | |||
1984 | int shift = Address::times_1, int offset = 0); | |||
1985 | ||||
1986 | void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm, | |||
1987 | bool conjoint, int shift = Address::times_1, int offset = 0, | |||
1988 | bool use64byteVector = false); | |||
1989 | ||||
1990 | void generate_fill_avx3(BasicType type, Register to, Register value, | |||
1991 | Register count, Register rtmp, XMMRegister xtmp); | |||
1992 | ||||
1993 | #endif // COMPILER2_OR_JVMCI | |||
1994 | ||||
1995 | #endif // _LP64 | |||
1996 | ||||
1997 | void vallones(XMMRegister dst, int vector_len); | |||
1998 | }; | |||
1999 | ||||
2000 | /** | |||
2001 | * class SkipIfEqual: | |||
2002 | * | |||
2003 | * Instantiating this class will result in assembly code being output that will | |||
2004 | * jump around any code emitted between the creation of the instance and it's | |||
2005 | * automatic destruction at the end of a scope block, depending on the value of | |||
2006 | * the flag passed to the constructor, which will be checked at run-time. | |||
2007 | */ | |||
2008 | class SkipIfEqual { | |||
2009 | private: | |||
2010 | MacroAssembler* _masm; | |||
2011 | Label _label; | |||
2012 | ||||
2013 | public: | |||
2014 | SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); | |||
2015 | ~SkipIfEqual(); | |||
2016 | }; | |||
2017 | ||||
2018 | #endif // CPU_X86_MACROASSEMBLER_X86_HPP |