Bug Summary

File:jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp
Warning:line 1143, column 29
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name vm_version_x86.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "jvm.h"
27#include "asm/macroAssembler.hpp"
28#include "asm/macroAssembler.inline.hpp"
29#include "code/codeBlob.hpp"
30#include "logging/log.hpp"
31#include "logging/logStream.hpp"
32#include "memory/resourceArea.hpp"
33#include "memory/universe.hpp"
34#include "runtime/globals_extension.hpp"
35#include "runtime/java.hpp"
36#include "runtime/os.hpp"
37#include "runtime/stubCodeGenerator.hpp"
38#include "runtime/vm_version.hpp"
39#include "utilities/powerOfTwo.hpp"
40#include "utilities/virtualizationSupport.hpp"
41
42#include OS_HEADER_INLINE(os)"os_linux.inline.hpp"
43
44int VM_Version::_cpu;
45int VM_Version::_model;
46int VM_Version::_stepping;
47bool VM_Version::_has_intel_jcc_erratum;
48VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
49
50#define DECLARE_CPU_FEATURE_NAME(id, name, bit)name, name,
51const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)"cx8", "cmov", "fxsr", "ht", "mmx", "3dnowpref", "sse", "sse2"
, "sse3", "ssse3", "sse4a", "sse4.1", "sse4.2", "popcnt", "lzcnt"
, "tsc", "tscinvbit", "tscinv", "avx", "avx2", "aes", "erms",
"clmul", "bmi1", "bmi2", "rtm", "adx", "avx512f", "avx512dq"
, "avx512pf", "avx512er", "avx512cd", "avx512bw", "avx512vl",
"sha", "fma", "vzeroupper", "avx512_vpopcntdq", "avx512_vpclmulqdq"
, "avx512_vaes", "avx512_vnni", "clflush", "clflushopt", "clwb"
, "avx512_vbmi2", "avx512_vbmi", "hv", "serialize",
};
52#undef DECLARE_CPU_FEATURE_FLAG
53
54// Address of instruction which causes SEGV
55address VM_Version::_cpuinfo_segv_addr = 0;
56// Address of instruction after the one which causes SEGV
57address VM_Version::_cpuinfo_cont_addr = 0;
58
59static BufferBlob* stub_blob;
60static const int stub_size = 2000;
61
62extern "C" {
63 typedef void (*get_cpu_info_stub_t)(void*);
64 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
65}
66static get_cpu_info_stub_t get_cpu_info_stub = NULL__null;
67static detect_virt_stub_t detect_virt_stub = NULL__null;
68
69#ifdef _LP641
70
71bool VM_Version::supports_clflush() {
72 // clflush should always be available on x86_64
73 // if not we are in real trouble because we rely on it
74 // to flush the code cache.
75 // Unfortunately, Assembler::clflush is currently called as part
76 // of generation of the code cache flush routine. This happens
77 // under Universe::init before the processor features are set
78 // up. Assembler::flush calls this routine to check that clflush
79 // is allowed. So, we give the caller a free pass if Universe init
80 // is still in progress.
81 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available")do { if (!((!Universe::is_fully_initialized() || (_features &
CPU_FLUSH) != 0))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 81, "assert(" "(!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0)"
") failed", "clflush should be available"); ::breakpoint(); }
} while (0)
;
82 return true;
83}
84#endif
85
86#define CPUID_STANDARD_FN0x0 0x0
87#define CPUID_STANDARD_FN_10x1 0x1
88#define CPUID_STANDARD_FN_40x4 0x4
89#define CPUID_STANDARD_FN_B0xb 0xb
90
91#define CPUID_EXTENDED_FN0x80000000 0x80000000
92#define CPUID_EXTENDED_FN_10x80000001 0x80000001
93#define CPUID_EXTENDED_FN_20x80000002 0x80000002
94#define CPUID_EXTENDED_FN_30x80000003 0x80000003
95#define CPUID_EXTENDED_FN_40x80000004 0x80000004
96#define CPUID_EXTENDED_FN_70x80000007 0x80000007
97#define CPUID_EXTENDED_FN_80x80000008 0x80000008
98
99class VM_Version_StubGenerator: public StubCodeGenerator {
100 public:
101
102 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
103
104 address generate_get_cpu_info() {
105 // Flags to test CPU type.
106 const uint32_t HS_EFL_AC = 0x40000;
107 const uint32_t HS_EFL_ID = 0x200000;
108 // Values for when we don't have a CPUID instruction.
109 const int CPU_FAMILY_SHIFT = 8;
110 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
111 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
112 bool use_evex = FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum)) || (UseAVX > 2);
4
Assuming the condition is true
113
114 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
115 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
116 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
117
118 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
119# define __ _masm->
120
121 address start = __ pc();
122
123 //
124 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
125 //
126 // LP64: rcx and rdx are first and second argument registers on windows
127
128 __ push(rbp);
129#ifdef _LP641
130 __ mov(rbp, c_rarg0); // cpuid_info address
131#else
132 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
133#endif
134 __ push(rbx);
135 __ push(rsi);
136 __ pushf(); // preserve rbx, and flags
137 __ pop(rax);
138 __ push(rax);
139 __ mov(rcx, rax);
140 //
141 // if we are unable to change the AC flag, we have a 386
142 //
143 __ xorl(rax, HS_EFL_AC);
144 __ push(rax);
145 __ popf();
146 __ pushf();
147 __ pop(rax);
148 __ cmpptr(rax, rcx);
149 __ jccb(Assembler::notEqual, detect_486)jccb_0(Assembler::notEqual, detect_486, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 149)
;
150
151 __ movl(rax, CPU_FAMILY_386);
152 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
153 __ jmp(done);
154
155 //
156 // If we are unable to change the ID flag, we have a 486 which does
157 // not support the "cpuid" instruction.
158 //
159 __ bind(detect_486);
160 __ mov(rax, rcx);
161 __ xorl(rax, HS_EFL_ID);
162 __ push(rax);
163 __ popf();
164 __ pushf();
165 __ pop(rax);
166 __ cmpptr(rcx, rax);
167 __ jccb(Assembler::notEqual, detect_586)jccb_0(Assembler::notEqual, detect_586, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 167)
;
168
169 __ bind(cpu486);
170 __ movl(rax, CPU_FAMILY_486);
171 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
172 __ jmp(done);
173
174 //
175 // At this point, we have a chip which supports the "cpuid" instruction
176 //
177 __ bind(detect_586);
178 __ xorl(rax, rax);
179 __ cpuid();
180 __ orl(rax, rax);
181 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
182 // value of at least 1, we give up and
183 // assume a 486
184 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
185 __ movl(Address(rsi, 0), rax);
186 __ movl(Address(rsi, 4), rbx);
187 __ movl(Address(rsi, 8), rcx);
188 __ movl(Address(rsi,12), rdx);
189
190 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
191 __ jccb(Assembler::belowEqual, std_cpuid4)jccb_0(Assembler::belowEqual, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 191)
;
192
193 //
194 // cpuid(0xB) Processor Topology
195 //
196 __ movl(rax, 0xb);
197 __ xorl(rcx, rcx); // Threads level
198 __ cpuid();
199
200 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
201 __ movl(Address(rsi, 0), rax);
202 __ movl(Address(rsi, 4), rbx);
203 __ movl(Address(rsi, 8), rcx);
204 __ movl(Address(rsi,12), rdx);
205
206 __ movl(rax, 0xb);
207 __ movl(rcx, 1); // Cores level
208 __ cpuid();
209 __ push(rax);
210 __ andl(rax, 0x1f); // Determine if valid topology level
211 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
212 __ andl(rax, 0xffff);
213 __ pop(rax);
214 __ jccb(Assembler::equal, std_cpuid4)jccb_0(Assembler::equal, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 214)
;
215
216 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
217 __ movl(Address(rsi, 0), rax);
218 __ movl(Address(rsi, 4), rbx);
219 __ movl(Address(rsi, 8), rcx);
220 __ movl(Address(rsi,12), rdx);
221
222 __ movl(rax, 0xb);
223 __ movl(rcx, 2); // Packages level
224 __ cpuid();
225 __ push(rax);
226 __ andl(rax, 0x1f); // Determine if valid topology level
227 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
228 __ andl(rax, 0xffff);
229 __ pop(rax);
230 __ jccb(Assembler::equal, std_cpuid4)jccb_0(Assembler::equal, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 230)
;
231
232 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
233 __ movl(Address(rsi, 0), rax);
234 __ movl(Address(rsi, 4), rbx);
235 __ movl(Address(rsi, 8), rcx);
236 __ movl(Address(rsi,12), rdx);
237
238 //
239 // cpuid(0x4) Deterministic cache params
240 //
241 __ bind(std_cpuid4);
242 __ movl(rax, 4);
243 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
244 __ jccb(Assembler::greater, std_cpuid1)jccb_0(Assembler::greater, std_cpuid1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 244)
;
245
246 __ xorl(rcx, rcx); // L1 cache
247 __ cpuid();
248 __ push(rax);
249 __ andl(rax, 0x1f); // Determine if valid cache parameters used
250 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
251 __ pop(rax);
252 __ jccb(Assembler::equal, std_cpuid1)jccb_0(Assembler::equal, std_cpuid1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 252)
;
253
254 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
255 __ movl(Address(rsi, 0), rax);
256 __ movl(Address(rsi, 4), rbx);
257 __ movl(Address(rsi, 8), rcx);
258 __ movl(Address(rsi,12), rdx);
259
260 //
261 // Standard cpuid(0x1)
262 //
263 __ bind(std_cpuid1);
264 __ movl(rax, 1);
265 __ cpuid();
266 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
267 __ movl(Address(rsi, 0), rax);
268 __ movl(Address(rsi, 4), rbx);
269 __ movl(Address(rsi, 8), rcx);
270 __ movl(Address(rsi,12), rdx);
271
272 //
273 // Check if OS has enabled XGETBV instruction to access XCR0
274 // (OSXSAVE feature flag) and CPU supports AVX
275 //
276 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
277 __ cmpl(rcx, 0x18000000);
278 __ jccb(Assembler::notEqual, sef_cpuid)jccb_0(Assembler::notEqual, sef_cpuid, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 278)
; // jump if AVX is not supported
279
280 //
281 // XCR0, XFEATURE_ENABLED_MASK register
282 //
283 __ xorl(rcx, rcx); // zero for XCR0 register
284 __ xgetbv();
285 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
286 __ movl(Address(rsi, 0), rax);
287 __ movl(Address(rsi, 4), rdx);
288
289 //
290 // cpuid(0x7) Structured Extended Features
291 //
292 __ bind(sef_cpuid);
293 __ movl(rax, 7);
294 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
295 __ jccb(Assembler::greater, ext_cpuid)jccb_0(Assembler::greater, ext_cpuid, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 295)
;
296
297 __ xorl(rcx, rcx);
298 __ cpuid();
299 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
300 __ movl(Address(rsi, 0), rax);
301 __ movl(Address(rsi, 4), rbx);
302 __ movl(Address(rsi, 8), rcx);
303 __ movl(Address(rsi, 12), rdx);
304
305 //
306 // Extended cpuid(0x80000000)
307 //
308 __ bind(ext_cpuid);
309 __ movl(rax, 0x80000000);
310 __ cpuid();
311 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
312 __ jcc(Assembler::belowEqual, done);
313 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
314 __ jcc(Assembler::belowEqual, ext_cpuid1);
315 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
316 __ jccb(Assembler::belowEqual, ext_cpuid5)jccb_0(Assembler::belowEqual, ext_cpuid5, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 316)
;
317 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
318 __ jccb(Assembler::belowEqual, ext_cpuid7)jccb_0(Assembler::belowEqual, ext_cpuid7, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 318)
;
319 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
320 __ jccb(Assembler::belowEqual, ext_cpuid8)jccb_0(Assembler::belowEqual, ext_cpuid8, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 320)
;
321 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
322 __ jccb(Assembler::below, ext_cpuid8)jccb_0(Assembler::below, ext_cpuid8, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 322)
;
323 //
324 // Extended cpuid(0x8000001E)
325 //
326 __ movl(rax, 0x8000001E);
327 __ cpuid();
328 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
329 __ movl(Address(rsi, 0), rax);
330 __ movl(Address(rsi, 4), rbx);
331 __ movl(Address(rsi, 8), rcx);
332 __ movl(Address(rsi,12), rdx);
333
334 //
335 // Extended cpuid(0x80000008)
336 //
337 __ bind(ext_cpuid8);
338 __ movl(rax, 0x80000008);
339 __ cpuid();
340 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
341 __ movl(Address(rsi, 0), rax);
342 __ movl(Address(rsi, 4), rbx);
343 __ movl(Address(rsi, 8), rcx);
344 __ movl(Address(rsi,12), rdx);
345
346 //
347 // Extended cpuid(0x80000007)
348 //
349 __ bind(ext_cpuid7);
350 __ movl(rax, 0x80000007);
351 __ cpuid();
352 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
353 __ movl(Address(rsi, 0), rax);
354 __ movl(Address(rsi, 4), rbx);
355 __ movl(Address(rsi, 8), rcx);
356 __ movl(Address(rsi,12), rdx);
357
358 //
359 // Extended cpuid(0x80000005)
360 //
361 __ bind(ext_cpuid5);
362 __ movl(rax, 0x80000005);
363 __ cpuid();
364 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
365 __ movl(Address(rsi, 0), rax);
366 __ movl(Address(rsi, 4), rbx);
367 __ movl(Address(rsi, 8), rcx);
368 __ movl(Address(rsi,12), rdx);
369
370 //
371 // Extended cpuid(0x80000001)
372 //
373 __ bind(ext_cpuid1);
374 __ movl(rax, 0x80000001);
375 __ cpuid();
376 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
377 __ movl(Address(rsi, 0), rax);
378 __ movl(Address(rsi, 4), rbx);
379 __ movl(Address(rsi, 8), rcx);
380 __ movl(Address(rsi,12), rdx);
381
382 //
383 // Check if OS has enabled XGETBV instruction to access XCR0
384 // (OSXSAVE feature flag) and CPU supports AVX
385 //
386 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
387 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
388 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
389 __ cmpl(rcx, 0x18000000);
390 __ jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 390)
; // jump if AVX is not supported
391
392 __ movl(rax, 0x6);
393 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
394 __ cmpl(rax, 0x6);
395 __ jccb(Assembler::equal, start_simd_check)jccb_0(Assembler::equal, start_simd_check, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 395)
; // return if AVX is not supported
396
397 // we need to bridge farther than imm8, so we use this island as a thunk
398 __ bind(done);
399 __ jmp(wrapup);
400
401 __ bind(start_simd_check);
402 //
403 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
404 // registers are not restored after a signal processing.
405 // Generate SEGV here (reference through NULL)
406 // and check upper YMM/ZMM bits after it.
407 //
408 intx saved_useavx = UseAVX;
409 intx saved_usesse = UseSSE;
410
411 // If UseAVX is unitialized or is set by the user to include EVEX
412 if (use_evex
4.1
'use_evex' is true
4.1
'use_evex' is true
) {
5
Taking true branch
413 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
414 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
415 __ movl(rax, 0x10000);
416 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
417 __ cmpl(rax, 0x10000);
418 __ jccb(Assembler::notEqual, legacy_setup)jccb_0(Assembler::notEqual, legacy_setup, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 418)
; // jump if EVEX is not supported
419 // check _cpuid_info.xem_xcr0_eax.bits.opmask
420 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
421 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
422 __ movl(rax, 0xE0);
423 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
424 __ cmpl(rax, 0xE0);
425 __ jccb(Assembler::notEqual, legacy_setup)jccb_0(Assembler::notEqual, legacy_setup, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 425)
; // jump if EVEX is not supported
426
427 if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) {
6
Assuming the condition is false
7
Taking false branch
428 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
429 __ movl(rax, Address(rsi, 0));
430 __ cmpl(rax, 0x50654); // If it is Skylake
431 __ jcc(Assembler::equal, legacy_setup);
432 }
433 // EVEX setup: run in lowest evex mode
434 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
435 UseAVX = 3;
436 UseSSE = 2;
437#ifdef _WINDOWS
438 // xmm5-xmm15 are not preserved by caller on windows
439 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
440 __ subptr(rsp, 64);
441 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
442#ifdef _LP641
443 __ subptr(rsp, 64);
444 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
445 __ subptr(rsp, 64);
446 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
447#endif // _LP64
448#endif // _WINDOWS
449
450 // load value into all 64 bytes of zmm7 register
451 __ movl(rcx, VM_Version::ymm_test_value());
452 __ movdl(xmm0, rcx);
453 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
454 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
8
Passing null pointer value via 2nd parameter 'src'
9
Calling 'MacroAssembler::evmovdqul'
455#ifdef _LP641
456 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
457 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
458#endif
459 VM_Version::clean_cpuFeatures();
460 __ jmp(save_restore_except);
461 }
462
463 __ bind(legacy_setup);
464 // AVX setup
465 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
466 UseAVX = 1;
467 UseSSE = 2;
468#ifdef _WINDOWS
469 __ subptr(rsp, 32);
470 __ vmovdqu(Address(rsp, 0), xmm7);
471#ifdef _LP641
472 __ subptr(rsp, 32);
473 __ vmovdqu(Address(rsp, 0), xmm8);
474 __ subptr(rsp, 32);
475 __ vmovdqu(Address(rsp, 0), xmm15);
476#endif // _LP64
477#endif // _WINDOWS
478
479 // load value into all 32 bytes of ymm7 register
480 __ movl(rcx, VM_Version::ymm_test_value());
481
482 __ movdl(xmm0, rcx);
483 __ pshufd(xmm0, xmm0, 0x00);
484 __ vinsertf128_high(xmm0, xmm0);
485 __ vmovdqu(xmm7, xmm0);
486#ifdef _LP641
487 __ vmovdqu(xmm8, xmm0);
488 __ vmovdqu(xmm15, xmm0);
489#endif
490 VM_Version::clean_cpuFeatures();
491
492 __ bind(save_restore_except);
493 __ xorl(rsi, rsi);
494 VM_Version::set_cpuinfo_segv_addr(__ pc());
495 // Generate SEGV
496 __ movl(rax, Address(rsi, 0));
497
498 VM_Version::set_cpuinfo_cont_addr(__ pc());
499 // Returns here after signal. Save xmm0 to check it later.
500
501 // If UseAVX is unitialized or is set by the user to include EVEX
502 if (use_evex) {
503 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
504 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
505 __ movl(rax, 0x10000);
506 __ andl(rax, Address(rsi, 4));
507 __ cmpl(rax, 0x10000);
508 __ jcc(Assembler::notEqual, legacy_save_restore);
509 // check _cpuid_info.xem_xcr0_eax.bits.opmask
510 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
511 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
512 __ movl(rax, 0xE0);
513 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
514 __ cmpl(rax, 0xE0);
515 __ jcc(Assembler::notEqual, legacy_save_restore);
516
517 if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) {
518 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
519 __ movl(rax, Address(rsi, 0));
520 __ cmpl(rax, 0x50654); // If it is Skylake
521 __ jcc(Assembler::equal, legacy_save_restore);
522 }
523 // EVEX check: run in lowest evex mode
524 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
525 UseAVX = 3;
526 UseSSE = 2;
527 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
528 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
529 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
530#ifdef _LP641
531 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
532 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
533#endif
534
535#ifdef _WINDOWS
536#ifdef _LP641
537 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
538 __ addptr(rsp, 64);
539 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
540 __ addptr(rsp, 64);
541#endif // _LP64
542 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
543 __ addptr(rsp, 64);
544#endif // _WINDOWS
545 generate_vzeroupper(wrapup);
546 VM_Version::clean_cpuFeatures();
547 UseAVX = saved_useavx;
548 UseSSE = saved_usesse;
549 __ jmp(wrapup);
550 }
551
552 __ bind(legacy_save_restore);
553 // AVX check
554 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
555 UseAVX = 1;
556 UseSSE = 2;
557 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
558 __ vmovdqu(Address(rsi, 0), xmm0);
559 __ vmovdqu(Address(rsi, 32), xmm7);
560#ifdef _LP641
561 __ vmovdqu(Address(rsi, 64), xmm8);
562 __ vmovdqu(Address(rsi, 96), xmm15);
563#endif
564
565#ifdef _WINDOWS
566#ifdef _LP641
567 __ vmovdqu(xmm15, Address(rsp, 0));
568 __ addptr(rsp, 32);
569 __ vmovdqu(xmm8, Address(rsp, 0));
570 __ addptr(rsp, 32);
571#endif // _LP64
572 __ vmovdqu(xmm7, Address(rsp, 0));
573 __ addptr(rsp, 32);
574#endif // _WINDOWS
575 generate_vzeroupper(wrapup);
576 VM_Version::clean_cpuFeatures();
577 UseAVX = saved_useavx;
578 UseSSE = saved_usesse;
579
580 __ bind(wrapup);
581 __ popf();
582 __ pop(rsi);
583 __ pop(rbx);
584 __ pop(rbp);
585 __ ret(0);
586
587# undef __
588
589 return start;
590 };
591 void generate_vzeroupper(Label& L_wrapup) {
592# define __ _masm->
593 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
594 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
595 __ jcc(Assembler::notEqual, L_wrapup);
596 __ movl(rcx, 0x0FFF0FF0);
597 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
598 __ andl(rcx, Address(rsi, 0));
599 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
600 __ jcc(Assembler::equal, L_wrapup);
601 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
602 __ jcc(Assembler::equal, L_wrapup);
603 // vzeroupper() will use a pre-computed instruction sequence that we
604 // can't compute until after we've determined CPU capabilities. Use
605 // uncached variant here directly to be able to bootstrap correctly
606 __ vzeroupper_uncached();
607# undef __
608 }
609 address generate_detect_virt() {
610 StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
611# define __ _masm->
612
613 address start = __ pc();
614
615 // Evacuate callee-saved registers
616 __ push(rbp);
617 __ push(rbx);
618 __ push(rsi); // for Windows
619
620#ifdef _LP641
621 __ mov(rax, c_rarg0); // CPUID leaf
622 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
623#else
624 __ movptr(rax, Address(rsp, 16)); // CPUID leaf
625 __ movptr(rsi, Address(rsp, 20)); // register array address
626#endif
627
628 __ cpuid();
629
630 // Store result to register array
631 __ movl(Address(rsi, 0), rax);
632 __ movl(Address(rsi, 4), rbx);
633 __ movl(Address(rsi, 8), rcx);
634 __ movl(Address(rsi, 12), rdx);
635
636 // Epilogue
637 __ pop(rsi);
638 __ pop(rbx);
639 __ pop(rbp);
640 __ ret(0);
641
642# undef __
643
644 return start;
645 };
646
647
648 address generate_getCPUIDBrandString(void) {
649 // Flags to test CPU type.
650 const uint32_t HS_EFL_AC = 0x40000;
651 const uint32_t HS_EFL_ID = 0x200000;
652 // Values for when we don't have a CPUID instruction.
653 const int CPU_FAMILY_SHIFT = 8;
654 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
655 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
656
657 Label detect_486, cpu486, detect_586, done, ext_cpuid;
658
659 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
660# define __ _masm->
661
662 address start = __ pc();
663
664 //
665 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
666 //
667 // LP64: rcx and rdx are first and second argument registers on windows
668
669 __ push(rbp);
670#ifdef _LP641
671 __ mov(rbp, c_rarg0); // cpuid_info address
672#else
673 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
674#endif
675 __ push(rbx);
676 __ push(rsi);
677 __ pushf(); // preserve rbx, and flags
678 __ pop(rax);
679 __ push(rax);
680 __ mov(rcx, rax);
681 //
682 // if we are unable to change the AC flag, we have a 386
683 //
684 __ xorl(rax, HS_EFL_AC);
685 __ push(rax);
686 __ popf();
687 __ pushf();
688 __ pop(rax);
689 __ cmpptr(rax, rcx);
690 __ jccb(Assembler::notEqual, detect_486)jccb_0(Assembler::notEqual, detect_486, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 690)
;
691
692 __ movl(rax, CPU_FAMILY_386);
693 __ jmp(done);
694
695 //
696 // If we are unable to change the ID flag, we have a 486 which does
697 // not support the "cpuid" instruction.
698 //
699 __ bind(detect_486);
700 __ mov(rax, rcx);
701 __ xorl(rax, HS_EFL_ID);
702 __ push(rax);
703 __ popf();
704 __ pushf();
705 __ pop(rax);
706 __ cmpptr(rcx, rax);
707 __ jccb(Assembler::notEqual, detect_586)jccb_0(Assembler::notEqual, detect_586, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 707)
;
708
709 __ bind(cpu486);
710 __ movl(rax, CPU_FAMILY_486);
711 __ jmp(done);
712
713 //
714 // At this point, we have a chip which supports the "cpuid" instruction
715 //
716 __ bind(detect_586);
717 __ xorl(rax, rax);
718 __ cpuid();
719 __ orl(rax, rax);
720 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
721 // value of at least 1, we give up and
722 // assume a 486
723
724 //
725 // Extended cpuid(0x80000000) for processor brand string detection
726 //
727 __ bind(ext_cpuid);
728 __ movl(rax, CPUID_EXTENDED_FN0x80000000);
729 __ cpuid();
730 __ cmpl(rax, CPUID_EXTENDED_FN_40x80000004);
731 __ jcc(Assembler::below, done);
732
733 //
734 // Extended cpuid(0x80000002) // first 16 bytes in brand string
735 //
736 __ movl(rax, CPUID_EXTENDED_FN_20x80000002);
737 __ cpuid();
738 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
739 __ movl(Address(rsi, 0), rax);
740 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
741 __ movl(Address(rsi, 0), rbx);
742 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
743 __ movl(Address(rsi, 0), rcx);
744 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
745 __ movl(Address(rsi,0), rdx);
746
747 //
748 // Extended cpuid(0x80000003) // next 16 bytes in brand string
749 //
750 __ movl(rax, CPUID_EXTENDED_FN_30x80000003);
751 __ cpuid();
752 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
753 __ movl(Address(rsi, 0), rax);
754 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
755 __ movl(Address(rsi, 0), rbx);
756 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
757 __ movl(Address(rsi, 0), rcx);
758 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
759 __ movl(Address(rsi,0), rdx);
760
761 //
762 // Extended cpuid(0x80000004) // last 16 bytes in brand string
763 //
764 __ movl(rax, CPUID_EXTENDED_FN_40x80000004);
765 __ cpuid();
766 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
767 __ movl(Address(rsi, 0), rax);
768 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
769 __ movl(Address(rsi, 0), rbx);
770 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
771 __ movl(Address(rsi, 0), rcx);
772 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
773 __ movl(Address(rsi,0), rdx);
774
775 //
776 // return
777 //
778 __ bind(done);
779 __ popf();
780 __ pop(rsi);
781 __ pop(rbx);
782 __ pop(rbp);
783 __ ret(0);
784
785# undef __
786
787 return start;
788 };
789};
790
791void VM_Version::get_processor_features() {
792
793 _cpu = 4; // 486 by default
794 _model = 0;
795 _stepping = 0;
796 _features = 0;
797 _logical_processors_per_package = 1;
798 // i486 internal cache is both I&D and has a 16-byte line size
799 _L1_data_cache_line_size = 16;
800
801 // Get raw processor info
802
803 get_cpu_info_stub(&_cpuid_info);
804
805 assert_is_initialized();
806 _cpu = extended_cpu_family();
807 _model = extended_cpu_model();
808 _stepping = cpu_stepping();
809
810 if (cpu_family() > 4) { // it supports CPUID
811 _features = feature_flags();
812 // Logical processors are only available on P4s and above,
813 // and only if hyperthreading is available.
814 _logical_processors_per_package = logical_processor_count();
815 _L1_data_cache_line_size = L1_line_size();
816 }
817
818 _supports_cx8 = supports_cmpxchg8();
819 // xchg and xadd instructions
820 _supports_atomic_getset4 = true;
821 _supports_atomic_getadd4 = true;
822 LP64_ONLY(_supports_atomic_getset8 = true)_supports_atomic_getset8 = true;
823 LP64_ONLY(_supports_atomic_getadd8 = true)_supports_atomic_getadd8 = true;
824
825#ifdef _LP641
826 // OS should support SSE for x64 and hardware should support at least SSE2.
827 if (!VM_Version::supports_sse2()) {
828 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
829 }
830 // in 64 bit the use of SSE2 is the minimum
831 if (UseSSE < 2) UseSSE = 2;
832#endif
833
834#ifdef AMD641
835 // flush_icache_stub have to be generated first.
836 // That is why Icache line size is hard coded in ICache class,
837 // see icache_x86.hpp. It is also the reason why we can't use
838 // clflush instruction in 32-bit VM since it could be running
839 // on CPU which does not support it.
840 //
841 // The only thing we can do is to verify that flushed
842 // ICache::line_size has correct value.
843 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported")do { if (!(_cpuid_info.std_cpuid1_edx.bits.clflush != 0)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 843, "guarantee(" "_cpuid_info.std_cpuid1_edx.bits.clflush != 0"
") failed", "clflush is not supported"); ::breakpoint(); } }
while (0)
;
844 // clflush_size is size in quadwords (8 bytes).
845 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported")do { if (!(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8)
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 845, "guarantee(" "_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8"
") failed", "such clflush size is not supported"); ::breakpoint
(); } } while (0)
;
846#endif
847
848#ifdef _LP641
849 // assigning this field effectively enables Unsafe.writebackMemory()
850 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
851 // that is only implemented on x86_64 and only if the OS plays ball
852 if (os::supports_map_sync()) {
853 // publish data cache line flush size to generic field, otherwise
854 // let if default to zero thereby disabling writeback
855 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
856 }
857#endif
858 // If the OS doesn't support SSE, we can't use this feature even if the HW does
859 if (!os::supports_sse())
860 _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
861
862 if (UseSSE < 4) {
863 _features &= ~CPU_SSE4_1;
864 _features &= ~CPU_SSE4_2;
865 }
866
867 if (UseSSE < 3) {
868 _features &= ~CPU_SSE3;
869 _features &= ~CPU_SSSE3;
870 _features &= ~CPU_SSE4A;
871 }
872
873 if (UseSSE < 2)
874 _features &= ~CPU_SSE2;
875
876 if (UseSSE < 1)
877 _features &= ~CPU_SSE;
878
879 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
880 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
881 UseAVX = 0;
882 }
883
884 // first try initial setting and detect what we can support
885 int use_avx_limit = 0;
886 if (UseAVX > 0) {
887 if (UseAVX > 2 && supports_evex()) {
888 use_avx_limit = 3;
889 } else if (UseAVX > 1 && supports_avx2()) {
890 use_avx_limit = 2;
891 } else if (UseAVX > 0 && supports_avx()) {
892 use_avx_limit = 1;
893 } else {
894 use_avx_limit = 0;
895 }
896 }
897 if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) {
898 // Don't use AVX-512 on older Skylakes unless explicitly requested.
899 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
900 FLAG_SET_DEFAULT(UseAVX, 2)((UseAVX) = (2));
901 } else {
902 FLAG_SET_DEFAULT(UseAVX, use_avx_limit)((UseAVX) = (use_avx_limit));
903 }
904 }
905 if (UseAVX > use_avx_limit) {
906 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit);
907 FLAG_SET_DEFAULT(UseAVX, use_avx_limit)((UseAVX) = (use_avx_limit));
908 } else if (UseAVX < 0) {
909 warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX);
910 FLAG_SET_DEFAULT(UseAVX, 0)((UseAVX) = (0));
911 }
912
913 if (UseAVX < 3) {
914 _features &= ~CPU_AVX512F;
915 _features &= ~CPU_AVX512DQ;
916 _features &= ~CPU_AVX512CD;
917 _features &= ~CPU_AVX512BW;
918 _features &= ~CPU_AVX512VL;
919 _features &= ~CPU_AVX512_VPOPCNTDQ;
920 _features &= ~CPU_AVX512_VPCLMULQDQ;
921 _features &= ~CPU_AVX512_VAES;
922 _features &= ~CPU_AVX512_VNNI;
923 _features &= ~CPU_AVX512_VBMI;
924 _features &= ~CPU_AVX512_VBMI2;
925 }
926
927 if (UseAVX < 2)
928 _features &= ~CPU_AVX2;
929
930 if (UseAVX < 1) {
931 _features &= ~CPU_AVX;
932 _features &= ~CPU_VZEROUPPER;
933 }
934
935 if (logical_processors_per_package() == 1) {
936 // HT processor could be installed on a system which doesn't support HT.
937 _features &= ~CPU_HT;
938 }
939
940 if (is_intel()) { // Intel cpus specific settings
941 if (is_knights_family()) {
942 _features &= ~CPU_VZEROUPPER;
943 _features &= ~CPU_AVX512BW;
944 _features &= ~CPU_AVX512VL;
945 _features &= ~CPU_AVX512DQ;
946 _features &= ~CPU_AVX512_VNNI;
947 _features &= ~CPU_AVX512_VAES;
948 _features &= ~CPU_AVX512_VPOPCNTDQ;
949 _features &= ~CPU_AVX512_VPCLMULQDQ;
950 _features &= ~CPU_AVX512_VBMI;
951 _features &= ~CPU_AVX512_VBMI2;
952 _features &= ~CPU_CLWB;
953 _features &= ~CPU_FLUSHOPT;
954 }
955 }
956
957 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)(JVMFlag::is_default(Flag_IntelJccErratumMitigation_enum))) {
958 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
959 } else {
960 _has_intel_jcc_erratum = IntelJccErratumMitigation;
961 }
962
963 char buf[512];
964 int res = jio_snprintf(
965 buf, sizeof(buf),
966 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
967 cores_per_cpu(), threads_per_core(),
968 cpu_family(), _model, _stepping, os::cpu_microcode_revision());
969 assert(res > 0, "not enough temporary space allocated")do { if (!(res > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 969, "assert(" "res > 0" ") failed", "not enough temporary space allocated"
); ::breakpoint(); } } while (0)
;
970 insert_features_names(buf + res, sizeof(buf) - res, _features_names);
971
972 _features_string = os::strdup(buf);
973
974 // UseSSE is set to the smaller of what hardware supports and what
975 // the command line requires. I.e., you cannot set UseSSE to 2 on
976 // older Pentiums which do not support it.
977 int use_sse_limit = 0;
978 if (UseSSE > 0) {
979 if (UseSSE > 3 && supports_sse4_1()) {
980 use_sse_limit = 4;
981 } else if (UseSSE > 2 && supports_sse3()) {
982 use_sse_limit = 3;
983 } else if (UseSSE > 1 && supports_sse2()) {
984 use_sse_limit = 2;
985 } else if (UseSSE > 0 && supports_sse()) {
986 use_sse_limit = 1;
987 } else {
988 use_sse_limit = 0;
989 }
990 }
991 if (FLAG_IS_DEFAULT(UseSSE)(JVMFlag::is_default(Flag_UseSSE_enum))) {
992 FLAG_SET_DEFAULT(UseSSE, use_sse_limit)((UseSSE) = (use_sse_limit));
993 } else if (UseSSE > use_sse_limit) {
994 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit);
995 FLAG_SET_DEFAULT(UseSSE, use_sse_limit)((UseSSE) = (use_sse_limit));
996 } else if (UseSSE < 0) {
997 warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE);
998 FLAG_SET_DEFAULT(UseSSE, 0)((UseSSE) = (0));
999 }
1000
1001 // Use AES instructions if available.
1002 if (supports_aes()) {
1003 if (FLAG_IS_DEFAULT(UseAES)(JVMFlag::is_default(Flag_UseAES_enum))) {
1004 FLAG_SET_DEFAULT(UseAES, true)((UseAES) = (true));
1005 }
1006 if (!UseAES) {
1007 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) {
1008 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1009 }
1010 FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false));
1011 } else {
1012 if (UseSSE > 2) {
1013 if (FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) {
1014 FLAG_SET_DEFAULT(UseAESIntrinsics, true)((UseAESIntrinsics) = (true));
1015 }
1016 } else {
1017 // The AES intrinsic stubs require AES instruction support (of course)
1018 // but also require sse3 mode or higher for instructions it use.
1019 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) {
1020 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1021 }
1022 FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false));
1023 }
1024
1025 // --AES-CTR begins--
1026 if (!UseAESIntrinsics) {
1027 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) {
1028 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1029 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false));
1030 }
1031 } else {
1032 if (supports_sse4_1()) {
1033 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) {
1034 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true)((UseAESCTRIntrinsics) = (true));
1035 }
1036 } else {
1037 // The AES-CTR intrinsic stubs require AES instruction support (of course)
1038 // but also require sse4.1 mode or higher for instructions it use.
1039 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) {
1040 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1041 }
1042 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false));
1043 }
1044 }
1045 // --AES-CTR ends--
1046 }
1047 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1048 if (UseAES && !FLAG_IS_DEFAULT(UseAES)(JVMFlag::is_default(Flag_UseAES_enum))) {
1049 warning("AES instructions are not available on this CPU");
1050 FLAG_SET_DEFAULT(UseAES, false)((UseAES) = (false));
1051 }
1052 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) {
1053 warning("AES intrinsics are not available on this CPU");
1054 FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false));
1055 }
1056 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) {
1057 warning("AES-CTR intrinsics are not available on this CPU");
1058 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false));
1059 }
1060 }
1061
1062 // Use CLMUL instructions if available.
1063 if (supports_clmul()) {
1064 if (FLAG_IS_DEFAULT(UseCLMUL)(JVMFlag::is_default(Flag_UseCLMUL_enum))) {
1065 UseCLMUL = true;
1066 }
1067 } else if (UseCLMUL) {
1068 if (!FLAG_IS_DEFAULT(UseCLMUL)(JVMFlag::is_default(Flag_UseCLMUL_enum)))
1069 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1070 FLAG_SET_DEFAULT(UseCLMUL, false)((UseCLMUL) = (false));
1071 }
1072
1073 if (UseCLMUL && (UseSSE > 2)) {
1074 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)(JVMFlag::is_default(Flag_UseCRC32Intrinsics_enum))) {
1075 UseCRC32Intrinsics = true;
1076 }
1077 } else if (UseCRC32Intrinsics) {
1078 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)(JVMFlag::is_default(Flag_UseCRC32Intrinsics_enum)))
1079 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1080 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false)((UseCRC32Intrinsics) = (false));
1081 }
1082
1083#ifdef _LP641
1084 if (supports_avx2()) {
1085 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)(JVMFlag::is_default(Flag_UseAdler32Intrinsics_enum))) {
1086 UseAdler32Intrinsics = true;
1087 }
1088 } else if (UseAdler32Intrinsics) {
1089 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)(JVMFlag::is_default(Flag_UseAdler32Intrinsics_enum))) {
1090 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1091 }
1092 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false)((UseAdler32Intrinsics) = (false));
1093 }
1094#else
1095 if (UseAdler32Intrinsics) {
1096 warning("Adler32Intrinsics not available on this CPU.");
1097 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false)((UseAdler32Intrinsics) = (false));
1098 }
1099#endif
1100
1101 if (supports_sse4_2() && supports_clmul()) {
1102 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)(JVMFlag::is_default(Flag_UseCRC32CIntrinsics_enum))) {
1103 UseCRC32CIntrinsics = true;
1104 }
1105 } else if (UseCRC32CIntrinsics) {
1106 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)(JVMFlag::is_default(Flag_UseCRC32CIntrinsics_enum))) {
1107 warning("CRC32C intrinsics are not available on this CPU");
1108 }
1109 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false)((UseCRC32CIntrinsics) = (false));
1110 }
1111
1112 // GHASH/GCM intrinsics
1113 if (UseCLMUL && (UseSSE > 2)) {
1114 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)(JVMFlag::is_default(Flag_UseGHASHIntrinsics_enum))) {
1115 UseGHASHIntrinsics = true;
1116 }
1117 } else if (UseGHASHIntrinsics) {
1118 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)(JVMFlag::is_default(Flag_UseGHASHIntrinsics_enum)))
1119 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1120 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false)((UseGHASHIntrinsics) = (false));
1121 }
1122
1123 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1124 if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) {
1125 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)(JVMFlag::is_default(Flag_UseBASE64Intrinsics_enum))) {
1126 UseBASE64Intrinsics = true;
1127 }
1128 } else if (UseBASE64Intrinsics) {
1129 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)(JVMFlag::is_default(Flag_UseBASE64Intrinsics_enum)))
1130 warning("Base64 intrinsic requires EVEX instructions on this CPU");
1131 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false)((UseBASE64Intrinsics) = (false));
1132 }
1133
1134 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1135 if (FLAG_IS_DEFAULT(UseFMA)(JVMFlag::is_default(Flag_UseFMA_enum))) {
1136 UseFMA = true;
1137 }
1138 } else if (UseFMA) {
1139 warning("FMA instructions are not available on this CPU");
1140 FLAG_SET_DEFAULT(UseFMA, false)((UseFMA) = (false));
1141 }
1142
1143 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)(JVMFlag::is_default(Flag_UseMD5Intrinsics_enum))) {
1144 UseMD5Intrinsics = true;
1145 }
1146
1147 if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())|| supports_avx2() && supports_bmi2()) {
1148 if (FLAG_IS_DEFAULT(UseSHA)(JVMFlag::is_default(Flag_UseSHA_enum))) {
1149 UseSHA = true;
1150 }
1151 } else if (UseSHA) {
1152 warning("SHA instructions are not available on this CPU");
1153 FLAG_SET_DEFAULT(UseSHA, false)((UseSHA) = (false));
1154 }
1155
1156 if (supports_sha() && supports_sse4_1() && UseSHA) {
1157 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)(JVMFlag::is_default(Flag_UseSHA1Intrinsics_enum))) {
1158 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true)((UseSHA1Intrinsics) = (true));
1159 }
1160 } else if (UseSHA1Intrinsics) {
1161 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1162 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false)((UseSHA1Intrinsics) = (false));
1163 }
1164
1165 if (supports_sse4_1() && UseSHA) {
1166 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)(JVMFlag::is_default(Flag_UseSHA256Intrinsics_enum))) {
1167 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true)((UseSHA256Intrinsics) = (true));
1168 }
1169 } else if (UseSHA256Intrinsics) {
1170 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1171 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false)((UseSHA256Intrinsics) = (false));
1172 }
1173
1174#ifdef _LP641
1175 // These are only supported on 64-bit
1176 if (UseSHA && supports_avx2() && supports_bmi2()) {
1177 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)(JVMFlag::is_default(Flag_UseSHA512Intrinsics_enum))) {
1178 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true)((UseSHA512Intrinsics) = (true));
1179 }
1180 } else
1181#endif
1182 if (UseSHA512Intrinsics) {
1183 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1184 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false)((UseSHA512Intrinsics) = (false));
1185 }
1186
1187 if (UseSHA3Intrinsics) {
1188 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1189 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false)((UseSHA3Intrinsics) = (false));
1190 }
1191
1192 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1193 FLAG_SET_DEFAULT(UseSHA, false)((UseSHA) = (false));
1194 }
1195
1196 if (!supports_rtm() && UseRTMLocking) {
1197 vm_exit_during_initialization("RTM instructions are not available on this CPU");
1198 }
1199
1200#if INCLUDE_RTM_OPT1
1201 if (UseRTMLocking) {
1202 if (!CompilerConfig::is_c2_enabled()) {
1203 // Only C2 does RTM locking optimization.
1204 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1205 }
1206 if (is_intel_family_core()) {
1207 if ((_model == CPU_MODEL_HASWELL_E3) ||
1208 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1209 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) {
1210 // currently a collision between SKL and HSW_E3
1211 if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1212 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1213 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1214 } else {
1215 warning("UseRTMLocking is only available as experimental option on this platform.");
1216 }
1217 }
1218 }
1219 if (!FLAG_IS_CMDLINE(UseRTMLocking)(JVMFlag::is_cmdline(Flag_UseRTMLocking_enum))) {
1220 // RTM locking should be used only for applications with
1221 // high lock contention. For now we do not use it by default.
1222 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1223 }
1224 } else { // !UseRTMLocking
1225 if (UseRTMForStackLocks) {
1226 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)(JVMFlag::is_default(Flag_UseRTMForStackLocks_enum))) {
1227 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1228 }
1229 FLAG_SET_DEFAULT(UseRTMForStackLocks, false)((UseRTMForStackLocks) = (false));
1230 }
1231 if (UseRTMDeopt) {
1232 FLAG_SET_DEFAULT(UseRTMDeopt, false)((UseRTMDeopt) = (false));
1233 }
1234 if (PrintPreciseRTMLockingStatistics) {
1235 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false)((PrintPreciseRTMLockingStatistics) = (false));
1236 }
1237 }
1238#else
1239 if (UseRTMLocking) {
1240 // Only C2 does RTM locking optimization.
1241 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1242 }
1243#endif
1244
1245#ifdef COMPILER21
1246 if (UseFPUForSpilling) {
1247 if (UseSSE < 2) {
1248 // Only supported with SSE2+
1249 FLAG_SET_DEFAULT(UseFPUForSpilling, false)((UseFPUForSpilling) = (false));
1250 }
1251 }
1252#endif
1253
1254#if COMPILER2_OR_JVMCI1
1255 int max_vector_size = 0;
1256 if (UseSSE < 2) {
1257 // Vectors (in XMM) are only supported with SSE2+
1258 // SSE is always 2 on x64.
1259 max_vector_size = 0;
1260 } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1261 // 16 byte vectors (in XMM) are supported with SSE2+
1262 max_vector_size = 16;
1263 } else if (UseAVX == 1 || UseAVX == 2) {
1264 // 32 bytes vectors (in YMM) are only supported with AVX+
1265 max_vector_size = 32;
1266 } else if (UseAVX > 2) {
1267 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1268 max_vector_size = 64;
1269 }
1270
1271#ifdef _LP641
1272 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1273#else
1274 int min_vector_size = 0;
1275#endif
1276
1277 if (!FLAG_IS_DEFAULT(MaxVectorSize)(JVMFlag::is_default(Flag_MaxVectorSize_enum))) {
1278 if (MaxVectorSize < min_vector_size) {
1279 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1280 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size)((MaxVectorSize) = (min_vector_size));
1281 }
1282 if (MaxVectorSize > max_vector_size) {
1283 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1284 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size));
1285 }
1286 if (!is_power_of_2(MaxVectorSize)) {
1287 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1288 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size));
1289 }
1290 } else {
1291 // If default, use highest supported configuration
1292 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size));
1293 }
1294
1295#if defined(COMPILER21) && defined(ASSERT1)
1296 if (MaxVectorSize > 0) {
1297 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1298 tty->print_cr("State of YMM registers after signal handle:");
1299 int nreg = 2 LP64_ONLY(+2)+2;
1300 const char* ymm_name[4] = {"0", "7", "8", "15"};
1301 for (int i = 0; i < nreg; i++) {
1302 tty->print("YMM%s:", ymm_name[i]);
1303 for (int j = 7; j >=0; j--) {
1304 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1305 }
1306 tty->cr();
1307 }
1308 }
1309 }
1310#endif // COMPILER2 && ASSERT
1311
1312#ifdef _LP641
1313 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)(JVMFlag::is_default(Flag_UseMultiplyToLenIntrinsic_enum))) {
1314 UseMultiplyToLenIntrinsic = true;
1315 }
1316 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)(JVMFlag::is_default(Flag_UseSquareToLenIntrinsic_enum))) {
1317 UseSquareToLenIntrinsic = true;
1318 }
1319 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)(JVMFlag::is_default(Flag_UseMulAddIntrinsic_enum))) {
1320 UseMulAddIntrinsic = true;
1321 }
1322 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)(JVMFlag::is_default(Flag_UseMontgomeryMultiplyIntrinsic_enum
))
) {
1323 UseMontgomeryMultiplyIntrinsic = true;
1324 }
1325 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)(JVMFlag::is_default(Flag_UseMontgomerySquareIntrinsic_enum))) {
1326 UseMontgomerySquareIntrinsic = true;
1327 }
1328#else
1329 if (UseMultiplyToLenIntrinsic) {
1330 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)(JVMFlag::is_default(Flag_UseMultiplyToLenIntrinsic_enum))) {
1331 warning("multiplyToLen intrinsic is not available in 32-bit VM");
1332 }
1333 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false)((UseMultiplyToLenIntrinsic) = (false));
1334 }
1335 if (UseMontgomeryMultiplyIntrinsic) {
1336 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)(JVMFlag::is_default(Flag_UseMontgomeryMultiplyIntrinsic_enum
))
) {
1337 warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1338 }
1339 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false)((UseMontgomeryMultiplyIntrinsic) = (false));
1340 }
1341 if (UseMontgomerySquareIntrinsic) {
1342 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)(JVMFlag::is_default(Flag_UseMontgomerySquareIntrinsic_enum))) {
1343 warning("montgomerySquare intrinsic is not available in 32-bit VM");
1344 }
1345 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false)((UseMontgomerySquareIntrinsic) = (false));
1346 }
1347 if (UseSquareToLenIntrinsic) {
1348 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)(JVMFlag::is_default(Flag_UseSquareToLenIntrinsic_enum))) {
1349 warning("squareToLen intrinsic is not available in 32-bit VM");
1350 }
1351 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false)((UseSquareToLenIntrinsic) = (false));
1352 }
1353 if (UseMulAddIntrinsic) {
1354 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)(JVMFlag::is_default(Flag_UseMulAddIntrinsic_enum))) {
1355 warning("mulAdd intrinsic is not available in 32-bit VM");
1356 }
1357 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false)((UseMulAddIntrinsic) = (false));
1358 }
1359#endif // _LP64
1360#endif // COMPILER2_OR_JVMCI
1361
1362 // On new cpus instructions which update whole XMM register should be used
1363 // to prevent partial register stall due to dependencies on high half.
1364 //
1365 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1366 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1367 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1368 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1369
1370
1371 if (is_zx()) { // ZX cpus specific settings
1372 if (FLAG_IS_DEFAULT(UseStoreImmI16)(JVMFlag::is_default(Flag_UseStoreImmI16_enum))) {
1373 UseStoreImmI16 = false; // don't use it on ZX cpus
1374 }
1375 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1376 if (FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) {
1377 // Use it on all ZX cpus
1378 UseAddressNop = true;
1379 }
1380 }
1381 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) {
1382 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1383 }
1384 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) {
1385 if (supports_sse3()) {
1386 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1387 } else {
1388 UseXmmRegToRegMoveAll = false;
1389 }
1390 }
1391 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1392#ifdef COMPILER21
1393 if (FLAG_IS_DEFAULT(MaxLoopPad)(JVMFlag::is_default(Flag_MaxLoopPad_enum))) {
1394 // For new ZX cpus do the next optimization:
1395 // don't align the beginning of a loop if there are enough instructions
1396 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1397 // in current fetch line (OptoLoopAlignment) or the padding
1398 // is big (> MaxLoopPad).
1399 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1400 // generated NOP instructions. 11 is the largest size of one
1401 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1402 MaxLoopPad = 11;
1403 }
1404#endif // COMPILER2
1405 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) {
1406 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1407 }
1408 if (supports_sse4_2()) { // new ZX cpus
1409 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) {
1410 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1411 }
1412 }
1413 if (supports_sse4_2()) {
1414 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) {
1415 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true));
1416 }
1417 } else {
1418 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) {
1419 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1420 }
1421 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false));
1422 }
1423 }
1424
1425 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum)) && supports_3dnow_prefetch()) {
1426 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3));
1427 }
1428 }
1429
1430 if (is_amd_family()) { // AMD cpus specific settings
1431 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) {
1432 // Use it on new AMD cpus starting from Opteron.
1433 UseAddressNop = true;
1434 }
1435 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)(JVMFlag::is_default(Flag_UseNewLongLShift_enum))) {
1436 // Use it on new AMD cpus starting from Opteron.
1437 UseNewLongLShift = true;
1438 }
1439 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) {
1440 if (supports_sse4a()) {
1441 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1442 } else {
1443 UseXmmLoadAndClearUpper = false;
1444 }
1445 }
1446 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) {
1447 if (supports_sse4a()) {
1448 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1449 } else {
1450 UseXmmRegToRegMoveAll = false;
1451 }
1452 }
1453 if (FLAG_IS_DEFAULT(UseXmmI2F)(JVMFlag::is_default(Flag_UseXmmI2F_enum))) {
1454 if (supports_sse4a()) {
1455 UseXmmI2F = true;
1456 } else {
1457 UseXmmI2F = false;
1458 }
1459 }
1460 if (FLAG_IS_DEFAULT(UseXmmI2D)(JVMFlag::is_default(Flag_UseXmmI2D_enum))) {
1461 if (supports_sse4a()) {
1462 UseXmmI2D = true;
1463 } else {
1464 UseXmmI2D = false;
1465 }
1466 }
1467 if (supports_sse4_2()) {
1468 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) {
1469 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true));
1470 }
1471 } else {
1472 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) {
1473 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1474 }
1475 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false));
1476 }
1477
1478 // some defaults for AMD family 15h
1479 if (cpu_family() == 0x15) {
1480 // On family 15h processors default is no sw prefetch
1481 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)(JVMFlag::is_default(Flag_AllocatePrefetchStyle_enum))) {
1482 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0)((AllocatePrefetchStyle) = (0));
1483 }
1484 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1485 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum))) {
1486 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3));
1487 }
1488 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1489 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) {
1490 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true)((UseXMMForArrayCopy) = (true));
1491 }
1492 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) {
1493 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true)((UseUnalignedLoadStores) = (true));
1494 }
1495 }
1496
1497#ifdef COMPILER21
1498 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1499 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1500 FLAG_SET_DEFAULT(MaxVectorSize, 16)((MaxVectorSize) = (16));
1501 }
1502#endif // COMPILER2
1503
1504 // Some defaults for AMD family >= 17h && Hygon family 18h
1505 if (cpu_family() >= 0x17) {
1506 // On family >=17h processors use XMM and UnalignedLoadStores
1507 // for Array Copy
1508 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) {
1509 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true)((UseXMMForArrayCopy) = (true));
1510 }
1511 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) {
1512 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true)((UseUnalignedLoadStores) = (true));
1513 }
1514#ifdef COMPILER21
1515 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum))) {
1516 FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true));
1517 }
1518#endif
1519 }
1520 }
1521
1522 if (is_intel()) { // Intel cpus specific settings
1523 if (FLAG_IS_DEFAULT(UseStoreImmI16)(JVMFlag::is_default(Flag_UseStoreImmI16_enum))) {
1524 UseStoreImmI16 = false; // don't use it on Intel cpus
1525 }
1526 if (cpu_family() == 6 || cpu_family() == 15) {
1527 if (FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) {
1528 // Use it on all Intel cpus starting from PentiumPro
1529 UseAddressNop = true;
1530 }
1531 }
1532 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) {
1533 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1534 }
1535 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) {
1536 if (supports_sse3()) {
1537 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1538 } else {
1539 UseXmmRegToRegMoveAll = false;
1540 }
1541 }
1542 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1543#ifdef COMPILER21
1544 if (FLAG_IS_DEFAULT(MaxLoopPad)(JVMFlag::is_default(Flag_MaxLoopPad_enum))) {
1545 // For new Intel cpus do the next optimization:
1546 // don't align the beginning of a loop if there are enough instructions
1547 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1548 // in current fetch line (OptoLoopAlignment) or the padding
1549 // is big (> MaxLoopPad).
1550 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1551 // generated NOP instructions. 11 is the largest size of one
1552 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1553 MaxLoopPad = 11;
1554 }
1555#endif // COMPILER2
1556
1557 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) {
1558 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1559 }
1560 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1561 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) {
1562 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1563 }
1564 }
1565 if (supports_sse4_2()) {
1566 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) {
1567 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true));
1568 }
1569 } else {
1570 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) {
1571 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1572 }
1573 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false));
1574 }
1575 }
1576 if (is_atom_family() || is_knights_family()) {
1577#ifdef COMPILER21
1578 if (FLAG_IS_DEFAULT(OptoScheduling)(JVMFlag::is_default(Flag_OptoScheduling_enum))) {
1579 OptoScheduling = true;
1580 }
1581#endif
1582 if (supports_sse4_2()) { // Silvermont
1583 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) {
1584 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1585 }
1586 }
1587 if (FLAG_IS_DEFAULT(UseIncDec)(JVMFlag::is_default(Flag_UseIncDec_enum))) {
1588 FLAG_SET_DEFAULT(UseIncDec, false)((UseIncDec) = (false));
1589 }
1590 }
1591 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum)) && supports_3dnow_prefetch()) {
1592 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3));
1593 }
1594#ifdef COMPILER21
1595 if (UseAVX > 2) {
1596 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum
))
||
1597 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum
))
&&
1598 ArrayOperationPartialInlineSize != 0 &&
1599 ArrayOperationPartialInlineSize != 16 &&
1600 ArrayOperationPartialInlineSize != 32 &&
1601 ArrayOperationPartialInlineSize != 64)) {
1602 int inline_size = 0;
1603 if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1604 inline_size = 64;
1605 } else if (MaxVectorSize >= 32) {
1606 inline_size = 32;
1607 } else if (MaxVectorSize >= 16) {
1608 inline_size = 16;
1609 }
1610 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum
))
) {
1611 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1612 }
1613 ArrayOperationPartialInlineSize = inline_size;
1614 }
1615
1616 if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1617 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1618 if (ArrayOperationPartialInlineSize) {
1619 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT"%" "l" "d" ")", MaxVectorSize);
1620 } else {
1621 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT"%" "l" "d", ArrayOperationPartialInlineSize);
1622 }
1623 }
1624 }
1625#endif
1626 }
1627
1628#ifdef COMPILER21
1629 if (FLAG_IS_DEFAULT(OptimizeFill)(JVMFlag::is_default(Flag_OptimizeFill_enum))) {
1630 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1631 OptimizeFill = false;
1632 }
1633 }
1634#endif
1635
1636#ifdef _LP641
1637 if (UseSSE42Intrinsics) {
1638 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum
))
) {
1639 UseVectorizedMismatchIntrinsic = true;
1640 }
1641 } else if (UseVectorizedMismatchIntrinsic) {
1642 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum
))
)
1643 warning("vectorizedMismatch intrinsics are not available on this CPU");
1644 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false)((UseVectorizedMismatchIntrinsic) = (false));
1645 }
1646#else
1647 if (UseVectorizedMismatchIntrinsic) {
1648 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum
))
) {
1649 warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1650 }
1651 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false)((UseVectorizedMismatchIntrinsic) = (false));
1652 }
1653#endif // _LP64
1654
1655 // Use count leading zeros count instruction if available.
1656 if (supports_lzcnt()) {
1657 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)(JVMFlag::is_default(Flag_UseCountLeadingZerosInstruction_enum
))
) {
1658 UseCountLeadingZerosInstruction = true;
1659 }
1660 } else if (UseCountLeadingZerosInstruction) {
1661 warning("lzcnt instruction is not available on this CPU");
1662 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false)((UseCountLeadingZerosInstruction) = (false));
1663 }
1664
1665 // Use count trailing zeros instruction if available
1666 if (supports_bmi1()) {
1667 // tzcnt does not require VEX prefix
1668 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)(JVMFlag::is_default(Flag_UseCountTrailingZerosInstruction_enum
))
) {
1669 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)(JVMFlag::is_default(Flag_UseBMI1Instructions_enum))) {
1670 // Don't use tzcnt if BMI1 is switched off on command line.
1671 UseCountTrailingZerosInstruction = false;
1672 } else {
1673 UseCountTrailingZerosInstruction = true;
1674 }
1675 }
1676 } else if (UseCountTrailingZerosInstruction) {
1677 warning("tzcnt instruction is not available on this CPU");
1678 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false)((UseCountTrailingZerosInstruction) = (false));
1679 }
1680
1681 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1682 // VEX prefix is generated only when AVX > 0.
1683 if (supports_bmi1() && supports_avx()) {
1684 if (FLAG_IS_DEFAULT(UseBMI1Instructions)(JVMFlag::is_default(Flag_UseBMI1Instructions_enum))) {
1685 UseBMI1Instructions = true;
1686 }
1687 } else if (UseBMI1Instructions) {
1688 warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1689 FLAG_SET_DEFAULT(UseBMI1Instructions, false)((UseBMI1Instructions) = (false));
1690 }
1691
1692 if (supports_bmi2() && supports_avx()) {
1693 if (FLAG_IS_DEFAULT(UseBMI2Instructions)(JVMFlag::is_default(Flag_UseBMI2Instructions_enum))) {
1694 UseBMI2Instructions = true;
1695 }
1696 } else if (UseBMI2Instructions) {
1697 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1698 FLAG_SET_DEFAULT(UseBMI2Instructions, false)((UseBMI2Instructions) = (false));
1699 }
1700
1701 // Use population count instruction if available.
1702 if (supports_popcnt()) {
1703 if (FLAG_IS_DEFAULT(UsePopCountInstruction)(JVMFlag::is_default(Flag_UsePopCountInstruction_enum))) {
1704 UsePopCountInstruction = true;
1705 }
1706 } else if (UsePopCountInstruction) {
1707 warning("POPCNT instruction is not available on this CPU");
1708 FLAG_SET_DEFAULT(UsePopCountInstruction, false)((UsePopCountInstruction) = (false));
1709 }
1710
1711 // Use fast-string operations if available.
1712 if (supports_erms()) {
1713 if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) {
1714 UseFastStosb = true;
1715 }
1716 } else if (UseFastStosb) {
1717 warning("fast-string operations are not available on this CPU");
1718 FLAG_SET_DEFAULT(UseFastStosb, false)((UseFastStosb) = (false));
1719 }
1720
1721 // For AMD Processors use XMM/YMM MOVDQU instructions
1722 // for Object Initialization as default
1723 if (is_amd() && cpu_family() >= 0x19) {
1724 if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) {
1725 UseFastStosb = false;
1726 }
1727 }
1728
1729#ifdef COMPILER21
1730 if (is_intel() && MaxVectorSize > 16) {
1731 if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) {
1732 UseFastStosb = false;
1733 }
1734 }
1735#endif
1736
1737 // Use XMM/YMM MOVDQU instruction for Object Initialization
1738 if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1739 if (FLAG_IS_DEFAULT(UseXMMForObjInit)(JVMFlag::is_default(Flag_UseXMMForObjInit_enum))) {
1740 UseXMMForObjInit = true;
1741 }
1742 } else if (UseXMMForObjInit) {
1743 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1744 FLAG_SET_DEFAULT(UseXMMForObjInit, false)((UseXMMForObjInit) = (false));
1745 }
1746
1747#ifdef COMPILER21
1748 if (FLAG_IS_DEFAULT(AlignVector)(JVMFlag::is_default(Flag_AlignVector_enum))) {
1749 // Modern processors allow misaligned memory operations for vectors.
1750 AlignVector = !UseUnalignedLoadStores;
1751 }
1752#endif // COMPILER2
1753
1754 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum))) {
1755 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1756 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0)((AllocatePrefetchInstr) = (0));
1757 } else if (!supports_sse() && supports_3dnow_prefetch()) {
1758 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3));
1759 }
1760 }
1761
1762 // Allocation prefetch settings
1763 intx cache_line_size = prefetch_data_size();
1764 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)(JVMFlag::is_default(Flag_AllocatePrefetchStepSize_enum)) &&
1765 (cache_line_size > AllocatePrefetchStepSize)) {
1766 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size)((AllocatePrefetchStepSize) = (cache_line_size));
1767 }
1768
1769 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1770 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0")do { if (!(!(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum
)))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 1770, "assert(" "!(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum))"
") failed", "default value should not be 0"); ::breakpoint()
; } } while (0)
;
1771 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)(JVMFlag::is_default(Flag_AllocatePrefetchStyle_enum))) {
1772 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1773 }
1774 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0)((AllocatePrefetchStyle) = (0));
1775 }
1776
1777 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum))) {
1778 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1779 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch))((AllocatePrefetchDistance) = (allocate_prefetch_distance(use_watermark_prefetch
)))
;
1780 }
1781
1782 if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1783 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)(JVMFlag::is_default(Flag_AllocatePrefetchLines_enum)) &&
1784 supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1785 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4)((AllocatePrefetchLines) = (4));
1786 }
1787#ifdef COMPILER21
1788 if (FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum)) && supports_sse4_2()) {
1789 FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true));
1790 }
1791#endif
1792 }
1793
1794 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1795#ifdef COMPILER21
1796 if (FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum))) {
1797 FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true));
1798 }
1799#endif
1800 }
1801
1802#ifdef _LP641
1803 // Prefetch settings
1804
1805 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1806 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1807 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1808 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1809
1810 // gc copy/scan is disabled if prefetchw isn't supported, because
1811 // Prefetch::write emits an inlined prefetchw on Linux.
1812 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1813 // The used prefetcht0 instruction works for both amd64 and em64t.
1814
1815 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)(JVMFlag::is_default(Flag_PrefetchCopyIntervalInBytes_enum))) {
1816 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576)((PrefetchCopyIntervalInBytes) = (576));
1817 }
1818 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)(JVMFlag::is_default(Flag_PrefetchScanIntervalInBytes_enum))) {
1819 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576)((PrefetchScanIntervalInBytes) = (576));
1820 }
1821 if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)(JVMFlag::is_default(Flag_PrefetchFieldsAhead_enum))) {
1822 FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1)((PrefetchFieldsAhead) = (1));
1823 }
1824#endif
1825
1826 if (FLAG_IS_DEFAULT(ContendedPaddingWidth)(JVMFlag::is_default(Flag_ContendedPaddingWidth_enum)) &&
1827 (cache_line_size > ContendedPaddingWidth))
1828 ContendedPaddingWidth = cache_line_size;
1829
1830 // This machine allows unaligned memory accesses
1831 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)(JVMFlag::is_default(Flag_UseUnalignedAccesses_enum))) {
1832 FLAG_SET_DEFAULT(UseUnalignedAccesses, true)((UseUnalignedAccesses) = (true));
1833 }
1834
1835#ifndef PRODUCT
1836 if (log_is_enabled(Info, os, cpu)(LogImpl<(LogTag::_os), (LogTag::_cpu), (LogTag::__NO_TAG)
, (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG)>
::is_level(LogLevel::Info))
) {
1837 LogStream ls(Log(os, cpu)LogImpl<(LogTag::_os), (LogTag::_cpu), (LogTag::__NO_TAG),
(LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG)>
::info());
1838 outputStream* log = &ls;
1839 log->print_cr("Logical CPUs per core: %u",
1840 logical_processors_per_package());
1841 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1842 log->print("UseSSE=%d", (int) UseSSE);
1843 if (UseAVX > 0) {
1844 log->print(" UseAVX=%d", (int) UseAVX);
1845 }
1846 if (UseAES) {
1847 log->print(" UseAES=1");
1848 }
1849#ifdef COMPILER21
1850 if (MaxVectorSize > 0) {
1851 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1852 }
1853#endif
1854 log->cr();
1855 log->print("Allocation");
1856 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1857 log->print_cr(": no prefetching");
1858 } else {
1859 log->print(" prefetching: ");
1860 if (UseSSE == 0 && supports_3dnow_prefetch()) {
1861 log->print("PREFETCHW");
1862 } else if (UseSSE >= 1) {
1863 if (AllocatePrefetchInstr == 0) {
1864 log->print("PREFETCHNTA");
1865 } else if (AllocatePrefetchInstr == 1) {
1866 log->print("PREFETCHT0");
1867 } else if (AllocatePrefetchInstr == 2) {
1868 log->print("PREFETCHT2");
1869 } else if (AllocatePrefetchInstr == 3) {
1870 log->print("PREFETCHW");
1871 }
1872 }
1873 if (AllocatePrefetchLines > 1) {
1874 log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1875 } else {
1876 log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1877 }
1878 }
1879
1880 if (PrefetchCopyIntervalInBytes > 0) {
1881 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1882 }
1883 if (PrefetchScanIntervalInBytes > 0) {
1884 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1885 }
1886 if (PrefetchFieldsAhead > 0) {
1887 log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
1888 }
1889 if (ContendedPaddingWidth > 0) {
1890 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1891 }
1892 }
1893#endif // !PRODUCT
1894 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)(JVMFlag::is_default(Flag_UseSignumIntrinsic_enum))) {
1895 FLAG_SET_DEFAULT(UseSignumIntrinsic, true)((UseSignumIntrinsic) = (true));
1896 }
1897 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)(JVMFlag::is_default(Flag_UseCopySignIntrinsic_enum))) {
1898 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true)((UseCopySignIntrinsic) = (true));
1899 }
1900}
1901
1902void VM_Version::print_platform_virtualization_info(outputStream* st) {
1903 VirtualizationType vrt = VM_Version::get_detected_virtualization();
1904 if (vrt == XenHVM) {
1905 st->print_cr("Xen hardware-assisted virtualization detected");
1906 } else if (vrt == KVM) {
1907 st->print_cr("KVM virtualization detected");
1908 } else if (vrt == VMWare) {
1909 st->print_cr("VMWare virtualization detected");
1910 VirtualizationSupport::print_virtualization_info(st);
1911 } else if (vrt == HyperV) {
1912 st->print_cr("Hyper-V virtualization detected");
1913 } else if (vrt == HyperVRole) {
1914 st->print_cr("Hyper-V role detected");
1915 }
1916}
1917
1918bool VM_Version::compute_has_intel_jcc_erratum() {
1919 if (!is_intel_family_core()) {
1920 // Only Intel CPUs are affected.
1921 return false;
1922 }
1923 // The following table of affected CPUs is based on the following document released by Intel:
1924 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1925 switch (_model) {
1926 case 0x8E:
1927 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1928 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1929 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1930 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1931 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1932 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1933 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1934 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1935 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1936 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1937 case 0x4E:
1938 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1939 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1940 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1941 return _stepping == 0x3;
1942 case 0x55:
1943 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1944 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1945 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1946 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1947 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1948 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1949 return _stepping == 0x4 || _stepping == 0x7;
1950 case 0x5E:
1951 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1952 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1953 return _stepping == 0x3;
1954 case 0x9E:
1955 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1956 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1957 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1958 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1959 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1960 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1961 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1962 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1963 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1964 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1965 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1966 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1967 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1968 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1969 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1970 case 0xA5:
1971 // Not in Intel documentation.
1972 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
1973 return true;
1974 case 0xA6:
1975 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
1976 return _stepping == 0x0;
1977 case 0xAE:
1978 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
1979 return _stepping == 0xA;
1980 default:
1981 // If we are running on another intel machine not recognized in the table, we are okay.
1982 return false;
1983 }
1984}
1985
1986// On Xen, the cpuid instruction returns
1987// eax / registers[0]: Version of Xen
1988// ebx / registers[1]: chars 'XenV'
1989// ecx / registers[2]: chars 'MMXe'
1990// edx / registers[3]: chars 'nVMM'
1991//
1992// On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
1993// ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
1994// ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
1995// edx / registers[3]: chars 'M' / 'ware' / 't Hv'
1996//
1997// more information :
1998// https://kb.vmware.com/s/article/1009458
1999//
2000void VM_Version::check_virtualizations() {
2001 uint32_t registers[4] = {0};
2002 char signature[13] = {0};
2003
2004 // Xen cpuid leaves can be found 0x100 aligned boundary starting
2005 // from 0x40000000 until 0x40010000.
2006 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2007 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2008 detect_virt_stub(leaf, registers);
2009 memcpy(signature, &registers[1], 12);
2010
2011 if (strncmp("VMwareVMware", signature, 12) == 0) {
2012 Abstract_VM_Version::_detected_virtualization = VMWare;
2013 // check for extended metrics from guestlib
2014 VirtualizationSupport::initialize();
2015 } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2016 Abstract_VM_Version::_detected_virtualization = HyperV;
2017#ifdef _WINDOWS
2018 // CPUID leaf 0x40000007 is available to the root partition only.
2019 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2020 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2021 detect_virt_stub(0x40000007, registers);
2022 if ((registers[0] != 0x0) ||
2023 (registers[1] != 0x0) ||
2024 (registers[2] != 0x0) ||
2025 (registers[3] != 0x0)) {
2026 Abstract_VM_Version::_detected_virtualization = HyperVRole;
2027 }
2028#endif
2029 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2030 Abstract_VM_Version::_detected_virtualization = KVM;
2031 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2032 Abstract_VM_Version::_detected_virtualization = XenHVM;
2033 }
2034 }
2035}
2036
2037// avx3_threshold() sets the threshold at which 64-byte instructions are used
2038// for implementing the array copy and clear operations.
2039// The Intel platforms that supports the serialize instruction
2040// has improved implementation of 64-byte load/stores and so the default
2041// threshold is set to 0 for these platforms.
2042int VM_Version::avx3_threshold() {
2043 return (is_intel_family_core() &&
2044 supports_serialize() &&
2045 FLAG_IS_DEFAULT(AVX3Threshold)(JVMFlag::is_default(Flag_AVX3Threshold_enum))) ? 0 : AVX3Threshold;
2046}
2047
2048static bool _vm_version_initialized = false;
2049
2050void VM_Version::initialize() {
2051 ResourceMark rm;
2052 // Making this stub must be FIRST use of assembler
2053 stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2054 if (stub_blob == NULL__null) {
1
Assuming 'stub_blob' is not equal to NULL
2
Taking false branch
2055 vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2056 }
2057 CodeBuffer c(stub_blob);
2058 VM_Version_StubGenerator g(&c);
2059
2060 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,(reinterpret_cast<get_cpu_info_stub_t>(g.generate_get_cpu_info
()))
3
Calling 'VM_Version_StubGenerator::generate_get_cpu_info'
2061 g.generate_get_cpu_info())(reinterpret_cast<get_cpu_info_stub_t>(g.generate_get_cpu_info
()))
;
2062 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,(reinterpret_cast<detect_virt_stub_t>(g.generate_detect_virt
()))
2063 g.generate_detect_virt())(reinterpret_cast<detect_virt_stub_t>(g.generate_detect_virt
()))
;
2064
2065 get_processor_features();
2066
2067 LP64_ONLY(Assembler::precompute_instructions();)Assembler::precompute_instructions();
2068
2069 if (VM_Version::supports_hv()) { // Supports hypervisor
2070 check_virtualizations();
2071 }
2072 _vm_version_initialized = true;
2073}
2074
2075typedef enum {
2076 CPU_FAMILY_8086_8088 = 0,
2077 CPU_FAMILY_INTEL_286 = 2,
2078 CPU_FAMILY_INTEL_386 = 3,
2079 CPU_FAMILY_INTEL_486 = 4,
2080 CPU_FAMILY_PENTIUM = 5,
2081 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models
2082 CPU_FAMILY_PENTIUM_4 = 0xF
2083} FamilyFlag;
2084
2085typedef enum {
2086 RDTSCP_FLAG = 0x08000000, // bit 27
2087 INTEL64_FLAG = 0x20000000 // bit 29
2088} _featureExtendedEdxFlag;
2089
2090typedef enum {
2091 FPU_FLAG = 0x00000001,
2092 VME_FLAG = 0x00000002,
2093 DE_FLAG = 0x00000004,
2094 PSE_FLAG = 0x00000008,
2095 TSC_FLAG = 0x00000010,
2096 MSR_FLAG = 0x00000020,
2097 PAE_FLAG = 0x00000040,
2098 MCE_FLAG = 0x00000080,
2099 CX8_FLAG = 0x00000100,
2100 APIC_FLAG = 0x00000200,
2101 SEP_FLAG = 0x00000800,
2102 MTRR_FLAG = 0x00001000,
2103 PGE_FLAG = 0x00002000,
2104 MCA_FLAG = 0x00004000,
2105 CMOV_FLAG = 0x00008000,
2106 PAT_FLAG = 0x00010000,
2107 PSE36_FLAG = 0x00020000,
2108 PSNUM_FLAG = 0x00040000,
2109 CLFLUSH_FLAG = 0x00080000,
2110 DTS_FLAG = 0x00200000,
2111 ACPI_FLAG = 0x00400000,
2112 MMX_FLAG = 0x00800000,
2113 FXSR_FLAG = 0x01000000,
2114 SSE_FLAG = 0x02000000,
2115 SSE2_FLAG = 0x04000000,
2116 SS_FLAG = 0x08000000,
2117 HTT_FLAG = 0x10000000,
2118 TM_FLAG = 0x20000000
2119} FeatureEdxFlag;
2120
2121static BufferBlob* cpuid_brand_string_stub_blob;
2122static const int cpuid_brand_string_stub_size = 550;
2123
2124extern "C" {
2125 typedef void (*getCPUIDBrandString_stub_t)(void*);
2126}
2127
2128static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = NULL__null;
2129
2130// VM_Version statics
2131enum {
2132 ExtendedFamilyIdLength_INTEL = 16,
2133 ExtendedFamilyIdLength_AMD = 24
2134};
2135
2136const size_t VENDOR_LENGTH = 13;
2137const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2138static char* _cpu_brand_string = NULL__null;
2139static int64_t _max_qualified_cpu_frequency = 0;
2140
2141static int _no_of_threads = 0;
2142static int _no_of_cores = 0;
2143
2144const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2145 "8086/8088",
2146 "",
2147 "286",
2148 "386",
2149 "486",
2150 "Pentium",
2151 "Pentium Pro", //or Pentium-M/Woodcrest depeding on model
2152 "",
2153 "",
2154 "",
2155 "",
2156 "",
2157 "",
2158 "",
2159 "",
2160 "Pentium 4"
2161};
2162
2163const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2164 "",
2165 "",
2166 "",
2167 "",
2168 "5x86",
2169 "K5/K6",
2170 "Athlon/AthlonXP",
2171 "",
2172 "",
2173 "",
2174 "",
2175 "",
2176 "",
2177 "",
2178 "",
2179 "Opteron/Athlon64",
2180 "Opteron QC/Phenom", // Barcelona et.al.
2181 "",
2182 "",
2183 "",
2184 "",
2185 "",
2186 "",
2187 "Zen"
2188};
2189// Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2190// September 2013, Vol 3C Table 35-1
2191const char* const _model_id_pentium_pro[] = {
2192 "",
2193 "Pentium Pro",
2194 "",
2195 "Pentium II model 3",
2196 "",
2197 "Pentium II model 5/Xeon/Celeron",
2198 "Celeron",
2199 "Pentium III/Pentium III Xeon",
2200 "Pentium III/Pentium III Xeon",
2201 "Pentium M model 9", // Yonah
2202 "Pentium III, model A",
2203 "Pentium III, model B",
2204 "",
2205 "Pentium M model D", // Dothan
2206 "",
2207 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2208 "",
2209 "",
2210 "",
2211 "",
2212 "",
2213 "",
2214 "Celeron", // 0x16 Celeron 65nm
2215 "Core 2", // 0x17 Penryn / Harpertown
2216 "",
2217 "",
2218 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP
2219 "Atom", // 0x1B Z5xx series Silverthorn
2220 "",
2221 "Core 2", // 0x1D Dunnington (6-core)
2222 "Nehalem", // 0x1E CPU_MODEL_NEHALEM
2223 "",
2224 "",
2225 "",
2226 "",
2227 "",
2228 "",
2229 "Westmere", // 0x25 CPU_MODEL_WESTMERE
2230 "",
2231 "",
2232 "", // 0x28
2233 "",
2234 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3"
2235 "",
2236 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP
2237 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2238 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX
2239 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX
2240 "",
2241 "",
2242 "",
2243 "",
2244 "",
2245 "",
2246 "",
2247 "",
2248 "",
2249 "",
2250 "Ivy Bridge", // 0x3a
2251 "",
2252 "Haswell", // 0x3c "4th Generation Intel Core Processor"
2253 "", // 0x3d "Next Generation Intel Core Processor"
2254 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2255 "", // 0x3f "Future Generation Intel Xeon Processor"
2256 "",
2257 "",
2258 "",
2259 "",
2260 "",
2261 "Haswell", // 0x45 "4th Generation Intel Core Processor"
2262 "Haswell", // 0x46 "4th Generation Intel Core Processor"
2263 NULL__null
2264};
2265
2266/* Brand ID is for back compability
2267 * Newer CPUs uses the extended brand string */
2268const char* const _brand_id[] = {
2269 "",
2270 "Celeron processor",
2271 "Pentium III processor",
2272 "Intel Pentium III Xeon processor",
2273 "",
2274 "",
2275 "",
2276 "",
2277 "Intel Pentium 4 processor",
2278 NULL__null
2279};
2280
2281
2282const char* const _feature_edx_id[] = {
2283 "On-Chip FPU",
2284 "Virtual Mode Extensions",
2285 "Debugging Extensions",
2286 "Page Size Extensions",
2287 "Time Stamp Counter",
2288 "Model Specific Registers",
2289 "Physical Address Extension",
2290 "Machine Check Exceptions",
2291 "CMPXCHG8B Instruction",
2292 "On-Chip APIC",
2293 "",
2294 "Fast System Call",
2295 "Memory Type Range Registers",
2296 "Page Global Enable",
2297 "Machine Check Architecture",
2298 "Conditional Mov Instruction",
2299 "Page Attribute Table",
2300 "36-bit Page Size Extension",
2301 "Processor Serial Number",
2302 "CLFLUSH Instruction",
2303 "",
2304 "Debug Trace Store feature",
2305 "ACPI registers in MSR space",
2306 "Intel Architecture MMX Technology",
2307 "Fast Float Point Save and Restore",
2308 "Streaming SIMD extensions",
2309 "Streaming SIMD extensions 2",
2310 "Self-Snoop",
2311 "Hyper Threading",
2312 "Thermal Monitor",
2313 "",
2314 "Pending Break Enable"
2315};
2316
2317const char* const _feature_extended_edx_id[] = {
2318 "",
2319 "",
2320 "",
2321 "",
2322 "",
2323 "",
2324 "",
2325 "",
2326 "",
2327 "",
2328 "",
2329 "SYSCALL/SYSRET",
2330 "",
2331 "",
2332 "",
2333 "",
2334 "",
2335 "",
2336 "",
2337 "",
2338 "Execute Disable Bit",
2339 "",
2340 "",
2341 "",
2342 "",
2343 "",
2344 "",
2345 "RDTSCP",
2346 "",
2347 "Intel 64 Architecture",
2348 "",
2349 ""
2350};
2351
2352const char* const _feature_ecx_id[] = {
2353 "Streaming SIMD Extensions 3",
2354 "PCLMULQDQ",
2355 "64-bit DS Area",
2356 "MONITOR/MWAIT instructions",
2357 "CPL Qualified Debug Store",
2358 "Virtual Machine Extensions",
2359 "Safer Mode Extensions",
2360 "Enhanced Intel SpeedStep technology",
2361 "Thermal Monitor 2",
2362 "Supplemental Streaming SIMD Extensions 3",
2363 "L1 Context ID",
2364 "",
2365 "Fused Multiply-Add",
2366 "CMPXCHG16B",
2367 "xTPR Update Control",
2368 "Perfmon and Debug Capability",
2369 "",
2370 "Process-context identifiers",
2371 "Direct Cache Access",
2372 "Streaming SIMD extensions 4.1",
2373 "Streaming SIMD extensions 4.2",
2374 "x2APIC",
2375 "MOVBE",
2376 "Popcount instruction",
2377 "TSC-Deadline",
2378 "AESNI",
2379 "XSAVE",
2380 "OSXSAVE",
2381 "AVX",
2382 "F16C",
2383 "RDRAND",
2384 ""
2385};
2386
2387const char* const _feature_extended_ecx_id[] = {
2388 "LAHF/SAHF instruction support",
2389 "Core multi-processor legacy mode",
2390 "",
2391 "",
2392 "",
2393 "Advanced Bit Manipulations: LZCNT",
2394 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2395 "Misaligned SSE mode",
2396 "",
2397 "",
2398 "",
2399 "",
2400 "",
2401 "",
2402 "",
2403 "",
2404 "",
2405 "",
2406 "",
2407 "",
2408 "",
2409 "",
2410 "",
2411 "",
2412 "",
2413 "",
2414 "",
2415 "",
2416 "",
2417 "",
2418 "",
2419 ""
2420};
2421
2422void VM_Version::initialize_tsc(void) {
2423 ResourceMark rm;
2424
2425 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2426 if (cpuid_brand_string_stub_blob == NULL__null) {
2427 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2428 }
2429 CodeBuffer c(cpuid_brand_string_stub_blob);
2430 VM_Version_StubGenerator g(&c);
2431 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,(reinterpret_cast<getCPUIDBrandString_stub_t>(g.generate_getCPUIDBrandString
()))
2432 g.generate_getCPUIDBrandString())(reinterpret_cast<getCPUIDBrandString_stub_t>(g.generate_getCPUIDBrandString
()))
;
2433}
2434
2435const char* VM_Version::cpu_model_description(void) {
2436 uint32_t cpu_family = extended_cpu_family();
2437 uint32_t cpu_model = extended_cpu_model();
2438 const char* model = NULL__null;
2439
2440 if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2441 for (uint32_t i = 0; i <= cpu_model; i++) {
2442 model = _model_id_pentium_pro[i];
2443 if (model == NULL__null) {
2444 break;
2445 }
2446 }
2447 }
2448 return model;
2449}
2450
2451const char* VM_Version::cpu_brand_string(void) {
2452 if (_cpu_brand_string == NULL__null) {
2453 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal)(char*) (AllocateHeap(((CPU_EBS_MAX_LENGTH)) * sizeof(char), mtInternal
, AllocFailStrategy::RETURN_NULL))
;
2454 if (NULL__null == _cpu_brand_string) {
2455 return NULL__null;
2456 }
2457 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2458 if (ret_val != OS_OK) {
2459 FREE_C_HEAP_ARRAY(char, _cpu_brand_string)FreeHeap((char*)(_cpu_brand_string));
2460 _cpu_brand_string = NULL__null;
2461 }
2462 }
2463 return _cpu_brand_string;
2464}
2465
2466const char* VM_Version::cpu_brand(void) {
2467 const char* brand = NULL__null;
2468
2469 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2470 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2471 brand = _brand_id[0];
2472 for (int i = 0; brand != NULL__null && i <= brand_num; i += 1) {
2473 brand = _brand_id[i];
2474 }
2475 }
2476 return brand;
2477}
2478
2479bool VM_Version::cpu_is_em64t(void) {
2480 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2481}
2482
2483bool VM_Version::is_netburst(void) {
2484 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2485}
2486
2487bool VM_Version::supports_tscinv_ext(void) {
2488 if (!supports_tscinv_bit()) {
2489 return false;
2490 }
2491
2492 if (is_intel()) {
2493 return true;
2494 }
2495
2496 if (is_amd()) {
2497 return !is_amd_Barcelona();
2498 }
2499
2500 if (is_hygon()) {
2501 return true;
2502 }
2503
2504 return false;
2505}
2506
2507void VM_Version::resolve_cpu_information_details(void) {
2508
2509 // in future we want to base this information on proper cpu
2510 // and cache topology enumeration such as:
2511 // Intel 64 Architecture Processor Topology Enumeration
2512 // which supports system cpu and cache topology enumeration
2513 // either using 2xAPICIDs or initial APICIDs
2514
2515 // currently only rough cpu information estimates
2516 // which will not necessarily reflect the exact configuration of the system
2517
2518 // this is the number of logical hardware threads
2519 // visible to the operating system
2520 _no_of_threads = os::processor_count();
2521
2522 // find out number of threads per cpu package
2523 int threads_per_package = threads_per_core() * cores_per_cpu();
2524
2525 // use amount of threads visible to the process in order to guess number of sockets
2526 _no_of_sockets = _no_of_threads / threads_per_package;
2527
2528 // process might only see a subset of the total number of threads
2529 // from a single processor package. Virtualization/resource management for example.
2530 // If so then just write a hard 1 as num of pkgs.
2531 if (0 == _no_of_sockets) {
2532 _no_of_sockets = 1;
2533 }
2534
2535 // estimate the number of cores
2536 _no_of_cores = cores_per_cpu() * _no_of_sockets;
2537}
2538
2539
2540const char* VM_Version::cpu_family_description(void) {
2541 int cpu_family_id = extended_cpu_family();
2542 if (is_amd()) {
2543 if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2544 return _family_id_amd[cpu_family_id];
2545 }
2546 }
2547 if (is_intel()) {
2548 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2549 return cpu_model_description();
2550 }
2551 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2552 return _family_id_intel[cpu_family_id];
2553 }
2554 }
2555 if (is_hygon()) {
2556 return "Dhyana";
2557 }
2558 return "Unknown x86";
2559}
2560
2561int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2562 assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2562, "assert(" "buf != __null" ") failed", "buffer is NULL!"
); ::breakpoint(); } } while (0)
;
2563 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!")do { if (!(buf_len >= CPU_TYPE_DESC_BUF_SIZE)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2563, "assert(" "buf_len >= CPU_TYPE_DESC_BUF_SIZE" ") failed"
, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!")
; ::breakpoint(); } } while (0)
;
2564
2565 const char* cpu_type = NULL__null;
2566 const char* x64 = NULL__null;
2567
2568 if (is_intel()) {
2569 cpu_type = "Intel";
2570 x64 = cpu_is_em64t() ? " Intel64" : "";
2571 } else if (is_amd()) {
2572 cpu_type = "AMD";
2573 x64 = cpu_is_em64t() ? " AMD64" : "";
2574 } else if (is_hygon()) {
2575 cpu_type = "Hygon";
2576 x64 = cpu_is_em64t() ? " AMD64" : "";
2577 } else {
2578 cpu_type = "Unknown x86";
2579 x64 = cpu_is_em64t() ? " x86_64" : "";
2580 }
2581
2582 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2583 cpu_type,
2584 cpu_family_description(),
2585 supports_ht() ? " (HT)" : "",
2586 supports_sse3() ? " SSE3" : "",
2587 supports_ssse3() ? " SSSE3" : "",
2588 supports_sse4_1() ? " SSE4.1" : "",
2589 supports_sse4_2() ? " SSE4.2" : "",
2590 supports_sse4a() ? " SSE4A" : "",
2591 is_netburst() ? " Netburst" : "",
2592 is_intel_family_core() ? " Core" : "",
2593 x64);
2594
2595 return OS_OK;
2596}
2597
2598int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2599 assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2599, "assert(" "buf != __null" ") failed", "buffer is NULL!"
); ::breakpoint(); } } while (0)
;
2600 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!")do { if (!(buf_len >= CPU_EBS_MAX_LENGTH)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2600, "assert(" "buf_len >= CPU_EBS_MAX_LENGTH" ") failed"
, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); ::
breakpoint(); } } while (0)
;
2601 assert(getCPUIDBrandString_stub != NULL, "not initialized")do { if (!(getCPUIDBrandString_stub != __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2601, "assert(" "getCPUIDBrandString_stub != __null" ") failed"
, "not initialized"); ::breakpoint(); } } while (0)
;
2602
2603 // invoke newly generated asm code to fetch CPU Brand String
2604 getCPUIDBrandString_stub(&_cpuid_info);
2605
2606 // fetch results into buffer
2607 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0;
2608 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1;
2609 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2;
2610 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2611 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2612 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2613 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2614 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2615 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2616 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2617 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2618 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2619
2620 return OS_OK;
2621}
2622
2623size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2624 guarantee(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2624, "guarantee(" "buf != NULL" ") failed", "buffer is NULL!"
); ::breakpoint(); } } while (0)
;
2625 guarantee(buf_len > 0, "buffer len not enough!")do { if (!(buf_len > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2625, "guarantee(" "buf_len > 0" ") failed", "buffer len not enough!"
); ::breakpoint(); } } while (0)
;
2626
2627 unsigned int flag = 0;
2628 unsigned int fi = 0;
2629 size_t written = 0;
2630 const char* prefix = "";
2631
2632#define WRITE_TO_BUF(string){ int res = jio_snprintf(&buf[written], buf_len - written
, "%s%s", prefix, string); if (res < 0) { return buf_len -
1; } written += res; if (prefix[0] == '\0') { prefix = ", ";
} }
\
2633 { \
2634 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2635 if (res < 0) { \
2636 return buf_len - 1; \
2637 } \
2638 written += res; \
2639 if (prefix[0] == '\0') { \
2640 prefix = ", "; \
2641 } \
2642 }
2643
2644 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2645 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2646 continue; /* no hyperthreading */
2647 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2648 continue; /* no fast system call */
2649 }
2650 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2651 WRITE_TO_BUF(_feature_edx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written
, "%s%s", prefix, _feature_edx_id[fi]); if (res < 0) { return
buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix
= ", "; } }
;
2652 }
2653 }
2654
2655 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2656 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2657 WRITE_TO_BUF(_feature_ecx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written
, "%s%s", prefix, _feature_ecx_id[fi]); if (res < 0) { return
buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix
= ", "; } }
;
2658 }
2659 }
2660
2661 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2662 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2663 WRITE_TO_BUF(_feature_extended_ecx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written
, "%s%s", prefix, _feature_extended_ecx_id[fi]); if (res <
0) { return buf_len - 1; } written += res; if (prefix[0] == '\0'
) { prefix = ", "; } }
;
2664 }
2665 }
2666
2667 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2668 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2669 WRITE_TO_BUF(_feature_extended_edx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written
, "%s%s", prefix, _feature_extended_edx_id[fi]); if (res <
0) { return buf_len - 1; } written += res; if (prefix[0] == '\0'
) { prefix = ", "; } }
;
2670 }
2671 }
2672
2673 if (supports_tscinv_bit()) {
2674 WRITE_TO_BUF("Invariant TSC"){ int res = jio_snprintf(&buf[written], buf_len - written
, "%s%s", prefix, "Invariant TSC"); if (res < 0) { return buf_len
- 1; } written += res; if (prefix[0] == '\0') { prefix = ", "
; } }
;
2675 }
2676
2677 return written;
2678}
2679
2680/**
2681 * Write a detailed description of the cpu to a given buffer, including
2682 * feature set.
2683 */
2684int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2685 assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2685, "assert(" "buf != __null" ") failed", "buffer is NULL!"
); ::breakpoint(); } } while (0)
;
2686 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!")do { if (!(buf_len >= CPU_DETAILED_DESC_BUF_SIZE)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2686, "assert(" "buf_len >= CPU_DETAILED_DESC_BUF_SIZE" ") failed"
, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"
); ::breakpoint(); } } while (0)
;
2687
2688 static const char* unknown = "<unknown>";
2689 char vendor_id[VENDOR_LENGTH];
2690 const char* family = NULL__null;
2691 const char* model = NULL__null;
2692 const char* brand = NULL__null;
2693 int outputLen = 0;
2694
2695 family = cpu_family_description();
2696 if (family == NULL__null) {
2697 family = unknown;
2698 }
2699
2700 model = cpu_model_description();
2701 if (model == NULL__null) {
2702 model = unknown;
2703 }
2704
2705 brand = cpu_brand_string();
2706
2707 if (brand == NULL__null) {
2708 brand = cpu_brand();
2709 if (brand == NULL__null) {
2710 brand = unknown;
2711 }
2712 }
2713
2714 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2715 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2716 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2717 vendor_id[VENDOR_LENGTH-1] = '\0';
2718
2719 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2720 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2721 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2722 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2723 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2724 "Supports: ",
2725 brand,
2726 vendor_id,
2727 family,
2728 extended_cpu_family(),
2729 model,
2730 extended_cpu_model(),
2731 cpu_stepping(),
2732 _cpuid_info.std_cpuid1_eax.bits.ext_family,
2733 _cpuid_info.std_cpuid1_eax.bits.ext_model,
2734 _cpuid_info.std_cpuid1_eax.bits.proc_type,
2735 _cpuid_info.std_cpuid1_eax.value,
2736 _cpuid_info.std_cpuid1_ebx.value,
2737 _cpuid_info.std_cpuid1_ecx.value,
2738 _cpuid_info.std_cpuid1_edx.value,
2739 _cpuid_info.ext_cpuid1_eax,
2740 _cpuid_info.ext_cpuid1_ebx,
2741 _cpuid_info.ext_cpuid1_ecx,
2742 _cpuid_info.ext_cpuid1_edx);
2743
2744 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2745 if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2746 return OS_ERR;
2747 }
2748
2749 cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2750
2751 return OS_OK;
2752}
2753
2754
2755// Fill in Abstract_VM_Version statics
2756void VM_Version::initialize_cpu_information() {
2757 assert(_vm_version_initialized, "should have initialized VM_Version long ago")do { if (!(_vm_version_initialized)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2757, "assert(" "_vm_version_initialized" ") failed", "should have initialized VM_Version long ago"
); ::breakpoint(); } } while (0)
;
2758 assert(!_initialized, "shouldn't be initialized yet")do { if (!(!_initialized)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp"
, 2758, "assert(" "!_initialized" ") failed", "shouldn't be initialized yet"
); ::breakpoint(); } } while (0)
;
2759 resolve_cpu_information_details();
2760
2761 // initialize cpu_name and cpu_desc
2762 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2763 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2764 _initialized = true;
2765}
2766
2767/**
2768 * For information about extracting the frequency from the cpu brand string, please see:
2769 *
2770 * Intel Processor Identification and the CPUID Instruction
2771 * Application Note 485
2772 * May 2012
2773 *
2774 * The return value is the frequency in Hz.
2775 */
2776int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2777 const char* const brand_string = cpu_brand_string();
2778 if (brand_string == NULL__null) {
2779 return 0;
2780 }
2781 const int64_t MEGA = 1000000;
2782 int64_t multiplier = 0;
2783 int64_t frequency = 0;
2784 uint8_t idx = 0;
2785 // The brand string buffer is at most 48 bytes.
2786 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2787 for (; idx < 48-2; ++idx) {
2788 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2789 // Search brand string for "yHz" where y is M, G, or T.
2790 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2791 if (brand_string[idx] == 'M') {
2792 multiplier = MEGA;
2793 } else if (brand_string[idx] == 'G') {
2794 multiplier = MEGA * 1000;
2795 } else if (brand_string[idx] == 'T') {
2796 multiplier = MEGA * MEGA;
2797 }
2798 break;
2799 }
2800 }
2801 if (multiplier > 0) {
2802 // Compute freqency (in Hz) from brand string.
2803 if (brand_string[idx-3] == '.') { // if format is "x.xx"
2804 frequency = (brand_string[idx-4] - '0') * multiplier;
2805 frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2806 frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2807 } else { // format is "xxxx"
2808 frequency = (brand_string[idx-4] - '0') * 1000;
2809 frequency += (brand_string[idx-3] - '0') * 100;
2810 frequency += (brand_string[idx-2] - '0') * 10;
2811 frequency += (brand_string[idx-1] - '0');
2812 frequency *= multiplier;
2813 }
2814 }
2815 return frequency;
2816}
2817
2818
2819int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2820 if (_max_qualified_cpu_frequency == 0) {
2821 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2822 }
2823 return _max_qualified_cpu_frequency;
2824}
2825

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef CPU_X86_MACROASSEMBLER_X86_HPP
26#define CPU_X86_MACROASSEMBLER_X86_HPP
27
28#include "asm/assembler.hpp"
29#include "code/vmreg.inline.hpp"
30#include "compiler/oopMap.hpp"
31#include "utilities/macros.hpp"
32#include "runtime/rtmLocking.hpp"
33#include "runtime/vm_version.hpp"
34
35// MacroAssembler extends Assembler by frequently used macros.
36//
37// Instructions for which a 'better' code sequence exists depending
38// on arguments should also go in here.
39
40class MacroAssembler: public Assembler {
41 friend class LIR_Assembler;
42 friend class Runtime1; // as_Address()
43
44 public:
45 // Support for VM calls
46 //
47 // This is the base routine called by the different versions of call_VM_leaf. The interpreter
48 // may customize this version by overriding it for its purposes (e.g., to save/restore
49 // additional registers when doing a VM call).
50
51 virtual void call_VM_leaf_base(
52 address entry_point, // the entry point
53 int number_of_arguments // the number of arguments to pop after the call
54 );
55
56 protected:
57 // This is the base routine called by the different versions of call_VM. The interpreter
58 // may customize this version by overriding it for its purposes (e.g., to save/restore
59 // additional registers when doing a VM call).
60 //
61 // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
62 // returns the register which contains the thread upon return. If a thread register has been
63 // specified, the return value will correspond to that register. If no last_java_sp is specified
64 // (noreg) than rsp will be used instead.
65 virtual void call_VM_base( // returns the register containing the thread upon return
66 Register oop_result, // where an oop-result ends up if any; use noreg otherwise
67 Register java_thread, // the thread if computed before ; use noreg otherwise
68 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
69 address entry_point, // the entry point
70 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
71 bool check_exceptions // whether to check for pending exceptions after return
72 );
73
74 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
75
76 // helpers for FPU flag access
77 // tmp is a temporary register, if none is available use noreg
78 void save_rax (Register tmp);
79 void restore_rax(Register tmp);
80
81 public:
82 MacroAssembler(CodeBuffer* code) : Assembler(code) {}
83
84 // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
85 // The implementation is only non-empty for the InterpreterMacroAssembler,
86 // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
87 virtual void check_and_handle_popframe(Register java_thread);
88 virtual void check_and_handle_earlyret(Register java_thread);
89
90 Address as_Address(AddressLiteral adr);
91 Address as_Address(ArrayAddress adr);
92
93 // Support for NULL-checks
94 //
95 // Generates code that causes a NULL OS exception if the content of reg is NULL.
96 // If the accessed location is M[reg + offset] and the offset is known, provide the
97 // offset. No explicit code generation is needed if the offset is within a certain
98 // range (0 <= offset <= page_size).
99
100 void null_check(Register reg, int offset = -1);
101 static bool needs_explicit_null_check(intptr_t offset);
102 static bool uses_implicit_null_check(void* address);
103
104 // Required platform-specific helpers for Label::patch_instructions.
105 // They _shadow_ the declarations in AbstractAssembler, which are undefined.
106 void pd_patch_instruction(address branch, address target, const char* file, int line) {
107 unsigned char op = branch[0];
108 assert(op == 0xE8 /* call */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
109 op == 0xE9 /* jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
110 op == 0xEB /* short jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
111 (op & 0xF0) == 0x70 /* short jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
112 op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
113 op == 0xC7 && branch[1] == 0xF8 /* xbegin */,do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
114 "Invalid opcode at patch point")do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
;
115
116 if (op == 0xEB || (op & 0xF0) == 0x70) {
117 // short offset operators (jmp and jcc)
118 char* disp = (char*) &branch[1];
119 int imm8 = target - (address) &disp[1];
120 guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d",do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d"
, file == __null ? "<NULL>" : file, line); ::breakpoint
(); } } while (0)
121 file == NULL ? "<NULL>" : file, line)do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d"
, file == __null ? "<NULL>" : file, line); ::breakpoint
(); } } while (0)
;
122 *disp = imm8;
123 } else {
124 int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1];
125 int imm32 = target - (address) &disp[1];
126 *disp = imm32;
127 }
128 }
129
130 // The following 4 methods return the offset of the appropriate move instruction
131
132 // Support for fast byte/short loading with zero extension (depending on particular CPU)
133 int load_unsigned_byte(Register dst, Address src);
134 int load_unsigned_short(Register dst, Address src);
135
136 // Support for fast byte/short loading with sign extension (depending on particular CPU)
137 int load_signed_byte(Register dst, Address src);
138 int load_signed_short(Register dst, Address src);
139
140 // Support for sign-extension (hi:lo = extend_sign(lo))
141 void extend_sign(Register hi, Register lo);
142
143 // Load and store values by size and signed-ness
144 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
145 void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
146
147 // Support for inc/dec with optimal instruction selection depending on value
148
149 void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value))incrementq(reg, value) NOT_LP64(incrementl(reg, value)) ; }
150 void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value))decrementq(reg, value) NOT_LP64(decrementl(reg, value)) ; }
151
152 void decrementl(Address dst, int value = 1);
153 void decrementl(Register reg, int value = 1);
154
155 void decrementq(Register reg, int value = 1);
156 void decrementq(Address dst, int value = 1);
157
158 void incrementl(Address dst, int value = 1);
159 void incrementl(Register reg, int value = 1);
160
161 void incrementq(Register reg, int value = 1);
162 void incrementq(Address dst, int value = 1);
163
164 // Support optimal SSE move instructions.
165 void movflt(XMMRegister dst, XMMRegister src) {
166 if (dst-> encoding() == src->encoding()) return;
167 if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
168 else { movss (dst, src); return; }
169 }
170 void movflt(XMMRegister dst, Address src) { movss(dst, src); }
171 void movflt(XMMRegister dst, AddressLiteral src);
172 void movflt(Address dst, XMMRegister src) { movss(dst, src); }
173
174 // Move with zero extension
175 void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); }
176
177 void movdbl(XMMRegister dst, XMMRegister src) {
178 if (dst-> encoding() == src->encoding()) return;
179 if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
180 else { movsd (dst, src); return; }
181 }
182
183 void movdbl(XMMRegister dst, AddressLiteral src);
184
185 void movdbl(XMMRegister dst, Address src) {
186 if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
187 else { movlpd(dst, src); return; }
188 }
189 void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
190
191 void incrementl(AddressLiteral dst);
192 void incrementl(ArrayAddress dst);
193
194 void incrementq(AddressLiteral dst);
195
196 // Alignment
197 void align32();
198 void align64();
199 void align(int modulus);
200 void align(int modulus, int target);
201
202 // A 5 byte nop that is safe for patching (see patch_verified_entry)
203 void fat_nop();
204
205 // Stack frame creation/removal
206 void enter();
207 void leave();
208
209 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
210 // The pointer will be loaded into the thread register.
211 void get_thread(Register thread);
212
213#ifdef _LP641
214 // Support for argument shuffling
215
216 void move32_64(VMRegPair src, VMRegPair dst);
217 void long_move(VMRegPair src, VMRegPair dst);
218 void float_move(VMRegPair src, VMRegPair dst);
219 void double_move(VMRegPair src, VMRegPair dst);
220 void move_ptr(VMRegPair src, VMRegPair dst);
221 void object_move(OopMap* map,
222 int oop_handle_offset,
223 int framesize_in_slots,
224 VMRegPair src,
225 VMRegPair dst,
226 bool is_receiver,
227 int* receiver_offset);
228#endif // _LP64
229
230 // Support for VM calls
231 //
232 // It is imperative that all calls into the VM are handled via the call_VM macros.
233 // They make sure that the stack linkage is setup correctly. call_VM's correspond
234 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
235
236
237 void call_VM(Register oop_result,
238 address entry_point,
239 bool check_exceptions = true);
240 void call_VM(Register oop_result,
241 address entry_point,
242 Register arg_1,
243 bool check_exceptions = true);
244 void call_VM(Register oop_result,
245 address entry_point,
246 Register arg_1, Register arg_2,
247 bool check_exceptions = true);
248 void call_VM(Register oop_result,
249 address entry_point,
250 Register arg_1, Register arg_2, Register arg_3,
251 bool check_exceptions = true);
252
253 // Overloadings with last_Java_sp
254 void call_VM(Register oop_result,
255 Register last_java_sp,
256 address entry_point,
257 int number_of_arguments = 0,
258 bool check_exceptions = true);
259 void call_VM(Register oop_result,
260 Register last_java_sp,
261 address entry_point,
262 Register arg_1, bool
263 check_exceptions = true);
264 void call_VM(Register oop_result,
265 Register last_java_sp,
266 address entry_point,
267 Register arg_1, Register arg_2,
268 bool check_exceptions = true);
269 void call_VM(Register oop_result,
270 Register last_java_sp,
271 address entry_point,
272 Register arg_1, Register arg_2, Register arg_3,
273 bool check_exceptions = true);
274
275 void get_vm_result (Register oop_result, Register thread);
276 void get_vm_result_2(Register metadata_result, Register thread);
277
278 // These always tightly bind to MacroAssembler::call_VM_base
279 // bypassing the virtual implementation
280 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
281 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
282 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
283 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
284 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
285
286 void call_VM_leaf0(address entry_point);
287 void call_VM_leaf(address entry_point,
288 int number_of_arguments = 0);
289 void call_VM_leaf(address entry_point,
290 Register arg_1);
291 void call_VM_leaf(address entry_point,
292 Register arg_1, Register arg_2);
293 void call_VM_leaf(address entry_point,
294 Register arg_1, Register arg_2, Register arg_3);
295
296 // These always tightly bind to MacroAssembler::call_VM_leaf_base
297 // bypassing the virtual implementation
298 void super_call_VM_leaf(address entry_point);
299 void super_call_VM_leaf(address entry_point, Register arg_1);
300 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
301 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
302 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
303
304 // last Java Frame (fills frame anchor)
305 void set_last_Java_frame(Register thread,
306 Register last_java_sp,
307 Register last_java_fp,
308 address last_java_pc);
309
310 // thread in the default location (r15_thread on 64bit)
311 void set_last_Java_frame(Register last_java_sp,
312 Register last_java_fp,
313 address last_java_pc);
314
315 void reset_last_Java_frame(Register thread, bool clear_fp);
316
317 // thread in the default location (r15_thread on 64bit)
318 void reset_last_Java_frame(bool clear_fp);
319
320 // jobjects
321 void clear_jweak_tag(Register possibly_jweak);
322 void resolve_jobject(Register value, Register thread, Register tmp);
323
324 // C 'boolean' to Java boolean: x == 0 ? 0 : 1
325 void c2bool(Register x);
326
327 // C++ bool manipulation
328
329 void movbool(Register dst, Address src);
330 void movbool(Address dst, bool boolconst);
331 void movbool(Address dst, Register src);
332 void testbool(Register dst);
333
334 void resolve_oop_handle(Register result, Register tmp = rscratch2);
335 void resolve_weak_handle(Register result, Register tmp);
336 void load_mirror(Register mirror, Register method, Register tmp = rscratch2);
337 void load_method_holder_cld(Register rresult, Register rmethod);
338
339 void load_method_holder(Register holder, Register method);
340
341 // oop manipulations
342 void load_klass(Register dst, Register src, Register tmp);
343 void store_klass(Register dst, Register src, Register tmp);
344
345 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
346 Register tmp1, Register thread_tmp);
347 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
348 Register tmp1, Register tmp2);
349
350 void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
351 Register thread_tmp = noreg, DecoratorSet decorators = 0);
352 void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
353 Register thread_tmp = noreg, DecoratorSet decorators = 0);
354 void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
355 Register tmp2 = noreg, DecoratorSet decorators = 0);
356
357 // Used for storing NULL. All other oop constants should be
358 // stored using routines that take a jobject.
359 void store_heap_oop_null(Address dst);
360
361#ifdef _LP641
362 void store_klass_gap(Register dst, Register src);
363
364 // This dummy is to prevent a call to store_heap_oop from
365 // converting a zero (like NULL) into a Register by giving
366 // the compiler two choices it can't resolve
367
368 void store_heap_oop(Address dst, void* dummy);
369
370 void encode_heap_oop(Register r);
371 void decode_heap_oop(Register r);
372 void encode_heap_oop_not_null(Register r);
373 void decode_heap_oop_not_null(Register r);
374 void encode_heap_oop_not_null(Register dst, Register src);
375 void decode_heap_oop_not_null(Register dst, Register src);
376
377 void set_narrow_oop(Register dst, jobject obj);
378 void set_narrow_oop(Address dst, jobject obj);
379 void cmp_narrow_oop(Register dst, jobject obj);
380 void cmp_narrow_oop(Address dst, jobject obj);
381
382 void encode_klass_not_null(Register r, Register tmp);
383 void decode_klass_not_null(Register r, Register tmp);
384 void encode_and_move_klass_not_null(Register dst, Register src);
385 void decode_and_move_klass_not_null(Register dst, Register src);
386 void set_narrow_klass(Register dst, Klass* k);
387 void set_narrow_klass(Address dst, Klass* k);
388 void cmp_narrow_klass(Register dst, Klass* k);
389 void cmp_narrow_klass(Address dst, Klass* k);
390
391 // if heap base register is used - reinit it with the correct value
392 void reinit_heapbase();
393
394 DEBUG_ONLY(void verify_heapbase(const char* msg);)void verify_heapbase(const char* msg);
395
396#endif // _LP64
397
398 // Int division/remainder for Java
399 // (as idivl, but checks for special case as described in JVM spec.)
400 // returns idivl instruction offset for implicit exception handling
401 int corrected_idivl(Register reg);
402
403 // Long division/remainder for Java
404 // (as idivq, but checks for special case as described in JVM spec.)
405 // returns idivq instruction offset for implicit exception handling
406 int corrected_idivq(Register reg);
407
408 void int3();
409
410 // Long operation macros for a 32bit cpu
411 // Long negation for Java
412 void lneg(Register hi, Register lo);
413
414 // Long multiplication for Java
415 // (destroys contents of eax, ebx, ecx and edx)
416 void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
417
418 // Long shifts for Java
419 // (semantics as described in JVM spec.)
420 void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f)
421 void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f)
422
423 // Long compare for Java
424 // (semantics as described in JVM spec.)
425 void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
426
427
428 // misc
429
430 // Sign extension
431 void sign_extend_short(Register reg);
432 void sign_extend_byte(Register reg);
433
434 // Division by power of 2, rounding towards 0
435 void division_with_shift(Register reg, int shift_value);
436
437#ifndef _LP641
438 // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
439 //
440 // CF (corresponds to C0) if x < y
441 // PF (corresponds to C2) if unordered
442 // ZF (corresponds to C3) if x = y
443 //
444 // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
445 // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
446 void fcmp(Register tmp);
447 // Variant of the above which allows y to be further down the stack
448 // and which only pops x and y if specified. If pop_right is
449 // specified then pop_left must also be specified.
450 void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
451
452 // Floating-point comparison for Java
453 // Compares the top-most stack entries on the FPU stack and stores the result in dst.
454 // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
455 // (semantics as described in JVM spec.)
456 void fcmp2int(Register dst, bool unordered_is_less);
457 // Variant of the above which allows y to be further down the stack
458 // and which only pops x and y if specified. If pop_right is
459 // specified then pop_left must also be specified.
460 void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
461
462 // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
463 // tmp is a temporary register, if none is available use noreg
464 void fremr(Register tmp);
465
466 // only if +VerifyFPU
467 void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
468#endif // !LP64
469
470 // dst = c = a * b + c
471 void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
472 void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
473
474 void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len);
475 void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len);
476 void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len);
477 void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len);
478
479
480 // same as fcmp2int, but using SSE2
481 void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
482 void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
483
484 // branch to L if FPU flag C2 is set/not set
485 // tmp is a temporary register, if none is available use noreg
486 void jC2 (Register tmp, Label& L);
487 void jnC2(Register tmp, Label& L);
488
489 // Load float value from 'address'. If UseSSE >= 1, the value is loaded into
490 // register xmm0. Otherwise, the value is loaded onto the FPU stack.
491 void load_float(Address src);
492
493 // Store float value to 'address'. If UseSSE >= 1, the value is stored
494 // from register xmm0. Otherwise, the value is stored from the FPU stack.
495 void store_float(Address dst);
496
497 // Load double value from 'address'. If UseSSE >= 2, the value is loaded into
498 // register xmm0. Otherwise, the value is loaded onto the FPU stack.
499 void load_double(Address src);
500
501 // Store double value to 'address'. If UseSSE >= 2, the value is stored
502 // from register xmm0. Otherwise, the value is stored from the FPU stack.
503 void store_double(Address dst);
504
505#ifndef _LP641
506 // Pop ST (ffree & fincstp combined)
507 void fpop();
508
509 void empty_FPU_stack();
510#endif // !_LP64
511
512 void push_IU_state();
513 void pop_IU_state();
514
515 void push_FPU_state();
516 void pop_FPU_state();
517
518 void push_CPU_state();
519 void pop_CPU_state();
520
521 // Round up to a power of two
522 void round_to(Register reg, int modulus);
523
524 // Callee saved registers handling
525 void push_callee_saved_registers();
526 void pop_callee_saved_registers();
527
528 // allocation
529 void eden_allocate(
530 Register thread, // Current thread
531 Register obj, // result: pointer to object after successful allocation
532 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
533 int con_size_in_bytes, // object size in bytes if known at compile time
534 Register t1, // temp register
535 Label& slow_case // continuation point if fast allocation fails
536 );
537 void tlab_allocate(
538 Register thread, // Current thread
539 Register obj, // result: pointer to object after successful allocation
540 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
541 int con_size_in_bytes, // object size in bytes if known at compile time
542 Register t1, // temp register
543 Register t2, // temp register
544 Label& slow_case // continuation point if fast allocation fails
545 );
546 void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
547
548 // interface method calling
549 void lookup_interface_method(Register recv_klass,
550 Register intf_klass,
551 RegisterOrConstant itable_index,
552 Register method_result,
553 Register scan_temp,
554 Label& no_such_interface,
555 bool return_method = true);
556
557 // virtual method calling
558 void lookup_virtual_method(Register recv_klass,
559 RegisterOrConstant vtable_index,
560 Register method_result);
561
562 // Test sub_klass against super_klass, with fast and slow paths.
563
564 // The fast path produces a tri-state answer: yes / no / maybe-slow.
565 // One of the three labels can be NULL, meaning take the fall-through.
566 // If super_check_offset is -1, the value is loaded up from super_klass.
567 // No registers are killed, except temp_reg.
568 void check_klass_subtype_fast_path(Register sub_klass,
569 Register super_klass,
570 Register temp_reg,
571 Label* L_success,
572 Label* L_failure,
573 Label* L_slow_path,
574 RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
575
576 // The rest of the type check; must be wired to a corresponding fast path.
577 // It does not repeat the fast path logic, so don't use it standalone.
578 // The temp_reg and temp2_reg can be noreg, if no temps are available.
579 // Updates the sub's secondary super cache as necessary.
580 // If set_cond_codes, condition codes will be Z on success, NZ on failure.
581 void check_klass_subtype_slow_path(Register sub_klass,
582 Register super_klass,
583 Register temp_reg,
584 Register temp2_reg,
585 Label* L_success,
586 Label* L_failure,
587 bool set_cond_codes = false);
588
589 // Simplified, combined version, good for typical uses.
590 // Falls through on failure.
591 void check_klass_subtype(Register sub_klass,
592 Register super_klass,
593 Register temp_reg,
594 Label& L_success);
595
596 void clinit_barrier(Register klass,
597 Register thread,
598 Label* L_fast_path = NULL__null,
599 Label* L_slow_path = NULL__null);
600
601 // method handles (JSR 292)
602 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
603
604 // Debugging
605
606 // only if +VerifyOops
607 void _verify_oop(Register reg, const char* s, const char* file, int line);
608 void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
609
610 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
611 if (VerifyOops) {
612 _verify_oop(reg, s, file, line);
613 }
614 }
615 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
616 if (VerifyOops) {
617 _verify_oop_addr(reg, s, file, line);
618 }
619 }
620
621 // TODO: verify method and klass metadata (compare against vptr?)
622 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
623 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
624
625#define verify_oop(reg)_verify_oop_checked(reg, "broken oop " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 625)
_verify_oop_checked(reg, "broken oop " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__625)
626#define verify_oop_msg(reg, msg)_verify_oop_checked(reg, "broken oop " "reg" ", " "msg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 626)
_verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__626)
627#define verify_oop_addr(addr)_verify_oop_addr_checked(addr, "broken oop addr " "addr", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 627)
_verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__627)
628#define verify_method_ptr(reg)_verify_method_ptr(reg, "broken method " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 628)
_verify_method_ptr(reg, "broken method " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__628)
629#define verify_klass_ptr(reg)_verify_klass_ptr(reg, "broken klass " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 629)
_verify_klass_ptr(reg, "broken klass " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__629)
630
631 // Verify or restore cpu control state after JNI call
632 void restore_cpu_control_state_after_jni();
633
634 // prints msg, dumps registers and stops execution
635 void stop(const char* msg);
636
637 // prints msg and continues
638 void warn(const char* msg);
639
640 // dumps registers and other state
641 void print_state();
642
643 static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
644 static void debug64(char* msg, int64_t pc, int64_t regs[]);
645 static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
646 static void print_state64(int64_t pc, int64_t regs[]);
647
648 void os_breakpoint();
649
650 void untested() { stop("untested"); }
651
652 void unimplemented(const char* what = "");
653
654 void should_not_reach_here() { stop("should not reach here"); }
655
656 void print_CPU_state();
657
658 // Stack overflow checking
659 void bang_stack_with_offset(int offset) {
660 // stack grows down, caller passes positive offset
661 assert(offset > 0, "must bang with negative offset")do { if (!(offset > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 661, "assert(" "offset > 0" ") failed", "must bang with negative offset"
); ::breakpoint(); } } while (0)
;
662 movl(Address(rsp, (-offset)), rax);
663 }
664
665 // Writes to stack successive pages until offset reached to check for
666 // stack overflow + shadow pages. Also, clobbers tmp
667 void bang_stack_size(Register size, Register tmp);
668
669 // Check for reserved stack access in method being exited (for JIT)
670 void reserved_stack_check();
671
672 void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod);
673
674 void verify_tlab();
675
676 Condition negate_condition(Condition cond);
677
678 // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
679 // operands. In general the names are modified to avoid hiding the instruction in Assembler
680 // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
681 // here in MacroAssembler. The major exception to this rule is call
682
683 // Arithmetics
684
685
686 void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)) ; }
687 void addptr(Address dst, Register src);
688
689 void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); }
690 void addptr(Register dst, int32_t src);
691 void addptr(Register dst, Register src);
692 void addptr(Register dst, RegisterOrConstant src) {
693 if (src.is_constant()) addptr(dst, (int) src.as_constant());
694 else addptr(dst, src.as_register());
695 }
696
697 void andptr(Register dst, int32_t src);
698 void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2))andq(src1, src2) NOT_LP64(andl(src1, src2)) ; }
699
700 void cmp8(AddressLiteral src1, int imm);
701
702 // renamed to drag out the casting of address to int32_t/intptr_t
703 void cmp32(Register src1, int32_t imm);
704
705 void cmp32(AddressLiteral src1, int32_t imm);
706 // compare reg - mem, or reg - &mem
707 void cmp32(Register src1, AddressLiteral src2);
708
709 void cmp32(Register src1, Address src2);
710
711#ifndef _LP641
712 void cmpklass(Address dst, Metadata* obj);
713 void cmpklass(Register dst, Metadata* obj);
714 void cmpoop(Address dst, jobject obj);
715#endif // _LP64
716
717 void cmpoop(Register src1, Register src2);
718 void cmpoop(Register src1, Address src2);
719 void cmpoop(Register dst, jobject obj);
720
721 // NOTE src2 must be the lval. This is NOT an mem-mem compare
722 void cmpptr(Address src1, AddressLiteral src2);
723
724 void cmpptr(Register src1, AddressLiteral src2);
725
726 void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
727 void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
728 // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
729
730 void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
731 void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
732
733 // cmp64 to avoild hiding cmpq
734 void cmp64(Register src1, AddressLiteral src);
735
736 void cmpxchgptr(Register reg, Address adr);
737
738 void locked_cmpxchgptr(Register reg, AddressLiteral adr);
739
740
741 void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src))imulq(dst, src) NOT_LP64(imull(dst, src)); }
742 void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32))imulq(dst, src, imm32) NOT_LP64(imull(dst, src, imm32)); }
743
744
745 void negptr(Register dst) { LP64_ONLY(negq(dst))negq(dst) NOT_LP64(negl(dst)); }
746
747 void notptr(Register dst) { LP64_ONLY(notq(dst))notq(dst) NOT_LP64(notl(dst)); }
748
749 void shlptr(Register dst, int32_t shift);
750 void shlptr(Register dst) { LP64_ONLY(shlq(dst))shlq(dst) NOT_LP64(shll(dst)); }
751
752 void shrptr(Register dst, int32_t shift);
753 void shrptr(Register dst) { LP64_ONLY(shrq(dst))shrq(dst) NOT_LP64(shrl(dst)); }
754
755 void sarptr(Register dst) { LP64_ONLY(sarq(dst))sarq(dst) NOT_LP64(sarl(dst)); }
756 void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src))sarq(dst, src) NOT_LP64(sarl(dst, src)); }
757
758 void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); }
759
760 void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); }
761 void subptr(Register dst, int32_t src);
762 // Force generation of a 4 byte immediate value even if it fits into 8bit
763 void subptr_imm32(Register dst, int32_t src);
764 void subptr(Register dst, Register src);
765 void subptr(Register dst, RegisterOrConstant src) {
766 if (src.is_constant()) subptr(dst, (int) src.as_constant());
767 else subptr(dst, src.as_register());
768 }
769
770 void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); }
771 void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); }
772
773 void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; }
774 void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; }
775
776 void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2))xaddq(src1, src2) NOT_LP64(xaddl(src1, src2)) ; }
777
778
779
780 // Helper functions for statistics gathering.
781 // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
782 void cond_inc32(Condition cond, AddressLiteral counter_addr);
783 // Unconditional atomic increment.
784 void atomic_incl(Address counter_addr);
785 void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1);
786#ifdef _LP641
787 void atomic_incq(Address counter_addr);
788 void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1);
789#endif
790 void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr))atomic_incq(counter_addr, scr) NOT_LP64(atomic_incl(counter_addr, scr)) ; }
791 void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr))atomic_incq(counter_addr) NOT_LP64(atomic_incl(counter_addr)) ; }
792
793 void lea(Register dst, AddressLiteral adr);
794 void lea(Address dst, AddressLiteral adr);
795 void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
796
797 void leal32(Register dst, Address src) { leal(dst, src); }
798
799 // Import other testl() methods from the parent class or else
800 // they will be hidden by the following overriding declaration.
801 using Assembler::testl;
802 void testl(Register dst, AddressLiteral src);
803
804 void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
805 void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
806 void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
807 void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32))orq(dst, imm32) NOT_LP64(orl(dst, imm32)); }
808
809 void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32))testq(src, imm32) NOT_LP64(testl(src, imm32)); }
810 void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2))testq(src1, src2) NOT_LP64(testl(src1, src2)); }
811 void testptr(Register src1, Register src2);
812
813 void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); }
814 void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); }
815
816 // Calls
817
818 void call(Label& L, relocInfo::relocType rtype);
819 void call(Register entry);
820 void call(Address addr) { Assembler::call(addr); }
821
822 // NOTE: this call transfers to the effective address of entry NOT
823 // the address contained by entry. This is because this is more natural
824 // for jumps/calls.
825 void call(AddressLiteral entry);
826
827 // Emit the CompiledIC call idiom
828 void ic_call(address entry, jint method_index = 0);
829
830 // Jumps
831
832 // NOTE: these jumps tranfer to the effective address of dst NOT
833 // the address contained by dst. This is because this is more natural
834 // for jumps/calls.
835 void jump(AddressLiteral dst);
836 void jump_cc(Condition cc, AddressLiteral dst);
837
838 // 32bit can do a case table jump in one instruction but we no longer allow the base
839 // to be installed in the Address class. This jump will tranfers to the address
840 // contained in the location described by entry (not the address of entry)
841 void jump(ArrayAddress entry);
842
843 // Floating
844
845 void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
846 void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
847 void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); }
848
849 void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
850 void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
851 void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
852
853 void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
854 void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
855 void comiss(XMMRegister dst, AddressLiteral src);
856
857 void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
858 void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
859 void comisd(XMMRegister dst, AddressLiteral src);
860
861#ifndef _LP641
862 void fadd_s(Address src) { Assembler::fadd_s(src); }
863 void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
864
865 void fldcw(Address src) { Assembler::fldcw(src); }
866 void fldcw(AddressLiteral src);
867
868 void fld_s(int index) { Assembler::fld_s(index); }
869 void fld_s(Address src) { Assembler::fld_s(src); }
870 void fld_s(AddressLiteral src);
871
872 void fld_d(Address src) { Assembler::fld_d(src); }
873 void fld_d(AddressLiteral src);
874
875 void fmul_s(Address src) { Assembler::fmul_s(src); }
876 void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
877#endif // _LP64
878
879 void fld_x(Address src) { Assembler::fld_x(src); }
880 void fld_x(AddressLiteral src);
881
882 void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
883 void ldmxcsr(AddressLiteral src);
884
885#ifdef _LP641
886 private:
887 void sha256_AVX2_one_round_compute(
888 Register reg_old_h,
889 Register reg_a,
890 Register reg_b,
891 Register reg_c,
892 Register reg_d,
893 Register reg_e,
894 Register reg_f,
895 Register reg_g,
896 Register reg_h,
897 int iter);
898 void sha256_AVX2_four_rounds_compute_first(int start);
899 void sha256_AVX2_four_rounds_compute_last(int start);
900 void sha256_AVX2_one_round_and_sched(
901 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */
902 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */
903 XMMRegister xmm_2, /* ymm6 */
904 XMMRegister xmm_3, /* ymm7 */
905 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */
906 Register reg_b, /* ebx */ /* full cycle is 8 iterations */
907 Register reg_c, /* edi */
908 Register reg_d, /* esi */
909 Register reg_e, /* r8d */
910 Register reg_f, /* r9d */
911 Register reg_g, /* r10d */
912 Register reg_h, /* r11d */
913 int iter);
914
915 void addm(int disp, Register r1, Register r2);
916 void gfmul(XMMRegister tmp0, XMMRegister t);
917 void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
918 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
919 void generateHtbl_one_block(Register htbl);
920 void generateHtbl_eight_blocks(Register htbl);
921 public:
922 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
923 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
924 Register buf, Register state, Register ofs, Register limit, Register rsp,
925 bool multi_block, XMMRegister shuf_mask);
926 void avx_ghash(Register state, Register htbl, Register data, Register blocks);
927#endif
928
929#ifdef _LP641
930 private:
931 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d,
932 Register e, Register f, Register g, Register h, int iteration);
933
934 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
935 Register a, Register b, Register c, Register d, Register e, Register f,
936 Register g, Register h, int iteration);
937
938 void addmq(int disp, Register r1, Register r2);
939 public:
940 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
941 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
942 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
943 XMMRegister shuf_mask);
944private:
945 void roundEnc(XMMRegister key, int rnum);
946 void lastroundEnc(XMMRegister key, int rnum);
947 void roundDec(XMMRegister key, int rnum);
948 void lastroundDec(XMMRegister key, int rnum);
949 void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
950 void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
951 void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl);
952 void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
953 XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
954 XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
955 bool final_reduction, int index, XMMRegister counter_inc_mask);
956public:
957 void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
958 void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
959 void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
960 Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
961 void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
962 Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
963
964#endif
965
966 void fast_md5(Register buf, Address state, Address ofs, Address limit,
967 bool multi_block);
968
969 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
970 XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
971 Register buf, Register state, Register ofs, Register limit, Register rsp,
972 bool multi_block);
973
974#ifdef _LP641
975 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
976 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
977 Register buf, Register state, Register ofs, Register limit, Register rsp,
978 bool multi_block, XMMRegister shuf_mask);
979#else
980 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
981 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
982 Register buf, Register state, Register ofs, Register limit, Register rsp,
983 bool multi_block);
984#endif
985
986 void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
987 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
988 Register rax, Register rcx, Register rdx, Register tmp);
989
990#ifdef _LP641
991 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
992 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
993 Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
994
995 void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
996 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
997 Register rax, Register rcx, Register rdx, Register r11);
998
999 void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
1000 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
1001 Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
1002
1003 void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1004 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1005 Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2,
1006 Register tmp3, Register tmp4);
1007
1008 void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1009 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1010 Register rax, Register rcx, Register rdx, Register tmp1,
1011 Register tmp2, Register tmp3, Register tmp4);
1012 void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1013 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1014 Register rax, Register rcx, Register rdx, Register tmp1,
1015 Register tmp2, Register tmp3, Register tmp4);
1016#else
1017 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1018 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1019 Register rax, Register rcx, Register rdx, Register tmp1);
1020
1021 void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1022 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1023 Register rax, Register rcx, Register rdx, Register tmp);
1024
1025 void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
1026 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
1027 Register rdx, Register tmp);
1028
1029 void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1030 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1031 Register rax, Register rbx, Register rdx);
1032
1033 void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1034 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1035 Register rax, Register rcx, Register rdx, Register tmp);
1036
1037 void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
1038 Register edx, Register ebx, Register esi, Register edi,
1039 Register ebp, Register esp);
1040
1041 void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
1042 Register esi, Register edi, Register ebp, Register esp);
1043
1044 void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
1045 Register edx, Register ebx, Register esi, Register edi,
1046 Register ebp, Register esp);
1047
1048 void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1049 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1050 Register rax, Register rcx, Register rdx, Register tmp);
1051#endif
1052
1053private:
1054
1055 // these are private because users should be doing movflt/movdbl
1056
1057 void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
1058 void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
1059 void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
1060 void movss(XMMRegister dst, AddressLiteral src);
1061
1062 void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
1063 void movlpd(XMMRegister dst, AddressLiteral src);
1064
1065public:
1066
1067 void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
1068 void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
1069 void addsd(XMMRegister dst, AddressLiteral src);
1070
1071 void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
1072 void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
1073 void addss(XMMRegister dst, AddressLiteral src);
1074
1075 void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); }
1076 void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
1077 void addpd(XMMRegister dst, AddressLiteral src);
1078
1079 void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
1080 void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
1081 void divsd(XMMRegister dst, AddressLiteral src);
1082
1083 void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
1084 void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
1085 void divss(XMMRegister dst, AddressLiteral src);
1086
1087 // Move Unaligned Double Quadword
1088 void movdqu(Address dst, XMMRegister src);
1089 void movdqu(XMMRegister dst, Address src);
1090 void movdqu(XMMRegister dst, XMMRegister src);
1091 void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
1092
1093 void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
1094 void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
1095 void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
1096 void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1097 void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); }
1098 void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); }
1099
1100 void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); }
1101 void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); }
1102 void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); }
1103 void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); }
1104 void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); }
1105 void kmovql(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1106
1107 // Safe move operation, lowers down to 16bit moves for targets supporting
1108 // AVX512F feature and 64bit moves for targets supporting AVX512BW feature.
1109 void kmov(Address dst, KRegister src);
1110 void kmov(KRegister dst, Address src);
1111 void kmov(KRegister dst, KRegister src);
1112 void kmov(Register dst, KRegister src);
1113 void kmov(KRegister dst, Register src);
1114
1115 // AVX Unaligned forms
1116 void vmovdqu(Address dst, XMMRegister src);
1117 void vmovdqu(XMMRegister dst, Address src);
1118 void vmovdqu(XMMRegister dst, XMMRegister src);
1119 void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1120 void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len);
1121
1122
1123 // AVX512 Unaligned
1124 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len);
1125 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len);
1126
1127 void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1128 void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1129 void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1130 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
1131 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
1132 void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1133
1134 void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
1135 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
1136 void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
1137 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
1138 void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1139
1140 void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
1141 void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
1142 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
1143 if (dst->encoding() == src->encoding()) return;
10
Called C++ object pointer is null
1144 Assembler::evmovdqul(dst, src, vector_len);
1145 }
1146 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
1147 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
1148 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1149 if (dst->encoding() == src->encoding() && mask == k0) return;
1150 Assembler::evmovdqul(dst, mask, src, merge, vector_len);
1151 }
1152 void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1153
1154 void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
1155 void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
1156 void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
1157 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
1158 if (dst->encoding() == src->encoding()) return;
1159 Assembler::evmovdquq(dst, src, vector_len);
1160 }
1161 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
1162 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
1163 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1164 if (dst->encoding() == src->encoding() && mask == k0) return;
1165 Assembler::evmovdquq(dst, mask, src, merge, vector_len);
1166 }
1167 void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1168
1169 // Move Aligned Double Quadword
1170 void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
1171 void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); }
1172 void movdqa(XMMRegister dst, AddressLiteral src);
1173
1174 void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
1175 void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
1176 void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
1177 void movsd(XMMRegister dst, AddressLiteral src);
1178
1179 void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
1180 void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
1181 void mulpd(XMMRegister dst, AddressLiteral src);
1182
1183 void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
1184 void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
1185 void mulsd(XMMRegister dst, AddressLiteral src);
1186
1187 void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
1188 void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
1189 void mulss(XMMRegister dst, AddressLiteral src);
1190
1191 // Carry-Less Multiplication Quadword
1192 void pclmulldq(XMMRegister dst, XMMRegister src) {
1193 // 0x00 - multiply lower 64 bits [0:63]
1194 Assembler::pclmulqdq(dst, src, 0x00);
1195 }
1196 void pclmulhdq(XMMRegister dst, XMMRegister src) {
1197 // 0x11 - multiply upper 64 bits [64:127]
1198 Assembler::pclmulqdq(dst, src, 0x11);
1199 }
1200
1201 void pcmpeqb(XMMRegister dst, XMMRegister src);
1202 void pcmpeqw(XMMRegister dst, XMMRegister src);
1203
1204 void pcmpestri(XMMRegister dst, Address src, int imm8);
1205 void pcmpestri(XMMRegister dst, XMMRegister src, int imm8);
1206
1207 void pmovzxbw(XMMRegister dst, XMMRegister src);
1208 void pmovzxbw(XMMRegister dst, Address src);
1209
1210 void pmovmskb(Register dst, XMMRegister src);
1211
1212 void ptest(XMMRegister dst, XMMRegister src);
1213
1214 void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
1215 void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
1216 void sqrtsd(XMMRegister dst, AddressLiteral src);
1217
1218 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
1219 void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
1220 void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg);
1221
1222 void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
1223 void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
1224 void sqrtss(XMMRegister dst, AddressLiteral src);
1225
1226 void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
1227 void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
1228 void subsd(XMMRegister dst, AddressLiteral src);
1229
1230 void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
1231 void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
1232 void subss(XMMRegister dst, AddressLiteral src);
1233
1234 void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
1235 void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
1236 void ucomiss(XMMRegister dst, AddressLiteral src);
1237
1238 void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
1239 void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
1240 void ucomisd(XMMRegister dst, AddressLiteral src);
1241
1242 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
1243 void xorpd(XMMRegister dst, XMMRegister src);
1244 void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
1245 void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1246
1247 // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
1248 void xorps(XMMRegister dst, XMMRegister src);
1249 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
1250 void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1251
1252 // Shuffle Bytes
1253 void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
1254 void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); }
1255 void pshufb(XMMRegister dst, AddressLiteral src);
1256 // AVX 3-operands instructions
1257
1258 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
1259 void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); }
1260 void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1261
1262 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
1263 void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
1264 void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1265
1266 void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
1267 void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
1268
1269 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1270 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1271 void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch);
1272
1273 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1274 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1275
1276 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
1277 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
1278 void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch);
1279
1280 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
1281 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
1282 void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1283
1284 void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
1285 void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
1286
1287 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1288
1289 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1290 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1291
1292 // Vector compares
1293 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1294 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1295 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1296 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1297 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1298 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1299 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1300 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1301 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1302 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1303 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1304 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1305 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1306 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1307 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1308 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1309
1310 void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len);
1311
1312 // Emit comparison instruction for the specified comparison predicate.
1313 void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
1314 void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
1315
1316 void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
1317 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
1318
1319 void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit);
1320
1321 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1322 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1323 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
1324 Assembler::vpmulld(dst, nds, src, vector_len);
1325 };
1326 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1327 Assembler::vpmulld(dst, nds, src, vector_len);
1328 }
1329 void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1330
1331 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1332 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1333
1334 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1335 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1336
1337 void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1338 void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1339
1340 void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1341 void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1342
1343 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1344 if (!is_varshift) {
1345 Assembler::evpsllw(dst, mask, nds, src, merge, vector_len);
1346 } else {
1347 Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len);
1348 }
1349 }
1350 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1351 if (!is_varshift) {
1352 Assembler::evpslld(dst, mask, nds, src, merge, vector_len);
1353 } else {
1354 Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len);
1355 }
1356 }
1357 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1358 if (!is_varshift) {
1359 Assembler::evpsllq(dst, mask, nds, src, merge, vector_len);
1360 } else {
1361 Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len);
1362 }
1363 }
1364 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1365 if (!is_varshift) {
1366 Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len);
1367 } else {
1368 Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len);
1369 }
1370 }
1371 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1372 if (!is_varshift) {
1373 Assembler::evpsrld(dst, mask, nds, src, merge, vector_len);
1374 } else {
1375 Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len);
1376 }
1377 }
1378 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1379 if (!is_varshift) {
1380 Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len);
1381 } else {
1382 Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len);
1383 }
1384 }
1385 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1386 if (!is_varshift) {
1387 Assembler::evpsraw(dst, mask, nds, src, merge, vector_len);
1388 } else {
1389 Assembler::evpsravw(dst, mask, nds, src, merge, vector_len);
1390 }
1391 }
1392 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1393 if (!is_varshift) {
1394 Assembler::evpsrad(dst, mask, nds, src, merge, vector_len);
1395 } else {
1396 Assembler::evpsravd(dst, mask, nds, src, merge, vector_len);
1397 }
1398 }
1399 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1400 if (!is_varshift) {
1401 Assembler::evpsraq(dst, mask, nds, src, merge, vector_len);
1402 } else {
1403 Assembler::evpsravq(dst, mask, nds, src, merge, vector_len);
1404 }
1405 }
1406
1407 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1408 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1409 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1410 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1411
1412 void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1413 void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1414
1415 void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1416 void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1417
1418 void vptest(XMMRegister dst, XMMRegister src);
1419 void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); }
1420
1421 void punpcklbw(XMMRegister dst, XMMRegister src);
1422 void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
1423
1424 void pshufd(XMMRegister dst, Address src, int mode);
1425 void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); }
1426
1427 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1428 void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
1429
1430 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
1431 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
1432 void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1433
1434 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
1435 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
1436 void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1437
1438 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1439
1440 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
1441 void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
1442 void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1443
1444 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
1445 void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); }
1446 void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1447
1448 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
1449 void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); }
1450 void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1451
1452 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
1453 void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); }
1454 void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1455
1456 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
1457 void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); }
1458 void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1459
1460 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
1461 void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
1462 void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1463
1464 void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1465 void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1466
1467 // AVX Vector instructions
1468
1469 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
1470 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
1471 void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1472
1473 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
1474 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
1475 void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1476
1477 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1478 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
1479 Assembler::vpxor(dst, nds, src, vector_len);
1480 else
1481 Assembler::vxorpd(dst, nds, src, vector_len);
1482 }
1483 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
1484 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
1485 Assembler::vpxor(dst, nds, src, vector_len);
1486 else
1487 Assembler::vxorpd(dst, nds, src, vector_len);
1488 }
1489 void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1490
1491 // Simple version for AVX2 256bit vectors
1492 void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
1493 void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
1494
1495 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); }
1496 void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1497
1498 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
1499 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1500 Assembler::vinserti32x4(dst, nds, src, imm8);
1501 } else if (UseAVX > 1) {
1502 // vinserti128 is available only in AVX2
1503 Assembler::vinserti128(dst, nds, src, imm8);
1504 } else {
1505 Assembler::vinsertf128(dst, nds, src, imm8);
1506 }
1507 }
1508
1509 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
1510 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1511 Assembler::vinserti32x4(dst, nds, src, imm8);
1512 } else if (UseAVX > 1) {
1513 // vinserti128 is available only in AVX2
1514 Assembler::vinserti128(dst, nds, src, imm8);
1515 } else {
1516 Assembler::vinsertf128(dst, nds, src, imm8);
1517 }
1518 }
1519
1520 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1521 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1522 Assembler::vextracti32x4(dst, src, imm8);
1523 } else if (UseAVX > 1) {
1524 // vextracti128 is available only in AVX2
1525 Assembler::vextracti128(dst, src, imm8);
1526 } else {
1527 Assembler::vextractf128(dst, src, imm8);
1528 }
1529 }
1530
1531 void vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
1532 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1533 Assembler::vextracti32x4(dst, src, imm8);
1534 } else if (UseAVX > 1) {
1535 // vextracti128 is available only in AVX2
1536 Assembler::vextracti128(dst, src, imm8);
1537 } else {
1538 Assembler::vextractf128(dst, src, imm8);
1539 }
1540 }
1541
1542 // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers
1543 void vinserti128_high(XMMRegister dst, XMMRegister src) {
1544 vinserti128(dst, dst, src, 1);
1545 }
1546 void vinserti128_high(XMMRegister dst, Address src) {
1547 vinserti128(dst, dst, src, 1);
1548 }
1549 void vextracti128_high(XMMRegister dst, XMMRegister src) {
1550 vextracti128(dst, src, 1);
1551 }
1552 void vextracti128_high(Address dst, XMMRegister src) {
1553 vextracti128(dst, src, 1);
1554 }
1555
1556 void vinsertf128_high(XMMRegister dst, XMMRegister src) {
1557 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1558 Assembler::vinsertf32x4(dst, dst, src, 1);
1559 } else {
1560 Assembler::vinsertf128(dst, dst, src, 1);
1561 }
1562 }
1563
1564 void vinsertf128_high(XMMRegister dst, Address src) {
1565 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1566 Assembler::vinsertf32x4(dst, dst, src, 1);
1567 } else {
1568 Assembler::vinsertf128(dst, dst, src, 1);
1569 }
1570 }
1571
1572 void vextractf128_high(XMMRegister dst, XMMRegister src) {
1573 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1574 Assembler::vextractf32x4(dst, src, 1);
1575 } else {
1576 Assembler::vextractf128(dst, src, 1);
1577 }
1578 }
1579
1580 void vextractf128_high(Address dst, XMMRegister src) {
1581 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1582 Assembler::vextractf32x4(dst, src, 1);
1583 } else {
1584 Assembler::vextractf128(dst, src, 1);
1585 }
1586 }
1587
1588 // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers
1589 void vinserti64x4_high(XMMRegister dst, XMMRegister src) {
1590 Assembler::vinserti64x4(dst, dst, src, 1);
1591 }
1592 void vinsertf64x4_high(XMMRegister dst, XMMRegister src) {
1593 Assembler::vinsertf64x4(dst, dst, src, 1);
1594 }
1595 void vextracti64x4_high(XMMRegister dst, XMMRegister src) {
1596 Assembler::vextracti64x4(dst, src, 1);
1597 }
1598 void vextractf64x4_high(XMMRegister dst, XMMRegister src) {
1599 Assembler::vextractf64x4(dst, src, 1);
1600 }
1601 void vextractf64x4_high(Address dst, XMMRegister src) {
1602 Assembler::vextractf64x4(dst, src, 1);
1603 }
1604 void vinsertf64x4_high(XMMRegister dst, Address src) {
1605 Assembler::vinsertf64x4(dst, dst, src, 1);
1606 }
1607
1608 // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers
1609 void vinserti128_low(XMMRegister dst, XMMRegister src) {
1610 vinserti128(dst, dst, src, 0);
1611 }
1612 void vinserti128_low(XMMRegister dst, Address src) {
1613 vinserti128(dst, dst, src, 0);
1614 }
1615 void vextracti128_low(XMMRegister dst, XMMRegister src) {
1616 vextracti128(dst, src, 0);
1617 }
1618 void vextracti128_low(Address dst, XMMRegister src) {
1619 vextracti128(dst, src, 0);
1620 }
1621
1622 void vinsertf128_low(XMMRegister dst, XMMRegister src) {
1623 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1624 Assembler::vinsertf32x4(dst, dst, src, 0);
1625 } else {
1626 Assembler::vinsertf128(dst, dst, src, 0);
1627 }
1628 }
1629
1630 void vinsertf128_low(XMMRegister dst, Address src) {
1631 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1632 Assembler::vinsertf32x4(dst, dst, src, 0);
1633 } else {
1634 Assembler::vinsertf128(dst, dst, src, 0);
1635 }
1636 }
1637
1638 void vextractf128_low(XMMRegister dst, XMMRegister src) {
1639 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1640 Assembler::vextractf32x4(dst, src, 0);
1641 } else {
1642 Assembler::vextractf128(dst, src, 0);
1643 }
1644 }
1645
1646 void vextractf128_low(Address dst, XMMRegister src) {
1647 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1648 Assembler::vextractf32x4(dst, src, 0);
1649 } else {
1650 Assembler::vextractf128(dst, src, 0);
1651 }
1652 }
1653
1654 // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers
1655 void vinserti64x4_low(XMMRegister dst, XMMRegister src) {
1656 Assembler::vinserti64x4(dst, dst, src, 0);
1657 }
1658 void vinsertf64x4_low(XMMRegister dst, XMMRegister src) {
1659 Assembler::vinsertf64x4(dst, dst, src, 0);
1660 }
1661 void vextracti64x4_low(XMMRegister dst, XMMRegister src) {
1662 Assembler::vextracti64x4(dst, src, 0);
1663 }
1664 void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
1665 Assembler::vextractf64x4(dst, src, 0);
1666 }
1667 void vextractf64x4_low(Address dst, XMMRegister src) {
1668 Assembler::vextractf64x4(dst, src, 0);
1669 }
1670 void vinsertf64x4_low(XMMRegister dst, Address src) {
1671 Assembler::vinsertf64x4(dst, dst, src, 0);
1672 }
1673
1674 // Carry-Less Multiplication Quadword
1675 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1676 // 0x00 - multiply lower 64 bits [0:63]
1677 Assembler::vpclmulqdq(dst, nds, src, 0x00);
1678 }
1679 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1680 // 0x11 - multiply upper 64 bits [64:127]
1681 Assembler::vpclmulqdq(dst, nds, src, 0x11);
1682 }
1683 void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1684 // 0x10 - multiply nds[0:63] and src[64:127]
1685 Assembler::vpclmulqdq(dst, nds, src, 0x10);
1686 }
1687 void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1688 //0x01 - multiply nds[64:127] and src[0:63]
1689 Assembler::vpclmulqdq(dst, nds, src, 0x01);
1690 }
1691
1692 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1693 // 0x00 - multiply lower 64 bits [0:63]
1694 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len);
1695 }
1696 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1697 // 0x11 - multiply upper 64 bits [64:127]
1698 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len);
1699 }
1700
1701 // AVX-512 mask operations.
1702 void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2);
1703 void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
1704 void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg);
1705 void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
1706 void kortest(uint masklen, KRegister src1, KRegister src2);
1707 void ktest(uint masklen, KRegister src1, KRegister src2);
1708
1709 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1710 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1711
1712 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1713 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1714
1715 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1716 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1717
1718 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1719 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1720
1721 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
1722 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
1723 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
1724 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
1725
1726 void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch);
1727 void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch);
1728
1729 void cmov32( Condition cc, Register dst, Address src);
1730 void cmov32( Condition cc, Register dst, Register src);
1731
1732 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
1733
1734 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); }
1735 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); }
1736
1737 void movoop(Register dst, jobject obj);
1738 void movoop(Address dst, jobject obj);
1739
1740 void mov_metadata(Register dst, Metadata* obj);
1741 void mov_metadata(Address dst, Metadata* obj);
1742
1743 void movptr(ArrayAddress dst, Register src);
1744 // can this do an lea?
1745 void movptr(Register dst, ArrayAddress src);
1746
1747 void movptr(Register dst, Address src);
1748
1749#ifdef _LP641
1750 void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1);
1751#else
1752 void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit
1753#endif
1754
1755 void movptr(Register dst, intptr_t src);
1756 void movptr(Register dst, Register src);
1757 void movptr(Address dst, intptr_t src);
1758
1759 void movptr(Address dst, Register src);
1760
1761 void movptr(Register dst, RegisterOrConstant src) {
1762 if (src.is_constant()) movptr(dst, src.as_constant());
1763 else movptr(dst, src.as_register());
1764 }
1765
1766#ifdef _LP641
1767 // Generally the next two are only used for moving NULL
1768 // Although there are situations in initializing the mark word where
1769 // they could be used. They are dangerous.
1770
1771 // They only exist on LP64 so that int32_t and intptr_t are not the same
1772 // and we have ambiguous declarations.
1773
1774 void movptr(Address dst, int32_t imm32);
1775 void movptr(Register dst, int32_t imm32);
1776#endif // _LP64
1777
1778 // to avoid hiding movl
1779 void mov32(AddressLiteral dst, Register src);
1780 void mov32(Register dst, AddressLiteral src);
1781
1782 // to avoid hiding movb
1783 void movbyte(ArrayAddress dst, int src);
1784
1785 // Import other mov() methods from the parent class or else
1786 // they will be hidden by the following overriding declaration.
1787 using Assembler::movdl;
1788 using Assembler::movq;
1789 void movdl(XMMRegister dst, AddressLiteral src);
1790 void movq(XMMRegister dst, AddressLiteral src);
1791
1792 // Can push value or effective address
1793 void pushptr(AddressLiteral src);
1794
1795 void pushptr(Address src) { LP64_ONLY(pushq(src))pushq(src) NOT_LP64(pushl(src)); }
1796 void popptr(Address src) { LP64_ONLY(popq(src))popq(src) NOT_LP64(popl(src)); }
1797
1798 void pushoop(jobject obj);
1799 void pushklass(Metadata* obj);
1800
1801 // sign extend as need a l to ptr sized element
1802 void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(movl(dst, src)); }
1803 void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(if (dst != src) movl(dst, src)); }
1804
1805
1806 public:
1807 // C2 compiled method's prolog code.
1808 void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
1809
1810 // clear memory of size 'cnt' qwords, starting at 'base';
1811 // if 'is_large' is set, do not try to produce short loop
1812 void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg);
1813
1814 // clear memory initialization sequence for constant size;
1815 void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
1816
1817 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
1818 void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
1819
1820 // Fill primitive arrays
1821 void generate_fill(BasicType t, bool aligned,
1822 Register to, Register value, Register count,
1823 Register rtmp, XMMRegister xtmp);
1824
1825 void encode_iso_array(Register src, Register dst, Register len,
1826 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
1827 XMMRegister tmp4, Register tmp5, Register result, bool ascii);
1828
1829#ifdef _LP641
1830 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2);
1831 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1832 Register y, Register y_idx, Register z,
1833 Register carry, Register product,
1834 Register idx, Register kdx);
1835 void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
1836 Register yz_idx, Register idx,
1837 Register carry, Register product, int offset);
1838 void multiply_128_x_128_bmi2_loop(Register y, Register z,
1839 Register carry, Register carry2,
1840 Register idx, Register jdx,
1841 Register yz_idx1, Register yz_idx2,
1842 Register tmp, Register tmp3, Register tmp4);
1843 void multiply_128_x_128_loop(Register x_xstart, Register y, Register z,
1844 Register yz_idx, Register idx, Register jdx,
1845 Register carry, Register product,
1846 Register carry2);
1847 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
1848 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1849 void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
1850 Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
1851 void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry,
1852 Register tmp2);
1853 void multiply_add_64(Register sum, Register op1, Register op2, Register carry,
1854 Register rdxReg, Register raxReg);
1855 void add_one_64(Register z, Register zlen, Register carry, Register tmp1);
1856 void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
1857 Register tmp3, Register tmp4);
1858 void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
1859 Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
1860
1861 void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1,
1862 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
1863 Register raxReg);
1864 void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1,
1865 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
1866 Register raxReg);
1867 void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
1868 Register result, Register tmp1, Register tmp2,
1869 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3);
1870#endif
1871
1872 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1873 void update_byte_crc32(Register crc, Register val, Register table);
1874 void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
1875
1876
1877#ifdef _LP641
1878 void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2);
1879 void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos,
1880 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
1881 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup);
1882 void updateBytesAdler32(Register adler32, Register buf, Register length, XMMRegister shuf0, XMMRegister shuf1, ExternalAddress scale);
1883#endif // _LP64
1884
1885 // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
1886 // Note on a naming convention:
1887 // Prefix w = register only used on a Westmere+ architecture
1888 // Prefix n = register only used on a Nehalem architecture
1889#ifdef _LP641
1890 void crc32c_ipl_alg4(Register in_out, uint32_t n,
1891 Register tmp1, Register tmp2, Register tmp3);
1892#else
1893 void crc32c_ipl_alg4(Register in_out, uint32_t n,
1894 Register tmp1, Register tmp2, Register tmp3,
1895 XMMRegister xtmp1, XMMRegister xtmp2);
1896#endif
1897 void crc32c_pclmulqdq(XMMRegister w_xtmp1,
1898 Register in_out,
1899 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
1900 XMMRegister w_xtmp2,
1901 Register tmp1,
1902 Register n_tmp2, Register n_tmp3);
1903 void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
1904 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1905 Register tmp1, Register tmp2,
1906 Register n_tmp3);
1907 void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
1908 Register in_out1, Register in_out2, Register in_out3,
1909 Register tmp1, Register tmp2, Register tmp3,
1910 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1911 Register tmp4, Register tmp5,
1912 Register n_tmp6);
1913 void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
1914 Register tmp1, Register tmp2, Register tmp3,
1915 Register tmp4, Register tmp5, Register tmp6,
1916 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1917 bool is_pclmulqdq_supported);
1918 // Fold 128-bit data chunk
1919 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
1920 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);
1921#ifdef _LP641
1922 // Fold 512-bit data chunk
1923 void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset);
1924#endif // _LP64
1925 // Fold 8-bit data
1926 void fold_8bit_crc32(Register crc, Register table, Register tmp);
1927 void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp);
1928
1929 // Compress char[] array to byte[].
1930 void char_array_compress(Register src, Register dst, Register len,
1931 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
1932 XMMRegister tmp4, Register tmp5, Register result,
1933 KRegister mask1 = knoreg, KRegister mask2 = knoreg);
1934
1935 // Inflate byte[] array to char[].
1936 void byte_array_inflate(Register src, Register dst, Register len,
1937 XMMRegister tmp1, Register tmp2, KRegister mask = knoreg);
1938
1939 void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
1940 Register length, Register temp, int vec_enc);
1941
1942 void fill64_masked(uint shift, Register dst, int disp,
1943 XMMRegister xmm, KRegister mask, Register length,
1944 Register temp, bool use64byteVector = false);
1945
1946 void fill32_masked(uint shift, Register dst, int disp,
1947 XMMRegister xmm, KRegister mask, Register length,
1948 Register temp);
1949
1950 void fill32(Register dst, int disp, XMMRegister xmm);
1951
1952 void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false);
1953
1954#ifdef _LP641
1955 void convert_f2i(Register dst, XMMRegister src);
1956 void convert_d2i(Register dst, XMMRegister src);
1957 void convert_f2l(Register dst, XMMRegister src);
1958 void convert_d2l(Register dst, XMMRegister src);
1959
1960 void cache_wb(Address line);
1961 void cache_wbsync(bool is_pre);
1962
1963#if COMPILER2_OR_JVMCI1
1964 void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
1965 Register to, Register count, int shift,
1966 Register index, Register temp,
1967 bool use64byteVector, Label& L_entry, Label& L_exit);
1968
1969 void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
1970 Register to, Register start_index, Register end_index,
1971 Register count, int shift, Register temp,
1972 bool use64byteVector, Label& L_entry, Label& L_exit);
1973
1974 void copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
1975 KRegister mask, Register length, Register index,
1976 Register temp, int shift = Address::times_1, int offset = 0,
1977 bool use64byteVector = false);
1978
1979 void copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
1980 KRegister mask, Register length, Register index,
1981 Register temp, int shift = Address::times_1, int offset = 0);
1982
1983 void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
1984 int shift = Address::times_1, int offset = 0);
1985
1986 void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
1987 bool conjoint, int shift = Address::times_1, int offset = 0,
1988 bool use64byteVector = false);
1989
1990 void generate_fill_avx3(BasicType type, Register to, Register value,
1991 Register count, Register rtmp, XMMRegister xtmp);
1992
1993#endif // COMPILER2_OR_JVMCI
1994
1995#endif // _LP64
1996
1997 void vallones(XMMRegister dst, int vector_len);
1998};
1999
2000/**
2001 * class SkipIfEqual:
2002 *
2003 * Instantiating this class will result in assembly code being output that will
2004 * jump around any code emitted between the creation of the instance and it's
2005 * automatic destruction at the end of a scope block, depending on the value of
2006 * the flag passed to the constructor, which will be checked at run-time.
2007 */
2008class SkipIfEqual {
2009 private:
2010 MacroAssembler* _masm;
2011 Label _label;
2012
2013 public:
2014 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
2015 ~SkipIfEqual();
2016};
2017
2018#endif // CPU_X86_MACROASSEMBLER_X86_HPP