| File: | jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp |
| Warning: | line 1143, column 29 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | ||||
| 2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. | ||||
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||||
| 4 | * | ||||
| 5 | * This code is free software; you can redistribute it and/or modify it | ||||
| 6 | * under the terms of the GNU General Public License version 2 only, as | ||||
| 7 | * published by the Free Software Foundation. | ||||
| 8 | * | ||||
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT | ||||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||||
| 12 | * version 2 for more details (a copy is included in the LICENSE file that | ||||
| 13 | * accompanied this code). | ||||
| 14 | * | ||||
| 15 | * You should have received a copy of the GNU General Public License version | ||||
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, | ||||
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| 18 | * | ||||
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||||
| 20 | * or visit www.oracle.com if you need additional information or have any | ||||
| 21 | * questions. | ||||
| 22 | * | ||||
| 23 | */ | ||||
| 24 | |||||
| 25 | #include "precompiled.hpp" | ||||
| 26 | #include "jvm.h" | ||||
| 27 | #include "asm/macroAssembler.hpp" | ||||
| 28 | #include "asm/macroAssembler.inline.hpp" | ||||
| 29 | #include "code/codeBlob.hpp" | ||||
| 30 | #include "logging/log.hpp" | ||||
| 31 | #include "logging/logStream.hpp" | ||||
| 32 | #include "memory/resourceArea.hpp" | ||||
| 33 | #include "memory/universe.hpp" | ||||
| 34 | #include "runtime/globals_extension.hpp" | ||||
| 35 | #include "runtime/java.hpp" | ||||
| 36 | #include "runtime/os.hpp" | ||||
| 37 | #include "runtime/stubCodeGenerator.hpp" | ||||
| 38 | #include "runtime/vm_version.hpp" | ||||
| 39 | #include "utilities/powerOfTwo.hpp" | ||||
| 40 | #include "utilities/virtualizationSupport.hpp" | ||||
| 41 | |||||
| 42 | #include OS_HEADER_INLINE(os)"os_linux.inline.hpp" | ||||
| 43 | |||||
| 44 | int VM_Version::_cpu; | ||||
| 45 | int VM_Version::_model; | ||||
| 46 | int VM_Version::_stepping; | ||||
| 47 | bool VM_Version::_has_intel_jcc_erratum; | ||||
| 48 | VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; | ||||
| 49 | |||||
| 50 | #define DECLARE_CPU_FEATURE_NAME(id, name, bit)name, name, | ||||
| 51 | const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)"cx8", "cmov", "fxsr", "ht", "mmx", "3dnowpref", "sse", "sse2" , "sse3", "ssse3", "sse4a", "sse4.1", "sse4.2", "popcnt", "lzcnt" , "tsc", "tscinvbit", "tscinv", "avx", "avx2", "aes", "erms", "clmul", "bmi1", "bmi2", "rtm", "adx", "avx512f", "avx512dq" , "avx512pf", "avx512er", "avx512cd", "avx512bw", "avx512vl", "sha", "fma", "vzeroupper", "avx512_vpopcntdq", "avx512_vpclmulqdq" , "avx512_vaes", "avx512_vnni", "clflush", "clflushopt", "clwb" , "avx512_vbmi2", "avx512_vbmi", "hv", "serialize",}; | ||||
| 52 | #undef DECLARE_CPU_FEATURE_FLAG | ||||
| 53 | |||||
| 54 | // Address of instruction which causes SEGV | ||||
| 55 | address VM_Version::_cpuinfo_segv_addr = 0; | ||||
| 56 | // Address of instruction after the one which causes SEGV | ||||
| 57 | address VM_Version::_cpuinfo_cont_addr = 0; | ||||
| 58 | |||||
| 59 | static BufferBlob* stub_blob; | ||||
| 60 | static const int stub_size = 2000; | ||||
| 61 | |||||
| 62 | extern "C" { | ||||
| 63 | typedef void (*get_cpu_info_stub_t)(void*); | ||||
| 64 | typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); | ||||
| 65 | } | ||||
| 66 | static get_cpu_info_stub_t get_cpu_info_stub = NULL__null; | ||||
| 67 | static detect_virt_stub_t detect_virt_stub = NULL__null; | ||||
| 68 | |||||
| 69 | #ifdef _LP641 | ||||
| 70 | |||||
| 71 | bool VM_Version::supports_clflush() { | ||||
| 72 | // clflush should always be available on x86_64 | ||||
| 73 | // if not we are in real trouble because we rely on it | ||||
| 74 | // to flush the code cache. | ||||
| 75 | // Unfortunately, Assembler::clflush is currently called as part | ||||
| 76 | // of generation of the code cache flush routine. This happens | ||||
| 77 | // under Universe::init before the processor features are set | ||||
| 78 | // up. Assembler::flush calls this routine to check that clflush | ||||
| 79 | // is allowed. So, we give the caller a free pass if Universe init | ||||
| 80 | // is still in progress. | ||||
| 81 | assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available")do { if (!((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 81, "assert(" "(!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0)" ") failed", "clflush should be available"); ::breakpoint(); } } while (0); | ||||
| 82 | return true; | ||||
| 83 | } | ||||
| 84 | #endif | ||||
| 85 | |||||
| 86 | #define CPUID_STANDARD_FN0x0 0x0 | ||||
| 87 | #define CPUID_STANDARD_FN_10x1 0x1 | ||||
| 88 | #define CPUID_STANDARD_FN_40x4 0x4 | ||||
| 89 | #define CPUID_STANDARD_FN_B0xb 0xb | ||||
| 90 | |||||
| 91 | #define CPUID_EXTENDED_FN0x80000000 0x80000000 | ||||
| 92 | #define CPUID_EXTENDED_FN_10x80000001 0x80000001 | ||||
| 93 | #define CPUID_EXTENDED_FN_20x80000002 0x80000002 | ||||
| 94 | #define CPUID_EXTENDED_FN_30x80000003 0x80000003 | ||||
| 95 | #define CPUID_EXTENDED_FN_40x80000004 0x80000004 | ||||
| 96 | #define CPUID_EXTENDED_FN_70x80000007 0x80000007 | ||||
| 97 | #define CPUID_EXTENDED_FN_80x80000008 0x80000008 | ||||
| 98 | |||||
| 99 | class VM_Version_StubGenerator: public StubCodeGenerator { | ||||
| 100 | public: | ||||
| 101 | |||||
| 102 | VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} | ||||
| 103 | |||||
| 104 | address generate_get_cpu_info() { | ||||
| 105 | // Flags to test CPU type. | ||||
| 106 | const uint32_t HS_EFL_AC = 0x40000; | ||||
| 107 | const uint32_t HS_EFL_ID = 0x200000; | ||||
| 108 | // Values for when we don't have a CPUID instruction. | ||||
| 109 | const int CPU_FAMILY_SHIFT = 8; | ||||
| 110 | const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | ||||
| 111 | const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | ||||
| 112 | bool use_evex = FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum)) || (UseAVX > 2); | ||||
| 113 | |||||
| 114 | Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; | ||||
| 115 | Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; | ||||
| 116 | Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; | ||||
| 117 | |||||
| 118 | StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); | ||||
| 119 | # define __ _masm-> | ||||
| 120 | |||||
| 121 | address start = __ pc(); | ||||
| 122 | |||||
| 123 | // | ||||
| 124 | // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); | ||||
| 125 | // | ||||
| 126 | // LP64: rcx and rdx are first and second argument registers on windows | ||||
| 127 | |||||
| 128 | __ push(rbp); | ||||
| 129 | #ifdef _LP641 | ||||
| 130 | __ mov(rbp, c_rarg0); // cpuid_info address | ||||
| 131 | #else | ||||
| 132 | __ movptr(rbp, Address(rsp, 8)); // cpuid_info address | ||||
| 133 | #endif | ||||
| 134 | __ push(rbx); | ||||
| 135 | __ push(rsi); | ||||
| 136 | __ pushf(); // preserve rbx, and flags | ||||
| 137 | __ pop(rax); | ||||
| 138 | __ push(rax); | ||||
| 139 | __ mov(rcx, rax); | ||||
| 140 | // | ||||
| 141 | // if we are unable to change the AC flag, we have a 386 | ||||
| 142 | // | ||||
| 143 | __ xorl(rax, HS_EFL_AC); | ||||
| 144 | __ push(rax); | ||||
| 145 | __ popf(); | ||||
| 146 | __ pushf(); | ||||
| 147 | __ pop(rax); | ||||
| 148 | __ cmpptr(rax, rcx); | ||||
| 149 | __ jccb(Assembler::notEqual, detect_486)jccb_0(Assembler::notEqual, detect_486, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 149); | ||||
| 150 | |||||
| 151 | __ movl(rax, CPU_FAMILY_386); | ||||
| 152 | __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | ||||
| 153 | __ jmp(done); | ||||
| 154 | |||||
| 155 | // | ||||
| 156 | // If we are unable to change the ID flag, we have a 486 which does | ||||
| 157 | // not support the "cpuid" instruction. | ||||
| 158 | // | ||||
| 159 | __ bind(detect_486); | ||||
| 160 | __ mov(rax, rcx); | ||||
| 161 | __ xorl(rax, HS_EFL_ID); | ||||
| 162 | __ push(rax); | ||||
| 163 | __ popf(); | ||||
| 164 | __ pushf(); | ||||
| 165 | __ pop(rax); | ||||
| 166 | __ cmpptr(rcx, rax); | ||||
| 167 | __ jccb(Assembler::notEqual, detect_586)jccb_0(Assembler::notEqual, detect_586, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 167); | ||||
| 168 | |||||
| 169 | __ bind(cpu486); | ||||
| 170 | __ movl(rax, CPU_FAMILY_486); | ||||
| 171 | __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | ||||
| 172 | __ jmp(done); | ||||
| 173 | |||||
| 174 | // | ||||
| 175 | // At this point, we have a chip which supports the "cpuid" instruction | ||||
| 176 | // | ||||
| 177 | __ bind(detect_586); | ||||
| 178 | __ xorl(rax, rax); | ||||
| 179 | __ cpuid(); | ||||
| 180 | __ orl(rax, rax); | ||||
| 181 | __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input | ||||
| 182 | // value of at least 1, we give up and | ||||
| 183 | // assume a 486 | ||||
| 184 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); | ||||
| 185 | __ movl(Address(rsi, 0), rax); | ||||
| 186 | __ movl(Address(rsi, 4), rbx); | ||||
| 187 | __ movl(Address(rsi, 8), rcx); | ||||
| 188 | __ movl(Address(rsi,12), rdx); | ||||
| 189 | |||||
| 190 | __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? | ||||
| 191 | __ jccb(Assembler::belowEqual, std_cpuid4)jccb_0(Assembler::belowEqual, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 191); | ||||
| 192 | |||||
| 193 | // | ||||
| 194 | // cpuid(0xB) Processor Topology | ||||
| 195 | // | ||||
| 196 | __ movl(rax, 0xb); | ||||
| 197 | __ xorl(rcx, rcx); // Threads level | ||||
| 198 | __ cpuid(); | ||||
| 199 | |||||
| 200 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); | ||||
| 201 | __ movl(Address(rsi, 0), rax); | ||||
| 202 | __ movl(Address(rsi, 4), rbx); | ||||
| 203 | __ movl(Address(rsi, 8), rcx); | ||||
| 204 | __ movl(Address(rsi,12), rdx); | ||||
| 205 | |||||
| 206 | __ movl(rax, 0xb); | ||||
| 207 | __ movl(rcx, 1); // Cores level | ||||
| 208 | __ cpuid(); | ||||
| 209 | __ push(rax); | ||||
| 210 | __ andl(rax, 0x1f); // Determine if valid topology level | ||||
| 211 | __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | ||||
| 212 | __ andl(rax, 0xffff); | ||||
| 213 | __ pop(rax); | ||||
| 214 | __ jccb(Assembler::equal, std_cpuid4)jccb_0(Assembler::equal, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 214); | ||||
| 215 | |||||
| 216 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); | ||||
| 217 | __ movl(Address(rsi, 0), rax); | ||||
| 218 | __ movl(Address(rsi, 4), rbx); | ||||
| 219 | __ movl(Address(rsi, 8), rcx); | ||||
| 220 | __ movl(Address(rsi,12), rdx); | ||||
| 221 | |||||
| 222 | __ movl(rax, 0xb); | ||||
| 223 | __ movl(rcx, 2); // Packages level | ||||
| 224 | __ cpuid(); | ||||
| 225 | __ push(rax); | ||||
| 226 | __ andl(rax, 0x1f); // Determine if valid topology level | ||||
| 227 | __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | ||||
| 228 | __ andl(rax, 0xffff); | ||||
| 229 | __ pop(rax); | ||||
| 230 | __ jccb(Assembler::equal, std_cpuid4)jccb_0(Assembler::equal, std_cpuid4, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 230); | ||||
| 231 | |||||
| 232 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); | ||||
| 233 | __ movl(Address(rsi, 0), rax); | ||||
| 234 | __ movl(Address(rsi, 4), rbx); | ||||
| 235 | __ movl(Address(rsi, 8), rcx); | ||||
| 236 | __ movl(Address(rsi,12), rdx); | ||||
| 237 | |||||
| 238 | // | ||||
| 239 | // cpuid(0x4) Deterministic cache params | ||||
| 240 | // | ||||
| 241 | __ bind(std_cpuid4); | ||||
| 242 | __ movl(rax, 4); | ||||
| 243 | __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? | ||||
| 244 | __ jccb(Assembler::greater, std_cpuid1)jccb_0(Assembler::greater, std_cpuid1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 244); | ||||
| 245 | |||||
| 246 | __ xorl(rcx, rcx); // L1 cache | ||||
| 247 | __ cpuid(); | ||||
| 248 | __ push(rax); | ||||
| 249 | __ andl(rax, 0x1f); // Determine if valid cache parameters used | ||||
| 250 | __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache | ||||
| 251 | __ pop(rax); | ||||
| 252 | __ jccb(Assembler::equal, std_cpuid1)jccb_0(Assembler::equal, std_cpuid1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 252); | ||||
| 253 | |||||
| 254 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); | ||||
| 255 | __ movl(Address(rsi, 0), rax); | ||||
| 256 | __ movl(Address(rsi, 4), rbx); | ||||
| 257 | __ movl(Address(rsi, 8), rcx); | ||||
| 258 | __ movl(Address(rsi,12), rdx); | ||||
| 259 | |||||
| 260 | // | ||||
| 261 | // Standard cpuid(0x1) | ||||
| 262 | // | ||||
| 263 | __ bind(std_cpuid1); | ||||
| 264 | __ movl(rax, 1); | ||||
| 265 | __ cpuid(); | ||||
| 266 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
| 267 | __ movl(Address(rsi, 0), rax); | ||||
| 268 | __ movl(Address(rsi, 4), rbx); | ||||
| 269 | __ movl(Address(rsi, 8), rcx); | ||||
| 270 | __ movl(Address(rsi,12), rdx); | ||||
| 271 | |||||
| 272 | // | ||||
| 273 | // Check if OS has enabled XGETBV instruction to access XCR0 | ||||
| 274 | // (OSXSAVE feature flag) and CPU supports AVX | ||||
| 275 | // | ||||
| 276 | __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx | ||||
| 277 | __ cmpl(rcx, 0x18000000); | ||||
| 278 | __ jccb(Assembler::notEqual, sef_cpuid)jccb_0(Assembler::notEqual, sef_cpuid, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 278); // jump if AVX is not supported | ||||
| 279 | |||||
| 280 | // | ||||
| 281 | // XCR0, XFEATURE_ENABLED_MASK register | ||||
| 282 | // | ||||
| 283 | __ xorl(rcx, rcx); // zero for XCR0 register | ||||
| 284 | __ xgetbv(); | ||||
| 285 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); | ||||
| 286 | __ movl(Address(rsi, 0), rax); | ||||
| 287 | __ movl(Address(rsi, 4), rdx); | ||||
| 288 | |||||
| 289 | // | ||||
| 290 | // cpuid(0x7) Structured Extended Features | ||||
| 291 | // | ||||
| 292 | __ bind(sef_cpuid); | ||||
| 293 | __ movl(rax, 7); | ||||
| 294 | __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? | ||||
| 295 | __ jccb(Assembler::greater, ext_cpuid)jccb_0(Assembler::greater, ext_cpuid, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 295); | ||||
| 296 | |||||
| 297 | __ xorl(rcx, rcx); | ||||
| 298 | __ cpuid(); | ||||
| 299 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | ||||
| 300 | __ movl(Address(rsi, 0), rax); | ||||
| 301 | __ movl(Address(rsi, 4), rbx); | ||||
| 302 | __ movl(Address(rsi, 8), rcx); | ||||
| 303 | __ movl(Address(rsi, 12), rdx); | ||||
| 304 | |||||
| 305 | // | ||||
| 306 | // Extended cpuid(0x80000000) | ||||
| 307 | // | ||||
| 308 | __ bind(ext_cpuid); | ||||
| 309 | __ movl(rax, 0x80000000); | ||||
| 310 | __ cpuid(); | ||||
| 311 | __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? | ||||
| 312 | __ jcc(Assembler::belowEqual, done); | ||||
| 313 | __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? | ||||
| 314 | __ jcc(Assembler::belowEqual, ext_cpuid1); | ||||
| 315 | __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? | ||||
| 316 | __ jccb(Assembler::belowEqual, ext_cpuid5)jccb_0(Assembler::belowEqual, ext_cpuid5, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 316); | ||||
| 317 | __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? | ||||
| 318 | __ jccb(Assembler::belowEqual, ext_cpuid7)jccb_0(Assembler::belowEqual, ext_cpuid7, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 318); | ||||
| 319 | __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? | ||||
| 320 | __ jccb(Assembler::belowEqual, ext_cpuid8)jccb_0(Assembler::belowEqual, ext_cpuid8, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 320); | ||||
| 321 | __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? | ||||
| 322 | __ jccb(Assembler::below, ext_cpuid8)jccb_0(Assembler::below, ext_cpuid8, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 322); | ||||
| 323 | // | ||||
| 324 | // Extended cpuid(0x8000001E) | ||||
| 325 | // | ||||
| 326 | __ movl(rax, 0x8000001E); | ||||
| 327 | __ cpuid(); | ||||
| 328 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); | ||||
| 329 | __ movl(Address(rsi, 0), rax); | ||||
| 330 | __ movl(Address(rsi, 4), rbx); | ||||
| 331 | __ movl(Address(rsi, 8), rcx); | ||||
| 332 | __ movl(Address(rsi,12), rdx); | ||||
| 333 | |||||
| 334 | // | ||||
| 335 | // Extended cpuid(0x80000008) | ||||
| 336 | // | ||||
| 337 | __ bind(ext_cpuid8); | ||||
| 338 | __ movl(rax, 0x80000008); | ||||
| 339 | __ cpuid(); | ||||
| 340 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); | ||||
| 341 | __ movl(Address(rsi, 0), rax); | ||||
| 342 | __ movl(Address(rsi, 4), rbx); | ||||
| 343 | __ movl(Address(rsi, 8), rcx); | ||||
| 344 | __ movl(Address(rsi,12), rdx); | ||||
| 345 | |||||
| 346 | // | ||||
| 347 | // Extended cpuid(0x80000007) | ||||
| 348 | // | ||||
| 349 | __ bind(ext_cpuid7); | ||||
| 350 | __ movl(rax, 0x80000007); | ||||
| 351 | __ cpuid(); | ||||
| 352 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); | ||||
| 353 | __ movl(Address(rsi, 0), rax); | ||||
| 354 | __ movl(Address(rsi, 4), rbx); | ||||
| 355 | __ movl(Address(rsi, 8), rcx); | ||||
| 356 | __ movl(Address(rsi,12), rdx); | ||||
| 357 | |||||
| 358 | // | ||||
| 359 | // Extended cpuid(0x80000005) | ||||
| 360 | // | ||||
| 361 | __ bind(ext_cpuid5); | ||||
| 362 | __ movl(rax, 0x80000005); | ||||
| 363 | __ cpuid(); | ||||
| 364 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); | ||||
| 365 | __ movl(Address(rsi, 0), rax); | ||||
| 366 | __ movl(Address(rsi, 4), rbx); | ||||
| 367 | __ movl(Address(rsi, 8), rcx); | ||||
| 368 | __ movl(Address(rsi,12), rdx); | ||||
| 369 | |||||
| 370 | // | ||||
| 371 | // Extended cpuid(0x80000001) | ||||
| 372 | // | ||||
| 373 | __ bind(ext_cpuid1); | ||||
| 374 | __ movl(rax, 0x80000001); | ||||
| 375 | __ cpuid(); | ||||
| 376 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); | ||||
| 377 | __ movl(Address(rsi, 0), rax); | ||||
| 378 | __ movl(Address(rsi, 4), rbx); | ||||
| 379 | __ movl(Address(rsi, 8), rcx); | ||||
| 380 | __ movl(Address(rsi,12), rdx); | ||||
| 381 | |||||
| 382 | // | ||||
| 383 | // Check if OS has enabled XGETBV instruction to access XCR0 | ||||
| 384 | // (OSXSAVE feature flag) and CPU supports AVX | ||||
| 385 | // | ||||
| 386 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
| 387 | __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx | ||||
| 388 | __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx | ||||
| 389 | __ cmpl(rcx, 0x18000000); | ||||
| 390 | __ jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 390); // jump if AVX is not supported | ||||
| 391 | |||||
| 392 | __ movl(rax, 0x6); | ||||
| 393 | __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm | ||||
| 394 | __ cmpl(rax, 0x6); | ||||
| 395 | __ jccb(Assembler::equal, start_simd_check)jccb_0(Assembler::equal, start_simd_check, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 395); // return if AVX is not supported | ||||
| 396 | |||||
| 397 | // we need to bridge farther than imm8, so we use this island as a thunk | ||||
| 398 | __ bind(done); | ||||
| 399 | __ jmp(wrapup); | ||||
| 400 | |||||
| 401 | __ bind(start_simd_check); | ||||
| 402 | // | ||||
| 403 | // Some OSs have a bug when upper 128/256bits of YMM/ZMM | ||||
| 404 | // registers are not restored after a signal processing. | ||||
| 405 | // Generate SEGV here (reference through NULL) | ||||
| 406 | // and check upper YMM/ZMM bits after it. | ||||
| 407 | // | ||||
| 408 | intx saved_useavx = UseAVX; | ||||
| 409 | intx saved_usesse = UseSSE; | ||||
| 410 | |||||
| 411 | // If UseAVX is unitialized or is set by the user to include EVEX | ||||
| 412 | if (use_evex
| ||||
| 413 | // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f | ||||
| 414 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | ||||
| 415 | __ movl(rax, 0x10000); | ||||
| 416 | __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm | ||||
| 417 | __ cmpl(rax, 0x10000); | ||||
| 418 | __ jccb(Assembler::notEqual, legacy_setup)jccb_0(Assembler::notEqual, legacy_setup, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 418); // jump if EVEX is not supported | ||||
| 419 | // check _cpuid_info.xem_xcr0_eax.bits.opmask | ||||
| 420 | // check _cpuid_info.xem_xcr0_eax.bits.zmm512 | ||||
| 421 | // check _cpuid_info.xem_xcr0_eax.bits.zmm32 | ||||
| 422 | __ movl(rax, 0xE0); | ||||
| 423 | __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm | ||||
| 424 | __ cmpl(rax, 0xE0); | ||||
| 425 | __ jccb(Assembler::notEqual, legacy_setup)jccb_0(Assembler::notEqual, legacy_setup, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 425); // jump if EVEX is not supported | ||||
| 426 | |||||
| 427 | if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) { | ||||
| 428 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
| 429 | __ movl(rax, Address(rsi, 0)); | ||||
| 430 | __ cmpl(rax, 0x50654); // If it is Skylake | ||||
| 431 | __ jcc(Assembler::equal, legacy_setup); | ||||
| 432 | } | ||||
| 433 | // EVEX setup: run in lowest evex mode | ||||
| 434 | VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts | ||||
| 435 | UseAVX = 3; | ||||
| 436 | UseSSE = 2; | ||||
| 437 | #ifdef _WINDOWS | ||||
| 438 | // xmm5-xmm15 are not preserved by caller on windows | ||||
| 439 | // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx | ||||
| 440 | __ subptr(rsp, 64); | ||||
| 441 | __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); | ||||
| 442 | #ifdef _LP641 | ||||
| 443 | __ subptr(rsp, 64); | ||||
| 444 | __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); | ||||
| 445 | __ subptr(rsp, 64); | ||||
| 446 | __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); | ||||
| 447 | #endif // _LP64 | ||||
| 448 | #endif // _WINDOWS | ||||
| 449 | |||||
| 450 | // load value into all 64 bytes of zmm7 register | ||||
| 451 | __ movl(rcx, VM_Version::ymm_test_value()); | ||||
| 452 | __ movdl(xmm0, rcx); | ||||
| 453 | __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); | ||||
| 454 | __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); | ||||
| 455 | #ifdef _LP641 | ||||
| 456 | __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); | ||||
| 457 | __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); | ||||
| 458 | #endif | ||||
| 459 | VM_Version::clean_cpuFeatures(); | ||||
| 460 | __ jmp(save_restore_except); | ||||
| 461 | } | ||||
| 462 | |||||
| 463 | __ bind(legacy_setup); | ||||
| 464 | // AVX setup | ||||
| 465 | VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts | ||||
| 466 | UseAVX = 1; | ||||
| 467 | UseSSE = 2; | ||||
| 468 | #ifdef _WINDOWS | ||||
| 469 | __ subptr(rsp, 32); | ||||
| 470 | __ vmovdqu(Address(rsp, 0), xmm7); | ||||
| 471 | #ifdef _LP641 | ||||
| 472 | __ subptr(rsp, 32); | ||||
| 473 | __ vmovdqu(Address(rsp, 0), xmm8); | ||||
| 474 | __ subptr(rsp, 32); | ||||
| 475 | __ vmovdqu(Address(rsp, 0), xmm15); | ||||
| 476 | #endif // _LP64 | ||||
| 477 | #endif // _WINDOWS | ||||
| 478 | |||||
| 479 | // load value into all 32 bytes of ymm7 register | ||||
| 480 | __ movl(rcx, VM_Version::ymm_test_value()); | ||||
| 481 | |||||
| 482 | __ movdl(xmm0, rcx); | ||||
| 483 | __ pshufd(xmm0, xmm0, 0x00); | ||||
| 484 | __ vinsertf128_high(xmm0, xmm0); | ||||
| 485 | __ vmovdqu(xmm7, xmm0); | ||||
| 486 | #ifdef _LP641 | ||||
| 487 | __ vmovdqu(xmm8, xmm0); | ||||
| 488 | __ vmovdqu(xmm15, xmm0); | ||||
| 489 | #endif | ||||
| 490 | VM_Version::clean_cpuFeatures(); | ||||
| 491 | |||||
| 492 | __ bind(save_restore_except); | ||||
| 493 | __ xorl(rsi, rsi); | ||||
| 494 | VM_Version::set_cpuinfo_segv_addr(__ pc()); | ||||
| 495 | // Generate SEGV | ||||
| 496 | __ movl(rax, Address(rsi, 0)); | ||||
| 497 | |||||
| 498 | VM_Version::set_cpuinfo_cont_addr(__ pc()); | ||||
| 499 | // Returns here after signal. Save xmm0 to check it later. | ||||
| 500 | |||||
| 501 | // If UseAVX is unitialized or is set by the user to include EVEX | ||||
| 502 | if (use_evex) { | ||||
| 503 | // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f | ||||
| 504 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | ||||
| 505 | __ movl(rax, 0x10000); | ||||
| 506 | __ andl(rax, Address(rsi, 4)); | ||||
| 507 | __ cmpl(rax, 0x10000); | ||||
| 508 | __ jcc(Assembler::notEqual, legacy_save_restore); | ||||
| 509 | // check _cpuid_info.xem_xcr0_eax.bits.opmask | ||||
| 510 | // check _cpuid_info.xem_xcr0_eax.bits.zmm512 | ||||
| 511 | // check _cpuid_info.xem_xcr0_eax.bits.zmm32 | ||||
| 512 | __ movl(rax, 0xE0); | ||||
| 513 | __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm | ||||
| 514 | __ cmpl(rax, 0xE0); | ||||
| 515 | __ jcc(Assembler::notEqual, legacy_save_restore); | ||||
| 516 | |||||
| 517 | if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) { | ||||
| 518 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
| 519 | __ movl(rax, Address(rsi, 0)); | ||||
| 520 | __ cmpl(rax, 0x50654); // If it is Skylake | ||||
| 521 | __ jcc(Assembler::equal, legacy_save_restore); | ||||
| 522 | } | ||||
| 523 | // EVEX check: run in lowest evex mode | ||||
| 524 | VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts | ||||
| 525 | UseAVX = 3; | ||||
| 526 | UseSSE = 2; | ||||
| 527 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); | ||||
| 528 | __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); | ||||
| 529 | __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); | ||||
| 530 | #ifdef _LP641 | ||||
| 531 | __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); | ||||
| 532 | __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); | ||||
| 533 | #endif | ||||
| 534 | |||||
| 535 | #ifdef _WINDOWS | ||||
| 536 | #ifdef _LP641 | ||||
| 537 | __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); | ||||
| 538 | __ addptr(rsp, 64); | ||||
| 539 | __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); | ||||
| 540 | __ addptr(rsp, 64); | ||||
| 541 | #endif // _LP64 | ||||
| 542 | __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); | ||||
| 543 | __ addptr(rsp, 64); | ||||
| 544 | #endif // _WINDOWS | ||||
| 545 | generate_vzeroupper(wrapup); | ||||
| 546 | VM_Version::clean_cpuFeatures(); | ||||
| 547 | UseAVX = saved_useavx; | ||||
| 548 | UseSSE = saved_usesse; | ||||
| 549 | __ jmp(wrapup); | ||||
| 550 | } | ||||
| 551 | |||||
| 552 | __ bind(legacy_save_restore); | ||||
| 553 | // AVX check | ||||
| 554 | VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts | ||||
| 555 | UseAVX = 1; | ||||
| 556 | UseSSE = 2; | ||||
| 557 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); | ||||
| 558 | __ vmovdqu(Address(rsi, 0), xmm0); | ||||
| 559 | __ vmovdqu(Address(rsi, 32), xmm7); | ||||
| 560 | #ifdef _LP641 | ||||
| 561 | __ vmovdqu(Address(rsi, 64), xmm8); | ||||
| 562 | __ vmovdqu(Address(rsi, 96), xmm15); | ||||
| 563 | #endif | ||||
| 564 | |||||
| 565 | #ifdef _WINDOWS | ||||
| 566 | #ifdef _LP641 | ||||
| 567 | __ vmovdqu(xmm15, Address(rsp, 0)); | ||||
| 568 | __ addptr(rsp, 32); | ||||
| 569 | __ vmovdqu(xmm8, Address(rsp, 0)); | ||||
| 570 | __ addptr(rsp, 32); | ||||
| 571 | #endif // _LP64 | ||||
| 572 | __ vmovdqu(xmm7, Address(rsp, 0)); | ||||
| 573 | __ addptr(rsp, 32); | ||||
| 574 | #endif // _WINDOWS | ||||
| 575 | generate_vzeroupper(wrapup); | ||||
| 576 | VM_Version::clean_cpuFeatures(); | ||||
| 577 | UseAVX = saved_useavx; | ||||
| 578 | UseSSE = saved_usesse; | ||||
| 579 | |||||
| 580 | __ bind(wrapup); | ||||
| 581 | __ popf(); | ||||
| 582 | __ pop(rsi); | ||||
| 583 | __ pop(rbx); | ||||
| 584 | __ pop(rbp); | ||||
| 585 | __ ret(0); | ||||
| 586 | |||||
| 587 | # undef __ | ||||
| 588 | |||||
| 589 | return start; | ||||
| 590 | }; | ||||
| 591 | void generate_vzeroupper(Label& L_wrapup) { | ||||
| 592 | # define __ _masm-> | ||||
| 593 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); | ||||
| 594 | __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' | ||||
| 595 | __ jcc(Assembler::notEqual, L_wrapup); | ||||
| 596 | __ movl(rcx, 0x0FFF0FF0); | ||||
| 597 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | ||||
| 598 | __ andl(rcx, Address(rsi, 0)); | ||||
| 599 | __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 | ||||
| 600 | __ jcc(Assembler::equal, L_wrapup); | ||||
| 601 | __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi | ||||
| 602 | __ jcc(Assembler::equal, L_wrapup); | ||||
| 603 | // vzeroupper() will use a pre-computed instruction sequence that we | ||||
| 604 | // can't compute until after we've determined CPU capabilities. Use | ||||
| 605 | // uncached variant here directly to be able to bootstrap correctly | ||||
| 606 | __ vzeroupper_uncached(); | ||||
| 607 | # undef __ | ||||
| 608 | } | ||||
| 609 | address generate_detect_virt() { | ||||
| 610 | StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); | ||||
| 611 | # define __ _masm-> | ||||
| 612 | |||||
| 613 | address start = __ pc(); | ||||
| 614 | |||||
| 615 | // Evacuate callee-saved registers | ||||
| 616 | __ push(rbp); | ||||
| 617 | __ push(rbx); | ||||
| 618 | __ push(rsi); // for Windows | ||||
| 619 | |||||
| 620 | #ifdef _LP641 | ||||
| 621 | __ mov(rax, c_rarg0); // CPUID leaf | ||||
| 622 | __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) | ||||
| 623 | #else | ||||
| 624 | __ movptr(rax, Address(rsp, 16)); // CPUID leaf | ||||
| 625 | __ movptr(rsi, Address(rsp, 20)); // register array address | ||||
| 626 | #endif | ||||
| 627 | |||||
| 628 | __ cpuid(); | ||||
| 629 | |||||
| 630 | // Store result to register array | ||||
| 631 | __ movl(Address(rsi, 0), rax); | ||||
| 632 | __ movl(Address(rsi, 4), rbx); | ||||
| 633 | __ movl(Address(rsi, 8), rcx); | ||||
| 634 | __ movl(Address(rsi, 12), rdx); | ||||
| 635 | |||||
| 636 | // Epilogue | ||||
| 637 | __ pop(rsi); | ||||
| 638 | __ pop(rbx); | ||||
| 639 | __ pop(rbp); | ||||
| 640 | __ ret(0); | ||||
| 641 | |||||
| 642 | # undef __ | ||||
| 643 | |||||
| 644 | return start; | ||||
| 645 | }; | ||||
| 646 | |||||
| 647 | |||||
| 648 | address generate_getCPUIDBrandString(void) { | ||||
| 649 | // Flags to test CPU type. | ||||
| 650 | const uint32_t HS_EFL_AC = 0x40000; | ||||
| 651 | const uint32_t HS_EFL_ID = 0x200000; | ||||
| 652 | // Values for when we don't have a CPUID instruction. | ||||
| 653 | const int CPU_FAMILY_SHIFT = 8; | ||||
| 654 | const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | ||||
| 655 | const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | ||||
| 656 | |||||
| 657 | Label detect_486, cpu486, detect_586, done, ext_cpuid; | ||||
| 658 | |||||
| 659 | StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); | ||||
| 660 | # define __ _masm-> | ||||
| 661 | |||||
| 662 | address start = __ pc(); | ||||
| 663 | |||||
| 664 | // | ||||
| 665 | // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); | ||||
| 666 | // | ||||
| 667 | // LP64: rcx and rdx are first and second argument registers on windows | ||||
| 668 | |||||
| 669 | __ push(rbp); | ||||
| 670 | #ifdef _LP641 | ||||
| 671 | __ mov(rbp, c_rarg0); // cpuid_info address | ||||
| 672 | #else | ||||
| 673 | __ movptr(rbp, Address(rsp, 8)); // cpuid_info address | ||||
| 674 | #endif | ||||
| 675 | __ push(rbx); | ||||
| 676 | __ push(rsi); | ||||
| 677 | __ pushf(); // preserve rbx, and flags | ||||
| 678 | __ pop(rax); | ||||
| 679 | __ push(rax); | ||||
| 680 | __ mov(rcx, rax); | ||||
| 681 | // | ||||
| 682 | // if we are unable to change the AC flag, we have a 386 | ||||
| 683 | // | ||||
| 684 | __ xorl(rax, HS_EFL_AC); | ||||
| 685 | __ push(rax); | ||||
| 686 | __ popf(); | ||||
| 687 | __ pushf(); | ||||
| 688 | __ pop(rax); | ||||
| 689 | __ cmpptr(rax, rcx); | ||||
| 690 | __ jccb(Assembler::notEqual, detect_486)jccb_0(Assembler::notEqual, detect_486, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 690); | ||||
| 691 | |||||
| 692 | __ movl(rax, CPU_FAMILY_386); | ||||
| 693 | __ jmp(done); | ||||
| 694 | |||||
| 695 | // | ||||
| 696 | // If we are unable to change the ID flag, we have a 486 which does | ||||
| 697 | // not support the "cpuid" instruction. | ||||
| 698 | // | ||||
| 699 | __ bind(detect_486); | ||||
| 700 | __ mov(rax, rcx); | ||||
| 701 | __ xorl(rax, HS_EFL_ID); | ||||
| 702 | __ push(rax); | ||||
| 703 | __ popf(); | ||||
| 704 | __ pushf(); | ||||
| 705 | __ pop(rax); | ||||
| 706 | __ cmpptr(rcx, rax); | ||||
| 707 | __ jccb(Assembler::notEqual, detect_586)jccb_0(Assembler::notEqual, detect_586, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 707); | ||||
| 708 | |||||
| 709 | __ bind(cpu486); | ||||
| 710 | __ movl(rax, CPU_FAMILY_486); | ||||
| 711 | __ jmp(done); | ||||
| 712 | |||||
| 713 | // | ||||
| 714 | // At this point, we have a chip which supports the "cpuid" instruction | ||||
| 715 | // | ||||
| 716 | __ bind(detect_586); | ||||
| 717 | __ xorl(rax, rax); | ||||
| 718 | __ cpuid(); | ||||
| 719 | __ orl(rax, rax); | ||||
| 720 | __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input | ||||
| 721 | // value of at least 1, we give up and | ||||
| 722 | // assume a 486 | ||||
| 723 | |||||
| 724 | // | ||||
| 725 | // Extended cpuid(0x80000000) for processor brand string detection | ||||
| 726 | // | ||||
| 727 | __ bind(ext_cpuid); | ||||
| 728 | __ movl(rax, CPUID_EXTENDED_FN0x80000000); | ||||
| 729 | __ cpuid(); | ||||
| 730 | __ cmpl(rax, CPUID_EXTENDED_FN_40x80000004); | ||||
| 731 | __ jcc(Assembler::below, done); | ||||
| 732 | |||||
| 733 | // | ||||
| 734 | // Extended cpuid(0x80000002) // first 16 bytes in brand string | ||||
| 735 | // | ||||
| 736 | __ movl(rax, CPUID_EXTENDED_FN_20x80000002); | ||||
| 737 | __ cpuid(); | ||||
| 738 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); | ||||
| 739 | __ movl(Address(rsi, 0), rax); | ||||
| 740 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); | ||||
| 741 | __ movl(Address(rsi, 0), rbx); | ||||
| 742 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); | ||||
| 743 | __ movl(Address(rsi, 0), rcx); | ||||
| 744 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); | ||||
| 745 | __ movl(Address(rsi,0), rdx); | ||||
| 746 | |||||
| 747 | // | ||||
| 748 | // Extended cpuid(0x80000003) // next 16 bytes in brand string | ||||
| 749 | // | ||||
| 750 | __ movl(rax, CPUID_EXTENDED_FN_30x80000003); | ||||
| 751 | __ cpuid(); | ||||
| 752 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); | ||||
| 753 | __ movl(Address(rsi, 0), rax); | ||||
| 754 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); | ||||
| 755 | __ movl(Address(rsi, 0), rbx); | ||||
| 756 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); | ||||
| 757 | __ movl(Address(rsi, 0), rcx); | ||||
| 758 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); | ||||
| 759 | __ movl(Address(rsi,0), rdx); | ||||
| 760 | |||||
| 761 | // | ||||
| 762 | // Extended cpuid(0x80000004) // last 16 bytes in brand string | ||||
| 763 | // | ||||
| 764 | __ movl(rax, CPUID_EXTENDED_FN_40x80000004); | ||||
| 765 | __ cpuid(); | ||||
| 766 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); | ||||
| 767 | __ movl(Address(rsi, 0), rax); | ||||
| 768 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); | ||||
| 769 | __ movl(Address(rsi, 0), rbx); | ||||
| 770 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); | ||||
| 771 | __ movl(Address(rsi, 0), rcx); | ||||
| 772 | __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); | ||||
| 773 | __ movl(Address(rsi,0), rdx); | ||||
| 774 | |||||
| 775 | // | ||||
| 776 | // return | ||||
| 777 | // | ||||
| 778 | __ bind(done); | ||||
| 779 | __ popf(); | ||||
| 780 | __ pop(rsi); | ||||
| 781 | __ pop(rbx); | ||||
| 782 | __ pop(rbp); | ||||
| 783 | __ ret(0); | ||||
| 784 | |||||
| 785 | # undef __ | ||||
| 786 | |||||
| 787 | return start; | ||||
| 788 | }; | ||||
| 789 | }; | ||||
| 790 | |||||
| 791 | void VM_Version::get_processor_features() { | ||||
| 792 | |||||
| 793 | _cpu = 4; // 486 by default | ||||
| 794 | _model = 0; | ||||
| 795 | _stepping = 0; | ||||
| 796 | _features = 0; | ||||
| 797 | _logical_processors_per_package = 1; | ||||
| 798 | // i486 internal cache is both I&D and has a 16-byte line size | ||||
| 799 | _L1_data_cache_line_size = 16; | ||||
| 800 | |||||
| 801 | // Get raw processor info | ||||
| 802 | |||||
| 803 | get_cpu_info_stub(&_cpuid_info); | ||||
| 804 | |||||
| 805 | assert_is_initialized(); | ||||
| 806 | _cpu = extended_cpu_family(); | ||||
| 807 | _model = extended_cpu_model(); | ||||
| 808 | _stepping = cpu_stepping(); | ||||
| 809 | |||||
| 810 | if (cpu_family() > 4) { // it supports CPUID | ||||
| 811 | _features = feature_flags(); | ||||
| 812 | // Logical processors are only available on P4s and above, | ||||
| 813 | // and only if hyperthreading is available. | ||||
| 814 | _logical_processors_per_package = logical_processor_count(); | ||||
| 815 | _L1_data_cache_line_size = L1_line_size(); | ||||
| 816 | } | ||||
| 817 | |||||
| 818 | _supports_cx8 = supports_cmpxchg8(); | ||||
| 819 | // xchg and xadd instructions | ||||
| 820 | _supports_atomic_getset4 = true; | ||||
| 821 | _supports_atomic_getadd4 = true; | ||||
| 822 | LP64_ONLY(_supports_atomic_getset8 = true)_supports_atomic_getset8 = true; | ||||
| 823 | LP64_ONLY(_supports_atomic_getadd8 = true)_supports_atomic_getadd8 = true; | ||||
| 824 | |||||
| 825 | #ifdef _LP641 | ||||
| 826 | // OS should support SSE for x64 and hardware should support at least SSE2. | ||||
| 827 | if (!VM_Version::supports_sse2()) { | ||||
| 828 | vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); | ||||
| 829 | } | ||||
| 830 | // in 64 bit the use of SSE2 is the minimum | ||||
| 831 | if (UseSSE < 2) UseSSE = 2; | ||||
| 832 | #endif | ||||
| 833 | |||||
| 834 | #ifdef AMD641 | ||||
| 835 | // flush_icache_stub have to be generated first. | ||||
| 836 | // That is why Icache line size is hard coded in ICache class, | ||||
| 837 | // see icache_x86.hpp. It is also the reason why we can't use | ||||
| 838 | // clflush instruction in 32-bit VM since it could be running | ||||
| 839 | // on CPU which does not support it. | ||||
| 840 | // | ||||
| 841 | // The only thing we can do is to verify that flushed | ||||
| 842 | // ICache::line_size has correct value. | ||||
| 843 | guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported")do { if (!(_cpuid_info.std_cpuid1_edx.bits.clflush != 0)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 843, "guarantee(" "_cpuid_info.std_cpuid1_edx.bits.clflush != 0" ") failed", "clflush is not supported"); ::breakpoint(); } } while (0); | ||||
| 844 | // clflush_size is size in quadwords (8 bytes). | ||||
| 845 | guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported")do { if (!(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8) ) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 845, "guarantee(" "_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8" ") failed", "such clflush size is not supported"); ::breakpoint (); } } while (0); | ||||
| 846 | #endif | ||||
| 847 | |||||
| 848 | #ifdef _LP641 | ||||
| 849 | // assigning this field effectively enables Unsafe.writebackMemory() | ||||
| 850 | // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero | ||||
| 851 | // that is only implemented on x86_64 and only if the OS plays ball | ||||
| 852 | if (os::supports_map_sync()) { | ||||
| 853 | // publish data cache line flush size to generic field, otherwise | ||||
| 854 | // let if default to zero thereby disabling writeback | ||||
| 855 | _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; | ||||
| 856 | } | ||||
| 857 | #endif | ||||
| 858 | // If the OS doesn't support SSE, we can't use this feature even if the HW does | ||||
| 859 | if (!os::supports_sse()) | ||||
| 860 | _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); | ||||
| 861 | |||||
| 862 | if (UseSSE < 4) { | ||||
| 863 | _features &= ~CPU_SSE4_1; | ||||
| 864 | _features &= ~CPU_SSE4_2; | ||||
| 865 | } | ||||
| 866 | |||||
| 867 | if (UseSSE < 3) { | ||||
| 868 | _features &= ~CPU_SSE3; | ||||
| 869 | _features &= ~CPU_SSSE3; | ||||
| 870 | _features &= ~CPU_SSE4A; | ||||
| 871 | } | ||||
| 872 | |||||
| 873 | if (UseSSE < 2) | ||||
| 874 | _features &= ~CPU_SSE2; | ||||
| 875 | |||||
| 876 | if (UseSSE < 1) | ||||
| 877 | _features &= ~CPU_SSE; | ||||
| 878 | |||||
| 879 | //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. | ||||
| 880 | if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { | ||||
| 881 | UseAVX = 0; | ||||
| 882 | } | ||||
| 883 | |||||
| 884 | // first try initial setting and detect what we can support | ||||
| 885 | int use_avx_limit = 0; | ||||
| 886 | if (UseAVX > 0) { | ||||
| 887 | if (UseAVX > 2 && supports_evex()) { | ||||
| 888 | use_avx_limit = 3; | ||||
| 889 | } else if (UseAVX > 1 && supports_avx2()) { | ||||
| 890 | use_avx_limit = 2; | ||||
| 891 | } else if (UseAVX > 0 && supports_avx()) { | ||||
| 892 | use_avx_limit = 1; | ||||
| 893 | } else { | ||||
| 894 | use_avx_limit = 0; | ||||
| 895 | } | ||||
| 896 | } | ||||
| 897 | if (FLAG_IS_DEFAULT(UseAVX)(JVMFlag::is_default(Flag_UseAVX_enum))) { | ||||
| 898 | // Don't use AVX-512 on older Skylakes unless explicitly requested. | ||||
| 899 | if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { | ||||
| 900 | FLAG_SET_DEFAULT(UseAVX, 2)((UseAVX) = (2)); | ||||
| 901 | } else { | ||||
| 902 | FLAG_SET_DEFAULT(UseAVX, use_avx_limit)((UseAVX) = (use_avx_limit)); | ||||
| 903 | } | ||||
| 904 | } | ||||
| 905 | if (UseAVX > use_avx_limit) { | ||||
| 906 | warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit); | ||||
| 907 | FLAG_SET_DEFAULT(UseAVX, use_avx_limit)((UseAVX) = (use_avx_limit)); | ||||
| 908 | } else if (UseAVX < 0) { | ||||
| 909 | warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX); | ||||
| 910 | FLAG_SET_DEFAULT(UseAVX, 0)((UseAVX) = (0)); | ||||
| 911 | } | ||||
| 912 | |||||
| 913 | if (UseAVX < 3) { | ||||
| 914 | _features &= ~CPU_AVX512F; | ||||
| 915 | _features &= ~CPU_AVX512DQ; | ||||
| 916 | _features &= ~CPU_AVX512CD; | ||||
| 917 | _features &= ~CPU_AVX512BW; | ||||
| 918 | _features &= ~CPU_AVX512VL; | ||||
| 919 | _features &= ~CPU_AVX512_VPOPCNTDQ; | ||||
| 920 | _features &= ~CPU_AVX512_VPCLMULQDQ; | ||||
| 921 | _features &= ~CPU_AVX512_VAES; | ||||
| 922 | _features &= ~CPU_AVX512_VNNI; | ||||
| 923 | _features &= ~CPU_AVX512_VBMI; | ||||
| 924 | _features &= ~CPU_AVX512_VBMI2; | ||||
| 925 | } | ||||
| 926 | |||||
| 927 | if (UseAVX < 2) | ||||
| 928 | _features &= ~CPU_AVX2; | ||||
| 929 | |||||
| 930 | if (UseAVX < 1) { | ||||
| 931 | _features &= ~CPU_AVX; | ||||
| 932 | _features &= ~CPU_VZEROUPPER; | ||||
| 933 | } | ||||
| 934 | |||||
| 935 | if (logical_processors_per_package() == 1) { | ||||
| 936 | // HT processor could be installed on a system which doesn't support HT. | ||||
| 937 | _features &= ~CPU_HT; | ||||
| 938 | } | ||||
| 939 | |||||
| 940 | if (is_intel()) { // Intel cpus specific settings | ||||
| 941 | if (is_knights_family()) { | ||||
| 942 | _features &= ~CPU_VZEROUPPER; | ||||
| 943 | _features &= ~CPU_AVX512BW; | ||||
| 944 | _features &= ~CPU_AVX512VL; | ||||
| 945 | _features &= ~CPU_AVX512DQ; | ||||
| 946 | _features &= ~CPU_AVX512_VNNI; | ||||
| 947 | _features &= ~CPU_AVX512_VAES; | ||||
| 948 | _features &= ~CPU_AVX512_VPOPCNTDQ; | ||||
| 949 | _features &= ~CPU_AVX512_VPCLMULQDQ; | ||||
| 950 | _features &= ~CPU_AVX512_VBMI; | ||||
| 951 | _features &= ~CPU_AVX512_VBMI2; | ||||
| 952 | _features &= ~CPU_CLWB; | ||||
| 953 | _features &= ~CPU_FLUSHOPT; | ||||
| 954 | } | ||||
| 955 | } | ||||
| 956 | |||||
| 957 | if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)(JVMFlag::is_default(Flag_IntelJccErratumMitigation_enum))) { | ||||
| 958 | _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); | ||||
| 959 | } else { | ||||
| 960 | _has_intel_jcc_erratum = IntelJccErratumMitigation; | ||||
| 961 | } | ||||
| 962 | |||||
| 963 | char buf[512]; | ||||
| 964 | int res = jio_snprintf( | ||||
| 965 | buf, sizeof(buf), | ||||
| 966 | "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", | ||||
| 967 | cores_per_cpu(), threads_per_core(), | ||||
| 968 | cpu_family(), _model, _stepping, os::cpu_microcode_revision()); | ||||
| 969 | assert(res > 0, "not enough temporary space allocated")do { if (!(res > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 969, "assert(" "res > 0" ") failed", "not enough temporary space allocated" ); ::breakpoint(); } } while (0); | ||||
| 970 | insert_features_names(buf + res, sizeof(buf) - res, _features_names); | ||||
| 971 | |||||
| 972 | _features_string = os::strdup(buf); | ||||
| 973 | |||||
| 974 | // UseSSE is set to the smaller of what hardware supports and what | ||||
| 975 | // the command line requires. I.e., you cannot set UseSSE to 2 on | ||||
| 976 | // older Pentiums which do not support it. | ||||
| 977 | int use_sse_limit = 0; | ||||
| 978 | if (UseSSE > 0) { | ||||
| 979 | if (UseSSE > 3 && supports_sse4_1()) { | ||||
| 980 | use_sse_limit = 4; | ||||
| 981 | } else if (UseSSE > 2 && supports_sse3()) { | ||||
| 982 | use_sse_limit = 3; | ||||
| 983 | } else if (UseSSE > 1 && supports_sse2()) { | ||||
| 984 | use_sse_limit = 2; | ||||
| 985 | } else if (UseSSE > 0 && supports_sse()) { | ||||
| 986 | use_sse_limit = 1; | ||||
| 987 | } else { | ||||
| 988 | use_sse_limit = 0; | ||||
| 989 | } | ||||
| 990 | } | ||||
| 991 | if (FLAG_IS_DEFAULT(UseSSE)(JVMFlag::is_default(Flag_UseSSE_enum))) { | ||||
| 992 | FLAG_SET_DEFAULT(UseSSE, use_sse_limit)((UseSSE) = (use_sse_limit)); | ||||
| 993 | } else if (UseSSE > use_sse_limit) { | ||||
| 994 | warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit); | ||||
| 995 | FLAG_SET_DEFAULT(UseSSE, use_sse_limit)((UseSSE) = (use_sse_limit)); | ||||
| 996 | } else if (UseSSE < 0) { | ||||
| 997 | warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE); | ||||
| 998 | FLAG_SET_DEFAULT(UseSSE, 0)((UseSSE) = (0)); | ||||
| 999 | } | ||||
| 1000 | |||||
| 1001 | // Use AES instructions if available. | ||||
| 1002 | if (supports_aes()) { | ||||
| 1003 | if (FLAG_IS_DEFAULT(UseAES)(JVMFlag::is_default(Flag_UseAES_enum))) { | ||||
| 1004 | FLAG_SET_DEFAULT(UseAES, true)((UseAES) = (true)); | ||||
| 1005 | } | ||||
| 1006 | if (!UseAES) { | ||||
| 1007 | if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
| 1008 | warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); | ||||
| 1009 | } | ||||
| 1010 | FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false)); | ||||
| 1011 | } else { | ||||
| 1012 | if (UseSSE > 2) { | ||||
| 1013 | if (FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
| 1014 | FLAG_SET_DEFAULT(UseAESIntrinsics, true)((UseAESIntrinsics) = (true)); | ||||
| 1015 | } | ||||
| 1016 | } else { | ||||
| 1017 | // The AES intrinsic stubs require AES instruction support (of course) | ||||
| 1018 | // but also require sse3 mode or higher for instructions it use. | ||||
| 1019 | if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
| 1020 | warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); | ||||
| 1021 | } | ||||
| 1022 | FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false)); | ||||
| 1023 | } | ||||
| 1024 | |||||
| 1025 | // --AES-CTR begins-- | ||||
| 1026 | if (!UseAESIntrinsics) { | ||||
| 1027 | if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) { | ||||
| 1028 | warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); | ||||
| 1029 | FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false)); | ||||
| 1030 | } | ||||
| 1031 | } else { | ||||
| 1032 | if (supports_sse4_1()) { | ||||
| 1033 | if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) { | ||||
| 1034 | FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true)((UseAESCTRIntrinsics) = (true)); | ||||
| 1035 | } | ||||
| 1036 | } else { | ||||
| 1037 | // The AES-CTR intrinsic stubs require AES instruction support (of course) | ||||
| 1038 | // but also require sse4.1 mode or higher for instructions it use. | ||||
| 1039 | if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) { | ||||
| 1040 | warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); | ||||
| 1041 | } | ||||
| 1042 | FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false)); | ||||
| 1043 | } | ||||
| 1044 | } | ||||
| 1045 | // --AES-CTR ends-- | ||||
| 1046 | } | ||||
| 1047 | } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { | ||||
| 1048 | if (UseAES && !FLAG_IS_DEFAULT(UseAES)(JVMFlag::is_default(Flag_UseAES_enum))) { | ||||
| 1049 | warning("AES instructions are not available on this CPU"); | ||||
| 1050 | FLAG_SET_DEFAULT(UseAES, false)((UseAES) = (false)); | ||||
| 1051 | } | ||||
| 1052 | if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
| 1053 | warning("AES intrinsics are not available on this CPU"); | ||||
| 1054 | FLAG_SET_DEFAULT(UseAESIntrinsics, false)((UseAESIntrinsics) = (false)); | ||||
| 1055 | } | ||||
| 1056 | if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)(JVMFlag::is_default(Flag_UseAESCTRIntrinsics_enum))) { | ||||
| 1057 | warning("AES-CTR intrinsics are not available on this CPU"); | ||||
| 1058 | FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false)((UseAESCTRIntrinsics) = (false)); | ||||
| 1059 | } | ||||
| 1060 | } | ||||
| 1061 | |||||
| 1062 | // Use CLMUL instructions if available. | ||||
| 1063 | if (supports_clmul()) { | ||||
| 1064 | if (FLAG_IS_DEFAULT(UseCLMUL)(JVMFlag::is_default(Flag_UseCLMUL_enum))) { | ||||
| 1065 | UseCLMUL = true; | ||||
| 1066 | } | ||||
| 1067 | } else if (UseCLMUL) { | ||||
| 1068 | if (!FLAG_IS_DEFAULT(UseCLMUL)(JVMFlag::is_default(Flag_UseCLMUL_enum))) | ||||
| 1069 | warning("CLMUL instructions not available on this CPU (AVX may also be required)"); | ||||
| 1070 | FLAG_SET_DEFAULT(UseCLMUL, false)((UseCLMUL) = (false)); | ||||
| 1071 | } | ||||
| 1072 | |||||
| 1073 | if (UseCLMUL && (UseSSE > 2)) { | ||||
| 1074 | if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)(JVMFlag::is_default(Flag_UseCRC32Intrinsics_enum))) { | ||||
| 1075 | UseCRC32Intrinsics = true; | ||||
| 1076 | } | ||||
| 1077 | } else if (UseCRC32Intrinsics) { | ||||
| 1078 | if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)(JVMFlag::is_default(Flag_UseCRC32Intrinsics_enum))) | ||||
| 1079 | warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); | ||||
| 1080 | FLAG_SET_DEFAULT(UseCRC32Intrinsics, false)((UseCRC32Intrinsics) = (false)); | ||||
| 1081 | } | ||||
| 1082 | |||||
| 1083 | #ifdef _LP641 | ||||
| 1084 | if (supports_avx2()) { | ||||
| 1085 | if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)(JVMFlag::is_default(Flag_UseAdler32Intrinsics_enum))) { | ||||
| 1086 | UseAdler32Intrinsics = true; | ||||
| 1087 | } | ||||
| 1088 | } else if (UseAdler32Intrinsics) { | ||||
| 1089 | if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)(JVMFlag::is_default(Flag_UseAdler32Intrinsics_enum))) { | ||||
| 1090 | warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); | ||||
| 1091 | } | ||||
| 1092 | FLAG_SET_DEFAULT(UseAdler32Intrinsics, false)((UseAdler32Intrinsics) = (false)); | ||||
| 1093 | } | ||||
| 1094 | #else | ||||
| 1095 | if (UseAdler32Intrinsics) { | ||||
| 1096 | warning("Adler32Intrinsics not available on this CPU."); | ||||
| 1097 | FLAG_SET_DEFAULT(UseAdler32Intrinsics, false)((UseAdler32Intrinsics) = (false)); | ||||
| 1098 | } | ||||
| 1099 | #endif | ||||
| 1100 | |||||
| 1101 | if (supports_sse4_2() && supports_clmul()) { | ||||
| 1102 | if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)(JVMFlag::is_default(Flag_UseCRC32CIntrinsics_enum))) { | ||||
| 1103 | UseCRC32CIntrinsics = true; | ||||
| 1104 | } | ||||
| 1105 | } else if (UseCRC32CIntrinsics) { | ||||
| 1106 | if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)(JVMFlag::is_default(Flag_UseCRC32CIntrinsics_enum))) { | ||||
| 1107 | warning("CRC32C intrinsics are not available on this CPU"); | ||||
| 1108 | } | ||||
| 1109 | FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false)((UseCRC32CIntrinsics) = (false)); | ||||
| 1110 | } | ||||
| 1111 | |||||
| 1112 | // GHASH/GCM intrinsics | ||||
| 1113 | if (UseCLMUL && (UseSSE > 2)) { | ||||
| 1114 | if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)(JVMFlag::is_default(Flag_UseGHASHIntrinsics_enum))) { | ||||
| 1115 | UseGHASHIntrinsics = true; | ||||
| 1116 | } | ||||
| 1117 | } else if (UseGHASHIntrinsics) { | ||||
| 1118 | if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)(JVMFlag::is_default(Flag_UseGHASHIntrinsics_enum))) | ||||
| 1119 | warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); | ||||
| 1120 | FLAG_SET_DEFAULT(UseGHASHIntrinsics, false)((UseGHASHIntrinsics) = (false)); | ||||
| 1121 | } | ||||
| 1122 | |||||
| 1123 | // Base64 Intrinsics (Check the condition for which the intrinsic will be active) | ||||
| 1124 | if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) { | ||||
| 1125 | if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)(JVMFlag::is_default(Flag_UseBASE64Intrinsics_enum))) { | ||||
| 1126 | UseBASE64Intrinsics = true; | ||||
| 1127 | } | ||||
| 1128 | } else if (UseBASE64Intrinsics) { | ||||
| 1129 | if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)(JVMFlag::is_default(Flag_UseBASE64Intrinsics_enum))) | ||||
| 1130 | warning("Base64 intrinsic requires EVEX instructions on this CPU"); | ||||
| 1131 | FLAG_SET_DEFAULT(UseBASE64Intrinsics, false)((UseBASE64Intrinsics) = (false)); | ||||
| 1132 | } | ||||
| 1133 | |||||
| 1134 | if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions | ||||
| 1135 | if (FLAG_IS_DEFAULT(UseFMA)(JVMFlag::is_default(Flag_UseFMA_enum))) { | ||||
| 1136 | UseFMA = true; | ||||
| 1137 | } | ||||
| 1138 | } else if (UseFMA) { | ||||
| 1139 | warning("FMA instructions are not available on this CPU"); | ||||
| 1140 | FLAG_SET_DEFAULT(UseFMA, false)((UseFMA) = (false)); | ||||
| 1141 | } | ||||
| 1142 | |||||
| 1143 | if (FLAG_IS_DEFAULT(UseMD5Intrinsics)(JVMFlag::is_default(Flag_UseMD5Intrinsics_enum))) { | ||||
| 1144 | UseMD5Intrinsics = true; | ||||
| 1145 | } | ||||
| 1146 | |||||
| 1147 | if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())|| supports_avx2() && supports_bmi2()) { | ||||
| 1148 | if (FLAG_IS_DEFAULT(UseSHA)(JVMFlag::is_default(Flag_UseSHA_enum))) { | ||||
| 1149 | UseSHA = true; | ||||
| 1150 | } | ||||
| 1151 | } else if (UseSHA) { | ||||
| 1152 | warning("SHA instructions are not available on this CPU"); | ||||
| 1153 | FLAG_SET_DEFAULT(UseSHA, false)((UseSHA) = (false)); | ||||
| 1154 | } | ||||
| 1155 | |||||
| 1156 | if (supports_sha() && supports_sse4_1() && UseSHA) { | ||||
| 1157 | if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)(JVMFlag::is_default(Flag_UseSHA1Intrinsics_enum))) { | ||||
| 1158 | FLAG_SET_DEFAULT(UseSHA1Intrinsics, true)((UseSHA1Intrinsics) = (true)); | ||||
| 1159 | } | ||||
| 1160 | } else if (UseSHA1Intrinsics) { | ||||
| 1161 | warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); | ||||
| 1162 | FLAG_SET_DEFAULT(UseSHA1Intrinsics, false)((UseSHA1Intrinsics) = (false)); | ||||
| 1163 | } | ||||
| 1164 | |||||
| 1165 | if (supports_sse4_1() && UseSHA) { | ||||
| 1166 | if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)(JVMFlag::is_default(Flag_UseSHA256Intrinsics_enum))) { | ||||
| 1167 | FLAG_SET_DEFAULT(UseSHA256Intrinsics, true)((UseSHA256Intrinsics) = (true)); | ||||
| 1168 | } | ||||
| 1169 | } else if (UseSHA256Intrinsics) { | ||||
| 1170 | warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); | ||||
| 1171 | FLAG_SET_DEFAULT(UseSHA256Intrinsics, false)((UseSHA256Intrinsics) = (false)); | ||||
| 1172 | } | ||||
| 1173 | |||||
| 1174 | #ifdef _LP641 | ||||
| 1175 | // These are only supported on 64-bit | ||||
| 1176 | if (UseSHA && supports_avx2() && supports_bmi2()) { | ||||
| 1177 | if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)(JVMFlag::is_default(Flag_UseSHA512Intrinsics_enum))) { | ||||
| 1178 | FLAG_SET_DEFAULT(UseSHA512Intrinsics, true)((UseSHA512Intrinsics) = (true)); | ||||
| 1179 | } | ||||
| 1180 | } else | ||||
| 1181 | #endif | ||||
| 1182 | if (UseSHA512Intrinsics) { | ||||
| 1183 | warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); | ||||
| 1184 | FLAG_SET_DEFAULT(UseSHA512Intrinsics, false)((UseSHA512Intrinsics) = (false)); | ||||
| 1185 | } | ||||
| 1186 | |||||
| 1187 | if (UseSHA3Intrinsics) { | ||||
| 1188 | warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); | ||||
| 1189 | FLAG_SET_DEFAULT(UseSHA3Intrinsics, false)((UseSHA3Intrinsics) = (false)); | ||||
| 1190 | } | ||||
| 1191 | |||||
| 1192 | if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { | ||||
| 1193 | FLAG_SET_DEFAULT(UseSHA, false)((UseSHA) = (false)); | ||||
| 1194 | } | ||||
| 1195 | |||||
| 1196 | if (!supports_rtm() && UseRTMLocking) { | ||||
| 1197 | vm_exit_during_initialization("RTM instructions are not available on this CPU"); | ||||
| 1198 | } | ||||
| 1199 | |||||
| 1200 | #if INCLUDE_RTM_OPT1 | ||||
| 1201 | if (UseRTMLocking) { | ||||
| 1202 | if (!CompilerConfig::is_c2_enabled()) { | ||||
| 1203 | // Only C2 does RTM locking optimization. | ||||
| 1204 | vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); | ||||
| 1205 | } | ||||
| 1206 | if (is_intel_family_core()) { | ||||
| 1207 | if ((_model == CPU_MODEL_HASWELL_E3) || | ||||
| 1208 | (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || | ||||
| 1209 | (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { | ||||
| 1210 | // currently a collision between SKL and HSW_E3 | ||||
| 1211 | if (!UnlockExperimentalVMOptions && UseAVX < 3) { | ||||
| 1212 | vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " | ||||
| 1213 | "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); | ||||
| 1214 | } else { | ||||
| 1215 | warning("UseRTMLocking is only available as experimental option on this platform."); | ||||
| 1216 | } | ||||
| 1217 | } | ||||
| 1218 | } | ||||
| 1219 | if (!FLAG_IS_CMDLINE(UseRTMLocking)(JVMFlag::is_cmdline(Flag_UseRTMLocking_enum))) { | ||||
| 1220 | // RTM locking should be used only for applications with | ||||
| 1221 | // high lock contention. For now we do not use it by default. | ||||
| 1222 | vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); | ||||
| 1223 | } | ||||
| 1224 | } else { // !UseRTMLocking | ||||
| 1225 | if (UseRTMForStackLocks) { | ||||
| 1226 | if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)(JVMFlag::is_default(Flag_UseRTMForStackLocks_enum))) { | ||||
| 1227 | warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); | ||||
| 1228 | } | ||||
| 1229 | FLAG_SET_DEFAULT(UseRTMForStackLocks, false)((UseRTMForStackLocks) = (false)); | ||||
| 1230 | } | ||||
| 1231 | if (UseRTMDeopt) { | ||||
| 1232 | FLAG_SET_DEFAULT(UseRTMDeopt, false)((UseRTMDeopt) = (false)); | ||||
| 1233 | } | ||||
| 1234 | if (PrintPreciseRTMLockingStatistics) { | ||||
| 1235 | FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false)((PrintPreciseRTMLockingStatistics) = (false)); | ||||
| 1236 | } | ||||
| 1237 | } | ||||
| 1238 | #else | ||||
| 1239 | if (UseRTMLocking) { | ||||
| 1240 | // Only C2 does RTM locking optimization. | ||||
| 1241 | vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); | ||||
| 1242 | } | ||||
| 1243 | #endif | ||||
| 1244 | |||||
| 1245 | #ifdef COMPILER21 | ||||
| 1246 | if (UseFPUForSpilling) { | ||||
| 1247 | if (UseSSE < 2) { | ||||
| 1248 | // Only supported with SSE2+ | ||||
| 1249 | FLAG_SET_DEFAULT(UseFPUForSpilling, false)((UseFPUForSpilling) = (false)); | ||||
| 1250 | } | ||||
| 1251 | } | ||||
| 1252 | #endif | ||||
| 1253 | |||||
| 1254 | #if COMPILER2_OR_JVMCI1 | ||||
| 1255 | int max_vector_size = 0; | ||||
| 1256 | if (UseSSE < 2) { | ||||
| 1257 | // Vectors (in XMM) are only supported with SSE2+ | ||||
| 1258 | // SSE is always 2 on x64. | ||||
| 1259 | max_vector_size = 0; | ||||
| 1260 | } else if (UseAVX == 0 || !os_supports_avx_vectors()) { | ||||
| 1261 | // 16 byte vectors (in XMM) are supported with SSE2+ | ||||
| 1262 | max_vector_size = 16; | ||||
| 1263 | } else if (UseAVX == 1 || UseAVX == 2) { | ||||
| 1264 | // 32 bytes vectors (in YMM) are only supported with AVX+ | ||||
| 1265 | max_vector_size = 32; | ||||
| 1266 | } else if (UseAVX > 2) { | ||||
| 1267 | // 64 bytes vectors (in ZMM) are only supported with AVX 3 | ||||
| 1268 | max_vector_size = 64; | ||||
| 1269 | } | ||||
| 1270 | |||||
| 1271 | #ifdef _LP641 | ||||
| 1272 | int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit | ||||
| 1273 | #else | ||||
| 1274 | int min_vector_size = 0; | ||||
| 1275 | #endif | ||||
| 1276 | |||||
| 1277 | if (!FLAG_IS_DEFAULT(MaxVectorSize)(JVMFlag::is_default(Flag_MaxVectorSize_enum))) { | ||||
| 1278 | if (MaxVectorSize < min_vector_size) { | ||||
| 1279 | warning("MaxVectorSize must be at least %i on this platform", min_vector_size); | ||||
| 1280 | FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size)((MaxVectorSize) = (min_vector_size)); | ||||
| 1281 | } | ||||
| 1282 | if (MaxVectorSize > max_vector_size) { | ||||
| 1283 | warning("MaxVectorSize must be at most %i on this platform", max_vector_size); | ||||
| 1284 | FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size)); | ||||
| 1285 | } | ||||
| 1286 | if (!is_power_of_2(MaxVectorSize)) { | ||||
| 1287 | warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); | ||||
| 1288 | FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size)); | ||||
| 1289 | } | ||||
| 1290 | } else { | ||||
| 1291 | // If default, use highest supported configuration | ||||
| 1292 | FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size)((MaxVectorSize) = (max_vector_size)); | ||||
| 1293 | } | ||||
| 1294 | |||||
| 1295 | #if defined(COMPILER21) && defined(ASSERT1) | ||||
| 1296 | if (MaxVectorSize > 0) { | ||||
| 1297 | if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { | ||||
| 1298 | tty->print_cr("State of YMM registers after signal handle:"); | ||||
| 1299 | int nreg = 2 LP64_ONLY(+2)+2; | ||||
| 1300 | const char* ymm_name[4] = {"0", "7", "8", "15"}; | ||||
| 1301 | for (int i = 0; i < nreg; i++) { | ||||
| 1302 | tty->print("YMM%s:", ymm_name[i]); | ||||
| 1303 | for (int j = 7; j >=0; j--) { | ||||
| 1304 | tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); | ||||
| 1305 | } | ||||
| 1306 | tty->cr(); | ||||
| 1307 | } | ||||
| 1308 | } | ||||
| 1309 | } | ||||
| 1310 | #endif // COMPILER2 && ASSERT | ||||
| 1311 | |||||
| 1312 | #ifdef _LP641 | ||||
| 1313 | if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)(JVMFlag::is_default(Flag_UseMultiplyToLenIntrinsic_enum))) { | ||||
| 1314 | UseMultiplyToLenIntrinsic = true; | ||||
| 1315 | } | ||||
| 1316 | if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)(JVMFlag::is_default(Flag_UseSquareToLenIntrinsic_enum))) { | ||||
| 1317 | UseSquareToLenIntrinsic = true; | ||||
| 1318 | } | ||||
| 1319 | if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)(JVMFlag::is_default(Flag_UseMulAddIntrinsic_enum))) { | ||||
| 1320 | UseMulAddIntrinsic = true; | ||||
| 1321 | } | ||||
| 1322 | if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)(JVMFlag::is_default(Flag_UseMontgomeryMultiplyIntrinsic_enum ))) { | ||||
| 1323 | UseMontgomeryMultiplyIntrinsic = true; | ||||
| 1324 | } | ||||
| 1325 | if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)(JVMFlag::is_default(Flag_UseMontgomerySquareIntrinsic_enum))) { | ||||
| 1326 | UseMontgomerySquareIntrinsic = true; | ||||
| 1327 | } | ||||
| 1328 | #else | ||||
| 1329 | if (UseMultiplyToLenIntrinsic) { | ||||
| 1330 | if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)(JVMFlag::is_default(Flag_UseMultiplyToLenIntrinsic_enum))) { | ||||
| 1331 | warning("multiplyToLen intrinsic is not available in 32-bit VM"); | ||||
| 1332 | } | ||||
| 1333 | FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false)((UseMultiplyToLenIntrinsic) = (false)); | ||||
| 1334 | } | ||||
| 1335 | if (UseMontgomeryMultiplyIntrinsic) { | ||||
| 1336 | if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)(JVMFlag::is_default(Flag_UseMontgomeryMultiplyIntrinsic_enum ))) { | ||||
| 1337 | warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); | ||||
| 1338 | } | ||||
| 1339 | FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false)((UseMontgomeryMultiplyIntrinsic) = (false)); | ||||
| 1340 | } | ||||
| 1341 | if (UseMontgomerySquareIntrinsic) { | ||||
| 1342 | if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)(JVMFlag::is_default(Flag_UseMontgomerySquareIntrinsic_enum))) { | ||||
| 1343 | warning("montgomerySquare intrinsic is not available in 32-bit VM"); | ||||
| 1344 | } | ||||
| 1345 | FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false)((UseMontgomerySquareIntrinsic) = (false)); | ||||
| 1346 | } | ||||
| 1347 | if (UseSquareToLenIntrinsic) { | ||||
| 1348 | if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)(JVMFlag::is_default(Flag_UseSquareToLenIntrinsic_enum))) { | ||||
| 1349 | warning("squareToLen intrinsic is not available in 32-bit VM"); | ||||
| 1350 | } | ||||
| 1351 | FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false)((UseSquareToLenIntrinsic) = (false)); | ||||
| 1352 | } | ||||
| 1353 | if (UseMulAddIntrinsic) { | ||||
| 1354 | if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)(JVMFlag::is_default(Flag_UseMulAddIntrinsic_enum))) { | ||||
| 1355 | warning("mulAdd intrinsic is not available in 32-bit VM"); | ||||
| 1356 | } | ||||
| 1357 | FLAG_SET_DEFAULT(UseMulAddIntrinsic, false)((UseMulAddIntrinsic) = (false)); | ||||
| 1358 | } | ||||
| 1359 | #endif // _LP64 | ||||
| 1360 | #endif // COMPILER2_OR_JVMCI | ||||
| 1361 | |||||
| 1362 | // On new cpus instructions which update whole XMM register should be used | ||||
| 1363 | // to prevent partial register stall due to dependencies on high half. | ||||
| 1364 | // | ||||
| 1365 | // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) | ||||
| 1366 | // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) | ||||
| 1367 | // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). | ||||
| 1368 | // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). | ||||
| 1369 | |||||
| 1370 | |||||
| 1371 | if (is_zx()) { // ZX cpus specific settings | ||||
| 1372 | if (FLAG_IS_DEFAULT(UseStoreImmI16)(JVMFlag::is_default(Flag_UseStoreImmI16_enum))) { | ||||
| 1373 | UseStoreImmI16 = false; // don't use it on ZX cpus | ||||
| 1374 | } | ||||
| 1375 | if ((cpu_family() == 6) || (cpu_family() == 7)) { | ||||
| 1376 | if (FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) { | ||||
| 1377 | // Use it on all ZX cpus | ||||
| 1378 | UseAddressNop = true; | ||||
| 1379 | } | ||||
| 1380 | } | ||||
| 1381 | if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) { | ||||
| 1382 | UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus | ||||
| 1383 | } | ||||
| 1384 | if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) { | ||||
| 1385 | if (supports_sse3()) { | ||||
| 1386 | UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus | ||||
| 1387 | } else { | ||||
| 1388 | UseXmmRegToRegMoveAll = false; | ||||
| 1389 | } | ||||
| 1390 | } | ||||
| 1391 | if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus | ||||
| 1392 | #ifdef COMPILER21 | ||||
| 1393 | if (FLAG_IS_DEFAULT(MaxLoopPad)(JVMFlag::is_default(Flag_MaxLoopPad_enum))) { | ||||
| 1394 | // For new ZX cpus do the next optimization: | ||||
| 1395 | // don't align the beginning of a loop if there are enough instructions | ||||
| 1396 | // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) | ||||
| 1397 | // in current fetch line (OptoLoopAlignment) or the padding | ||||
| 1398 | // is big (> MaxLoopPad). | ||||
| 1399 | // Set MaxLoopPad to 11 for new ZX cpus to reduce number of | ||||
| 1400 | // generated NOP instructions. 11 is the largest size of one | ||||
| 1401 | // address NOP instruction '0F 1F' (see Assembler::nop(i)). | ||||
| 1402 | MaxLoopPad = 11; | ||||
| 1403 | } | ||||
| 1404 | #endif // COMPILER2 | ||||
| 1405 | if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) { | ||||
| 1406 | UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus | ||||
| 1407 | } | ||||
| 1408 | if (supports_sse4_2()) { // new ZX cpus | ||||
| 1409 | if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
| 1410 | UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus | ||||
| 1411 | } | ||||
| 1412 | } | ||||
| 1413 | if (supports_sse4_2()) { | ||||
| 1414 | if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) { | ||||
| 1415 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true)); | ||||
| 1416 | } | ||||
| 1417 | } else { | ||||
| 1418 | if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
| 1419 | warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); | ||||
| 1420 | } | ||||
| 1421 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false)); | ||||
| 1422 | } | ||||
| 1423 | } | ||||
| 1424 | |||||
| 1425 | if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum)) && supports_3dnow_prefetch()) { | ||||
| 1426 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3)); | ||||
| 1427 | } | ||||
| 1428 | } | ||||
| 1429 | |||||
| 1430 | if (is_amd_family()) { // AMD cpus specific settings | ||||
| 1431 | if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) { | ||||
| 1432 | // Use it on new AMD cpus starting from Opteron. | ||||
| 1433 | UseAddressNop = true; | ||||
| 1434 | } | ||||
| 1435 | if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)(JVMFlag::is_default(Flag_UseNewLongLShift_enum))) { | ||||
| 1436 | // Use it on new AMD cpus starting from Opteron. | ||||
| 1437 | UseNewLongLShift = true; | ||||
| 1438 | } | ||||
| 1439 | if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) { | ||||
| 1440 | if (supports_sse4a()) { | ||||
| 1441 | UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron | ||||
| 1442 | } else { | ||||
| 1443 | UseXmmLoadAndClearUpper = false; | ||||
| 1444 | } | ||||
| 1445 | } | ||||
| 1446 | if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) { | ||||
| 1447 | if (supports_sse4a()) { | ||||
| 1448 | UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' | ||||
| 1449 | } else { | ||||
| 1450 | UseXmmRegToRegMoveAll = false; | ||||
| 1451 | } | ||||
| 1452 | } | ||||
| 1453 | if (FLAG_IS_DEFAULT(UseXmmI2F)(JVMFlag::is_default(Flag_UseXmmI2F_enum))) { | ||||
| 1454 | if (supports_sse4a()) { | ||||
| 1455 | UseXmmI2F = true; | ||||
| 1456 | } else { | ||||
| 1457 | UseXmmI2F = false; | ||||
| 1458 | } | ||||
| 1459 | } | ||||
| 1460 | if (FLAG_IS_DEFAULT(UseXmmI2D)(JVMFlag::is_default(Flag_UseXmmI2D_enum))) { | ||||
| 1461 | if (supports_sse4a()) { | ||||
| 1462 | UseXmmI2D = true; | ||||
| 1463 | } else { | ||||
| 1464 | UseXmmI2D = false; | ||||
| 1465 | } | ||||
| 1466 | } | ||||
| 1467 | if (supports_sse4_2()) { | ||||
| 1468 | if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) { | ||||
| 1469 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true)); | ||||
| 1470 | } | ||||
| 1471 | } else { | ||||
| 1472 | if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
| 1473 | warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); | ||||
| 1474 | } | ||||
| 1475 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false)); | ||||
| 1476 | } | ||||
| 1477 | |||||
| 1478 | // some defaults for AMD family 15h | ||||
| 1479 | if (cpu_family() == 0x15) { | ||||
| 1480 | // On family 15h processors default is no sw prefetch | ||||
| 1481 | if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)(JVMFlag::is_default(Flag_AllocatePrefetchStyle_enum))) { | ||||
| 1482 | FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0)((AllocatePrefetchStyle) = (0)); | ||||
| 1483 | } | ||||
| 1484 | // Also, if some other prefetch style is specified, default instruction type is PREFETCHW | ||||
| 1485 | if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum))) { | ||||
| 1486 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3)); | ||||
| 1487 | } | ||||
| 1488 | // On family 15h processors use XMM and UnalignedLoadStores for Array Copy | ||||
| 1489 | if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) { | ||||
| 1490 | FLAG_SET_DEFAULT(UseXMMForArrayCopy, true)((UseXMMForArrayCopy) = (true)); | ||||
| 1491 | } | ||||
| 1492 | if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
| 1493 | FLAG_SET_DEFAULT(UseUnalignedLoadStores, true)((UseUnalignedLoadStores) = (true)); | ||||
| 1494 | } | ||||
| 1495 | } | ||||
| 1496 | |||||
| 1497 | #ifdef COMPILER21 | ||||
| 1498 | if (cpu_family() < 0x17 && MaxVectorSize > 16) { | ||||
| 1499 | // Limit vectors size to 16 bytes on AMD cpus < 17h. | ||||
| 1500 | FLAG_SET_DEFAULT(MaxVectorSize, 16)((MaxVectorSize) = (16)); | ||||
| 1501 | } | ||||
| 1502 | #endif // COMPILER2 | ||||
| 1503 | |||||
| 1504 | // Some defaults for AMD family >= 17h && Hygon family 18h | ||||
| 1505 | if (cpu_family() >= 0x17) { | ||||
| 1506 | // On family >=17h processors use XMM and UnalignedLoadStores | ||||
| 1507 | // for Array Copy | ||||
| 1508 | if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) { | ||||
| 1509 | FLAG_SET_DEFAULT(UseXMMForArrayCopy, true)((UseXMMForArrayCopy) = (true)); | ||||
| 1510 | } | ||||
| 1511 | if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
| 1512 | FLAG_SET_DEFAULT(UseUnalignedLoadStores, true)((UseUnalignedLoadStores) = (true)); | ||||
| 1513 | } | ||||
| 1514 | #ifdef COMPILER21 | ||||
| 1515 | if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum))) { | ||||
| 1516 | FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true)); | ||||
| 1517 | } | ||||
| 1518 | #endif | ||||
| 1519 | } | ||||
| 1520 | } | ||||
| 1521 | |||||
| 1522 | if (is_intel()) { // Intel cpus specific settings | ||||
| 1523 | if (FLAG_IS_DEFAULT(UseStoreImmI16)(JVMFlag::is_default(Flag_UseStoreImmI16_enum))) { | ||||
| 1524 | UseStoreImmI16 = false; // don't use it on Intel cpus | ||||
| 1525 | } | ||||
| 1526 | if (cpu_family() == 6 || cpu_family() == 15) { | ||||
| 1527 | if (FLAG_IS_DEFAULT(UseAddressNop)(JVMFlag::is_default(Flag_UseAddressNop_enum))) { | ||||
| 1528 | // Use it on all Intel cpus starting from PentiumPro | ||||
| 1529 | UseAddressNop = true; | ||||
| 1530 | } | ||||
| 1531 | } | ||||
| 1532 | if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)(JVMFlag::is_default(Flag_UseXmmLoadAndClearUpper_enum))) { | ||||
| 1533 | UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus | ||||
| 1534 | } | ||||
| 1535 | if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)(JVMFlag::is_default(Flag_UseXmmRegToRegMoveAll_enum))) { | ||||
| 1536 | if (supports_sse3()) { | ||||
| 1537 | UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus | ||||
| 1538 | } else { | ||||
| 1539 | UseXmmRegToRegMoveAll = false; | ||||
| 1540 | } | ||||
| 1541 | } | ||||
| 1542 | if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus | ||||
| 1543 | #ifdef COMPILER21 | ||||
| 1544 | if (FLAG_IS_DEFAULT(MaxLoopPad)(JVMFlag::is_default(Flag_MaxLoopPad_enum))) { | ||||
| 1545 | // For new Intel cpus do the next optimization: | ||||
| 1546 | // don't align the beginning of a loop if there are enough instructions | ||||
| 1547 | // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) | ||||
| 1548 | // in current fetch line (OptoLoopAlignment) or the padding | ||||
| 1549 | // is big (> MaxLoopPad). | ||||
| 1550 | // Set MaxLoopPad to 11 for new Intel cpus to reduce number of | ||||
| 1551 | // generated NOP instructions. 11 is the largest size of one | ||||
| 1552 | // address NOP instruction '0F 1F' (see Assembler::nop(i)). | ||||
| 1553 | MaxLoopPad = 11; | ||||
| 1554 | } | ||||
| 1555 | #endif // COMPILER2 | ||||
| 1556 | |||||
| 1557 | if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)(JVMFlag::is_default(Flag_UseXMMForArrayCopy_enum))) { | ||||
| 1558 | UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus | ||||
| 1559 | } | ||||
| 1560 | if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus | ||||
| 1561 | if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
| 1562 | UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus | ||||
| 1563 | } | ||||
| 1564 | } | ||||
| 1565 | if (supports_sse4_2()) { | ||||
| 1566 | if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)(JVMFlag::is_default(Flag_UseSSE42Intrinsics_enum))) { | ||||
| 1567 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, true)((UseSSE42Intrinsics) = (true)); | ||||
| 1568 | } | ||||
| 1569 | } else { | ||||
| 1570 | if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)(JVMFlag::is_default(Flag_UseAESIntrinsics_enum))) { | ||||
| 1571 | warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); | ||||
| 1572 | } | ||||
| 1573 | FLAG_SET_DEFAULT(UseSSE42Intrinsics, false)((UseSSE42Intrinsics) = (false)); | ||||
| 1574 | } | ||||
| 1575 | } | ||||
| 1576 | if (is_atom_family() || is_knights_family()) { | ||||
| 1577 | #ifdef COMPILER21 | ||||
| 1578 | if (FLAG_IS_DEFAULT(OptoScheduling)(JVMFlag::is_default(Flag_OptoScheduling_enum))) { | ||||
| 1579 | OptoScheduling = true; | ||||
| 1580 | } | ||||
| 1581 | #endif | ||||
| 1582 | if (supports_sse4_2()) { // Silvermont | ||||
| 1583 | if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)(JVMFlag::is_default(Flag_UseUnalignedLoadStores_enum))) { | ||||
| 1584 | UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus | ||||
| 1585 | } | ||||
| 1586 | } | ||||
| 1587 | if (FLAG_IS_DEFAULT(UseIncDec)(JVMFlag::is_default(Flag_UseIncDec_enum))) { | ||||
| 1588 | FLAG_SET_DEFAULT(UseIncDec, false)((UseIncDec) = (false)); | ||||
| 1589 | } | ||||
| 1590 | } | ||||
| 1591 | if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum)) && supports_3dnow_prefetch()) { | ||||
| 1592 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3)); | ||||
| 1593 | } | ||||
| 1594 | #ifdef COMPILER21 | ||||
| 1595 | if (UseAVX > 2) { | ||||
| 1596 | if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum )) || | ||||
| 1597 | (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum )) && | ||||
| 1598 | ArrayOperationPartialInlineSize != 0 && | ||||
| 1599 | ArrayOperationPartialInlineSize != 16 && | ||||
| 1600 | ArrayOperationPartialInlineSize != 32 && | ||||
| 1601 | ArrayOperationPartialInlineSize != 64)) { | ||||
| 1602 | int inline_size = 0; | ||||
| 1603 | if (MaxVectorSize >= 64 && AVX3Threshold == 0) { | ||||
| 1604 | inline_size = 64; | ||||
| 1605 | } else if (MaxVectorSize >= 32) { | ||||
| 1606 | inline_size = 32; | ||||
| 1607 | } else if (MaxVectorSize >= 16) { | ||||
| 1608 | inline_size = 16; | ||||
| 1609 | } | ||||
| 1610 | if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)(JVMFlag::is_default(Flag_ArrayOperationPartialInlineSize_enum ))) { | ||||
| 1611 | warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); | ||||
| 1612 | } | ||||
| 1613 | ArrayOperationPartialInlineSize = inline_size; | ||||
| 1614 | } | ||||
| 1615 | |||||
| 1616 | if (ArrayOperationPartialInlineSize > MaxVectorSize) { | ||||
| 1617 | ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; | ||||
| 1618 | if (ArrayOperationPartialInlineSize) { | ||||
| 1619 | warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT"%" "l" "d" ")", MaxVectorSize); | ||||
| 1620 | } else { | ||||
| 1621 | warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT"%" "l" "d", ArrayOperationPartialInlineSize); | ||||
| 1622 | } | ||||
| 1623 | } | ||||
| 1624 | } | ||||
| 1625 | #endif | ||||
| 1626 | } | ||||
| 1627 | |||||
| 1628 | #ifdef COMPILER21 | ||||
| 1629 | if (FLAG_IS_DEFAULT(OptimizeFill)(JVMFlag::is_default(Flag_OptimizeFill_enum))) { | ||||
| 1630 | if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { | ||||
| 1631 | OptimizeFill = false; | ||||
| 1632 | } | ||||
| 1633 | } | ||||
| 1634 | #endif | ||||
| 1635 | |||||
| 1636 | #ifdef _LP641 | ||||
| 1637 | if (UseSSE42Intrinsics) { | ||||
| 1638 | if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum ))) { | ||||
| 1639 | UseVectorizedMismatchIntrinsic = true; | ||||
| 1640 | } | ||||
| 1641 | } else if (UseVectorizedMismatchIntrinsic) { | ||||
| 1642 | if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum ))) | ||||
| 1643 | warning("vectorizedMismatch intrinsics are not available on this CPU"); | ||||
| 1644 | FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false)((UseVectorizedMismatchIntrinsic) = (false)); | ||||
| 1645 | } | ||||
| 1646 | #else | ||||
| 1647 | if (UseVectorizedMismatchIntrinsic) { | ||||
| 1648 | if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)(JVMFlag::is_default(Flag_UseVectorizedMismatchIntrinsic_enum ))) { | ||||
| 1649 | warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); | ||||
| 1650 | } | ||||
| 1651 | FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false)((UseVectorizedMismatchIntrinsic) = (false)); | ||||
| 1652 | } | ||||
| 1653 | #endif // _LP64 | ||||
| 1654 | |||||
| 1655 | // Use count leading zeros count instruction if available. | ||||
| 1656 | if (supports_lzcnt()) { | ||||
| 1657 | if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)(JVMFlag::is_default(Flag_UseCountLeadingZerosInstruction_enum ))) { | ||||
| 1658 | UseCountLeadingZerosInstruction = true; | ||||
| 1659 | } | ||||
| 1660 | } else if (UseCountLeadingZerosInstruction) { | ||||
| 1661 | warning("lzcnt instruction is not available on this CPU"); | ||||
| 1662 | FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false)((UseCountLeadingZerosInstruction) = (false)); | ||||
| 1663 | } | ||||
| 1664 | |||||
| 1665 | // Use count trailing zeros instruction if available | ||||
| 1666 | if (supports_bmi1()) { | ||||
| 1667 | // tzcnt does not require VEX prefix | ||||
| 1668 | if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)(JVMFlag::is_default(Flag_UseCountTrailingZerosInstruction_enum ))) { | ||||
| 1669 | if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)(JVMFlag::is_default(Flag_UseBMI1Instructions_enum))) { | ||||
| 1670 | // Don't use tzcnt if BMI1 is switched off on command line. | ||||
| 1671 | UseCountTrailingZerosInstruction = false; | ||||
| 1672 | } else { | ||||
| 1673 | UseCountTrailingZerosInstruction = true; | ||||
| 1674 | } | ||||
| 1675 | } | ||||
| 1676 | } else if (UseCountTrailingZerosInstruction) { | ||||
| 1677 | warning("tzcnt instruction is not available on this CPU"); | ||||
| 1678 | FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false)((UseCountTrailingZerosInstruction) = (false)); | ||||
| 1679 | } | ||||
| 1680 | |||||
| 1681 | // BMI instructions (except tzcnt) use an encoding with VEX prefix. | ||||
| 1682 | // VEX prefix is generated only when AVX > 0. | ||||
| 1683 | if (supports_bmi1() && supports_avx()) { | ||||
| 1684 | if (FLAG_IS_DEFAULT(UseBMI1Instructions)(JVMFlag::is_default(Flag_UseBMI1Instructions_enum))) { | ||||
| 1685 | UseBMI1Instructions = true; | ||||
| 1686 | } | ||||
| 1687 | } else if (UseBMI1Instructions) { | ||||
| 1688 | warning("BMI1 instructions are not available on this CPU (AVX is also required)"); | ||||
| 1689 | FLAG_SET_DEFAULT(UseBMI1Instructions, false)((UseBMI1Instructions) = (false)); | ||||
| 1690 | } | ||||
| 1691 | |||||
| 1692 | if (supports_bmi2() && supports_avx()) { | ||||
| 1693 | if (FLAG_IS_DEFAULT(UseBMI2Instructions)(JVMFlag::is_default(Flag_UseBMI2Instructions_enum))) { | ||||
| 1694 | UseBMI2Instructions = true; | ||||
| 1695 | } | ||||
| 1696 | } else if (UseBMI2Instructions) { | ||||
| 1697 | warning("BMI2 instructions are not available on this CPU (AVX is also required)"); | ||||
| 1698 | FLAG_SET_DEFAULT(UseBMI2Instructions, false)((UseBMI2Instructions) = (false)); | ||||
| 1699 | } | ||||
| 1700 | |||||
| 1701 | // Use population count instruction if available. | ||||
| 1702 | if (supports_popcnt()) { | ||||
| 1703 | if (FLAG_IS_DEFAULT(UsePopCountInstruction)(JVMFlag::is_default(Flag_UsePopCountInstruction_enum))) { | ||||
| 1704 | UsePopCountInstruction = true; | ||||
| 1705 | } | ||||
| 1706 | } else if (UsePopCountInstruction) { | ||||
| 1707 | warning("POPCNT instruction is not available on this CPU"); | ||||
| 1708 | FLAG_SET_DEFAULT(UsePopCountInstruction, false)((UsePopCountInstruction) = (false)); | ||||
| 1709 | } | ||||
| 1710 | |||||
| 1711 | // Use fast-string operations if available. | ||||
| 1712 | if (supports_erms()) { | ||||
| 1713 | if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) { | ||||
| 1714 | UseFastStosb = true; | ||||
| 1715 | } | ||||
| 1716 | } else if (UseFastStosb) { | ||||
| 1717 | warning("fast-string operations are not available on this CPU"); | ||||
| 1718 | FLAG_SET_DEFAULT(UseFastStosb, false)((UseFastStosb) = (false)); | ||||
| 1719 | } | ||||
| 1720 | |||||
| 1721 | // For AMD Processors use XMM/YMM MOVDQU instructions | ||||
| 1722 | // for Object Initialization as default | ||||
| 1723 | if (is_amd() && cpu_family() >= 0x19) { | ||||
| 1724 | if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) { | ||||
| 1725 | UseFastStosb = false; | ||||
| 1726 | } | ||||
| 1727 | } | ||||
| 1728 | |||||
| 1729 | #ifdef COMPILER21 | ||||
| 1730 | if (is_intel() && MaxVectorSize > 16) { | ||||
| 1731 | if (FLAG_IS_DEFAULT(UseFastStosb)(JVMFlag::is_default(Flag_UseFastStosb_enum))) { | ||||
| 1732 | UseFastStosb = false; | ||||
| 1733 | } | ||||
| 1734 | } | ||||
| 1735 | #endif | ||||
| 1736 | |||||
| 1737 | // Use XMM/YMM MOVDQU instruction for Object Initialization | ||||
| 1738 | if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) { | ||||
| 1739 | if (FLAG_IS_DEFAULT(UseXMMForObjInit)(JVMFlag::is_default(Flag_UseXMMForObjInit_enum))) { | ||||
| 1740 | UseXMMForObjInit = true; | ||||
| 1741 | } | ||||
| 1742 | } else if (UseXMMForObjInit) { | ||||
| 1743 | warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); | ||||
| 1744 | FLAG_SET_DEFAULT(UseXMMForObjInit, false)((UseXMMForObjInit) = (false)); | ||||
| 1745 | } | ||||
| 1746 | |||||
| 1747 | #ifdef COMPILER21 | ||||
| 1748 | if (FLAG_IS_DEFAULT(AlignVector)(JVMFlag::is_default(Flag_AlignVector_enum))) { | ||||
| 1749 | // Modern processors allow misaligned memory operations for vectors. | ||||
| 1750 | AlignVector = !UseUnalignedLoadStores; | ||||
| 1751 | } | ||||
| 1752 | #endif // COMPILER2 | ||||
| 1753 | |||||
| 1754 | if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)(JVMFlag::is_default(Flag_AllocatePrefetchInstr_enum))) { | ||||
| 1755 | if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { | ||||
| 1756 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0)((AllocatePrefetchInstr) = (0)); | ||||
| 1757 | } else if (!supports_sse() && supports_3dnow_prefetch()) { | ||||
| 1758 | FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3)((AllocatePrefetchInstr) = (3)); | ||||
| 1759 | } | ||||
| 1760 | } | ||||
| 1761 | |||||
| 1762 | // Allocation prefetch settings | ||||
| 1763 | intx cache_line_size = prefetch_data_size(); | ||||
| 1764 | if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)(JVMFlag::is_default(Flag_AllocatePrefetchStepSize_enum)) && | ||||
| 1765 | (cache_line_size > AllocatePrefetchStepSize)) { | ||||
| 1766 | FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size)((AllocatePrefetchStepSize) = (cache_line_size)); | ||||
| 1767 | } | ||||
| 1768 | |||||
| 1769 | if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { | ||||
| 1770 | assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0")do { if (!(!(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum )))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 1770, "assert(" "!(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum))" ") failed", "default value should not be 0"); ::breakpoint() ; } } while (0); | ||||
| 1771 | if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)(JVMFlag::is_default(Flag_AllocatePrefetchStyle_enum))) { | ||||
| 1772 | warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); | ||||
| 1773 | } | ||||
| 1774 | FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0)((AllocatePrefetchStyle) = (0)); | ||||
| 1775 | } | ||||
| 1776 | |||||
| 1777 | if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)(JVMFlag::is_default(Flag_AllocatePrefetchDistance_enum))) { | ||||
| 1778 | bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); | ||||
| 1779 | FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch))((AllocatePrefetchDistance) = (allocate_prefetch_distance(use_watermark_prefetch ))); | ||||
| 1780 | } | ||||
| 1781 | |||||
| 1782 | if (is_intel() && cpu_family() == 6 && supports_sse3()) { | ||||
| 1783 | if (FLAG_IS_DEFAULT(AllocatePrefetchLines)(JVMFlag::is_default(Flag_AllocatePrefetchLines_enum)) && | ||||
| 1784 | supports_sse4_2() && supports_ht()) { // Nehalem based cpus | ||||
| 1785 | FLAG_SET_DEFAULT(AllocatePrefetchLines, 4)((AllocatePrefetchLines) = (4)); | ||||
| 1786 | } | ||||
| 1787 | #ifdef COMPILER21 | ||||
| 1788 | if (FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum)) && supports_sse4_2()) { | ||||
| 1789 | FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true)); | ||||
| 1790 | } | ||||
| 1791 | #endif | ||||
| 1792 | } | ||||
| 1793 | |||||
| 1794 | if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { | ||||
| 1795 | #ifdef COMPILER21 | ||||
| 1796 | if (FLAG_IS_DEFAULT(UseFPUForSpilling)(JVMFlag::is_default(Flag_UseFPUForSpilling_enum))) { | ||||
| 1797 | FLAG_SET_DEFAULT(UseFPUForSpilling, true)((UseFPUForSpilling) = (true)); | ||||
| 1798 | } | ||||
| 1799 | #endif | ||||
| 1800 | } | ||||
| 1801 | |||||
| 1802 | #ifdef _LP641 | ||||
| 1803 | // Prefetch settings | ||||
| 1804 | |||||
| 1805 | // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from | ||||
| 1806 | // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. | ||||
| 1807 | // Tested intervals from 128 to 2048 in increments of 64 == one cache line. | ||||
| 1808 | // 256 bytes (4 dcache lines) was the nearest runner-up to 576. | ||||
| 1809 | |||||
| 1810 | // gc copy/scan is disabled if prefetchw isn't supported, because | ||||
| 1811 | // Prefetch::write emits an inlined prefetchw on Linux. | ||||
| 1812 | // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. | ||||
| 1813 | // The used prefetcht0 instruction works for both amd64 and em64t. | ||||
| 1814 | |||||
| 1815 | if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)(JVMFlag::is_default(Flag_PrefetchCopyIntervalInBytes_enum))) { | ||||
| 1816 | FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576)((PrefetchCopyIntervalInBytes) = (576)); | ||||
| 1817 | } | ||||
| 1818 | if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)(JVMFlag::is_default(Flag_PrefetchScanIntervalInBytes_enum))) { | ||||
| 1819 | FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576)((PrefetchScanIntervalInBytes) = (576)); | ||||
| 1820 | } | ||||
| 1821 | if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)(JVMFlag::is_default(Flag_PrefetchFieldsAhead_enum))) { | ||||
| 1822 | FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1)((PrefetchFieldsAhead) = (1)); | ||||
| 1823 | } | ||||
| 1824 | #endif | ||||
| 1825 | |||||
| 1826 | if (FLAG_IS_DEFAULT(ContendedPaddingWidth)(JVMFlag::is_default(Flag_ContendedPaddingWidth_enum)) && | ||||
| 1827 | (cache_line_size > ContendedPaddingWidth)) | ||||
| 1828 | ContendedPaddingWidth = cache_line_size; | ||||
| 1829 | |||||
| 1830 | // This machine allows unaligned memory accesses | ||||
| 1831 | if (FLAG_IS_DEFAULT(UseUnalignedAccesses)(JVMFlag::is_default(Flag_UseUnalignedAccesses_enum))) { | ||||
| 1832 | FLAG_SET_DEFAULT(UseUnalignedAccesses, true)((UseUnalignedAccesses) = (true)); | ||||
| 1833 | } | ||||
| 1834 | |||||
| 1835 | #ifndef PRODUCT | ||||
| 1836 | if (log_is_enabled(Info, os, cpu)(LogImpl<(LogTag::_os), (LogTag::_cpu), (LogTag::__NO_TAG) , (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG)> ::is_level(LogLevel::Info))) { | ||||
| 1837 | LogStream ls(Log(os, cpu)LogImpl<(LogTag::_os), (LogTag::_cpu), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG)>::info()); | ||||
| 1838 | outputStream* log = &ls; | ||||
| 1839 | log->print_cr("Logical CPUs per core: %u", | ||||
| 1840 | logical_processors_per_package()); | ||||
| 1841 | log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); | ||||
| 1842 | log->print("UseSSE=%d", (int) UseSSE); | ||||
| 1843 | if (UseAVX > 0) { | ||||
| 1844 | log->print(" UseAVX=%d", (int) UseAVX); | ||||
| 1845 | } | ||||
| 1846 | if (UseAES) { | ||||
| 1847 | log->print(" UseAES=1"); | ||||
| 1848 | } | ||||
| 1849 | #ifdef COMPILER21 | ||||
| 1850 | if (MaxVectorSize > 0) { | ||||
| 1851 | log->print(" MaxVectorSize=%d", (int) MaxVectorSize); | ||||
| 1852 | } | ||||
| 1853 | #endif | ||||
| 1854 | log->cr(); | ||||
| 1855 | log->print("Allocation"); | ||||
| 1856 | if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { | ||||
| 1857 | log->print_cr(": no prefetching"); | ||||
| 1858 | } else { | ||||
| 1859 | log->print(" prefetching: "); | ||||
| 1860 | if (UseSSE == 0 && supports_3dnow_prefetch()) { | ||||
| 1861 | log->print("PREFETCHW"); | ||||
| 1862 | } else if (UseSSE >= 1) { | ||||
| 1863 | if (AllocatePrefetchInstr == 0) { | ||||
| 1864 | log->print("PREFETCHNTA"); | ||||
| 1865 | } else if (AllocatePrefetchInstr == 1) { | ||||
| 1866 | log->print("PREFETCHT0"); | ||||
| 1867 | } else if (AllocatePrefetchInstr == 2) { | ||||
| 1868 | log->print("PREFETCHT2"); | ||||
| 1869 | } else if (AllocatePrefetchInstr == 3) { | ||||
| 1870 | log->print("PREFETCHW"); | ||||
| 1871 | } | ||||
| 1872 | } | ||||
| 1873 | if (AllocatePrefetchLines > 1) { | ||||
| 1874 | log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize); | ||||
| 1875 | } else { | ||||
| 1876 | log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize); | ||||
| 1877 | } | ||||
| 1878 | } | ||||
| 1879 | |||||
| 1880 | if (PrefetchCopyIntervalInBytes > 0) { | ||||
| 1881 | log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); | ||||
| 1882 | } | ||||
| 1883 | if (PrefetchScanIntervalInBytes > 0) { | ||||
| 1884 | log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); | ||||
| 1885 | } | ||||
| 1886 | if (PrefetchFieldsAhead > 0) { | ||||
| 1887 | log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead); | ||||
| 1888 | } | ||||
| 1889 | if (ContendedPaddingWidth > 0) { | ||||
| 1890 | log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); | ||||
| 1891 | } | ||||
| 1892 | } | ||||
| 1893 | #endif // !PRODUCT | ||||
| 1894 | if (FLAG_IS_DEFAULT(UseSignumIntrinsic)(JVMFlag::is_default(Flag_UseSignumIntrinsic_enum))) { | ||||
| 1895 | FLAG_SET_DEFAULT(UseSignumIntrinsic, true)((UseSignumIntrinsic) = (true)); | ||||
| 1896 | } | ||||
| 1897 | if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)(JVMFlag::is_default(Flag_UseCopySignIntrinsic_enum))) { | ||||
| 1898 | FLAG_SET_DEFAULT(UseCopySignIntrinsic, true)((UseCopySignIntrinsic) = (true)); | ||||
| 1899 | } | ||||
| 1900 | } | ||||
| 1901 | |||||
| 1902 | void VM_Version::print_platform_virtualization_info(outputStream* st) { | ||||
| 1903 | VirtualizationType vrt = VM_Version::get_detected_virtualization(); | ||||
| 1904 | if (vrt == XenHVM) { | ||||
| 1905 | st->print_cr("Xen hardware-assisted virtualization detected"); | ||||
| 1906 | } else if (vrt == KVM) { | ||||
| 1907 | st->print_cr("KVM virtualization detected"); | ||||
| 1908 | } else if (vrt == VMWare) { | ||||
| 1909 | st->print_cr("VMWare virtualization detected"); | ||||
| 1910 | VirtualizationSupport::print_virtualization_info(st); | ||||
| 1911 | } else if (vrt == HyperV) { | ||||
| 1912 | st->print_cr("Hyper-V virtualization detected"); | ||||
| 1913 | } else if (vrt == HyperVRole) { | ||||
| 1914 | st->print_cr("Hyper-V role detected"); | ||||
| 1915 | } | ||||
| 1916 | } | ||||
| 1917 | |||||
| 1918 | bool VM_Version::compute_has_intel_jcc_erratum() { | ||||
| 1919 | if (!is_intel_family_core()) { | ||||
| 1920 | // Only Intel CPUs are affected. | ||||
| 1921 | return false; | ||||
| 1922 | } | ||||
| 1923 | // The following table of affected CPUs is based on the following document released by Intel: | ||||
| 1924 | // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf | ||||
| 1925 | switch (_model) { | ||||
| 1926 | case 0x8E: | ||||
| 1927 | // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y | ||||
| 1928 | // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U | ||||
| 1929 | // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e | ||||
| 1930 | // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y | ||||
| 1931 | // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e | ||||
| 1932 | // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U | ||||
| 1933 | // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y | ||||
| 1934 | // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 | ||||
| 1935 | // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U | ||||
| 1936 | return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; | ||||
| 1937 | case 0x4E: | ||||
| 1938 | // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U | ||||
| 1939 | // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e | ||||
| 1940 | // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y | ||||
| 1941 | return _stepping == 0x3; | ||||
| 1942 | case 0x55: | ||||
| 1943 | // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville | ||||
| 1944 | // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server | ||||
| 1945 | // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W | ||||
| 1946 | // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X | ||||
| 1947 | // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 | ||||
| 1948 | // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) | ||||
| 1949 | return _stepping == 0x4 || _stepping == 0x7; | ||||
| 1950 | case 0x5E: | ||||
| 1951 | // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H | ||||
| 1952 | // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S | ||||
| 1953 | return _stepping == 0x3; | ||||
| 1954 | case 0x9E: | ||||
| 1955 | // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G | ||||
| 1956 | // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H | ||||
| 1957 | // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S | ||||
| 1958 | // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X | ||||
| 1959 | // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 | ||||
| 1960 | // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H | ||||
| 1961 | // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S | ||||
| 1962 | // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP | ||||
| 1963 | // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) | ||||
| 1964 | // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) | ||||
| 1965 | // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) | ||||
| 1966 | // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) | ||||
| 1967 | // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) | ||||
| 1968 | // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) | ||||
| 1969 | return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; | ||||
| 1970 | case 0xA5: | ||||
| 1971 | // Not in Intel documentation. | ||||
| 1972 | // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H | ||||
| 1973 | return true; | ||||
| 1974 | case 0xA6: | ||||
| 1975 | // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 | ||||
| 1976 | return _stepping == 0x0; | ||||
| 1977 | case 0xAE: | ||||
| 1978 | // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) | ||||
| 1979 | return _stepping == 0xA; | ||||
| 1980 | default: | ||||
| 1981 | // If we are running on another intel machine not recognized in the table, we are okay. | ||||
| 1982 | return false; | ||||
| 1983 | } | ||||
| 1984 | } | ||||
| 1985 | |||||
| 1986 | // On Xen, the cpuid instruction returns | ||||
| 1987 | // eax / registers[0]: Version of Xen | ||||
| 1988 | // ebx / registers[1]: chars 'XenV' | ||||
| 1989 | // ecx / registers[2]: chars 'MMXe' | ||||
| 1990 | // edx / registers[3]: chars 'nVMM' | ||||
| 1991 | // | ||||
| 1992 | // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns | ||||
| 1993 | // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' | ||||
| 1994 | // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' | ||||
| 1995 | // edx / registers[3]: chars 'M' / 'ware' / 't Hv' | ||||
| 1996 | // | ||||
| 1997 | // more information : | ||||
| 1998 | // https://kb.vmware.com/s/article/1009458 | ||||
| 1999 | // | ||||
| 2000 | void VM_Version::check_virtualizations() { | ||||
| 2001 | uint32_t registers[4] = {0}; | ||||
| 2002 | char signature[13] = {0}; | ||||
| 2003 | |||||
| 2004 | // Xen cpuid leaves can be found 0x100 aligned boundary starting | ||||
| 2005 | // from 0x40000000 until 0x40010000. | ||||
| 2006 | // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html | ||||
| 2007 | for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { | ||||
| 2008 | detect_virt_stub(leaf, registers); | ||||
| 2009 | memcpy(signature, ®isters[1], 12); | ||||
| 2010 | |||||
| 2011 | if (strncmp("VMwareVMware", signature, 12) == 0) { | ||||
| 2012 | Abstract_VM_Version::_detected_virtualization = VMWare; | ||||
| 2013 | // check for extended metrics from guestlib | ||||
| 2014 | VirtualizationSupport::initialize(); | ||||
| 2015 | } else if (strncmp("Microsoft Hv", signature, 12) == 0) { | ||||
| 2016 | Abstract_VM_Version::_detected_virtualization = HyperV; | ||||
| 2017 | #ifdef _WINDOWS | ||||
| 2018 | // CPUID leaf 0x40000007 is available to the root partition only. | ||||
| 2019 | // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. | ||||
| 2020 | // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf | ||||
| 2021 | detect_virt_stub(0x40000007, registers); | ||||
| 2022 | if ((registers[0] != 0x0) || | ||||
| 2023 | (registers[1] != 0x0) || | ||||
| 2024 | (registers[2] != 0x0) || | ||||
| 2025 | (registers[3] != 0x0)) { | ||||
| 2026 | Abstract_VM_Version::_detected_virtualization = HyperVRole; | ||||
| 2027 | } | ||||
| 2028 | #endif | ||||
| 2029 | } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { | ||||
| 2030 | Abstract_VM_Version::_detected_virtualization = KVM; | ||||
| 2031 | } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { | ||||
| 2032 | Abstract_VM_Version::_detected_virtualization = XenHVM; | ||||
| 2033 | } | ||||
| 2034 | } | ||||
| 2035 | } | ||||
| 2036 | |||||
| 2037 | // avx3_threshold() sets the threshold at which 64-byte instructions are used | ||||
| 2038 | // for implementing the array copy and clear operations. | ||||
| 2039 | // The Intel platforms that supports the serialize instruction | ||||
| 2040 | // has improved implementation of 64-byte load/stores and so the default | ||||
| 2041 | // threshold is set to 0 for these platforms. | ||||
| 2042 | int VM_Version::avx3_threshold() { | ||||
| 2043 | return (is_intel_family_core() && | ||||
| 2044 | supports_serialize() && | ||||
| 2045 | FLAG_IS_DEFAULT(AVX3Threshold)(JVMFlag::is_default(Flag_AVX3Threshold_enum))) ? 0 : AVX3Threshold; | ||||
| 2046 | } | ||||
| 2047 | |||||
| 2048 | static bool _vm_version_initialized = false; | ||||
| 2049 | |||||
| 2050 | void VM_Version::initialize() { | ||||
| 2051 | ResourceMark rm; | ||||
| 2052 | // Making this stub must be FIRST use of assembler | ||||
| 2053 | stub_blob = BufferBlob::create("VM_Version stub", stub_size); | ||||
| 2054 | if (stub_blob == NULL__null) { | ||||
| |||||
| 2055 | vm_exit_during_initialization("Unable to allocate stub for VM_Version"); | ||||
| 2056 | } | ||||
| 2057 | CodeBuffer c(stub_blob); | ||||
| 2058 | VM_Version_StubGenerator g(&c); | ||||
| 2059 | |||||
| 2060 | get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,(reinterpret_cast<get_cpu_info_stub_t>(g.generate_get_cpu_info ())) | ||||
| 2061 | g.generate_get_cpu_info())(reinterpret_cast<get_cpu_info_stub_t>(g.generate_get_cpu_info ())); | ||||
| 2062 | detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,(reinterpret_cast<detect_virt_stub_t>(g.generate_detect_virt ())) | ||||
| 2063 | g.generate_detect_virt())(reinterpret_cast<detect_virt_stub_t>(g.generate_detect_virt ())); | ||||
| 2064 | |||||
| 2065 | get_processor_features(); | ||||
| 2066 | |||||
| 2067 | LP64_ONLY(Assembler::precompute_instructions();)Assembler::precompute_instructions(); | ||||
| 2068 | |||||
| 2069 | if (VM_Version::supports_hv()) { // Supports hypervisor | ||||
| 2070 | check_virtualizations(); | ||||
| 2071 | } | ||||
| 2072 | _vm_version_initialized = true; | ||||
| 2073 | } | ||||
| 2074 | |||||
| 2075 | typedef enum { | ||||
| 2076 | CPU_FAMILY_8086_8088 = 0, | ||||
| 2077 | CPU_FAMILY_INTEL_286 = 2, | ||||
| 2078 | CPU_FAMILY_INTEL_386 = 3, | ||||
| 2079 | CPU_FAMILY_INTEL_486 = 4, | ||||
| 2080 | CPU_FAMILY_PENTIUM = 5, | ||||
| 2081 | CPU_FAMILY_PENTIUMPRO = 6, // Same family several models | ||||
| 2082 | CPU_FAMILY_PENTIUM_4 = 0xF | ||||
| 2083 | } FamilyFlag; | ||||
| 2084 | |||||
| 2085 | typedef enum { | ||||
| 2086 | RDTSCP_FLAG = 0x08000000, // bit 27 | ||||
| 2087 | INTEL64_FLAG = 0x20000000 // bit 29 | ||||
| 2088 | } _featureExtendedEdxFlag; | ||||
| 2089 | |||||
| 2090 | typedef enum { | ||||
| 2091 | FPU_FLAG = 0x00000001, | ||||
| 2092 | VME_FLAG = 0x00000002, | ||||
| 2093 | DE_FLAG = 0x00000004, | ||||
| 2094 | PSE_FLAG = 0x00000008, | ||||
| 2095 | TSC_FLAG = 0x00000010, | ||||
| 2096 | MSR_FLAG = 0x00000020, | ||||
| 2097 | PAE_FLAG = 0x00000040, | ||||
| 2098 | MCE_FLAG = 0x00000080, | ||||
| 2099 | CX8_FLAG = 0x00000100, | ||||
| 2100 | APIC_FLAG = 0x00000200, | ||||
| 2101 | SEP_FLAG = 0x00000800, | ||||
| 2102 | MTRR_FLAG = 0x00001000, | ||||
| 2103 | PGE_FLAG = 0x00002000, | ||||
| 2104 | MCA_FLAG = 0x00004000, | ||||
| 2105 | CMOV_FLAG = 0x00008000, | ||||
| 2106 | PAT_FLAG = 0x00010000, | ||||
| 2107 | PSE36_FLAG = 0x00020000, | ||||
| 2108 | PSNUM_FLAG = 0x00040000, | ||||
| 2109 | CLFLUSH_FLAG = 0x00080000, | ||||
| 2110 | DTS_FLAG = 0x00200000, | ||||
| 2111 | ACPI_FLAG = 0x00400000, | ||||
| 2112 | MMX_FLAG = 0x00800000, | ||||
| 2113 | FXSR_FLAG = 0x01000000, | ||||
| 2114 | SSE_FLAG = 0x02000000, | ||||
| 2115 | SSE2_FLAG = 0x04000000, | ||||
| 2116 | SS_FLAG = 0x08000000, | ||||
| 2117 | HTT_FLAG = 0x10000000, | ||||
| 2118 | TM_FLAG = 0x20000000 | ||||
| 2119 | } FeatureEdxFlag; | ||||
| 2120 | |||||
| 2121 | static BufferBlob* cpuid_brand_string_stub_blob; | ||||
| 2122 | static const int cpuid_brand_string_stub_size = 550; | ||||
| 2123 | |||||
| 2124 | extern "C" { | ||||
| 2125 | typedef void (*getCPUIDBrandString_stub_t)(void*); | ||||
| 2126 | } | ||||
| 2127 | |||||
| 2128 | static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = NULL__null; | ||||
| 2129 | |||||
| 2130 | // VM_Version statics | ||||
| 2131 | enum { | ||||
| 2132 | ExtendedFamilyIdLength_INTEL = 16, | ||||
| 2133 | ExtendedFamilyIdLength_AMD = 24 | ||||
| 2134 | }; | ||||
| 2135 | |||||
| 2136 | const size_t VENDOR_LENGTH = 13; | ||||
| 2137 | const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); | ||||
| 2138 | static char* _cpu_brand_string = NULL__null; | ||||
| 2139 | static int64_t _max_qualified_cpu_frequency = 0; | ||||
| 2140 | |||||
| 2141 | static int _no_of_threads = 0; | ||||
| 2142 | static int _no_of_cores = 0; | ||||
| 2143 | |||||
| 2144 | const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { | ||||
| 2145 | "8086/8088", | ||||
| 2146 | "", | ||||
| 2147 | "286", | ||||
| 2148 | "386", | ||||
| 2149 | "486", | ||||
| 2150 | "Pentium", | ||||
| 2151 | "Pentium Pro", //or Pentium-M/Woodcrest depeding on model | ||||
| 2152 | "", | ||||
| 2153 | "", | ||||
| 2154 | "", | ||||
| 2155 | "", | ||||
| 2156 | "", | ||||
| 2157 | "", | ||||
| 2158 | "", | ||||
| 2159 | "", | ||||
| 2160 | "Pentium 4" | ||||
| 2161 | }; | ||||
| 2162 | |||||
| 2163 | const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { | ||||
| 2164 | "", | ||||
| 2165 | "", | ||||
| 2166 | "", | ||||
| 2167 | "", | ||||
| 2168 | "5x86", | ||||
| 2169 | "K5/K6", | ||||
| 2170 | "Athlon/AthlonXP", | ||||
| 2171 | "", | ||||
| 2172 | "", | ||||
| 2173 | "", | ||||
| 2174 | "", | ||||
| 2175 | "", | ||||
| 2176 | "", | ||||
| 2177 | "", | ||||
| 2178 | "", | ||||
| 2179 | "Opteron/Athlon64", | ||||
| 2180 | "Opteron QC/Phenom", // Barcelona et.al. | ||||
| 2181 | "", | ||||
| 2182 | "", | ||||
| 2183 | "", | ||||
| 2184 | "", | ||||
| 2185 | "", | ||||
| 2186 | "", | ||||
| 2187 | "Zen" | ||||
| 2188 | }; | ||||
| 2189 | // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, | ||||
| 2190 | // September 2013, Vol 3C Table 35-1 | ||||
| 2191 | const char* const _model_id_pentium_pro[] = { | ||||
| 2192 | "", | ||||
| 2193 | "Pentium Pro", | ||||
| 2194 | "", | ||||
| 2195 | "Pentium II model 3", | ||||
| 2196 | "", | ||||
| 2197 | "Pentium II model 5/Xeon/Celeron", | ||||
| 2198 | "Celeron", | ||||
| 2199 | "Pentium III/Pentium III Xeon", | ||||
| 2200 | "Pentium III/Pentium III Xeon", | ||||
| 2201 | "Pentium M model 9", // Yonah | ||||
| 2202 | "Pentium III, model A", | ||||
| 2203 | "Pentium III, model B", | ||||
| 2204 | "", | ||||
| 2205 | "Pentium M model D", // Dothan | ||||
| 2206 | "", | ||||
| 2207 | "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown | ||||
| 2208 | "", | ||||
| 2209 | "", | ||||
| 2210 | "", | ||||
| 2211 | "", | ||||
| 2212 | "", | ||||
| 2213 | "", | ||||
| 2214 | "Celeron", // 0x16 Celeron 65nm | ||||
| 2215 | "Core 2", // 0x17 Penryn / Harpertown | ||||
| 2216 | "", | ||||
| 2217 | "", | ||||
| 2218 | "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP | ||||
| 2219 | "Atom", // 0x1B Z5xx series Silverthorn | ||||
| 2220 | "", | ||||
| 2221 | "Core 2", // 0x1D Dunnington (6-core) | ||||
| 2222 | "Nehalem", // 0x1E CPU_MODEL_NEHALEM | ||||
| 2223 | "", | ||||
| 2224 | "", | ||||
| 2225 | "", | ||||
| 2226 | "", | ||||
| 2227 | "", | ||||
| 2228 | "", | ||||
| 2229 | "Westmere", // 0x25 CPU_MODEL_WESTMERE | ||||
| 2230 | "", | ||||
| 2231 | "", | ||||
| 2232 | "", // 0x28 | ||||
| 2233 | "", | ||||
| 2234 | "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" | ||||
| 2235 | "", | ||||
| 2236 | "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP | ||||
| 2237 | "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP | ||||
| 2238 | "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX | ||||
| 2239 | "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX | ||||
| 2240 | "", | ||||
| 2241 | "", | ||||
| 2242 | "", | ||||
| 2243 | "", | ||||
| 2244 | "", | ||||
| 2245 | "", | ||||
| 2246 | "", | ||||
| 2247 | "", | ||||
| 2248 | "", | ||||
| 2249 | "", | ||||
| 2250 | "Ivy Bridge", // 0x3a | ||||
| 2251 | "", | ||||
| 2252 | "Haswell", // 0x3c "4th Generation Intel Core Processor" | ||||
| 2253 | "", // 0x3d "Next Generation Intel Core Processor" | ||||
| 2254 | "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" | ||||
| 2255 | "", // 0x3f "Future Generation Intel Xeon Processor" | ||||
| 2256 | "", | ||||
| 2257 | "", | ||||
| 2258 | "", | ||||
| 2259 | "", | ||||
| 2260 | "", | ||||
| 2261 | "Haswell", // 0x45 "4th Generation Intel Core Processor" | ||||
| 2262 | "Haswell", // 0x46 "4th Generation Intel Core Processor" | ||||
| 2263 | NULL__null | ||||
| 2264 | }; | ||||
| 2265 | |||||
| 2266 | /* Brand ID is for back compability | ||||
| 2267 | * Newer CPUs uses the extended brand string */ | ||||
| 2268 | const char* const _brand_id[] = { | ||||
| 2269 | "", | ||||
| 2270 | "Celeron processor", | ||||
| 2271 | "Pentium III processor", | ||||
| 2272 | "Intel Pentium III Xeon processor", | ||||
| 2273 | "", | ||||
| 2274 | "", | ||||
| 2275 | "", | ||||
| 2276 | "", | ||||
| 2277 | "Intel Pentium 4 processor", | ||||
| 2278 | NULL__null | ||||
| 2279 | }; | ||||
| 2280 | |||||
| 2281 | |||||
| 2282 | const char* const _feature_edx_id[] = { | ||||
| 2283 | "On-Chip FPU", | ||||
| 2284 | "Virtual Mode Extensions", | ||||
| 2285 | "Debugging Extensions", | ||||
| 2286 | "Page Size Extensions", | ||||
| 2287 | "Time Stamp Counter", | ||||
| 2288 | "Model Specific Registers", | ||||
| 2289 | "Physical Address Extension", | ||||
| 2290 | "Machine Check Exceptions", | ||||
| 2291 | "CMPXCHG8B Instruction", | ||||
| 2292 | "On-Chip APIC", | ||||
| 2293 | "", | ||||
| 2294 | "Fast System Call", | ||||
| 2295 | "Memory Type Range Registers", | ||||
| 2296 | "Page Global Enable", | ||||
| 2297 | "Machine Check Architecture", | ||||
| 2298 | "Conditional Mov Instruction", | ||||
| 2299 | "Page Attribute Table", | ||||
| 2300 | "36-bit Page Size Extension", | ||||
| 2301 | "Processor Serial Number", | ||||
| 2302 | "CLFLUSH Instruction", | ||||
| 2303 | "", | ||||
| 2304 | "Debug Trace Store feature", | ||||
| 2305 | "ACPI registers in MSR space", | ||||
| 2306 | "Intel Architecture MMX Technology", | ||||
| 2307 | "Fast Float Point Save and Restore", | ||||
| 2308 | "Streaming SIMD extensions", | ||||
| 2309 | "Streaming SIMD extensions 2", | ||||
| 2310 | "Self-Snoop", | ||||
| 2311 | "Hyper Threading", | ||||
| 2312 | "Thermal Monitor", | ||||
| 2313 | "", | ||||
| 2314 | "Pending Break Enable" | ||||
| 2315 | }; | ||||
| 2316 | |||||
| 2317 | const char* const _feature_extended_edx_id[] = { | ||||
| 2318 | "", | ||||
| 2319 | "", | ||||
| 2320 | "", | ||||
| 2321 | "", | ||||
| 2322 | "", | ||||
| 2323 | "", | ||||
| 2324 | "", | ||||
| 2325 | "", | ||||
| 2326 | "", | ||||
| 2327 | "", | ||||
| 2328 | "", | ||||
| 2329 | "SYSCALL/SYSRET", | ||||
| 2330 | "", | ||||
| 2331 | "", | ||||
| 2332 | "", | ||||
| 2333 | "", | ||||
| 2334 | "", | ||||
| 2335 | "", | ||||
| 2336 | "", | ||||
| 2337 | "", | ||||
| 2338 | "Execute Disable Bit", | ||||
| 2339 | "", | ||||
| 2340 | "", | ||||
| 2341 | "", | ||||
| 2342 | "", | ||||
| 2343 | "", | ||||
| 2344 | "", | ||||
| 2345 | "RDTSCP", | ||||
| 2346 | "", | ||||
| 2347 | "Intel 64 Architecture", | ||||
| 2348 | "", | ||||
| 2349 | "" | ||||
| 2350 | }; | ||||
| 2351 | |||||
| 2352 | const char* const _feature_ecx_id[] = { | ||||
| 2353 | "Streaming SIMD Extensions 3", | ||||
| 2354 | "PCLMULQDQ", | ||||
| 2355 | "64-bit DS Area", | ||||
| 2356 | "MONITOR/MWAIT instructions", | ||||
| 2357 | "CPL Qualified Debug Store", | ||||
| 2358 | "Virtual Machine Extensions", | ||||
| 2359 | "Safer Mode Extensions", | ||||
| 2360 | "Enhanced Intel SpeedStep technology", | ||||
| 2361 | "Thermal Monitor 2", | ||||
| 2362 | "Supplemental Streaming SIMD Extensions 3", | ||||
| 2363 | "L1 Context ID", | ||||
| 2364 | "", | ||||
| 2365 | "Fused Multiply-Add", | ||||
| 2366 | "CMPXCHG16B", | ||||
| 2367 | "xTPR Update Control", | ||||
| 2368 | "Perfmon and Debug Capability", | ||||
| 2369 | "", | ||||
| 2370 | "Process-context identifiers", | ||||
| 2371 | "Direct Cache Access", | ||||
| 2372 | "Streaming SIMD extensions 4.1", | ||||
| 2373 | "Streaming SIMD extensions 4.2", | ||||
| 2374 | "x2APIC", | ||||
| 2375 | "MOVBE", | ||||
| 2376 | "Popcount instruction", | ||||
| 2377 | "TSC-Deadline", | ||||
| 2378 | "AESNI", | ||||
| 2379 | "XSAVE", | ||||
| 2380 | "OSXSAVE", | ||||
| 2381 | "AVX", | ||||
| 2382 | "F16C", | ||||
| 2383 | "RDRAND", | ||||
| 2384 | "" | ||||
| 2385 | }; | ||||
| 2386 | |||||
| 2387 | const char* const _feature_extended_ecx_id[] = { | ||||
| 2388 | "LAHF/SAHF instruction support", | ||||
| 2389 | "Core multi-processor legacy mode", | ||||
| 2390 | "", | ||||
| 2391 | "", | ||||
| 2392 | "", | ||||
| 2393 | "Advanced Bit Manipulations: LZCNT", | ||||
| 2394 | "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", | ||||
| 2395 | "Misaligned SSE mode", | ||||
| 2396 | "", | ||||
| 2397 | "", | ||||
| 2398 | "", | ||||
| 2399 | "", | ||||
| 2400 | "", | ||||
| 2401 | "", | ||||
| 2402 | "", | ||||
| 2403 | "", | ||||
| 2404 | "", | ||||
| 2405 | "", | ||||
| 2406 | "", | ||||
| 2407 | "", | ||||
| 2408 | "", | ||||
| 2409 | "", | ||||
| 2410 | "", | ||||
| 2411 | "", | ||||
| 2412 | "", | ||||
| 2413 | "", | ||||
| 2414 | "", | ||||
| 2415 | "", | ||||
| 2416 | "", | ||||
| 2417 | "", | ||||
| 2418 | "", | ||||
| 2419 | "" | ||||
| 2420 | }; | ||||
| 2421 | |||||
| 2422 | void VM_Version::initialize_tsc(void) { | ||||
| 2423 | ResourceMark rm; | ||||
| 2424 | |||||
| 2425 | cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); | ||||
| 2426 | if (cpuid_brand_string_stub_blob == NULL__null) { | ||||
| 2427 | vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); | ||||
| 2428 | } | ||||
| 2429 | CodeBuffer c(cpuid_brand_string_stub_blob); | ||||
| 2430 | VM_Version_StubGenerator g(&c); | ||||
| 2431 | getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,(reinterpret_cast<getCPUIDBrandString_stub_t>(g.generate_getCPUIDBrandString ())) | ||||
| 2432 | g.generate_getCPUIDBrandString())(reinterpret_cast<getCPUIDBrandString_stub_t>(g.generate_getCPUIDBrandString ())); | ||||
| 2433 | } | ||||
| 2434 | |||||
| 2435 | const char* VM_Version::cpu_model_description(void) { | ||||
| 2436 | uint32_t cpu_family = extended_cpu_family(); | ||||
| 2437 | uint32_t cpu_model = extended_cpu_model(); | ||||
| 2438 | const char* model = NULL__null; | ||||
| 2439 | |||||
| 2440 | if (cpu_family == CPU_FAMILY_PENTIUMPRO) { | ||||
| 2441 | for (uint32_t i = 0; i <= cpu_model; i++) { | ||||
| 2442 | model = _model_id_pentium_pro[i]; | ||||
| 2443 | if (model == NULL__null) { | ||||
| 2444 | break; | ||||
| 2445 | } | ||||
| 2446 | } | ||||
| 2447 | } | ||||
| 2448 | return model; | ||||
| 2449 | } | ||||
| 2450 | |||||
| 2451 | const char* VM_Version::cpu_brand_string(void) { | ||||
| 2452 | if (_cpu_brand_string == NULL__null) { | ||||
| 2453 | _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal)(char*) (AllocateHeap(((CPU_EBS_MAX_LENGTH)) * sizeof(char), mtInternal , AllocFailStrategy::RETURN_NULL)); | ||||
| 2454 | if (NULL__null == _cpu_brand_string) { | ||||
| 2455 | return NULL__null; | ||||
| 2456 | } | ||||
| 2457 | int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); | ||||
| 2458 | if (ret_val != OS_OK) { | ||||
| 2459 | FREE_C_HEAP_ARRAY(char, _cpu_brand_string)FreeHeap((char*)(_cpu_brand_string)); | ||||
| 2460 | _cpu_brand_string = NULL__null; | ||||
| 2461 | } | ||||
| 2462 | } | ||||
| 2463 | return _cpu_brand_string; | ||||
| 2464 | } | ||||
| 2465 | |||||
| 2466 | const char* VM_Version::cpu_brand(void) { | ||||
| 2467 | const char* brand = NULL__null; | ||||
| 2468 | |||||
| 2469 | if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { | ||||
| 2470 | int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; | ||||
| 2471 | brand = _brand_id[0]; | ||||
| 2472 | for (int i = 0; brand != NULL__null && i <= brand_num; i += 1) { | ||||
| 2473 | brand = _brand_id[i]; | ||||
| 2474 | } | ||||
| 2475 | } | ||||
| 2476 | return brand; | ||||
| 2477 | } | ||||
| 2478 | |||||
| 2479 | bool VM_Version::cpu_is_em64t(void) { | ||||
| 2480 | return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); | ||||
| 2481 | } | ||||
| 2482 | |||||
| 2483 | bool VM_Version::is_netburst(void) { | ||||
| 2484 | return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); | ||||
| 2485 | } | ||||
| 2486 | |||||
| 2487 | bool VM_Version::supports_tscinv_ext(void) { | ||||
| 2488 | if (!supports_tscinv_bit()) { | ||||
| 2489 | return false; | ||||
| 2490 | } | ||||
| 2491 | |||||
| 2492 | if (is_intel()) { | ||||
| 2493 | return true; | ||||
| 2494 | } | ||||
| 2495 | |||||
| 2496 | if (is_amd()) { | ||||
| 2497 | return !is_amd_Barcelona(); | ||||
| 2498 | } | ||||
| 2499 | |||||
| 2500 | if (is_hygon()) { | ||||
| 2501 | return true; | ||||
| 2502 | } | ||||
| 2503 | |||||
| 2504 | return false; | ||||
| 2505 | } | ||||
| 2506 | |||||
| 2507 | void VM_Version::resolve_cpu_information_details(void) { | ||||
| 2508 | |||||
| 2509 | // in future we want to base this information on proper cpu | ||||
| 2510 | // and cache topology enumeration such as: | ||||
| 2511 | // Intel 64 Architecture Processor Topology Enumeration | ||||
| 2512 | // which supports system cpu and cache topology enumeration | ||||
| 2513 | // either using 2xAPICIDs or initial APICIDs | ||||
| 2514 | |||||
| 2515 | // currently only rough cpu information estimates | ||||
| 2516 | // which will not necessarily reflect the exact configuration of the system | ||||
| 2517 | |||||
| 2518 | // this is the number of logical hardware threads | ||||
| 2519 | // visible to the operating system | ||||
| 2520 | _no_of_threads = os::processor_count(); | ||||
| 2521 | |||||
| 2522 | // find out number of threads per cpu package | ||||
| 2523 | int threads_per_package = threads_per_core() * cores_per_cpu(); | ||||
| 2524 | |||||
| 2525 | // use amount of threads visible to the process in order to guess number of sockets | ||||
| 2526 | _no_of_sockets = _no_of_threads / threads_per_package; | ||||
| 2527 | |||||
| 2528 | // process might only see a subset of the total number of threads | ||||
| 2529 | // from a single processor package. Virtualization/resource management for example. | ||||
| 2530 | // If so then just write a hard 1 as num of pkgs. | ||||
| 2531 | if (0 == _no_of_sockets) { | ||||
| 2532 | _no_of_sockets = 1; | ||||
| 2533 | } | ||||
| 2534 | |||||
| 2535 | // estimate the number of cores | ||||
| 2536 | _no_of_cores = cores_per_cpu() * _no_of_sockets; | ||||
| 2537 | } | ||||
| 2538 | |||||
| 2539 | |||||
| 2540 | const char* VM_Version::cpu_family_description(void) { | ||||
| 2541 | int cpu_family_id = extended_cpu_family(); | ||||
| 2542 | if (is_amd()) { | ||||
| 2543 | if (cpu_family_id < ExtendedFamilyIdLength_AMD) { | ||||
| 2544 | return _family_id_amd[cpu_family_id]; | ||||
| 2545 | } | ||||
| 2546 | } | ||||
| 2547 | if (is_intel()) { | ||||
| 2548 | if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { | ||||
| 2549 | return cpu_model_description(); | ||||
| 2550 | } | ||||
| 2551 | if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { | ||||
| 2552 | return _family_id_intel[cpu_family_id]; | ||||
| 2553 | } | ||||
| 2554 | } | ||||
| 2555 | if (is_hygon()) { | ||||
| 2556 | return "Dhyana"; | ||||
| 2557 | } | ||||
| 2558 | return "Unknown x86"; | ||||
| 2559 | } | ||||
| 2560 | |||||
| 2561 | int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { | ||||
| 2562 | assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2562, "assert(" "buf != __null" ") failed", "buffer is NULL!" ); ::breakpoint(); } } while (0); | ||||
| 2563 | assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!")do { if (!(buf_len >= CPU_TYPE_DESC_BUF_SIZE)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2563, "assert(" "buf_len >= CPU_TYPE_DESC_BUF_SIZE" ") failed" , "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!") ; ::breakpoint(); } } while (0); | ||||
| 2564 | |||||
| 2565 | const char* cpu_type = NULL__null; | ||||
| 2566 | const char* x64 = NULL__null; | ||||
| 2567 | |||||
| 2568 | if (is_intel()) { | ||||
| 2569 | cpu_type = "Intel"; | ||||
| 2570 | x64 = cpu_is_em64t() ? " Intel64" : ""; | ||||
| 2571 | } else if (is_amd()) { | ||||
| 2572 | cpu_type = "AMD"; | ||||
| 2573 | x64 = cpu_is_em64t() ? " AMD64" : ""; | ||||
| 2574 | } else if (is_hygon()) { | ||||
| 2575 | cpu_type = "Hygon"; | ||||
| 2576 | x64 = cpu_is_em64t() ? " AMD64" : ""; | ||||
| 2577 | } else { | ||||
| 2578 | cpu_type = "Unknown x86"; | ||||
| 2579 | x64 = cpu_is_em64t() ? " x86_64" : ""; | ||||
| 2580 | } | ||||
| 2581 | |||||
| 2582 | jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", | ||||
| 2583 | cpu_type, | ||||
| 2584 | cpu_family_description(), | ||||
| 2585 | supports_ht() ? " (HT)" : "", | ||||
| 2586 | supports_sse3() ? " SSE3" : "", | ||||
| 2587 | supports_ssse3() ? " SSSE3" : "", | ||||
| 2588 | supports_sse4_1() ? " SSE4.1" : "", | ||||
| 2589 | supports_sse4_2() ? " SSE4.2" : "", | ||||
| 2590 | supports_sse4a() ? " SSE4A" : "", | ||||
| 2591 | is_netburst() ? " Netburst" : "", | ||||
| 2592 | is_intel_family_core() ? " Core" : "", | ||||
| 2593 | x64); | ||||
| 2594 | |||||
| 2595 | return OS_OK; | ||||
| 2596 | } | ||||
| 2597 | |||||
| 2598 | int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { | ||||
| 2599 | assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2599, "assert(" "buf != __null" ") failed", "buffer is NULL!" ); ::breakpoint(); } } while (0); | ||||
| 2600 | assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!")do { if (!(buf_len >= CPU_EBS_MAX_LENGTH)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2600, "assert(" "buf_len >= CPU_EBS_MAX_LENGTH" ") failed" , "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); :: breakpoint(); } } while (0); | ||||
| 2601 | assert(getCPUIDBrandString_stub != NULL, "not initialized")do { if (!(getCPUIDBrandString_stub != __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2601, "assert(" "getCPUIDBrandString_stub != __null" ") failed" , "not initialized"); ::breakpoint(); } } while (0); | ||||
| 2602 | |||||
| 2603 | // invoke newly generated asm code to fetch CPU Brand String | ||||
| 2604 | getCPUIDBrandString_stub(&_cpuid_info); | ||||
| 2605 | |||||
| 2606 | // fetch results into buffer | ||||
| 2607 | *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; | ||||
| 2608 | *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; | ||||
| 2609 | *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; | ||||
| 2610 | *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; | ||||
| 2611 | *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; | ||||
| 2612 | *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; | ||||
| 2613 | *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; | ||||
| 2614 | *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; | ||||
| 2615 | *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; | ||||
| 2616 | *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; | ||||
| 2617 | *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; | ||||
| 2618 | *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; | ||||
| 2619 | |||||
| 2620 | return OS_OK; | ||||
| 2621 | } | ||||
| 2622 | |||||
| 2623 | size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { | ||||
| 2624 | guarantee(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2624, "guarantee(" "buf != NULL" ") failed", "buffer is NULL!" ); ::breakpoint(); } } while (0); | ||||
| 2625 | guarantee(buf_len > 0, "buffer len not enough!")do { if (!(buf_len > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2625, "guarantee(" "buf_len > 0" ") failed", "buffer len not enough!" ); ::breakpoint(); } } while (0); | ||||
| 2626 | |||||
| 2627 | unsigned int flag = 0; | ||||
| 2628 | unsigned int fi = 0; | ||||
| 2629 | size_t written = 0; | ||||
| 2630 | const char* prefix = ""; | ||||
| 2631 | |||||
| 2632 | #define WRITE_TO_BUF(string){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, string); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix = ", "; } } \ | ||||
| 2633 | { \ | ||||
| 2634 | int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ | ||||
| 2635 | if (res < 0) { \ | ||||
| 2636 | return buf_len - 1; \ | ||||
| 2637 | } \ | ||||
| 2638 | written += res; \ | ||||
| 2639 | if (prefix[0] == '\0') { \ | ||||
| 2640 | prefix = ", "; \ | ||||
| 2641 | } \ | ||||
| 2642 | } | ||||
| 2643 | |||||
| 2644 | for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { | ||||
| 2645 | if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { | ||||
| 2646 | continue; /* no hyperthreading */ | ||||
| 2647 | } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { | ||||
| 2648 | continue; /* no fast system call */ | ||||
| 2649 | } | ||||
| 2650 | if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { | ||||
| 2651 | WRITE_TO_BUF(_feature_edx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, _feature_edx_id[fi]); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix = ", "; } }; | ||||
| 2652 | } | ||||
| 2653 | } | ||||
| 2654 | |||||
| 2655 | for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { | ||||
| 2656 | if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { | ||||
| 2657 | WRITE_TO_BUF(_feature_ecx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, _feature_ecx_id[fi]); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix = ", "; } }; | ||||
| 2658 | } | ||||
| 2659 | } | ||||
| 2660 | |||||
| 2661 | for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { | ||||
| 2662 | if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { | ||||
| 2663 | WRITE_TO_BUF(_feature_extended_ecx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, _feature_extended_ecx_id[fi]); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0' ) { prefix = ", "; } }; | ||||
| 2664 | } | ||||
| 2665 | } | ||||
| 2666 | |||||
| 2667 | for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { | ||||
| 2668 | if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { | ||||
| 2669 | WRITE_TO_BUF(_feature_extended_edx_id[fi]){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, _feature_extended_edx_id[fi]); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0' ) { prefix = ", "; } }; | ||||
| 2670 | } | ||||
| 2671 | } | ||||
| 2672 | |||||
| 2673 | if (supports_tscinv_bit()) { | ||||
| 2674 | WRITE_TO_BUF("Invariant TSC"){ int res = jio_snprintf(&buf[written], buf_len - written , "%s%s", prefix, "Invariant TSC"); if (res < 0) { return buf_len - 1; } written += res; if (prefix[0] == '\0') { prefix = ", " ; } }; | ||||
| 2675 | } | ||||
| 2676 | |||||
| 2677 | return written; | ||||
| 2678 | } | ||||
| 2679 | |||||
| 2680 | /** | ||||
| 2681 | * Write a detailed description of the cpu to a given buffer, including | ||||
| 2682 | * feature set. | ||||
| 2683 | */ | ||||
| 2684 | int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { | ||||
| 2685 | assert(buf != NULL, "buffer is NULL!")do { if (!(buf != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2685, "assert(" "buf != __null" ") failed", "buffer is NULL!" ); ::breakpoint(); } } while (0); | ||||
| 2686 | assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!")do { if (!(buf_len >= CPU_DETAILED_DESC_BUF_SIZE)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2686, "assert(" "buf_len >= CPU_DETAILED_DESC_BUF_SIZE" ") failed" , "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!" ); ::breakpoint(); } } while (0); | ||||
| 2687 | |||||
| 2688 | static const char* unknown = "<unknown>"; | ||||
| 2689 | char vendor_id[VENDOR_LENGTH]; | ||||
| 2690 | const char* family = NULL__null; | ||||
| 2691 | const char* model = NULL__null; | ||||
| 2692 | const char* brand = NULL__null; | ||||
| 2693 | int outputLen = 0; | ||||
| 2694 | |||||
| 2695 | family = cpu_family_description(); | ||||
| 2696 | if (family == NULL__null) { | ||||
| 2697 | family = unknown; | ||||
| 2698 | } | ||||
| 2699 | |||||
| 2700 | model = cpu_model_description(); | ||||
| 2701 | if (model == NULL__null) { | ||||
| 2702 | model = unknown; | ||||
| 2703 | } | ||||
| 2704 | |||||
| 2705 | brand = cpu_brand_string(); | ||||
| 2706 | |||||
| 2707 | if (brand == NULL__null) { | ||||
| 2708 | brand = cpu_brand(); | ||||
| 2709 | if (brand == NULL__null) { | ||||
| 2710 | brand = unknown; | ||||
| 2711 | } | ||||
| 2712 | } | ||||
| 2713 | |||||
| 2714 | *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; | ||||
| 2715 | *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; | ||||
| 2716 | *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; | ||||
| 2717 | vendor_id[VENDOR_LENGTH-1] = '\0'; | ||||
| 2718 | |||||
| 2719 | outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" | ||||
| 2720 | "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" | ||||
| 2721 | "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" | ||||
| 2722 | "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" | ||||
| 2723 | "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" | ||||
| 2724 | "Supports: ", | ||||
| 2725 | brand, | ||||
| 2726 | vendor_id, | ||||
| 2727 | family, | ||||
| 2728 | extended_cpu_family(), | ||||
| 2729 | model, | ||||
| 2730 | extended_cpu_model(), | ||||
| 2731 | cpu_stepping(), | ||||
| 2732 | _cpuid_info.std_cpuid1_eax.bits.ext_family, | ||||
| 2733 | _cpuid_info.std_cpuid1_eax.bits.ext_model, | ||||
| 2734 | _cpuid_info.std_cpuid1_eax.bits.proc_type, | ||||
| 2735 | _cpuid_info.std_cpuid1_eax.value, | ||||
| 2736 | _cpuid_info.std_cpuid1_ebx.value, | ||||
| 2737 | _cpuid_info.std_cpuid1_ecx.value, | ||||
| 2738 | _cpuid_info.std_cpuid1_edx.value, | ||||
| 2739 | _cpuid_info.ext_cpuid1_eax, | ||||
| 2740 | _cpuid_info.ext_cpuid1_ebx, | ||||
| 2741 | _cpuid_info.ext_cpuid1_ecx, | ||||
| 2742 | _cpuid_info.ext_cpuid1_edx); | ||||
| 2743 | |||||
| 2744 | if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { | ||||
| 2745 | if (buf_len > 0) { buf[buf_len-1] = '\0'; } | ||||
| 2746 | return OS_ERR; | ||||
| 2747 | } | ||||
| 2748 | |||||
| 2749 | cpu_write_support_string(&buf[outputLen], buf_len - outputLen); | ||||
| 2750 | |||||
| 2751 | return OS_OK; | ||||
| 2752 | } | ||||
| 2753 | |||||
| 2754 | |||||
| 2755 | // Fill in Abstract_VM_Version statics | ||||
| 2756 | void VM_Version::initialize_cpu_information() { | ||||
| 2757 | assert(_vm_version_initialized, "should have initialized VM_Version long ago")do { if (!(_vm_version_initialized)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2757, "assert(" "_vm_version_initialized" ") failed", "should have initialized VM_Version long ago" ); ::breakpoint(); } } while (0); | ||||
| 2758 | assert(!_initialized, "shouldn't be initialized yet")do { if (!(!_initialized)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp" , 2758, "assert(" "!_initialized" ") failed", "shouldn't be initialized yet" ); ::breakpoint(); } } while (0); | ||||
| 2759 | resolve_cpu_information_details(); | ||||
| 2760 | |||||
| 2761 | // initialize cpu_name and cpu_desc | ||||
| 2762 | cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); | ||||
| 2763 | cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); | ||||
| 2764 | _initialized = true; | ||||
| 2765 | } | ||||
| 2766 | |||||
| 2767 | /** | ||||
| 2768 | * For information about extracting the frequency from the cpu brand string, please see: | ||||
| 2769 | * | ||||
| 2770 | * Intel Processor Identification and the CPUID Instruction | ||||
| 2771 | * Application Note 485 | ||||
| 2772 | * May 2012 | ||||
| 2773 | * | ||||
| 2774 | * The return value is the frequency in Hz. | ||||
| 2775 | */ | ||||
| 2776 | int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { | ||||
| 2777 | const char* const brand_string = cpu_brand_string(); | ||||
| 2778 | if (brand_string == NULL__null) { | ||||
| 2779 | return 0; | ||||
| 2780 | } | ||||
| 2781 | const int64_t MEGA = 1000000; | ||||
| 2782 | int64_t multiplier = 0; | ||||
| 2783 | int64_t frequency = 0; | ||||
| 2784 | uint8_t idx = 0; | ||||
| 2785 | // The brand string buffer is at most 48 bytes. | ||||
| 2786 | // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. | ||||
| 2787 | for (; idx < 48-2; ++idx) { | ||||
| 2788 | // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. | ||||
| 2789 | // Search brand string for "yHz" where y is M, G, or T. | ||||
| 2790 | if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { | ||||
| 2791 | if (brand_string[idx] == 'M') { | ||||
| 2792 | multiplier = MEGA; | ||||
| 2793 | } else if (brand_string[idx] == 'G') { | ||||
| 2794 | multiplier = MEGA * 1000; | ||||
| 2795 | } else if (brand_string[idx] == 'T') { | ||||
| 2796 | multiplier = MEGA * MEGA; | ||||
| 2797 | } | ||||
| 2798 | break; | ||||
| 2799 | } | ||||
| 2800 | } | ||||
| 2801 | if (multiplier > 0) { | ||||
| 2802 | // Compute freqency (in Hz) from brand string. | ||||
| 2803 | if (brand_string[idx-3] == '.') { // if format is "x.xx" | ||||
| 2804 | frequency = (brand_string[idx-4] - '0') * multiplier; | ||||
| 2805 | frequency += (brand_string[idx-2] - '0') * multiplier / 10; | ||||
| 2806 | frequency += (brand_string[idx-1] - '0') * multiplier / 100; | ||||
| 2807 | } else { // format is "xxxx" | ||||
| 2808 | frequency = (brand_string[idx-4] - '0') * 1000; | ||||
| 2809 | frequency += (brand_string[idx-3] - '0') * 100; | ||||
| 2810 | frequency += (brand_string[idx-2] - '0') * 10; | ||||
| 2811 | frequency += (brand_string[idx-1] - '0'); | ||||
| 2812 | frequency *= multiplier; | ||||
| 2813 | } | ||||
| 2814 | } | ||||
| 2815 | return frequency; | ||||
| 2816 | } | ||||
| 2817 | |||||
| 2818 | |||||
| 2819 | int64_t VM_Version::maximum_qualified_cpu_frequency(void) { | ||||
| 2820 | if (_max_qualified_cpu_frequency == 0) { | ||||
| 2821 | _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); | ||||
| 2822 | } | ||||
| 2823 | return _max_qualified_cpu_frequency; | ||||
| 2824 | } | ||||
| 2825 |
| 1 | /* | |||
| 2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. | |||
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |||
| 4 | * | |||
| 5 | * This code is free software; you can redistribute it and/or modify it | |||
| 6 | * under the terms of the GNU General Public License version 2 only, as | |||
| 7 | * published by the Free Software Foundation. | |||
| 8 | * | |||
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT | |||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
| 12 | * version 2 for more details (a copy is included in the LICENSE file that | |||
| 13 | * accompanied this code). | |||
| 14 | * | |||
| 15 | * You should have received a copy of the GNU General Public License version | |||
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, | |||
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |||
| 18 | * | |||
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |||
| 20 | * or visit www.oracle.com if you need additional information or have any | |||
| 21 | * questions. | |||
| 22 | * | |||
| 23 | */ | |||
| 24 | ||||
| 25 | #ifndef CPU_X86_MACROASSEMBLER_X86_HPP | |||
| 26 | #define CPU_X86_MACROASSEMBLER_X86_HPP | |||
| 27 | ||||
| 28 | #include "asm/assembler.hpp" | |||
| 29 | #include "code/vmreg.inline.hpp" | |||
| 30 | #include "compiler/oopMap.hpp" | |||
| 31 | #include "utilities/macros.hpp" | |||
| 32 | #include "runtime/rtmLocking.hpp" | |||
| 33 | #include "runtime/vm_version.hpp" | |||
| 34 | ||||
| 35 | // MacroAssembler extends Assembler by frequently used macros. | |||
| 36 | // | |||
| 37 | // Instructions for which a 'better' code sequence exists depending | |||
| 38 | // on arguments should also go in here. | |||
| 39 | ||||
| 40 | class MacroAssembler: public Assembler { | |||
| 41 | friend class LIR_Assembler; | |||
| 42 | friend class Runtime1; // as_Address() | |||
| 43 | ||||
| 44 | public: | |||
| 45 | // Support for VM calls | |||
| 46 | // | |||
| 47 | // This is the base routine called by the different versions of call_VM_leaf. The interpreter | |||
| 48 | // may customize this version by overriding it for its purposes (e.g., to save/restore | |||
| 49 | // additional registers when doing a VM call). | |||
| 50 | ||||
| 51 | virtual void call_VM_leaf_base( | |||
| 52 | address entry_point, // the entry point | |||
| 53 | int number_of_arguments // the number of arguments to pop after the call | |||
| 54 | ); | |||
| 55 | ||||
| 56 | protected: | |||
| 57 | // This is the base routine called by the different versions of call_VM. The interpreter | |||
| 58 | // may customize this version by overriding it for its purposes (e.g., to save/restore | |||
| 59 | // additional registers when doing a VM call). | |||
| 60 | // | |||
| 61 | // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base | |||
| 62 | // returns the register which contains the thread upon return. If a thread register has been | |||
| 63 | // specified, the return value will correspond to that register. If no last_java_sp is specified | |||
| 64 | // (noreg) than rsp will be used instead. | |||
| 65 | virtual void call_VM_base( // returns the register containing the thread upon return | |||
| 66 | Register oop_result, // where an oop-result ends up if any; use noreg otherwise | |||
| 67 | Register java_thread, // the thread if computed before ; use noreg otherwise | |||
| 68 | Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise | |||
| 69 | address entry_point, // the entry point | |||
| 70 | int number_of_arguments, // the number of arguments (w/o thread) to pop after the call | |||
| 71 | bool check_exceptions // whether to check for pending exceptions after return | |||
| 72 | ); | |||
| 73 | ||||
| 74 | void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); | |||
| 75 | ||||
| 76 | // helpers for FPU flag access | |||
| 77 | // tmp is a temporary register, if none is available use noreg | |||
| 78 | void save_rax (Register tmp); | |||
| 79 | void restore_rax(Register tmp); | |||
| 80 | ||||
| 81 | public: | |||
| 82 | MacroAssembler(CodeBuffer* code) : Assembler(code) {} | |||
| 83 | ||||
| 84 | // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. | |||
| 85 | // The implementation is only non-empty for the InterpreterMacroAssembler, | |||
| 86 | // as only the interpreter handles PopFrame and ForceEarlyReturn requests. | |||
| 87 | virtual void check_and_handle_popframe(Register java_thread); | |||
| 88 | virtual void check_and_handle_earlyret(Register java_thread); | |||
| 89 | ||||
| 90 | Address as_Address(AddressLiteral adr); | |||
| 91 | Address as_Address(ArrayAddress adr); | |||
| 92 | ||||
| 93 | // Support for NULL-checks | |||
| 94 | // | |||
| 95 | // Generates code that causes a NULL OS exception if the content of reg is NULL. | |||
| 96 | // If the accessed location is M[reg + offset] and the offset is known, provide the | |||
| 97 | // offset. No explicit code generation is needed if the offset is within a certain | |||
| 98 | // range (0 <= offset <= page_size). | |||
| 99 | ||||
| 100 | void null_check(Register reg, int offset = -1); | |||
| 101 | static bool needs_explicit_null_check(intptr_t offset); | |||
| 102 | static bool uses_implicit_null_check(void* address); | |||
| 103 | ||||
| 104 | // Required platform-specific helpers for Label::patch_instructions. | |||
| 105 | // They _shadow_ the declarations in AbstractAssembler, which are undefined. | |||
| 106 | void pd_patch_instruction(address branch, address target, const char* file, int line) { | |||
| 107 | unsigned char op = branch[0]; | |||
| 108 | assert(op == 0xE8 /* call */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
| 109 | op == 0xE9 /* jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
| 110 | op == 0xEB /* short jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
| 111 | (op & 0xF0) == 0x70 /* short jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
| 112 | op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
| 113 | op == 0xC7 && branch[1] == 0xF8 /* xbegin */,do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0) | |||
| 114 | "Invalid opcode at patch point")do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0 ) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8" ") failed", "Invalid opcode at patch point"); ::breakpoint() ; } } while (0); | |||
| 115 | ||||
| 116 | if (op == 0xEB || (op & 0xF0) == 0x70) { | |||
| 117 | // short offset operators (jmp and jcc) | |||
| 118 | char* disp = (char*) &branch[1]; | |||
| 119 | int imm8 = target - (address) &disp[1]; | |||
| 120 | guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d",do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d" , file == __null ? "<NULL>" : file, line); ::breakpoint (); } } while (0) | |||
| 121 | file == NULL ? "<NULL>" : file, line)do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d" , file == __null ? "<NULL>" : file, line); ::breakpoint (); } } while (0); | |||
| 122 | *disp = imm8; | |||
| 123 | } else { | |||
| 124 | int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1]; | |||
| 125 | int imm32 = target - (address) &disp[1]; | |||
| 126 | *disp = imm32; | |||
| 127 | } | |||
| 128 | } | |||
| 129 | ||||
| 130 | // The following 4 methods return the offset of the appropriate move instruction | |||
| 131 | ||||
| 132 | // Support for fast byte/short loading with zero extension (depending on particular CPU) | |||
| 133 | int load_unsigned_byte(Register dst, Address src); | |||
| 134 | int load_unsigned_short(Register dst, Address src); | |||
| 135 | ||||
| 136 | // Support for fast byte/short loading with sign extension (depending on particular CPU) | |||
| 137 | int load_signed_byte(Register dst, Address src); | |||
| 138 | int load_signed_short(Register dst, Address src); | |||
| 139 | ||||
| 140 | // Support for sign-extension (hi:lo = extend_sign(lo)) | |||
| 141 | void extend_sign(Register hi, Register lo); | |||
| 142 | ||||
| 143 | // Load and store values by size and signed-ness | |||
| 144 | void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); | |||
| 145 | void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); | |||
| 146 | ||||
| 147 | // Support for inc/dec with optimal instruction selection depending on value | |||
| 148 | ||||
| 149 | void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value))incrementq(reg, value) NOT_LP64(incrementl(reg, value)) ; } | |||
| 150 | void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value))decrementq(reg, value) NOT_LP64(decrementl(reg, value)) ; } | |||
| 151 | ||||
| 152 | void decrementl(Address dst, int value = 1); | |||
| 153 | void decrementl(Register reg, int value = 1); | |||
| 154 | ||||
| 155 | void decrementq(Register reg, int value = 1); | |||
| 156 | void decrementq(Address dst, int value = 1); | |||
| 157 | ||||
| 158 | void incrementl(Address dst, int value = 1); | |||
| 159 | void incrementl(Register reg, int value = 1); | |||
| 160 | ||||
| 161 | void incrementq(Register reg, int value = 1); | |||
| 162 | void incrementq(Address dst, int value = 1); | |||
| 163 | ||||
| 164 | // Support optimal SSE move instructions. | |||
| 165 | void movflt(XMMRegister dst, XMMRegister src) { | |||
| 166 | if (dst-> encoding() == src->encoding()) return; | |||
| 167 | if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } | |||
| 168 | else { movss (dst, src); return; } | |||
| 169 | } | |||
| 170 | void movflt(XMMRegister dst, Address src) { movss(dst, src); } | |||
| 171 | void movflt(XMMRegister dst, AddressLiteral src); | |||
| 172 | void movflt(Address dst, XMMRegister src) { movss(dst, src); } | |||
| 173 | ||||
| 174 | // Move with zero extension | |||
| 175 | void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); } | |||
| 176 | ||||
| 177 | void movdbl(XMMRegister dst, XMMRegister src) { | |||
| 178 | if (dst-> encoding() == src->encoding()) return; | |||
| 179 | if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } | |||
| 180 | else { movsd (dst, src); return; } | |||
| 181 | } | |||
| 182 | ||||
| 183 | void movdbl(XMMRegister dst, AddressLiteral src); | |||
| 184 | ||||
| 185 | void movdbl(XMMRegister dst, Address src) { | |||
| 186 | if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } | |||
| 187 | else { movlpd(dst, src); return; } | |||
| 188 | } | |||
| 189 | void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } | |||
| 190 | ||||
| 191 | void incrementl(AddressLiteral dst); | |||
| 192 | void incrementl(ArrayAddress dst); | |||
| 193 | ||||
| 194 | void incrementq(AddressLiteral dst); | |||
| 195 | ||||
| 196 | // Alignment | |||
| 197 | void align32(); | |||
| 198 | void align64(); | |||
| 199 | void align(int modulus); | |||
| 200 | void align(int modulus, int target); | |||
| 201 | ||||
| 202 | // A 5 byte nop that is safe for patching (see patch_verified_entry) | |||
| 203 | void fat_nop(); | |||
| 204 | ||||
| 205 | // Stack frame creation/removal | |||
| 206 | void enter(); | |||
| 207 | void leave(); | |||
| 208 | ||||
| 209 | // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) | |||
| 210 | // The pointer will be loaded into the thread register. | |||
| 211 | void get_thread(Register thread); | |||
| 212 | ||||
| 213 | #ifdef _LP641 | |||
| 214 | // Support for argument shuffling | |||
| 215 | ||||
| 216 | void move32_64(VMRegPair src, VMRegPair dst); | |||
| 217 | void long_move(VMRegPair src, VMRegPair dst); | |||
| 218 | void float_move(VMRegPair src, VMRegPair dst); | |||
| 219 | void double_move(VMRegPair src, VMRegPair dst); | |||
| 220 | void move_ptr(VMRegPair src, VMRegPair dst); | |||
| 221 | void object_move(OopMap* map, | |||
| 222 | int oop_handle_offset, | |||
| 223 | int framesize_in_slots, | |||
| 224 | VMRegPair src, | |||
| 225 | VMRegPair dst, | |||
| 226 | bool is_receiver, | |||
| 227 | int* receiver_offset); | |||
| 228 | #endif // _LP64 | |||
| 229 | ||||
| 230 | // Support for VM calls | |||
| 231 | // | |||
| 232 | // It is imperative that all calls into the VM are handled via the call_VM macros. | |||
| 233 | // They make sure that the stack linkage is setup correctly. call_VM's correspond | |||
| 234 | // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. | |||
| 235 | ||||
| 236 | ||||
| 237 | void call_VM(Register oop_result, | |||
| 238 | address entry_point, | |||
| 239 | bool check_exceptions = true); | |||
| 240 | void call_VM(Register oop_result, | |||
| 241 | address entry_point, | |||
| 242 | Register arg_1, | |||
| 243 | bool check_exceptions = true); | |||
| 244 | void call_VM(Register oop_result, | |||
| 245 | address entry_point, | |||
| 246 | Register arg_1, Register arg_2, | |||
| 247 | bool check_exceptions = true); | |||
| 248 | void call_VM(Register oop_result, | |||
| 249 | address entry_point, | |||
| 250 | Register arg_1, Register arg_2, Register arg_3, | |||
| 251 | bool check_exceptions = true); | |||
| 252 | ||||
| 253 | // Overloadings with last_Java_sp | |||
| 254 | void call_VM(Register oop_result, | |||
| 255 | Register last_java_sp, | |||
| 256 | address entry_point, | |||
| 257 | int number_of_arguments = 0, | |||
| 258 | bool check_exceptions = true); | |||
| 259 | void call_VM(Register oop_result, | |||
| 260 | Register last_java_sp, | |||
| 261 | address entry_point, | |||
| 262 | Register arg_1, bool | |||
| 263 | check_exceptions = true); | |||
| 264 | void call_VM(Register oop_result, | |||
| 265 | Register last_java_sp, | |||
| 266 | address entry_point, | |||
| 267 | Register arg_1, Register arg_2, | |||
| 268 | bool check_exceptions = true); | |||
| 269 | void call_VM(Register oop_result, | |||
| 270 | Register last_java_sp, | |||
| 271 | address entry_point, | |||
| 272 | Register arg_1, Register arg_2, Register arg_3, | |||
| 273 | bool check_exceptions = true); | |||
| 274 | ||||
| 275 | void get_vm_result (Register oop_result, Register thread); | |||
| 276 | void get_vm_result_2(Register metadata_result, Register thread); | |||
| 277 | ||||
| 278 | // These always tightly bind to MacroAssembler::call_VM_base | |||
| 279 | // bypassing the virtual implementation | |||
| 280 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); | |||
| 281 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); | |||
| 282 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); | |||
| 283 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); | |||
| 284 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); | |||
| 285 | ||||
| 286 | void call_VM_leaf0(address entry_point); | |||
| 287 | void call_VM_leaf(address entry_point, | |||
| 288 | int number_of_arguments = 0); | |||
| 289 | void call_VM_leaf(address entry_point, | |||
| 290 | Register arg_1); | |||
| 291 | void call_VM_leaf(address entry_point, | |||
| 292 | Register arg_1, Register arg_2); | |||
| 293 | void call_VM_leaf(address entry_point, | |||
| 294 | Register arg_1, Register arg_2, Register arg_3); | |||
| 295 | ||||
| 296 | // These always tightly bind to MacroAssembler::call_VM_leaf_base | |||
| 297 | // bypassing the virtual implementation | |||
| 298 | void super_call_VM_leaf(address entry_point); | |||
| 299 | void super_call_VM_leaf(address entry_point, Register arg_1); | |||
| 300 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); | |||
| 301 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); | |||
| 302 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); | |||
| 303 | ||||
| 304 | // last Java Frame (fills frame anchor) | |||
| 305 | void set_last_Java_frame(Register thread, | |||
| 306 | Register last_java_sp, | |||
| 307 | Register last_java_fp, | |||
| 308 | address last_java_pc); | |||
| 309 | ||||
| 310 | // thread in the default location (r15_thread on 64bit) | |||
| 311 | void set_last_Java_frame(Register last_java_sp, | |||
| 312 | Register last_java_fp, | |||
| 313 | address last_java_pc); | |||
| 314 | ||||
| 315 | void reset_last_Java_frame(Register thread, bool clear_fp); | |||
| 316 | ||||
| 317 | // thread in the default location (r15_thread on 64bit) | |||
| 318 | void reset_last_Java_frame(bool clear_fp); | |||
| 319 | ||||
| 320 | // jobjects | |||
| 321 | void clear_jweak_tag(Register possibly_jweak); | |||
| 322 | void resolve_jobject(Register value, Register thread, Register tmp); | |||
| 323 | ||||
| 324 | // C 'boolean' to Java boolean: x == 0 ? 0 : 1 | |||
| 325 | void c2bool(Register x); | |||
| 326 | ||||
| 327 | // C++ bool manipulation | |||
| 328 | ||||
| 329 | void movbool(Register dst, Address src); | |||
| 330 | void movbool(Address dst, bool boolconst); | |||
| 331 | void movbool(Address dst, Register src); | |||
| 332 | void testbool(Register dst); | |||
| 333 | ||||
| 334 | void resolve_oop_handle(Register result, Register tmp = rscratch2); | |||
| 335 | void resolve_weak_handle(Register result, Register tmp); | |||
| 336 | void load_mirror(Register mirror, Register method, Register tmp = rscratch2); | |||
| 337 | void load_method_holder_cld(Register rresult, Register rmethod); | |||
| 338 | ||||
| 339 | void load_method_holder(Register holder, Register method); | |||
| 340 | ||||
| 341 | // oop manipulations | |||
| 342 | void load_klass(Register dst, Register src, Register tmp); | |||
| 343 | void store_klass(Register dst, Register src, Register tmp); | |||
| 344 | ||||
| 345 | void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, | |||
| 346 | Register tmp1, Register thread_tmp); | |||
| 347 | void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, | |||
| 348 | Register tmp1, Register tmp2); | |||
| 349 | ||||
| 350 | void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, | |||
| 351 | Register thread_tmp = noreg, DecoratorSet decorators = 0); | |||
| 352 | void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, | |||
| 353 | Register thread_tmp = noreg, DecoratorSet decorators = 0); | |||
| 354 | void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, | |||
| 355 | Register tmp2 = noreg, DecoratorSet decorators = 0); | |||
| 356 | ||||
| 357 | // Used for storing NULL. All other oop constants should be | |||
| 358 | // stored using routines that take a jobject. | |||
| 359 | void store_heap_oop_null(Address dst); | |||
| 360 | ||||
| 361 | #ifdef _LP641 | |||
| 362 | void store_klass_gap(Register dst, Register src); | |||
| 363 | ||||
| 364 | // This dummy is to prevent a call to store_heap_oop from | |||
| 365 | // converting a zero (like NULL) into a Register by giving | |||
| 366 | // the compiler two choices it can't resolve | |||
| 367 | ||||
| 368 | void store_heap_oop(Address dst, void* dummy); | |||
| 369 | ||||
| 370 | void encode_heap_oop(Register r); | |||
| 371 | void decode_heap_oop(Register r); | |||
| 372 | void encode_heap_oop_not_null(Register r); | |||
| 373 | void decode_heap_oop_not_null(Register r); | |||
| 374 | void encode_heap_oop_not_null(Register dst, Register src); | |||
| 375 | void decode_heap_oop_not_null(Register dst, Register src); | |||
| 376 | ||||
| 377 | void set_narrow_oop(Register dst, jobject obj); | |||
| 378 | void set_narrow_oop(Address dst, jobject obj); | |||
| 379 | void cmp_narrow_oop(Register dst, jobject obj); | |||
| 380 | void cmp_narrow_oop(Address dst, jobject obj); | |||
| 381 | ||||
| 382 | void encode_klass_not_null(Register r, Register tmp); | |||
| 383 | void decode_klass_not_null(Register r, Register tmp); | |||
| 384 | void encode_and_move_klass_not_null(Register dst, Register src); | |||
| 385 | void decode_and_move_klass_not_null(Register dst, Register src); | |||
| 386 | void set_narrow_klass(Register dst, Klass* k); | |||
| 387 | void set_narrow_klass(Address dst, Klass* k); | |||
| 388 | void cmp_narrow_klass(Register dst, Klass* k); | |||
| 389 | void cmp_narrow_klass(Address dst, Klass* k); | |||
| 390 | ||||
| 391 | // if heap base register is used - reinit it with the correct value | |||
| 392 | void reinit_heapbase(); | |||
| 393 | ||||
| 394 | DEBUG_ONLY(void verify_heapbase(const char* msg);)void verify_heapbase(const char* msg); | |||
| 395 | ||||
| 396 | #endif // _LP64 | |||
| 397 | ||||
| 398 | // Int division/remainder for Java | |||
| 399 | // (as idivl, but checks for special case as described in JVM spec.) | |||
| 400 | // returns idivl instruction offset for implicit exception handling | |||
| 401 | int corrected_idivl(Register reg); | |||
| 402 | ||||
| 403 | // Long division/remainder for Java | |||
| 404 | // (as idivq, but checks for special case as described in JVM spec.) | |||
| 405 | // returns idivq instruction offset for implicit exception handling | |||
| 406 | int corrected_idivq(Register reg); | |||
| 407 | ||||
| 408 | void int3(); | |||
| 409 | ||||
| 410 | // Long operation macros for a 32bit cpu | |||
| 411 | // Long negation for Java | |||
| 412 | void lneg(Register hi, Register lo); | |||
| 413 | ||||
| 414 | // Long multiplication for Java | |||
| 415 | // (destroys contents of eax, ebx, ecx and edx) | |||
| 416 | void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y | |||
| 417 | ||||
| 418 | // Long shifts for Java | |||
| 419 | // (semantics as described in JVM spec.) | |||
| 420 | void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) | |||
| 421 | void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) | |||
| 422 | ||||
| 423 | // Long compare for Java | |||
| 424 | // (semantics as described in JVM spec.) | |||
| 425 | void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) | |||
| 426 | ||||
| 427 | ||||
| 428 | // misc | |||
| 429 | ||||
| 430 | // Sign extension | |||
| 431 | void sign_extend_short(Register reg); | |||
| 432 | void sign_extend_byte(Register reg); | |||
| 433 | ||||
| 434 | // Division by power of 2, rounding towards 0 | |||
| 435 | void division_with_shift(Register reg, int shift_value); | |||
| 436 | ||||
| 437 | #ifndef _LP641 | |||
| 438 | // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: | |||
| 439 | // | |||
| 440 | // CF (corresponds to C0) if x < y | |||
| 441 | // PF (corresponds to C2) if unordered | |||
| 442 | // ZF (corresponds to C3) if x = y | |||
| 443 | // | |||
| 444 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). | |||
| 445 | // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) | |||
| 446 | void fcmp(Register tmp); | |||
| 447 | // Variant of the above which allows y to be further down the stack | |||
| 448 | // and which only pops x and y if specified. If pop_right is | |||
| 449 | // specified then pop_left must also be specified. | |||
| 450 | void fcmp(Register tmp, int index, bool pop_left, bool pop_right); | |||
| 451 | ||||
| 452 | // Floating-point comparison for Java | |||
| 453 | // Compares the top-most stack entries on the FPU stack and stores the result in dst. | |||
| 454 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). | |||
| 455 | // (semantics as described in JVM spec.) | |||
| 456 | void fcmp2int(Register dst, bool unordered_is_less); | |||
| 457 | // Variant of the above which allows y to be further down the stack | |||
| 458 | // and which only pops x and y if specified. If pop_right is | |||
| 459 | // specified then pop_left must also be specified. | |||
| 460 | void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); | |||
| 461 | ||||
| 462 | // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) | |||
| 463 | // tmp is a temporary register, if none is available use noreg | |||
| 464 | void fremr(Register tmp); | |||
| 465 | ||||
| 466 | // only if +VerifyFPU | |||
| 467 | void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); | |||
| 468 | #endif // !LP64 | |||
| 469 | ||||
| 470 | // dst = c = a * b + c | |||
| 471 | void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); | |||
| 472 | void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); | |||
| 473 | ||||
| 474 | void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); | |||
| 475 | void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); | |||
| 476 | void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); | |||
| 477 | void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); | |||
| 478 | ||||
| 479 | ||||
| 480 | // same as fcmp2int, but using SSE2 | |||
| 481 | void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); | |||
| 482 | void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); | |||
| 483 | ||||
| 484 | // branch to L if FPU flag C2 is set/not set | |||
| 485 | // tmp is a temporary register, if none is available use noreg | |||
| 486 | void jC2 (Register tmp, Label& L); | |||
| 487 | void jnC2(Register tmp, Label& L); | |||
| 488 | ||||
| 489 | // Load float value from 'address'. If UseSSE >= 1, the value is loaded into | |||
| 490 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. | |||
| 491 | void load_float(Address src); | |||
| 492 | ||||
| 493 | // Store float value to 'address'. If UseSSE >= 1, the value is stored | |||
| 494 | // from register xmm0. Otherwise, the value is stored from the FPU stack. | |||
| 495 | void store_float(Address dst); | |||
| 496 | ||||
| 497 | // Load double value from 'address'. If UseSSE >= 2, the value is loaded into | |||
| 498 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. | |||
| 499 | void load_double(Address src); | |||
| 500 | ||||
| 501 | // Store double value to 'address'. If UseSSE >= 2, the value is stored | |||
| 502 | // from register xmm0. Otherwise, the value is stored from the FPU stack. | |||
| 503 | void store_double(Address dst); | |||
| 504 | ||||
| 505 | #ifndef _LP641 | |||
| 506 | // Pop ST (ffree & fincstp combined) | |||
| 507 | void fpop(); | |||
| 508 | ||||
| 509 | void empty_FPU_stack(); | |||
| 510 | #endif // !_LP64 | |||
| 511 | ||||
| 512 | void push_IU_state(); | |||
| 513 | void pop_IU_state(); | |||
| 514 | ||||
| 515 | void push_FPU_state(); | |||
| 516 | void pop_FPU_state(); | |||
| 517 | ||||
| 518 | void push_CPU_state(); | |||
| 519 | void pop_CPU_state(); | |||
| 520 | ||||
| 521 | // Round up to a power of two | |||
| 522 | void round_to(Register reg, int modulus); | |||
| 523 | ||||
| 524 | // Callee saved registers handling | |||
| 525 | void push_callee_saved_registers(); | |||
| 526 | void pop_callee_saved_registers(); | |||
| 527 | ||||
| 528 | // allocation | |||
| 529 | void eden_allocate( | |||
| 530 | Register thread, // Current thread | |||
| 531 | Register obj, // result: pointer to object after successful allocation | |||
| 532 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise | |||
| 533 | int con_size_in_bytes, // object size in bytes if known at compile time | |||
| 534 | Register t1, // temp register | |||
| 535 | Label& slow_case // continuation point if fast allocation fails | |||
| 536 | ); | |||
| 537 | void tlab_allocate( | |||
| 538 | Register thread, // Current thread | |||
| 539 | Register obj, // result: pointer to object after successful allocation | |||
| 540 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise | |||
| 541 | int con_size_in_bytes, // object size in bytes if known at compile time | |||
| 542 | Register t1, // temp register | |||
| 543 | Register t2, // temp register | |||
| 544 | Label& slow_case // continuation point if fast allocation fails | |||
| 545 | ); | |||
| 546 | void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); | |||
| 547 | ||||
| 548 | // interface method calling | |||
| 549 | void lookup_interface_method(Register recv_klass, | |||
| 550 | Register intf_klass, | |||
| 551 | RegisterOrConstant itable_index, | |||
| 552 | Register method_result, | |||
| 553 | Register scan_temp, | |||
| 554 | Label& no_such_interface, | |||
| 555 | bool return_method = true); | |||
| 556 | ||||
| 557 | // virtual method calling | |||
| 558 | void lookup_virtual_method(Register recv_klass, | |||
| 559 | RegisterOrConstant vtable_index, | |||
| 560 | Register method_result); | |||
| 561 | ||||
| 562 | // Test sub_klass against super_klass, with fast and slow paths. | |||
| 563 | ||||
| 564 | // The fast path produces a tri-state answer: yes / no / maybe-slow. | |||
| 565 | // One of the three labels can be NULL, meaning take the fall-through. | |||
| 566 | // If super_check_offset is -1, the value is loaded up from super_klass. | |||
| 567 | // No registers are killed, except temp_reg. | |||
| 568 | void check_klass_subtype_fast_path(Register sub_klass, | |||
| 569 | Register super_klass, | |||
| 570 | Register temp_reg, | |||
| 571 | Label* L_success, | |||
| 572 | Label* L_failure, | |||
| 573 | Label* L_slow_path, | |||
| 574 | RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); | |||
| 575 | ||||
| 576 | // The rest of the type check; must be wired to a corresponding fast path. | |||
| 577 | // It does not repeat the fast path logic, so don't use it standalone. | |||
| 578 | // The temp_reg and temp2_reg can be noreg, if no temps are available. | |||
| 579 | // Updates the sub's secondary super cache as necessary. | |||
| 580 | // If set_cond_codes, condition codes will be Z on success, NZ on failure. | |||
| 581 | void check_klass_subtype_slow_path(Register sub_klass, | |||
| 582 | Register super_klass, | |||
| 583 | Register temp_reg, | |||
| 584 | Register temp2_reg, | |||
| 585 | Label* L_success, | |||
| 586 | Label* L_failure, | |||
| 587 | bool set_cond_codes = false); | |||
| 588 | ||||
| 589 | // Simplified, combined version, good for typical uses. | |||
| 590 | // Falls through on failure. | |||
| 591 | void check_klass_subtype(Register sub_klass, | |||
| 592 | Register super_klass, | |||
| 593 | Register temp_reg, | |||
| 594 | Label& L_success); | |||
| 595 | ||||
| 596 | void clinit_barrier(Register klass, | |||
| 597 | Register thread, | |||
| 598 | Label* L_fast_path = NULL__null, | |||
| 599 | Label* L_slow_path = NULL__null); | |||
| 600 | ||||
| 601 | // method handles (JSR 292) | |||
| 602 | Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); | |||
| 603 | ||||
| 604 | // Debugging | |||
| 605 | ||||
| 606 | // only if +VerifyOops | |||
| 607 | void _verify_oop(Register reg, const char* s, const char* file, int line); | |||
| 608 | void _verify_oop_addr(Address addr, const char* s, const char* file, int line); | |||
| 609 | ||||
| 610 | void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { | |||
| 611 | if (VerifyOops) { | |||
| 612 | _verify_oop(reg, s, file, line); | |||
| 613 | } | |||
| 614 | } | |||
| 615 | void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { | |||
| 616 | if (VerifyOops) { | |||
| 617 | _verify_oop_addr(reg, s, file, line); | |||
| 618 | } | |||
| 619 | } | |||
| 620 | ||||
| 621 | // TODO: verify method and klass metadata (compare against vptr?) | |||
| 622 | void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} | |||
| 623 | void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} | |||
| 624 | ||||
| 625 | #define verify_oop(reg)_verify_oop_checked(reg, "broken oop " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 625) _verify_oop_checked(reg, "broken oop " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__625) | |||
| 626 | #define verify_oop_msg(reg, msg)_verify_oop_checked(reg, "broken oop " "reg" ", " "msg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 626) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__626) | |||
| 627 | #define verify_oop_addr(addr)_verify_oop_addr_checked(addr, "broken oop addr " "addr", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 627) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__627) | |||
| 628 | #define verify_method_ptr(reg)_verify_method_ptr(reg, "broken method " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 628) _verify_method_ptr(reg, "broken method " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__628) | |||
| 629 | #define verify_klass_ptr(reg)_verify_klass_ptr(reg, "broken klass " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 629) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__629) | |||
| 630 | ||||
| 631 | // Verify or restore cpu control state after JNI call | |||
| 632 | void restore_cpu_control_state_after_jni(); | |||
| 633 | ||||
| 634 | // prints msg, dumps registers and stops execution | |||
| 635 | void stop(const char* msg); | |||
| 636 | ||||
| 637 | // prints msg and continues | |||
| 638 | void warn(const char* msg); | |||
| 639 | ||||
| 640 | // dumps registers and other state | |||
| 641 | void print_state(); | |||
| 642 | ||||
| 643 | static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); | |||
| 644 | static void debug64(char* msg, int64_t pc, int64_t regs[]); | |||
| 645 | static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); | |||
| 646 | static void print_state64(int64_t pc, int64_t regs[]); | |||
| 647 | ||||
| 648 | void os_breakpoint(); | |||
| 649 | ||||
| 650 | void untested() { stop("untested"); } | |||
| 651 | ||||
| 652 | void unimplemented(const char* what = ""); | |||
| 653 | ||||
| 654 | void should_not_reach_here() { stop("should not reach here"); } | |||
| 655 | ||||
| 656 | void print_CPU_state(); | |||
| 657 | ||||
| 658 | // Stack overflow checking | |||
| 659 | void bang_stack_with_offset(int offset) { | |||
| 660 | // stack grows down, caller passes positive offset | |||
| 661 | assert(offset > 0, "must bang with negative offset")do { if (!(offset > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp" , 661, "assert(" "offset > 0" ") failed", "must bang with negative offset" ); ::breakpoint(); } } while (0); | |||
| 662 | movl(Address(rsp, (-offset)), rax); | |||
| 663 | } | |||
| 664 | ||||
| 665 | // Writes to stack successive pages until offset reached to check for | |||
| 666 | // stack overflow + shadow pages. Also, clobbers tmp | |||
| 667 | void bang_stack_size(Register size, Register tmp); | |||
| 668 | ||||
| 669 | // Check for reserved stack access in method being exited (for JIT) | |||
| 670 | void reserved_stack_check(); | |||
| 671 | ||||
| 672 | void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod); | |||
| 673 | ||||
| 674 | void verify_tlab(); | |||
| 675 | ||||
| 676 | Condition negate_condition(Condition cond); | |||
| 677 | ||||
| 678 | // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit | |||
| 679 | // operands. In general the names are modified to avoid hiding the instruction in Assembler | |||
| 680 | // so that we don't need to implement all the varieties in the Assembler with trivial wrappers | |||
| 681 | // here in MacroAssembler. The major exception to this rule is call | |||
| 682 | ||||
| 683 | // Arithmetics | |||
| 684 | ||||
| 685 | ||||
| 686 | void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)) ; } | |||
| 687 | void addptr(Address dst, Register src); | |||
| 688 | ||||
| 689 | void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); } | |||
| 690 | void addptr(Register dst, int32_t src); | |||
| 691 | void addptr(Register dst, Register src); | |||
| 692 | void addptr(Register dst, RegisterOrConstant src) { | |||
| 693 | if (src.is_constant()) addptr(dst, (int) src.as_constant()); | |||
| 694 | else addptr(dst, src.as_register()); | |||
| 695 | } | |||
| 696 | ||||
| 697 | void andptr(Register dst, int32_t src); | |||
| 698 | void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2))andq(src1, src2) NOT_LP64(andl(src1, src2)) ; } | |||
| 699 | ||||
| 700 | void cmp8(AddressLiteral src1, int imm); | |||
| 701 | ||||
| 702 | // renamed to drag out the casting of address to int32_t/intptr_t | |||
| 703 | void cmp32(Register src1, int32_t imm); | |||
| 704 | ||||
| 705 | void cmp32(AddressLiteral src1, int32_t imm); | |||
| 706 | // compare reg - mem, or reg - &mem | |||
| 707 | void cmp32(Register src1, AddressLiteral src2); | |||
| 708 | ||||
| 709 | void cmp32(Register src1, Address src2); | |||
| 710 | ||||
| 711 | #ifndef _LP641 | |||
| 712 | void cmpklass(Address dst, Metadata* obj); | |||
| 713 | void cmpklass(Register dst, Metadata* obj); | |||
| 714 | void cmpoop(Address dst, jobject obj); | |||
| 715 | #endif // _LP64 | |||
| 716 | ||||
| 717 | void cmpoop(Register src1, Register src2); | |||
| 718 | void cmpoop(Register src1, Address src2); | |||
| 719 | void cmpoop(Register dst, jobject obj); | |||
| 720 | ||||
| 721 | // NOTE src2 must be the lval. This is NOT an mem-mem compare | |||
| 722 | void cmpptr(Address src1, AddressLiteral src2); | |||
| 723 | ||||
| 724 | void cmpptr(Register src1, AddressLiteral src2); | |||
| 725 | ||||
| 726 | void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
| 727 | void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
| 728 | // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } | |||
| 729 | ||||
| 730 | void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
| 731 | void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; } | |||
| 732 | ||||
| 733 | // cmp64 to avoild hiding cmpq | |||
| 734 | void cmp64(Register src1, AddressLiteral src); | |||
| 735 | ||||
| 736 | void cmpxchgptr(Register reg, Address adr); | |||
| 737 | ||||
| 738 | void locked_cmpxchgptr(Register reg, AddressLiteral adr); | |||
| 739 | ||||
| 740 | ||||
| 741 | void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src))imulq(dst, src) NOT_LP64(imull(dst, src)); } | |||
| 742 | void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32))imulq(dst, src, imm32) NOT_LP64(imull(dst, src, imm32)); } | |||
| 743 | ||||
| 744 | ||||
| 745 | void negptr(Register dst) { LP64_ONLY(negq(dst))negq(dst) NOT_LP64(negl(dst)); } | |||
| 746 | ||||
| 747 | void notptr(Register dst) { LP64_ONLY(notq(dst))notq(dst) NOT_LP64(notl(dst)); } | |||
| 748 | ||||
| 749 | void shlptr(Register dst, int32_t shift); | |||
| 750 | void shlptr(Register dst) { LP64_ONLY(shlq(dst))shlq(dst) NOT_LP64(shll(dst)); } | |||
| 751 | ||||
| 752 | void shrptr(Register dst, int32_t shift); | |||
| 753 | void shrptr(Register dst) { LP64_ONLY(shrq(dst))shrq(dst) NOT_LP64(shrl(dst)); } | |||
| 754 | ||||
| 755 | void sarptr(Register dst) { LP64_ONLY(sarq(dst))sarq(dst) NOT_LP64(sarl(dst)); } | |||
| 756 | void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src))sarq(dst, src) NOT_LP64(sarl(dst, src)); } | |||
| 757 | ||||
| 758 | void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); } | |||
| 759 | ||||
| 760 | void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); } | |||
| 761 | void subptr(Register dst, int32_t src); | |||
| 762 | // Force generation of a 4 byte immediate value even if it fits into 8bit | |||
| 763 | void subptr_imm32(Register dst, int32_t src); | |||
| 764 | void subptr(Register dst, Register src); | |||
| 765 | void subptr(Register dst, RegisterOrConstant src) { | |||
| 766 | if (src.is_constant()) subptr(dst, (int) src.as_constant()); | |||
| 767 | else subptr(dst, src.as_register()); | |||
| 768 | } | |||
| 769 | ||||
| 770 | void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); } | |||
| 771 | void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); } | |||
| 772 | ||||
| 773 | void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; } | |||
| 774 | void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; } | |||
| 775 | ||||
| 776 | void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2))xaddq(src1, src2) NOT_LP64(xaddl(src1, src2)) ; } | |||
| 777 | ||||
| 778 | ||||
| 779 | ||||
| 780 | // Helper functions for statistics gathering. | |||
| 781 | // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. | |||
| 782 | void cond_inc32(Condition cond, AddressLiteral counter_addr); | |||
| 783 | // Unconditional atomic increment. | |||
| 784 | void atomic_incl(Address counter_addr); | |||
| 785 | void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1); | |||
| 786 | #ifdef _LP641 | |||
| 787 | void atomic_incq(Address counter_addr); | |||
| 788 | void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1); | |||
| 789 | #endif | |||
| 790 | void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr))atomic_incq(counter_addr, scr) NOT_LP64(atomic_incl(counter_addr, scr)) ; } | |||
| 791 | void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr))atomic_incq(counter_addr) NOT_LP64(atomic_incl(counter_addr)) ; } | |||
| 792 | ||||
| 793 | void lea(Register dst, AddressLiteral adr); | |||
| 794 | void lea(Address dst, AddressLiteral adr); | |||
| 795 | void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } | |||
| 796 | ||||
| 797 | void leal32(Register dst, Address src) { leal(dst, src); } | |||
| 798 | ||||
| 799 | // Import other testl() methods from the parent class or else | |||
| 800 | // they will be hidden by the following overriding declaration. | |||
| 801 | using Assembler::testl; | |||
| 802 | void testl(Register dst, AddressLiteral src); | |||
| 803 | ||||
| 804 | void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
| 805 | void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
| 806 | void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); } | |||
| 807 | void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32))orq(dst, imm32) NOT_LP64(orl(dst, imm32)); } | |||
| 808 | ||||
| 809 | void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32))testq(src, imm32) NOT_LP64(testl(src, imm32)); } | |||
| 810 | void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2))testq(src1, src2) NOT_LP64(testl(src1, src2)); } | |||
| 811 | void testptr(Register src1, Register src2); | |||
| 812 | ||||
| 813 | void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); } | |||
| 814 | void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); } | |||
| 815 | ||||
| 816 | // Calls | |||
| 817 | ||||
| 818 | void call(Label& L, relocInfo::relocType rtype); | |||
| 819 | void call(Register entry); | |||
| 820 | void call(Address addr) { Assembler::call(addr); } | |||
| 821 | ||||
| 822 | // NOTE: this call transfers to the effective address of entry NOT | |||
| 823 | // the address contained by entry. This is because this is more natural | |||
| 824 | // for jumps/calls. | |||
| 825 | void call(AddressLiteral entry); | |||
| 826 | ||||
| 827 | // Emit the CompiledIC call idiom | |||
| 828 | void ic_call(address entry, jint method_index = 0); | |||
| 829 | ||||
| 830 | // Jumps | |||
| 831 | ||||
| 832 | // NOTE: these jumps tranfer to the effective address of dst NOT | |||
| 833 | // the address contained by dst. This is because this is more natural | |||
| 834 | // for jumps/calls. | |||
| 835 | void jump(AddressLiteral dst); | |||
| 836 | void jump_cc(Condition cc, AddressLiteral dst); | |||
| 837 | ||||
| 838 | // 32bit can do a case table jump in one instruction but we no longer allow the base | |||
| 839 | // to be installed in the Address class. This jump will tranfers to the address | |||
| 840 | // contained in the location described by entry (not the address of entry) | |||
| 841 | void jump(ArrayAddress entry); | |||
| 842 | ||||
| 843 | // Floating | |||
| 844 | ||||
| 845 | void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } | |||
| 846 | void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
| 847 | void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } | |||
| 848 | ||||
| 849 | void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } | |||
| 850 | void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } | |||
| 851 | void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
| 852 | ||||
| 853 | void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } | |||
| 854 | void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } | |||
| 855 | void comiss(XMMRegister dst, AddressLiteral src); | |||
| 856 | ||||
| 857 | void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } | |||
| 858 | void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } | |||
| 859 | void comisd(XMMRegister dst, AddressLiteral src); | |||
| 860 | ||||
| 861 | #ifndef _LP641 | |||
| 862 | void fadd_s(Address src) { Assembler::fadd_s(src); } | |||
| 863 | void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } | |||
| 864 | ||||
| 865 | void fldcw(Address src) { Assembler::fldcw(src); } | |||
| 866 | void fldcw(AddressLiteral src); | |||
| 867 | ||||
| 868 | void fld_s(int index) { Assembler::fld_s(index); } | |||
| 869 | void fld_s(Address src) { Assembler::fld_s(src); } | |||
| 870 | void fld_s(AddressLiteral src); | |||
| 871 | ||||
| 872 | void fld_d(Address src) { Assembler::fld_d(src); } | |||
| 873 | void fld_d(AddressLiteral src); | |||
| 874 | ||||
| 875 | void fmul_s(Address src) { Assembler::fmul_s(src); } | |||
| 876 | void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } | |||
| 877 | #endif // _LP64 | |||
| 878 | ||||
| 879 | void fld_x(Address src) { Assembler::fld_x(src); } | |||
| 880 | void fld_x(AddressLiteral src); | |||
| 881 | ||||
| 882 | void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } | |||
| 883 | void ldmxcsr(AddressLiteral src); | |||
| 884 | ||||
| 885 | #ifdef _LP641 | |||
| 886 | private: | |||
| 887 | void sha256_AVX2_one_round_compute( | |||
| 888 | Register reg_old_h, | |||
| 889 | Register reg_a, | |||
| 890 | Register reg_b, | |||
| 891 | Register reg_c, | |||
| 892 | Register reg_d, | |||
| 893 | Register reg_e, | |||
| 894 | Register reg_f, | |||
| 895 | Register reg_g, | |||
| 896 | Register reg_h, | |||
| 897 | int iter); | |||
| 898 | void sha256_AVX2_four_rounds_compute_first(int start); | |||
| 899 | void sha256_AVX2_four_rounds_compute_last(int start); | |||
| 900 | void sha256_AVX2_one_round_and_sched( | |||
| 901 | XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ | |||
| 902 | XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ | |||
| 903 | XMMRegister xmm_2, /* ymm6 */ | |||
| 904 | XMMRegister xmm_3, /* ymm7 */ | |||
| 905 | Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ | |||
| 906 | Register reg_b, /* ebx */ /* full cycle is 8 iterations */ | |||
| 907 | Register reg_c, /* edi */ | |||
| 908 | Register reg_d, /* esi */ | |||
| 909 | Register reg_e, /* r8d */ | |||
| 910 | Register reg_f, /* r9d */ | |||
| 911 | Register reg_g, /* r10d */ | |||
| 912 | Register reg_h, /* r11d */ | |||
| 913 | int iter); | |||
| 914 | ||||
| 915 | void addm(int disp, Register r1, Register r2); | |||
| 916 | void gfmul(XMMRegister tmp0, XMMRegister t); | |||
| 917 | void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0, | |||
| 918 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3); | |||
| 919 | void generateHtbl_one_block(Register htbl); | |||
| 920 | void generateHtbl_eight_blocks(Register htbl); | |||
| 921 | public: | |||
| 922 | void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
| 923 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
| 924 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
| 925 | bool multi_block, XMMRegister shuf_mask); | |||
| 926 | void avx_ghash(Register state, Register htbl, Register data, Register blocks); | |||
| 927 | #endif | |||
| 928 | ||||
| 929 | #ifdef _LP641 | |||
| 930 | private: | |||
| 931 | void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, | |||
| 932 | Register e, Register f, Register g, Register h, int iteration); | |||
| 933 | ||||
| 934 | void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 935 | Register a, Register b, Register c, Register d, Register e, Register f, | |||
| 936 | Register g, Register h, int iteration); | |||
| 937 | ||||
| 938 | void addmq(int disp, Register r1, Register r2); | |||
| 939 | public: | |||
| 940 | void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
| 941 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
| 942 | Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, | |||
| 943 | XMMRegister shuf_mask); | |||
| 944 | private: | |||
| 945 | void roundEnc(XMMRegister key, int rnum); | |||
| 946 | void lastroundEnc(XMMRegister key, int rnum); | |||
| 947 | void roundDec(XMMRegister key, int rnum); | |||
| 948 | void lastroundDec(XMMRegister key, int rnum); | |||
| 949 | void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask); | |||
| 950 | void gfmul_avx512(XMMRegister ghash, XMMRegister hkey); | |||
| 951 | void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl); | |||
| 952 | void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, | |||
| 953 | XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction, | |||
| 954 | XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos, | |||
| 955 | bool final_reduction, int index, XMMRegister counter_inc_mask); | |||
| 956 | public: | |||
| 957 | void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len); | |||
| 958 | void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len); | |||
| 959 | void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter, | |||
| 960 | Register len_reg, Register used, Register used_addr, Register saved_encCounter_start); | |||
| 961 | void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key, | |||
| 962 | Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter); | |||
| 963 | ||||
| 964 | #endif | |||
| 965 | ||||
| 966 | void fast_md5(Register buf, Address state, Address ofs, Address limit, | |||
| 967 | bool multi_block); | |||
| 968 | ||||
| 969 | void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, | |||
| 970 | XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, | |||
| 971 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
| 972 | bool multi_block); | |||
| 973 | ||||
| 974 | #ifdef _LP641 | |||
| 975 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
| 976 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
| 977 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
| 978 | bool multi_block, XMMRegister shuf_mask); | |||
| 979 | #else | |||
| 980 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, | |||
| 981 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, | |||
| 982 | Register buf, Register state, Register ofs, Register limit, Register rsp, | |||
| 983 | bool multi_block); | |||
| 984 | #endif | |||
| 985 | ||||
| 986 | void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 987 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 988 | Register rax, Register rcx, Register rdx, Register tmp); | |||
| 989 | ||||
| 990 | #ifdef _LP641 | |||
| 991 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 992 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 993 | Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2); | |||
| 994 | ||||
| 995 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 996 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 997 | Register rax, Register rcx, Register rdx, Register r11); | |||
| 998 | ||||
| 999 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, | |||
| 1000 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, | |||
| 1001 | Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); | |||
| 1002 | ||||
| 1003 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 1004 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 1005 | Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2, | |||
| 1006 | Register tmp3, Register tmp4); | |||
| 1007 | ||||
| 1008 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 1009 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 1010 | Register rax, Register rcx, Register rdx, Register tmp1, | |||
| 1011 | Register tmp2, Register tmp3, Register tmp4); | |||
| 1012 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 1013 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 1014 | Register rax, Register rcx, Register rdx, Register tmp1, | |||
| 1015 | Register tmp2, Register tmp3, Register tmp4); | |||
| 1016 | #else | |||
| 1017 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 1018 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 1019 | Register rax, Register rcx, Register rdx, Register tmp1); | |||
| 1020 | ||||
| 1021 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 1022 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 1023 | Register rax, Register rcx, Register rdx, Register tmp); | |||
| 1024 | ||||
| 1025 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, | |||
| 1026 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, | |||
| 1027 | Register rdx, Register tmp); | |||
| 1028 | ||||
| 1029 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 1030 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 1031 | Register rax, Register rbx, Register rdx); | |||
| 1032 | ||||
| 1033 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 1034 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 1035 | Register rax, Register rcx, Register rdx, Register tmp); | |||
| 1036 | ||||
| 1037 | void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, | |||
| 1038 | Register edx, Register ebx, Register esi, Register edi, | |||
| 1039 | Register ebp, Register esp); | |||
| 1040 | ||||
| 1041 | void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, | |||
| 1042 | Register esi, Register edi, Register ebp, Register esp); | |||
| 1043 | ||||
| 1044 | void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, | |||
| 1045 | Register edx, Register ebx, Register esi, Register edi, | |||
| 1046 | Register ebp, Register esp); | |||
| 1047 | ||||
| 1048 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, | |||
| 1049 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, | |||
| 1050 | Register rax, Register rcx, Register rdx, Register tmp); | |||
| 1051 | #endif | |||
| 1052 | ||||
| 1053 | private: | |||
| 1054 | ||||
| 1055 | // these are private because users should be doing movflt/movdbl | |||
| 1056 | ||||
| 1057 | void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } | |||
| 1058 | void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } | |||
| 1059 | void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } | |||
| 1060 | void movss(XMMRegister dst, AddressLiteral src); | |||
| 1061 | ||||
| 1062 | void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } | |||
| 1063 | void movlpd(XMMRegister dst, AddressLiteral src); | |||
| 1064 | ||||
| 1065 | public: | |||
| 1066 | ||||
| 1067 | void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } | |||
| 1068 | void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } | |||
| 1069 | void addsd(XMMRegister dst, AddressLiteral src); | |||
| 1070 | ||||
| 1071 | void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } | |||
| 1072 | void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } | |||
| 1073 | void addss(XMMRegister dst, AddressLiteral src); | |||
| 1074 | ||||
| 1075 | void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } | |||
| 1076 | void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } | |||
| 1077 | void addpd(XMMRegister dst, AddressLiteral src); | |||
| 1078 | ||||
| 1079 | void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } | |||
| 1080 | void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } | |||
| 1081 | void divsd(XMMRegister dst, AddressLiteral src); | |||
| 1082 | ||||
| 1083 | void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } | |||
| 1084 | void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } | |||
| 1085 | void divss(XMMRegister dst, AddressLiteral src); | |||
| 1086 | ||||
| 1087 | // Move Unaligned Double Quadword | |||
| 1088 | void movdqu(Address dst, XMMRegister src); | |||
| 1089 | void movdqu(XMMRegister dst, Address src); | |||
| 1090 | void movdqu(XMMRegister dst, XMMRegister src); | |||
| 1091 | void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1); | |||
| 1092 | ||||
| 1093 | void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); } | |||
| 1094 | void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
| 1095 | void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); } | |||
| 1096 | void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
| 1097 | void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
| 1098 | void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); } | |||
| 1099 | ||||
| 1100 | void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
| 1101 | void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); } | |||
| 1102 | void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
| 1103 | void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); } | |||
| 1104 | void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); } | |||
| 1105 | void kmovql(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
| 1106 | ||||
| 1107 | // Safe move operation, lowers down to 16bit moves for targets supporting | |||
| 1108 | // AVX512F feature and 64bit moves for targets supporting AVX512BW feature. | |||
| 1109 | void kmov(Address dst, KRegister src); | |||
| 1110 | void kmov(KRegister dst, Address src); | |||
| 1111 | void kmov(KRegister dst, KRegister src); | |||
| 1112 | void kmov(Register dst, KRegister src); | |||
| 1113 | void kmov(KRegister dst, Register src); | |||
| 1114 | ||||
| 1115 | // AVX Unaligned forms | |||
| 1116 | void vmovdqu(Address dst, XMMRegister src); | |||
| 1117 | void vmovdqu(XMMRegister dst, Address src); | |||
| 1118 | void vmovdqu(XMMRegister dst, XMMRegister src); | |||
| 1119 | void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
| 1120 | void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len); | |||
| 1121 | ||||
| 1122 | ||||
| 1123 | // AVX512 Unaligned | |||
| 1124 | void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); | |||
| 1125 | void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); | |||
| 1126 | ||||
| 1127 | void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
| 1128 | void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
| 1129 | void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } | |||
| 1130 | void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } | |||
| 1131 | void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } | |||
| 1132 | void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
| 1133 | ||||
| 1134 | void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } | |||
| 1135 | void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } | |||
| 1136 | void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } | |||
| 1137 | void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } | |||
| 1138 | void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
| 1139 | ||||
| 1140 | void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } | |||
| 1141 | void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } | |||
| 1142 | void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { | |||
| 1143 | if (dst->encoding() == src->encoding()) return; | |||
| ||||
| 1144 | Assembler::evmovdqul(dst, src, vector_len); | |||
| 1145 | } | |||
| 1146 | void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } | |||
| 1147 | void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } | |||
| 1148 | void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { | |||
| 1149 | if (dst->encoding() == src->encoding() && mask == k0) return; | |||
| 1150 | Assembler::evmovdqul(dst, mask, src, merge, vector_len); | |||
| 1151 | } | |||
| 1152 | void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
| 1153 | ||||
| 1154 | void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } | |||
| 1155 | void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } | |||
| 1156 | void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); | |||
| 1157 | void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { | |||
| 1158 | if (dst->encoding() == src->encoding()) return; | |||
| 1159 | Assembler::evmovdquq(dst, src, vector_len); | |||
| 1160 | } | |||
| 1161 | void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } | |||
| 1162 | void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } | |||
| 1163 | void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { | |||
| 1164 | if (dst->encoding() == src->encoding() && mask == k0) return; | |||
| 1165 | Assembler::evmovdquq(dst, mask, src, merge, vector_len); | |||
| 1166 | } | |||
| 1167 | void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
| 1168 | ||||
| 1169 | // Move Aligned Double Quadword | |||
| 1170 | void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } | |||
| 1171 | void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } | |||
| 1172 | void movdqa(XMMRegister dst, AddressLiteral src); | |||
| 1173 | ||||
| 1174 | void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } | |||
| 1175 | void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } | |||
| 1176 | void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } | |||
| 1177 | void movsd(XMMRegister dst, AddressLiteral src); | |||
| 1178 | ||||
| 1179 | void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } | |||
| 1180 | void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } | |||
| 1181 | void mulpd(XMMRegister dst, AddressLiteral src); | |||
| 1182 | ||||
| 1183 | void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } | |||
| 1184 | void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } | |||
| 1185 | void mulsd(XMMRegister dst, AddressLiteral src); | |||
| 1186 | ||||
| 1187 | void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } | |||
| 1188 | void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } | |||
| 1189 | void mulss(XMMRegister dst, AddressLiteral src); | |||
| 1190 | ||||
| 1191 | // Carry-Less Multiplication Quadword | |||
| 1192 | void pclmulldq(XMMRegister dst, XMMRegister src) { | |||
| 1193 | // 0x00 - multiply lower 64 bits [0:63] | |||
| 1194 | Assembler::pclmulqdq(dst, src, 0x00); | |||
| 1195 | } | |||
| 1196 | void pclmulhdq(XMMRegister dst, XMMRegister src) { | |||
| 1197 | // 0x11 - multiply upper 64 bits [64:127] | |||
| 1198 | Assembler::pclmulqdq(dst, src, 0x11); | |||
| 1199 | } | |||
| 1200 | ||||
| 1201 | void pcmpeqb(XMMRegister dst, XMMRegister src); | |||
| 1202 | void pcmpeqw(XMMRegister dst, XMMRegister src); | |||
| 1203 | ||||
| 1204 | void pcmpestri(XMMRegister dst, Address src, int imm8); | |||
| 1205 | void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); | |||
| 1206 | ||||
| 1207 | void pmovzxbw(XMMRegister dst, XMMRegister src); | |||
| 1208 | void pmovzxbw(XMMRegister dst, Address src); | |||
| 1209 | ||||
| 1210 | void pmovmskb(Register dst, XMMRegister src); | |||
| 1211 | ||||
| 1212 | void ptest(XMMRegister dst, XMMRegister src); | |||
| 1213 | ||||
| 1214 | void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } | |||
| 1215 | void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } | |||
| 1216 | void sqrtsd(XMMRegister dst, AddressLiteral src); | |||
| 1217 | ||||
| 1218 | void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } | |||
| 1219 | void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); } | |||
| 1220 | void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg); | |||
| 1221 | ||||
| 1222 | void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } | |||
| 1223 | void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } | |||
| 1224 | void sqrtss(XMMRegister dst, AddressLiteral src); | |||
| 1225 | ||||
| 1226 | void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } | |||
| 1227 | void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } | |||
| 1228 | void subsd(XMMRegister dst, AddressLiteral src); | |||
| 1229 | ||||
| 1230 | void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } | |||
| 1231 | void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } | |||
| 1232 | void subss(XMMRegister dst, AddressLiteral src); | |||
| 1233 | ||||
| 1234 | void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } | |||
| 1235 | void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } | |||
| 1236 | void ucomiss(XMMRegister dst, AddressLiteral src); | |||
| 1237 | ||||
| 1238 | void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } | |||
| 1239 | void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } | |||
| 1240 | void ucomisd(XMMRegister dst, AddressLiteral src); | |||
| 1241 | ||||
| 1242 | // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values | |||
| 1243 | void xorpd(XMMRegister dst, XMMRegister src); | |||
| 1244 | void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } | |||
| 1245 | void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
| 1246 | ||||
| 1247 | // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values | |||
| 1248 | void xorps(XMMRegister dst, XMMRegister src); | |||
| 1249 | void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } | |||
| 1250 | void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); | |||
| 1251 | ||||
| 1252 | // Shuffle Bytes | |||
| 1253 | void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } | |||
| 1254 | void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } | |||
| 1255 | void pshufb(XMMRegister dst, AddressLiteral src); | |||
| 1256 | // AVX 3-operands instructions | |||
| 1257 | ||||
| 1258 | void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } | |||
| 1259 | void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } | |||
| 1260 | void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1261 | ||||
| 1262 | void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } | |||
| 1263 | void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } | |||
| 1264 | void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1265 | ||||
| 1266 | void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); | |||
| 1267 | void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); | |||
| 1268 | ||||
| 1269 | void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
| 1270 | void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
| 1271 | void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch); | |||
| 1272 | ||||
| 1273 | void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
| 1274 | void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
| 1275 | ||||
| 1276 | void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } | |||
| 1277 | void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); } | |||
| 1278 | void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch); | |||
| 1279 | ||||
| 1280 | void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } | |||
| 1281 | void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } | |||
| 1282 | void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
| 1283 | ||||
| 1284 | void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); | |||
| 1285 | void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); } | |||
| 1286 | ||||
| 1287 | void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
| 1288 | ||||
| 1289 | void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
| 1290 | void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
| 1291 | ||||
| 1292 | // Vector compares | |||
| 1293 | void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
| 1294 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
| 1295 | void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
| 1296 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
| 1297 | void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
| 1298 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
| 1299 | void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
| 1300 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
| 1301 | void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
| 1302 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
| 1303 | void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
| 1304 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
| 1305 | void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, | |||
| 1306 | int comparison, bool is_signed, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len); } | |||
| 1307 | void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
| 1308 | int comparison, bool is_signed, int vector_len, Register scratch_reg); | |||
| 1309 | ||||
| 1310 | void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len); | |||
| 1311 | ||||
| 1312 | // Emit comparison instruction for the specified comparison predicate. | |||
| 1313 | void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg); | |||
| 1314 | void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len); | |||
| 1315 | ||||
| 1316 | void vpmovzxbw(XMMRegister dst, Address src, int vector_len); | |||
| 1317 | void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } | |||
| 1318 | ||||
| 1319 | void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit); | |||
| 1320 | ||||
| 1321 | void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
| 1322 | void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
| 1323 | void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
| 1324 | Assembler::vpmulld(dst, nds, src, vector_len); | |||
| 1325 | }; | |||
| 1326 | void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
| 1327 | Assembler::vpmulld(dst, nds, src, vector_len); | |||
| 1328 | } | |||
| 1329 | void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
| 1330 | ||||
| 1331 | void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
| 1332 | void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
| 1333 | ||||
| 1334 | void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); | |||
| 1335 | void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); | |||
| 1336 | ||||
| 1337 | void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
| 1338 | void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
| 1339 | ||||
| 1340 | void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
| 1341 | void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
| 1342 | ||||
| 1343 | void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1344 | if (!is_varshift) { | |||
| 1345 | Assembler::evpsllw(dst, mask, nds, src, merge, vector_len); | |||
| 1346 | } else { | |||
| 1347 | Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len); | |||
| 1348 | } | |||
| 1349 | } | |||
| 1350 | void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1351 | if (!is_varshift) { | |||
| 1352 | Assembler::evpslld(dst, mask, nds, src, merge, vector_len); | |||
| 1353 | } else { | |||
| 1354 | Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len); | |||
| 1355 | } | |||
| 1356 | } | |||
| 1357 | void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1358 | if (!is_varshift) { | |||
| 1359 | Assembler::evpsllq(dst, mask, nds, src, merge, vector_len); | |||
| 1360 | } else { | |||
| 1361 | Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len); | |||
| 1362 | } | |||
| 1363 | } | |||
| 1364 | void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1365 | if (!is_varshift) { | |||
| 1366 | Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len); | |||
| 1367 | } else { | |||
| 1368 | Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len); | |||
| 1369 | } | |||
| 1370 | } | |||
| 1371 | void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1372 | if (!is_varshift) { | |||
| 1373 | Assembler::evpsrld(dst, mask, nds, src, merge, vector_len); | |||
| 1374 | } else { | |||
| 1375 | Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len); | |||
| 1376 | } | |||
| 1377 | } | |||
| 1378 | void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1379 | if (!is_varshift) { | |||
| 1380 | Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len); | |||
| 1381 | } else { | |||
| 1382 | Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len); | |||
| 1383 | } | |||
| 1384 | } | |||
| 1385 | void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1386 | if (!is_varshift) { | |||
| 1387 | Assembler::evpsraw(dst, mask, nds, src, merge, vector_len); | |||
| 1388 | } else { | |||
| 1389 | Assembler::evpsravw(dst, mask, nds, src, merge, vector_len); | |||
| 1390 | } | |||
| 1391 | } | |||
| 1392 | void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1393 | if (!is_varshift) { | |||
| 1394 | Assembler::evpsrad(dst, mask, nds, src, merge, vector_len); | |||
| 1395 | } else { | |||
| 1396 | Assembler::evpsravd(dst, mask, nds, src, merge, vector_len); | |||
| 1397 | } | |||
| 1398 | } | |||
| 1399 | void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { | |||
| 1400 | if (!is_varshift) { | |||
| 1401 | Assembler::evpsraq(dst, mask, nds, src, merge, vector_len); | |||
| 1402 | } else { | |||
| 1403 | Assembler::evpsravq(dst, mask, nds, src, merge, vector_len); | |||
| 1404 | } | |||
| 1405 | } | |||
| 1406 | ||||
| 1407 | void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
| 1408 | void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
| 1409 | void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
| 1410 | void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
| 1411 | ||||
| 1412 | void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
| 1413 | void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
| 1414 | ||||
| 1415 | void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); | |||
| 1416 | void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); | |||
| 1417 | ||||
| 1418 | void vptest(XMMRegister dst, XMMRegister src); | |||
| 1419 | void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); } | |||
| 1420 | ||||
| 1421 | void punpcklbw(XMMRegister dst, XMMRegister src); | |||
| 1422 | void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } | |||
| 1423 | ||||
| 1424 | void pshufd(XMMRegister dst, Address src, int mode); | |||
| 1425 | void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); } | |||
| 1426 | ||||
| 1427 | void pshuflw(XMMRegister dst, XMMRegister src, int mode); | |||
| 1428 | void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } | |||
| 1429 | ||||
| 1430 | void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } | |||
| 1431 | void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } | |||
| 1432 | void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
| 1433 | ||||
| 1434 | void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } | |||
| 1435 | void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } | |||
| 1436 | void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
| 1437 | ||||
| 1438 | void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); | |||
| 1439 | ||||
| 1440 | void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } | |||
| 1441 | void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } | |||
| 1442 | void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1443 | ||||
| 1444 | void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } | |||
| 1445 | void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } | |||
| 1446 | void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1447 | ||||
| 1448 | void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } | |||
| 1449 | void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } | |||
| 1450 | void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1451 | ||||
| 1452 | void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } | |||
| 1453 | void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } | |||
| 1454 | void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1455 | ||||
| 1456 | void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } | |||
| 1457 | void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } | |||
| 1458 | void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1459 | ||||
| 1460 | void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } | |||
| 1461 | void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } | |||
| 1462 | void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1463 | ||||
| 1464 | void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1465 | void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src); | |||
| 1466 | ||||
| 1467 | // AVX Vector instructions | |||
| 1468 | ||||
| 1469 | void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } | |||
| 1470 | void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } | |||
| 1471 | void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
| 1472 | ||||
| 1473 | void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } | |||
| 1474 | void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } | |||
| 1475 | void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
| 1476 | ||||
| 1477 | void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
| 1478 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 | |||
| 1479 | Assembler::vpxor(dst, nds, src, vector_len); | |||
| 1480 | else | |||
| 1481 | Assembler::vxorpd(dst, nds, src, vector_len); | |||
| 1482 | } | |||
| 1483 | void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
| 1484 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 | |||
| 1485 | Assembler::vpxor(dst, nds, src, vector_len); | |||
| 1486 | else | |||
| 1487 | Assembler::vxorpd(dst, nds, src, vector_len); | |||
| 1488 | } | |||
| 1489 | void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); | |||
| 1490 | ||||
| 1491 | // Simple version for AVX2 256bit vectors | |||
| 1492 | void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } | |||
| 1493 | void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } | |||
| 1494 | ||||
| 1495 | void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); } | |||
| 1496 | void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); | |||
| 1497 | ||||
| 1498 | void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { | |||
| 1499 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1500 | Assembler::vinserti32x4(dst, nds, src, imm8); | |||
| 1501 | } else if (UseAVX > 1) { | |||
| 1502 | // vinserti128 is available only in AVX2 | |||
| 1503 | Assembler::vinserti128(dst, nds, src, imm8); | |||
| 1504 | } else { | |||
| 1505 | Assembler::vinsertf128(dst, nds, src, imm8); | |||
| 1506 | } | |||
| 1507 | } | |||
| 1508 | ||||
| 1509 | void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { | |||
| 1510 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1511 | Assembler::vinserti32x4(dst, nds, src, imm8); | |||
| 1512 | } else if (UseAVX > 1) { | |||
| 1513 | // vinserti128 is available only in AVX2 | |||
| 1514 | Assembler::vinserti128(dst, nds, src, imm8); | |||
| 1515 | } else { | |||
| 1516 | Assembler::vinsertf128(dst, nds, src, imm8); | |||
| 1517 | } | |||
| 1518 | } | |||
| 1519 | ||||
| 1520 | void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) { | |||
| 1521 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1522 | Assembler::vextracti32x4(dst, src, imm8); | |||
| 1523 | } else if (UseAVX > 1) { | |||
| 1524 | // vextracti128 is available only in AVX2 | |||
| 1525 | Assembler::vextracti128(dst, src, imm8); | |||
| 1526 | } else { | |||
| 1527 | Assembler::vextractf128(dst, src, imm8); | |||
| 1528 | } | |||
| 1529 | } | |||
| 1530 | ||||
| 1531 | void vextracti128(Address dst, XMMRegister src, uint8_t imm8) { | |||
| 1532 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1533 | Assembler::vextracti32x4(dst, src, imm8); | |||
| 1534 | } else if (UseAVX > 1) { | |||
| 1535 | // vextracti128 is available only in AVX2 | |||
| 1536 | Assembler::vextracti128(dst, src, imm8); | |||
| 1537 | } else { | |||
| 1538 | Assembler::vextractf128(dst, src, imm8); | |||
| 1539 | } | |||
| 1540 | } | |||
| 1541 | ||||
| 1542 | // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers | |||
| 1543 | void vinserti128_high(XMMRegister dst, XMMRegister src) { | |||
| 1544 | vinserti128(dst, dst, src, 1); | |||
| 1545 | } | |||
| 1546 | void vinserti128_high(XMMRegister dst, Address src) { | |||
| 1547 | vinserti128(dst, dst, src, 1); | |||
| 1548 | } | |||
| 1549 | void vextracti128_high(XMMRegister dst, XMMRegister src) { | |||
| 1550 | vextracti128(dst, src, 1); | |||
| 1551 | } | |||
| 1552 | void vextracti128_high(Address dst, XMMRegister src) { | |||
| 1553 | vextracti128(dst, src, 1); | |||
| 1554 | } | |||
| 1555 | ||||
| 1556 | void vinsertf128_high(XMMRegister dst, XMMRegister src) { | |||
| 1557 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1558 | Assembler::vinsertf32x4(dst, dst, src, 1); | |||
| 1559 | } else { | |||
| 1560 | Assembler::vinsertf128(dst, dst, src, 1); | |||
| 1561 | } | |||
| 1562 | } | |||
| 1563 | ||||
| 1564 | void vinsertf128_high(XMMRegister dst, Address src) { | |||
| 1565 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1566 | Assembler::vinsertf32x4(dst, dst, src, 1); | |||
| 1567 | } else { | |||
| 1568 | Assembler::vinsertf128(dst, dst, src, 1); | |||
| 1569 | } | |||
| 1570 | } | |||
| 1571 | ||||
| 1572 | void vextractf128_high(XMMRegister dst, XMMRegister src) { | |||
| 1573 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1574 | Assembler::vextractf32x4(dst, src, 1); | |||
| 1575 | } else { | |||
| 1576 | Assembler::vextractf128(dst, src, 1); | |||
| 1577 | } | |||
| 1578 | } | |||
| 1579 | ||||
| 1580 | void vextractf128_high(Address dst, XMMRegister src) { | |||
| 1581 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1582 | Assembler::vextractf32x4(dst, src, 1); | |||
| 1583 | } else { | |||
| 1584 | Assembler::vextractf128(dst, src, 1); | |||
| 1585 | } | |||
| 1586 | } | |||
| 1587 | ||||
| 1588 | // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers | |||
| 1589 | void vinserti64x4_high(XMMRegister dst, XMMRegister src) { | |||
| 1590 | Assembler::vinserti64x4(dst, dst, src, 1); | |||
| 1591 | } | |||
| 1592 | void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { | |||
| 1593 | Assembler::vinsertf64x4(dst, dst, src, 1); | |||
| 1594 | } | |||
| 1595 | void vextracti64x4_high(XMMRegister dst, XMMRegister src) { | |||
| 1596 | Assembler::vextracti64x4(dst, src, 1); | |||
| 1597 | } | |||
| 1598 | void vextractf64x4_high(XMMRegister dst, XMMRegister src) { | |||
| 1599 | Assembler::vextractf64x4(dst, src, 1); | |||
| 1600 | } | |||
| 1601 | void vextractf64x4_high(Address dst, XMMRegister src) { | |||
| 1602 | Assembler::vextractf64x4(dst, src, 1); | |||
| 1603 | } | |||
| 1604 | void vinsertf64x4_high(XMMRegister dst, Address src) { | |||
| 1605 | Assembler::vinsertf64x4(dst, dst, src, 1); | |||
| 1606 | } | |||
| 1607 | ||||
| 1608 | // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers | |||
| 1609 | void vinserti128_low(XMMRegister dst, XMMRegister src) { | |||
| 1610 | vinserti128(dst, dst, src, 0); | |||
| 1611 | } | |||
| 1612 | void vinserti128_low(XMMRegister dst, Address src) { | |||
| 1613 | vinserti128(dst, dst, src, 0); | |||
| 1614 | } | |||
| 1615 | void vextracti128_low(XMMRegister dst, XMMRegister src) { | |||
| 1616 | vextracti128(dst, src, 0); | |||
| 1617 | } | |||
| 1618 | void vextracti128_low(Address dst, XMMRegister src) { | |||
| 1619 | vextracti128(dst, src, 0); | |||
| 1620 | } | |||
| 1621 | ||||
| 1622 | void vinsertf128_low(XMMRegister dst, XMMRegister src) { | |||
| 1623 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1624 | Assembler::vinsertf32x4(dst, dst, src, 0); | |||
| 1625 | } else { | |||
| 1626 | Assembler::vinsertf128(dst, dst, src, 0); | |||
| 1627 | } | |||
| 1628 | } | |||
| 1629 | ||||
| 1630 | void vinsertf128_low(XMMRegister dst, Address src) { | |||
| 1631 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1632 | Assembler::vinsertf32x4(dst, dst, src, 0); | |||
| 1633 | } else { | |||
| 1634 | Assembler::vinsertf128(dst, dst, src, 0); | |||
| 1635 | } | |||
| 1636 | } | |||
| 1637 | ||||
| 1638 | void vextractf128_low(XMMRegister dst, XMMRegister src) { | |||
| 1639 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1640 | Assembler::vextractf32x4(dst, src, 0); | |||
| 1641 | } else { | |||
| 1642 | Assembler::vextractf128(dst, src, 0); | |||
| 1643 | } | |||
| 1644 | } | |||
| 1645 | ||||
| 1646 | void vextractf128_low(Address dst, XMMRegister src) { | |||
| 1647 | if (UseAVX > 2 && VM_Version::supports_avx512novl()) { | |||
| 1648 | Assembler::vextractf32x4(dst, src, 0); | |||
| 1649 | } else { | |||
| 1650 | Assembler::vextractf128(dst, src, 0); | |||
| 1651 | } | |||
| 1652 | } | |||
| 1653 | ||||
| 1654 | // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers | |||
| 1655 | void vinserti64x4_low(XMMRegister dst, XMMRegister src) { | |||
| 1656 | Assembler::vinserti64x4(dst, dst, src, 0); | |||
| 1657 | } | |||
| 1658 | void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { | |||
| 1659 | Assembler::vinsertf64x4(dst, dst, src, 0); | |||
| 1660 | } | |||
| 1661 | void vextracti64x4_low(XMMRegister dst, XMMRegister src) { | |||
| 1662 | Assembler::vextracti64x4(dst, src, 0); | |||
| 1663 | } | |||
| 1664 | void vextractf64x4_low(XMMRegister dst, XMMRegister src) { | |||
| 1665 | Assembler::vextractf64x4(dst, src, 0); | |||
| 1666 | } | |||
| 1667 | void vextractf64x4_low(Address dst, XMMRegister src) { | |||
| 1668 | Assembler::vextractf64x4(dst, src, 0); | |||
| 1669 | } | |||
| 1670 | void vinsertf64x4_low(XMMRegister dst, Address src) { | |||
| 1671 | Assembler::vinsertf64x4(dst, dst, src, 0); | |||
| 1672 | } | |||
| 1673 | ||||
| 1674 | // Carry-Less Multiplication Quadword | |||
| 1675 | void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
| 1676 | // 0x00 - multiply lower 64 bits [0:63] | |||
| 1677 | Assembler::vpclmulqdq(dst, nds, src, 0x00); | |||
| 1678 | } | |||
| 1679 | void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
| 1680 | // 0x11 - multiply upper 64 bits [64:127] | |||
| 1681 | Assembler::vpclmulqdq(dst, nds, src, 0x11); | |||
| 1682 | } | |||
| 1683 | void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
| 1684 | // 0x10 - multiply nds[0:63] and src[64:127] | |||
| 1685 | Assembler::vpclmulqdq(dst, nds, src, 0x10); | |||
| 1686 | } | |||
| 1687 | void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { | |||
| 1688 | //0x01 - multiply nds[64:127] and src[0:63] | |||
| 1689 | Assembler::vpclmulqdq(dst, nds, src, 0x01); | |||
| 1690 | } | |||
| 1691 | ||||
| 1692 | void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
| 1693 | // 0x00 - multiply lower 64 bits [0:63] | |||
| 1694 | Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); | |||
| 1695 | } | |||
| 1696 | void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
| 1697 | // 0x11 - multiply upper 64 bits [64:127] | |||
| 1698 | Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); | |||
| 1699 | } | |||
| 1700 | ||||
| 1701 | // AVX-512 mask operations. | |||
| 1702 | void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2); | |||
| 1703 | void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2); | |||
| 1704 | void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg); | |||
| 1705 | void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2); | |||
| 1706 | void kortest(uint masklen, KRegister src1, KRegister src2); | |||
| 1707 | void ktest(uint masklen, KRegister src1, KRegister src2); | |||
| 1708 | ||||
| 1709 | void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
| 1710 | void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
| 1711 | ||||
| 1712 | void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
| 1713 | void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
| 1714 | ||||
| 1715 | void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
| 1716 | void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
| 1717 | ||||
| 1718 | void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); | |||
| 1719 | void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); | |||
| 1720 | ||||
| 1721 | void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); | |||
| 1722 | void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); | |||
| 1723 | void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); | |||
| 1724 | void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); | |||
| 1725 | ||||
| 1726 | void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch); | |||
| 1727 | void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch); | |||
| 1728 | ||||
| 1729 | void cmov32( Condition cc, Register dst, Address src); | |||
| 1730 | void cmov32( Condition cc, Register dst, Register src); | |||
| 1731 | ||||
| 1732 | void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } | |||
| 1733 | ||||
| 1734 | void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); } | |||
| 1735 | void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); } | |||
| 1736 | ||||
| 1737 | void movoop(Register dst, jobject obj); | |||
| 1738 | void movoop(Address dst, jobject obj); | |||
| 1739 | ||||
| 1740 | void mov_metadata(Register dst, Metadata* obj); | |||
| 1741 | void mov_metadata(Address dst, Metadata* obj); | |||
| 1742 | ||||
| 1743 | void movptr(ArrayAddress dst, Register src); | |||
| 1744 | // can this do an lea? | |||
| 1745 | void movptr(Register dst, ArrayAddress src); | |||
| 1746 | ||||
| 1747 | void movptr(Register dst, Address src); | |||
| 1748 | ||||
| 1749 | #ifdef _LP641 | |||
| 1750 | void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1); | |||
| 1751 | #else | |||
| 1752 | void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit | |||
| 1753 | #endif | |||
| 1754 | ||||
| 1755 | void movptr(Register dst, intptr_t src); | |||
| 1756 | void movptr(Register dst, Register src); | |||
| 1757 | void movptr(Address dst, intptr_t src); | |||
| 1758 | ||||
| 1759 | void movptr(Address dst, Register src); | |||
| 1760 | ||||
| 1761 | void movptr(Register dst, RegisterOrConstant src) { | |||
| 1762 | if (src.is_constant()) movptr(dst, src.as_constant()); | |||
| 1763 | else movptr(dst, src.as_register()); | |||
| 1764 | } | |||
| 1765 | ||||
| 1766 | #ifdef _LP641 | |||
| 1767 | // Generally the next two are only used for moving NULL | |||
| 1768 | // Although there are situations in initializing the mark word where | |||
| 1769 | // they could be used. They are dangerous. | |||
| 1770 | ||||
| 1771 | // They only exist on LP64 so that int32_t and intptr_t are not the same | |||
| 1772 | // and we have ambiguous declarations. | |||
| 1773 | ||||
| 1774 | void movptr(Address dst, int32_t imm32); | |||
| 1775 | void movptr(Register dst, int32_t imm32); | |||
| 1776 | #endif // _LP64 | |||
| 1777 | ||||
| 1778 | // to avoid hiding movl | |||
| 1779 | void mov32(AddressLiteral dst, Register src); | |||
| 1780 | void mov32(Register dst, AddressLiteral src); | |||
| 1781 | ||||
| 1782 | // to avoid hiding movb | |||
| 1783 | void movbyte(ArrayAddress dst, int src); | |||
| 1784 | ||||
| 1785 | // Import other mov() methods from the parent class or else | |||
| 1786 | // they will be hidden by the following overriding declaration. | |||
| 1787 | using Assembler::movdl; | |||
| 1788 | using Assembler::movq; | |||
| 1789 | void movdl(XMMRegister dst, AddressLiteral src); | |||
| 1790 | void movq(XMMRegister dst, AddressLiteral src); | |||
| 1791 | ||||
| 1792 | // Can push value or effective address | |||
| 1793 | void pushptr(AddressLiteral src); | |||
| 1794 | ||||
| 1795 | void pushptr(Address src) { LP64_ONLY(pushq(src))pushq(src) NOT_LP64(pushl(src)); } | |||
| 1796 | void popptr(Address src) { LP64_ONLY(popq(src))popq(src) NOT_LP64(popl(src)); } | |||
| 1797 | ||||
| 1798 | void pushoop(jobject obj); | |||
| 1799 | void pushklass(Metadata* obj); | |||
| 1800 | ||||
| 1801 | // sign extend as need a l to ptr sized element | |||
| 1802 | void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(movl(dst, src)); } | |||
| 1803 | void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(if (dst != src) movl(dst, src)); } | |||
| 1804 | ||||
| 1805 | ||||
| 1806 | public: | |||
| 1807 | // C2 compiled method's prolog code. | |||
| 1808 | void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); | |||
| 1809 | ||||
| 1810 | // clear memory of size 'cnt' qwords, starting at 'base'; | |||
| 1811 | // if 'is_large' is set, do not try to produce short loop | |||
| 1812 | void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg); | |||
| 1813 | ||||
| 1814 | // clear memory initialization sequence for constant size; | |||
| 1815 | void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); | |||
| 1816 | ||||
| 1817 | // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers | |||
| 1818 | void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); | |||
| 1819 | ||||
| 1820 | // Fill primitive arrays | |||
| 1821 | void generate_fill(BasicType t, bool aligned, | |||
| 1822 | Register to, Register value, Register count, | |||
| 1823 | Register rtmp, XMMRegister xtmp); | |||
| 1824 | ||||
| 1825 | void encode_iso_array(Register src, Register dst, Register len, | |||
| 1826 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, | |||
| 1827 | XMMRegister tmp4, Register tmp5, Register result, bool ascii); | |||
| 1828 | ||||
| 1829 | #ifdef _LP641 | |||
| 1830 | void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); | |||
| 1831 | void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, | |||
| 1832 | Register y, Register y_idx, Register z, | |||
| 1833 | Register carry, Register product, | |||
| 1834 | Register idx, Register kdx); | |||
| 1835 | void multiply_add_128_x_128(Register x_xstart, Register y, Register z, | |||
| 1836 | Register yz_idx, Register idx, | |||
| 1837 | Register carry, Register product, int offset); | |||
| 1838 | void multiply_128_x_128_bmi2_loop(Register y, Register z, | |||
| 1839 | Register carry, Register carry2, | |||
| 1840 | Register idx, Register jdx, | |||
| 1841 | Register yz_idx1, Register yz_idx2, | |||
| 1842 | Register tmp, Register tmp3, Register tmp4); | |||
| 1843 | void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, | |||
| 1844 | Register yz_idx, Register idx, Register jdx, | |||
| 1845 | Register carry, Register product, | |||
| 1846 | Register carry2); | |||
| 1847 | void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, | |||
| 1848 | Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); | |||
| 1849 | void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, | |||
| 1850 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg); | |||
| 1851 | void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, | |||
| 1852 | Register tmp2); | |||
| 1853 | void multiply_add_64(Register sum, Register op1, Register op2, Register carry, | |||
| 1854 | Register rdxReg, Register raxReg); | |||
| 1855 | void add_one_64(Register z, Register zlen, Register carry, Register tmp1); | |||
| 1856 | void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, | |||
| 1857 | Register tmp3, Register tmp4); | |||
| 1858 | void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, | |||
| 1859 | Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); | |||
| 1860 | ||||
| 1861 | void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, | |||
| 1862 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, | |||
| 1863 | Register raxReg); | |||
| 1864 | void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, | |||
| 1865 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, | |||
| 1866 | Register raxReg); | |||
| 1867 | void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, | |||
| 1868 | Register result, Register tmp1, Register tmp2, | |||
| 1869 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); | |||
| 1870 | #endif | |||
| 1871 | ||||
| 1872 | // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. | |||
| 1873 | void update_byte_crc32(Register crc, Register val, Register table); | |||
| 1874 | void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); | |||
| 1875 | ||||
| 1876 | ||||
| 1877 | #ifdef _LP641 | |||
| 1878 | void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2); | |||
| 1879 | void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos, | |||
| 1880 | Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop, | |||
| 1881 | Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup); | |||
| 1882 | void updateBytesAdler32(Register adler32, Register buf, Register length, XMMRegister shuf0, XMMRegister shuf1, ExternalAddress scale); | |||
| 1883 | #endif // _LP64 | |||
| 1884 | ||||
| 1885 | // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic | |||
| 1886 | // Note on a naming convention: | |||
| 1887 | // Prefix w = register only used on a Westmere+ architecture | |||
| 1888 | // Prefix n = register only used on a Nehalem architecture | |||
| 1889 | #ifdef _LP641 | |||
| 1890 | void crc32c_ipl_alg4(Register in_out, uint32_t n, | |||
| 1891 | Register tmp1, Register tmp2, Register tmp3); | |||
| 1892 | #else | |||
| 1893 | void crc32c_ipl_alg4(Register in_out, uint32_t n, | |||
| 1894 | Register tmp1, Register tmp2, Register tmp3, | |||
| 1895 | XMMRegister xtmp1, XMMRegister xtmp2); | |||
| 1896 | #endif | |||
| 1897 | void crc32c_pclmulqdq(XMMRegister w_xtmp1, | |||
| 1898 | Register in_out, | |||
| 1899 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, | |||
| 1900 | XMMRegister w_xtmp2, | |||
| 1901 | Register tmp1, | |||
| 1902 | Register n_tmp2, Register n_tmp3); | |||
| 1903 | void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, | |||
| 1904 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
| 1905 | Register tmp1, Register tmp2, | |||
| 1906 | Register n_tmp3); | |||
| 1907 | void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, | |||
| 1908 | Register in_out1, Register in_out2, Register in_out3, | |||
| 1909 | Register tmp1, Register tmp2, Register tmp3, | |||
| 1910 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
| 1911 | Register tmp4, Register tmp5, | |||
| 1912 | Register n_tmp6); | |||
| 1913 | void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, | |||
| 1914 | Register tmp1, Register tmp2, Register tmp3, | |||
| 1915 | Register tmp4, Register tmp5, Register tmp6, | |||
| 1916 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
| 1917 | bool is_pclmulqdq_supported); | |||
| 1918 | // Fold 128-bit data chunk | |||
| 1919 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); | |||
| 1920 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); | |||
| 1921 | #ifdef _LP641 | |||
| 1922 | // Fold 512-bit data chunk | |||
| 1923 | void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset); | |||
| 1924 | #endif // _LP64 | |||
| 1925 | // Fold 8-bit data | |||
| 1926 | void fold_8bit_crc32(Register crc, Register table, Register tmp); | |||
| 1927 | void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); | |||
| 1928 | ||||
| 1929 | // Compress char[] array to byte[]. | |||
| 1930 | void char_array_compress(Register src, Register dst, Register len, | |||
| 1931 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, | |||
| 1932 | XMMRegister tmp4, Register tmp5, Register result, | |||
| 1933 | KRegister mask1 = knoreg, KRegister mask2 = knoreg); | |||
| 1934 | ||||
| 1935 | // Inflate byte[] array to char[]. | |||
| 1936 | void byte_array_inflate(Register src, Register dst, Register len, | |||
| 1937 | XMMRegister tmp1, Register tmp2, KRegister mask = knoreg); | |||
| 1938 | ||||
| 1939 | void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask, | |||
| 1940 | Register length, Register temp, int vec_enc); | |||
| 1941 | ||||
| 1942 | void fill64_masked(uint shift, Register dst, int disp, | |||
| 1943 | XMMRegister xmm, KRegister mask, Register length, | |||
| 1944 | Register temp, bool use64byteVector = false); | |||
| 1945 | ||||
| 1946 | void fill32_masked(uint shift, Register dst, int disp, | |||
| 1947 | XMMRegister xmm, KRegister mask, Register length, | |||
| 1948 | Register temp); | |||
| 1949 | ||||
| 1950 | void fill32(Register dst, int disp, XMMRegister xmm); | |||
| 1951 | ||||
| 1952 | void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false); | |||
| 1953 | ||||
| 1954 | #ifdef _LP641 | |||
| 1955 | void convert_f2i(Register dst, XMMRegister src); | |||
| 1956 | void convert_d2i(Register dst, XMMRegister src); | |||
| 1957 | void convert_f2l(Register dst, XMMRegister src); | |||
| 1958 | void convert_d2l(Register dst, XMMRegister src); | |||
| 1959 | ||||
| 1960 | void cache_wb(Address line); | |||
| 1961 | void cache_wbsync(bool is_pre); | |||
| 1962 | ||||
| 1963 | #if COMPILER2_OR_JVMCI1 | |||
| 1964 | void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from, | |||
| 1965 | Register to, Register count, int shift, | |||
| 1966 | Register index, Register temp, | |||
| 1967 | bool use64byteVector, Label& L_entry, Label& L_exit); | |||
| 1968 | ||||
| 1969 | void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from, | |||
| 1970 | Register to, Register start_index, Register end_index, | |||
| 1971 | Register count, int shift, Register temp, | |||
| 1972 | bool use64byteVector, Label& L_entry, Label& L_exit); | |||
| 1973 | ||||
| 1974 | void copy64_masked_avx(Register dst, Register src, XMMRegister xmm, | |||
| 1975 | KRegister mask, Register length, Register index, | |||
| 1976 | Register temp, int shift = Address::times_1, int offset = 0, | |||
| 1977 | bool use64byteVector = false); | |||
| 1978 | ||||
| 1979 | void copy32_masked_avx(Register dst, Register src, XMMRegister xmm, | |||
| 1980 | KRegister mask, Register length, Register index, | |||
| 1981 | Register temp, int shift = Address::times_1, int offset = 0); | |||
| 1982 | ||||
| 1983 | void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm, | |||
| 1984 | int shift = Address::times_1, int offset = 0); | |||
| 1985 | ||||
| 1986 | void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm, | |||
| 1987 | bool conjoint, int shift = Address::times_1, int offset = 0, | |||
| 1988 | bool use64byteVector = false); | |||
| 1989 | ||||
| 1990 | void generate_fill_avx3(BasicType type, Register to, Register value, | |||
| 1991 | Register count, Register rtmp, XMMRegister xtmp); | |||
| 1992 | ||||
| 1993 | #endif // COMPILER2_OR_JVMCI | |||
| 1994 | ||||
| 1995 | #endif // _LP64 | |||
| 1996 | ||||
| 1997 | void vallones(XMMRegister dst, int vector_len); | |||
| 1998 | }; | |||
| 1999 | ||||
| 2000 | /** | |||
| 2001 | * class SkipIfEqual: | |||
| 2002 | * | |||
| 2003 | * Instantiating this class will result in assembly code being output that will | |||
| 2004 | * jump around any code emitted between the creation of the instance and it's | |||
| 2005 | * automatic destruction at the end of a scope block, depending on the value of | |||
| 2006 | * the flag passed to the constructor, which will be checked at run-time. | |||
| 2007 | */ | |||
| 2008 | class SkipIfEqual { | |||
| 2009 | private: | |||
| 2010 | MacroAssembler* _masm; | |||
| 2011 | Label _label; | |||
| 2012 | ||||
| 2013 | public: | |||
| 2014 | SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); | |||
| 2015 | ~SkipIfEqual(); | |||
| 2016 | }; | |||
| 2017 | ||||
| 2018 | #endif // CPU_X86_MACROASSEMBLER_X86_HPP |