Bug Summary

File:jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp
Warning:line 218, column 3
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name macroAssembler_x86_arrayCopy_avx3.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp

1/*
2* Copyright (c) 2020, 2021, Intel Corporation. All rights reserved.
3*
4* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5*
6* This code is free software; you can redistribute it and/or modify it
7* under the terms of the GNU General Public License version 2 only, as
8* published by the Free Software Foundation.
9*
10* This code is distributed in the hope that it will be useful, but WITHOUT
11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13* version 2 for more details (a copy is included in the LICENSE file that
14* accompanied this code).
15*
16* You should have received a copy of the GNU General Public License version
17* 2 along with this work; if not, write to the Free Software Foundation,
18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19*
20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21* or visit www.oracle.com if you need additional information or have any
22* questions.
23*
24*/
25
26#include "precompiled.hpp"
27#include "asm/macroAssembler.hpp"
28#include "asm/macroAssembler.inline.hpp"
29#include "compiler/compiler_globals.hpp"
30
31#ifdef PRODUCT
32#define BLOCK_COMMENT(str)block_comment(str) /* nothing */
33#else
34#define BLOCK_COMMENT(str)block_comment(str) block_comment(str)
35#endif
36
37#define BIND(label)bind(label); block_comment("label" ":") bind(label); BLOCK_COMMENT(#label ":")block_comment(#label ":")
38
39#ifdef _LP641
40
41#if COMPILER2_OR_JVMCI1
42
43void MacroAssembler::arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
44 Register to, Register count, int shift,
45 Register index, Register temp,
46 bool use64byteVector, Label& L_entry, Label& L_exit) {
47 Label L_entry_64, L_entry_96, L_entry_128;
48 Label L_entry_160, L_entry_192;
49
50 int size_mat[][6] = {
51 /* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
52 /* T_SHORT*/ {16 , 32, 48 , 64 , 80 , 96 },
53 /* T_INT */ {8 , 16, 24 , 32 , 40 , 48 },
54 /* T_LONG */ {4 , 8, 12 , 16 , 20 , 24 }
55 };
56
57 // Case A) Special case for length less than equal to 32 bytes.
58 cmpq(count, size_mat[shift][0]);
59 jccb(Assembler::greater, L_entry_64)jccb_0(Assembler::greater, L_entry_64, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 59)
;
60 copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift);
1
Passing value via 8th parameter 'shift'
2
Calling 'MacroAssembler::copy32_masked_avx'
61 jmp(L_exit);
62
63 // Case B) Special case for length less than equal to 64 bytes.
64 BIND(L_entry_64)bind(L_entry_64); block_comment("L_entry_64" ":");
65 cmpq(count, size_mat[shift][1]);
66 jccb(Assembler::greater, L_entry_96)jccb_0(Assembler::greater, L_entry_96, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 66)
;
67 copy64_masked_avx(to, from, xmm, mask, count, index, temp, shift, 0, use64byteVector);
68 jmp(L_exit);
69
70 // Case C) Special case for length less than equal to 96 bytes.
71 BIND(L_entry_96)bind(L_entry_96); block_comment("L_entry_96" ":");
72 cmpq(count, size_mat[shift][2]);
73 jccb(Assembler::greater, L_entry_128)jccb_0(Assembler::greater, L_entry_128, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 73)
;
74 copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
75 subq(count, 64 >> shift);
76 copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 64);
77 jmp(L_exit);
78
79 // Case D) Special case for length less than equal to 128 bytes.
80 BIND(L_entry_128)bind(L_entry_128); block_comment("L_entry_128" ":");
81 cmpq(count, size_mat[shift][3]);
82 jccb(Assembler::greater, L_entry_160)jccb_0(Assembler::greater, L_entry_160, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 82)
;
83 copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
84 copy32_avx(to, from, index, xmm, shift, 64);
85 subq(count, 96 >> shift);
86 copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 96);
87 jmp(L_exit);
88
89 // Case E) Special case for length less than equal to 160 bytes.
90 BIND(L_entry_160)bind(L_entry_160); block_comment("L_entry_160" ":");
91 cmpq(count, size_mat[shift][4]);
92 jccb(Assembler::greater, L_entry_192)jccb_0(Assembler::greater, L_entry_192, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 92)
;
93 copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
94 copy64_avx(to, from, index, xmm, false, shift, 64, use64byteVector);
95 subq(count, 128 >> shift);
96 copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 128);
97 jmp(L_exit);
98
99 // Case F) Special case for length less than equal to 192 bytes.
100 BIND(L_entry_192)bind(L_entry_192); block_comment("L_entry_192" ":");
101 cmpq(count, size_mat[shift][5]);
102 jcc(Assembler::greater, L_entry);
103 copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
104 copy64_avx(to, from, index, xmm, false, shift, 64, use64byteVector);
105 copy32_avx(to, from, index, xmm, shift, 128);
106 subq(count, 160 >> shift);
107 copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 160);
108 jmp(L_exit);
109}
110
111void MacroAssembler::arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
112 Register to, Register start_index, Register end_index,
113 Register count, int shift, Register temp,
114 bool use64byteVector, Label& L_entry, Label& L_exit) {
115 Label L_entry_64, L_entry_96, L_entry_128;
116 Label L_entry_160, L_entry_192;
117 bool avx3 = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
118
119 int size_mat[][6] = {
120 /* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
121 /* T_SHORT*/ {16 , 32, 48 , 64 , 80 , 96 },
122 /* T_INT */ {8 , 16, 24 , 32 , 40 , 48 },
123 /* T_LONG */ {4 , 8, 12 , 16 , 20 , 24 }
124 };
125
126 // Case A) Special case for length less than equal to 32 bytes.
127 cmpq(count, size_mat[shift][0]);
128 jccb(Assembler::greater, L_entry_64)jccb_0(Assembler::greater, L_entry_64, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 128)
;
129 copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
130 jmp(L_exit);
131
132 // Case B) Special case for length less than equal to 64 bytes.
133 BIND(L_entry_64)bind(L_entry_64); block_comment("L_entry_64" ":");
134 cmpq(count, size_mat[shift][1]);
135 jccb(Assembler::greater, L_entry_96)jccb_0(Assembler::greater, L_entry_96, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 135)
;
136 if (avx3) {
137 copy64_masked_avx(to, from, xmm, mask, count, start_index, temp, shift, 0, true);
138 } else {
139 copy32_avx(to, from, end_index, xmm, shift, -32);
140 subq(count, 32 >> shift);
141 copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
142 }
143 jmp(L_exit);
144
145 // Case C) Special case for length less than equal to 96 bytes.
146 BIND(L_entry_96)bind(L_entry_96); block_comment("L_entry_96" ":");
147 cmpq(count, size_mat[shift][2]);
148 jccb(Assembler::greater, L_entry_128)jccb_0(Assembler::greater, L_entry_128, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 148)
;
149 copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
150 subq(count, 64 >> shift);
151 copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
152 jmp(L_exit);
153
154 // Case D) Special case for length less than equal to 128 bytes.
155 BIND(L_entry_128)bind(L_entry_128); block_comment("L_entry_128" ":");
156 cmpq(count, size_mat[shift][3]);
157 jccb(Assembler::greater, L_entry_160)jccb_0(Assembler::greater, L_entry_160, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 157)
;
158 copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
159 copy32_avx(to, from, end_index, xmm, shift, -96);
160 subq(count, 96 >> shift);
161 copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
162 jmp(L_exit);
163
164 // Case E) Special case for length less than equal to 160 bytes.
165 BIND(L_entry_160)bind(L_entry_160); block_comment("L_entry_160" ":");
166 cmpq(count, size_mat[shift][4]);
167 jccb(Assembler::greater, L_entry_192)jccb_0(Assembler::greater, L_entry_192, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 167)
;
168 copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
169 copy64_avx(to, from, end_index, xmm, true, shift, -128, use64byteVector);
170 subq(count, 128 >> shift);
171 copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
172 jmp(L_exit);
173
174 // Case F) Special case for length less than equal to 192 bytes.
175 BIND(L_entry_192)bind(L_entry_192); block_comment("L_entry_192" ":");
176 cmpq(count, size_mat[shift][5]);
177 jcc(Assembler::greater, L_entry);
178 copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
179 copy64_avx(to, from, end_index, xmm, true, shift, -128, use64byteVector);
180 copy32_avx(to, from, end_index, xmm, shift, -160);
181 subq(count, 160 >> shift);
182 copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
183 jmp(L_exit);
184}
185
186void MacroAssembler::copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
187 KRegister mask, Register length, Register index,
188 Register temp, int shift, int offset,
189 bool use64byteVector) {
190 BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
191 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 191, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
192 if (!use64byteVector) {
193 copy32_avx(dst, src, index, xmm, shift, offset);
194 subptr(length, 32 >> shift);
195 copy32_masked_avx(dst, src, xmm, mask, length, index, temp, shift, offset+32);
196 } else {
197 Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
198 assert(MaxVectorSize == 64, "vector length != 64")do { if (!(MaxVectorSize == 64)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 198, "assert(" "MaxVectorSize == 64" ") failed", "vector length != 64"
); ::breakpoint(); } } while (0)
;
199 mov64(temp, -1L);
200 bzhiq(temp, temp, length);
201 kmovql(mask, temp);
202 evmovdqu(type[shift], mask, xmm, Address(src, index, scale, offset), Assembler::AVX_512bit);
203 evmovdqu(type[shift], mask, Address(dst, index, scale, offset), xmm, Assembler::AVX_512bit);
204 }
205}
206
207
208void MacroAssembler::copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
209 KRegister mask, Register length, Register index,
210 Register temp, int shift, int offset) {
211 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 211, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
3
Assuming 'MaxVectorSize' is >= 32
4
Taking false branch
5
Loop condition is false. Exiting loop
212 BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
213 Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
214 mov64(temp, -1L);
215 bzhiq(temp, temp, length);
216 kmovql(mask, temp);
217 evmovdqu(type[shift], mask, xmm, Address(src, index, scale, offset), Assembler::AVX_256bit);
6
Calling constructor for 'Address'
10
Returning from constructor for 'Address'
218 evmovdqu(type[shift], mask, Address(dst, index, scale, offset), xmm, Assembler::AVX_256bit);
11
1st function call argument is an uninitialized value
219}
220
221
222void MacroAssembler::copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
223 int shift, int offset) {
224 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 224, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
225 Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
226 vmovdqu(xmm, Address(src, index, scale, offset));
227 vmovdqu(Address(dst, index, scale, offset), xmm);
228}
229
230
231void MacroAssembler::copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
232 bool conjoint, int shift, int offset, bool use64byteVector) {
233 assert(MaxVectorSize == 64 || MaxVectorSize == 32, "vector length mismatch")do { if (!(MaxVectorSize == 64 || MaxVectorSize == 32)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp"
, 233, "assert(" "MaxVectorSize == 64 || MaxVectorSize == 32"
") failed", "vector length mismatch"); ::breakpoint(); } } while
(0)
;
234 if (!use64byteVector) {
235 if (conjoint) {
236 copy32_avx(dst, src, index, xmm, shift, offset+32);
237 copy32_avx(dst, src, index, xmm, shift, offset);
238 } else {
239 copy32_avx(dst, src, index, xmm, shift, offset);
240 copy32_avx(dst, src, index, xmm, shift, offset+32);
241 }
242 } else {
243 Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
244 evmovdquq(xmm, Address(src, index, scale, offset), Assembler::AVX_512bit);
245 evmovdquq(Address(dst, index, scale, offset), xmm, Assembler::AVX_512bit);
246 }
247}
248
249#endif // COMPILER2_OR_JVMCI
250
251#endif

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef CPU_X86_ASSEMBLER_X86_HPP
26#define CPU_X86_ASSEMBLER_X86_HPP
27
28#include "asm/register.hpp"
29#include "utilities/powerOfTwo.hpp"
30
31// Contains all the definitions needed for x86 assembly code generation.
32
33// Calling convention
34class Argument {
35 public:
36 enum {
37#ifdef _LP641
38#ifdef _WIN64
39 n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
40 n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )
41 n_int_register_returns_c = 1, // rax
42 n_float_register_returns_c = 1, // xmm0
43#else
44 n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
45 n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )
46 n_int_register_returns_c = 2, // rax, rdx
47 n_float_register_returns_c = 2, // xmm0, xmm1
48#endif // _WIN64
49 n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...
50 n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...
51#else
52 n_register_parameters = 0 // 0 registers used to pass arguments
53#endif // _LP64
54 };
55};
56
57
58#ifdef _LP641
59// Symbolically name the register arguments used by the c calling convention.
60// Windows is different from linux/solaris. So much for standards...
61
62#ifdef _WIN64
63
64REGISTER_DECLARATION(Register, c_rarg0, rcx)const Register c_rarg0 = ((Register)rcx);
65REGISTER_DECLARATION(Register, c_rarg1, rdx)const Register c_rarg1 = ((Register)rdx);
66REGISTER_DECLARATION(Register, c_rarg2, r8)const Register c_rarg2 = ((Register)r8);
67REGISTER_DECLARATION(Register, c_rarg3, r9)const Register c_rarg3 = ((Register)r9);
68
69REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0)const XMMRegister c_farg0 = ((XMMRegister)xmm0);
70REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1)const XMMRegister c_farg1 = ((XMMRegister)xmm1);
71REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2)const XMMRegister c_farg2 = ((XMMRegister)xmm2);
72REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3)const XMMRegister c_farg3 = ((XMMRegister)xmm3);
73
74#else
75
76REGISTER_DECLARATION(Register, c_rarg0, rdi)const Register c_rarg0 = ((Register)rdi);
77REGISTER_DECLARATION(Register, c_rarg1, rsi)const Register c_rarg1 = ((Register)rsi);
78REGISTER_DECLARATION(Register, c_rarg2, rdx)const Register c_rarg2 = ((Register)rdx);
79REGISTER_DECLARATION(Register, c_rarg3, rcx)const Register c_rarg3 = ((Register)rcx);
80REGISTER_DECLARATION(Register, c_rarg4, r8)const Register c_rarg4 = ((Register)r8);
81REGISTER_DECLARATION(Register, c_rarg5, r9)const Register c_rarg5 = ((Register)r9);
82
83REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0)const XMMRegister c_farg0 = ((XMMRegister)xmm0);
84REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1)const XMMRegister c_farg1 = ((XMMRegister)xmm1);
85REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2)const XMMRegister c_farg2 = ((XMMRegister)xmm2);
86REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3)const XMMRegister c_farg3 = ((XMMRegister)xmm3);
87REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4)const XMMRegister c_farg4 = ((XMMRegister)xmm4);
88REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5)const XMMRegister c_farg5 = ((XMMRegister)xmm5);
89REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6)const XMMRegister c_farg6 = ((XMMRegister)xmm6);
90REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7)const XMMRegister c_farg7 = ((XMMRegister)xmm7);
91
92#endif // _WIN64
93
94// Symbolically name the register arguments used by the Java calling convention.
95// We have control over the convention for java so we can do what we please.
96// What pleases us is to offset the java calling convention so that when
97// we call a suitable jni method the arguments are lined up and we don't
98// have to do little shuffling. A suitable jni method is non-static and a
99// small number of arguments (two fewer args on windows)
100//
101// |-------------------------------------------------------|
102// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |
103// |-------------------------------------------------------|
104// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)
105// | rdi rsi rdx rcx r8 r9 | solaris/linux
106// |-------------------------------------------------------|
107// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |
108// |-------------------------------------------------------|
109
110REGISTER_DECLARATION(Register, j_rarg0, c_rarg1)const Register j_rarg0 = ((Register)c_rarg1);
111REGISTER_DECLARATION(Register, j_rarg1, c_rarg2)const Register j_rarg1 = ((Register)c_rarg2);
112REGISTER_DECLARATION(Register, j_rarg2, c_rarg3)const Register j_rarg2 = ((Register)c_rarg3);
113// Windows runs out of register args here
114#ifdef _WIN64
115REGISTER_DECLARATION(Register, j_rarg3, rdi)const Register j_rarg3 = ((Register)rdi);
116REGISTER_DECLARATION(Register, j_rarg4, rsi)const Register j_rarg4 = ((Register)rsi);
117#else
118REGISTER_DECLARATION(Register, j_rarg3, c_rarg4)const Register j_rarg3 = ((Register)c_rarg4);
119REGISTER_DECLARATION(Register, j_rarg4, c_rarg5)const Register j_rarg4 = ((Register)c_rarg5);
120#endif /* _WIN64 */
121REGISTER_DECLARATION(Register, j_rarg5, c_rarg0)const Register j_rarg5 = ((Register)c_rarg0);
122
123REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0)const XMMRegister j_farg0 = ((XMMRegister)xmm0);
124REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1)const XMMRegister j_farg1 = ((XMMRegister)xmm1);
125REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2)const XMMRegister j_farg2 = ((XMMRegister)xmm2);
126REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3)const XMMRegister j_farg3 = ((XMMRegister)xmm3);
127REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4)const XMMRegister j_farg4 = ((XMMRegister)xmm4);
128REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5)const XMMRegister j_farg5 = ((XMMRegister)xmm5);
129REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6)const XMMRegister j_farg6 = ((XMMRegister)xmm6);
130REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7)const XMMRegister j_farg7 = ((XMMRegister)xmm7);
131
132REGISTER_DECLARATION(Register, rscratch1, r10)const Register rscratch1 = ((Register)r10); // volatile
133REGISTER_DECLARATION(Register, rscratch2, r11)const Register rscratch2 = ((Register)r11); // volatile
134
135REGISTER_DECLARATION(Register, r12_heapbase, r12)const Register r12_heapbase = ((Register)r12); // callee-saved
136REGISTER_DECLARATION(Register, r15_thread, r15)const Register r15_thread = ((Register)r15); // callee-saved
137
138#else
139// rscratch1 will apear in 32bit code that is dead but of course must compile
140// Using noreg ensures if the dead code is incorrectly live and executed it
141// will cause an assertion failure
142#define rscratch1 noreg
143#define rscratch2 noreg
144
145#endif // _LP64
146
147// JSR 292
148// On x86, the SP does not have to be saved when invoking method handle intrinsics
149// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
150REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg)const Register rbp_mh_SP_save = ((Register)noreg);
151
152// Address is an abstraction used to represent a memory location
153// using any of the amd64 addressing modes with one object.
154//
155// Note: A register location is represented via a Register, not
156// via an address for efficiency & simplicity reasons.
157
158class ArrayAddress;
159
160class Address {
161 public:
162 enum ScaleFactor {
163 no_scale = -1,
164 times_1 = 0,
165 times_2 = 1,
166 times_4 = 2,
167 times_8 = 3,
168 times_ptr = LP64_ONLY(times_8)times_8 NOT_LP64(times_4)
169 };
170 static ScaleFactor times(int size) {
171 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size")do { if (!(size >= 1 && size <= 8 && is_power_of_2
(size))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 171, "assert(" "size >= 1 && size <= 8 && is_power_of_2(size)"
") failed", "bad scale size"); ::breakpoint(); } } while (0)
;
172 if (size == 8) return times_8;
173 if (size == 4) return times_4;
174 if (size == 2) return times_2;
175 return times_1;
176 }
177 static int scale_size(ScaleFactor scale) {
178 assert(scale != no_scale, "")do { if (!(scale != no_scale)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 178, "assert(" "scale != no_scale" ") failed", ""); ::breakpoint
(); } } while (0)
;
179 assert(((1 << (int)times_1) == 1 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
180 (1 << (int)times_2) == 2 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
181 (1 << (int)times_4) == 4 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
182 (1 << (int)times_8) == 8), "")do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
;
183 return (1 << (int)scale);
184 }
185
186 private:
187 Register _base;
188 Register _index;
189 XMMRegister _xmmindex;
190 ScaleFactor _scale;
191 int _disp;
192 bool _isxmmindex;
193 RelocationHolder _rspec;
194
195 // Easily misused constructors make them private
196 // %%% can we make these go away?
197 NOT_LP64(Address(address loc, RelocationHolder spec);)
198 Address(int disp, address loc, relocInfo::relocType rtype);
199 Address(int disp, address loc, RelocationHolder spec);
200
201 public:
202
203 int disp() { return _disp; }
204 // creation
205 Address()
206 : _base(noreg),
207 _index(noreg),
208 _xmmindex(xnoreg),
209 _scale(no_scale),
210 _disp(0),
211 _isxmmindex(false){
212 }
213
214 // No default displacement otherwise Register can be implicitly
215 // converted to 0(Register) which is quite a different animal.
216
217 Address(Register base, int disp)
218 : _base(base),
219 _index(noreg),
220 _xmmindex(xnoreg),
221 _scale(no_scale),
222 _disp(disp),
223 _isxmmindex(false){
224 }
225
226 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
227 : _base (base),
228 _index(index),
229 _xmmindex(xnoreg),
230 _scale(scale),
231 _disp (disp),
232 _isxmmindex(false) {
233 assert(!index->is_valid() == (scale == Address::no_scale),do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 234, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
7
Assuming 'scale' is equal to no_scale
8
Taking false branch
9
Loop condition is false. Exiting loop
234 "inconsistent address")do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 234, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
235 }
236
237 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
238 : _base (base),
239 _index(index.register_or_noreg()),
240 _xmmindex(xnoreg),
241 _scale(scale),
242 _disp (disp + (index.constant_or_zero() * scale_size(scale))),
243 _isxmmindex(false){
244 if (!index.is_register()) scale = Address::no_scale;
245 assert(!_index->is_valid() == (scale == Address::no_scale),do { if (!(!_index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 246, "assert(" "!_index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
246 "inconsistent address")do { if (!(!_index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 246, "assert(" "!_index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
247 }
248
249 Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
250 : _base (base),
251 _index(noreg),
252 _xmmindex(index),
253 _scale(scale),
254 _disp(disp),
255 _isxmmindex(true) {
256 assert(!index->is_valid() == (scale == Address::no_scale),do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 257, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
257 "inconsistent address")do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 257, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
258 }
259
260 // The following overloads are used in connection with the
261 // ByteSize type (see sizes.hpp). They simplify the use of
262 // ByteSize'd arguments in assembly code.
263
264 Address(Register base, ByteSize disp)
265 : Address(base, in_bytes(disp)) {}
266
267 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
268 : Address(base, index, scale, in_bytes(disp)) {}
269
270 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
271 : Address(base, index, scale, in_bytes(disp)) {}
272
273 Address plus_disp(int disp) const {
274 Address a = (*this);
275 a._disp += disp;
276 return a;
277 }
278 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
279 Address a = (*this);
280 a._disp += disp.constant_or_zero() * scale_size(scale);
281 if (disp.is_register()) {
282 assert(!a.index()->is_valid(), "competing indexes")do { if (!(!a.index()->is_valid())) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 282, "assert(" "!a.index()->is_valid()" ") failed", "competing indexes"
); ::breakpoint(); } } while (0)
;
283 a._index = disp.as_register();
284 a._scale = scale;
285 }
286 return a;
287 }
288 bool is_same_address(Address a) const {
289 // disregard _rspec
290 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
291 }
292
293 // accessors
294 bool uses(Register reg) const { return _base == reg || _index == reg; }
295 Register base() const { return _base; }
296 Register index() const { return _index; }
297 XMMRegister xmmindex() const { return _xmmindex; }
298 ScaleFactor scale() const { return _scale; }
299 int disp() const { return _disp; }
300 bool isxmmindex() const { return _isxmmindex; }
301
302 // Convert the raw encoding form into the form expected by the constructor for
303 // Address. An index of 4 (rsp) corresponds to having no index, so convert
304 // that to noreg for the Address constructor.
305 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
306
307 static Address make_array(ArrayAddress);
308
309 private:
310 bool base_needs_rex() const {
311 return _base->is_valid() && _base->encoding() >= 8;
312 }
313
314 bool index_needs_rex() const {
315 return _index->is_valid() &&_index->encoding() >= 8;
316 }
317
318 bool xmmindex_needs_rex() const {
319 return _xmmindex->is_valid() && _xmmindex->encoding() >= 8;
320 }
321
322 relocInfo::relocType reloc() const { return _rspec.type(); }
323
324 friend class Assembler;
325 friend class MacroAssembler;
326 friend class LIR_Assembler; // base/index/scale/disp
327};
328
329//
330// AddressLiteral has been split out from Address because operands of this type
331// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
332// the few instructions that need to deal with address literals are unique and the
333// MacroAssembler does not have to implement every instruction in the Assembler
334// in order to search for address literals that may need special handling depending
335// on the instruction and the platform. As small step on the way to merging i486/amd64
336// directories.
337//
338class AddressLiteral {
339 friend class ArrayAddress;
340 RelocationHolder _rspec;
341 // Typically we use AddressLiterals we want to use their rval
342 // However in some situations we want the lval (effect address) of the item.
343 // We provide a special factory for making those lvals.
344 bool _is_lval;
345
346 // If the target is far we'll need to load the ea of this to
347 // a register to reach it. Otherwise if near we can do rip
348 // relative addressing.
349
350 address _target;
351
352 protected:
353 // creation
354 AddressLiteral()
355 : _is_lval(false),
356 _target(NULL__null)
357 {}
358
359 public:
360
361
362 AddressLiteral(address target, relocInfo::relocType rtype);
363
364 AddressLiteral(address target, RelocationHolder const& rspec)
365 : _rspec(rspec),
366 _is_lval(false),
367 _target(target)
368 {}
369
370 AddressLiteral addr() {
371 AddressLiteral ret = *this;
372 ret._is_lval = true;
373 return ret;
374 }
375
376
377 private:
378
379 address target() { return _target; }
380 bool is_lval() { return _is_lval; }
381
382 relocInfo::relocType reloc() const { return _rspec.type(); }
383 const RelocationHolder& rspec() const { return _rspec; }
384
385 friend class Assembler;
386 friend class MacroAssembler;
387 friend class Address;
388 friend class LIR_Assembler;
389};
390
391// Convience classes
392class RuntimeAddress: public AddressLiteral {
393
394 public:
395
396 RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
397
398};
399
400class ExternalAddress: public AddressLiteral {
401 private:
402 static relocInfo::relocType reloc_for_target(address target) {
403 // Sometimes ExternalAddress is used for values which aren't
404 // exactly addresses, like the card table base.
405 // external_word_type can't be used for values in the first page
406 // so just skip the reloc in that case.
407 return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
408 }
409
410 public:
411
412 ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
413
414};
415
416class InternalAddress: public AddressLiteral {
417
418 public:
419
420 InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
421
422};
423
424// x86 can do array addressing as a single operation since disp can be an absolute
425// address amd64 can't. We create a class that expresses the concept but does extra
426// magic on amd64 to get the final result
427
428class ArrayAddress {
429 private:
430
431 AddressLiteral _base;
432 Address _index;
433
434 public:
435
436 ArrayAddress() {};
437 ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
438 AddressLiteral base() { return _base; }
439 Address index() { return _index; }
440
441};
442
443class InstructionAttr;
444
445// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
446// See fxsave and xsave(EVEX enabled) documentation for layout
447const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize)2688 / wordSize;
448
449// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
450// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
451// is what you get. The Assembler is generating code into a CodeBuffer.
452
453class Assembler : public AbstractAssembler {
454 friend class AbstractAssembler; // for the non-virtual hack
455 friend class LIR_Assembler; // as_Address()
456 friend class StubGenerator;
457
458 public:
459 enum Condition { // The x86 condition codes used for conditional jumps/moves.
460 zero = 0x4,
461 notZero = 0x5,
462 equal = 0x4,
463 notEqual = 0x5,
464 less = 0xc,
465 lessEqual = 0xe,
466 greater = 0xf,
467 greaterEqual = 0xd,
468 below = 0x2,
469 belowEqual = 0x6,
470 above = 0x7,
471 aboveEqual = 0x3,
472 overflow = 0x0,
473 noOverflow = 0x1,
474 carrySet = 0x2,
475 carryClear = 0x3,
476 negative = 0x8,
477 positive = 0x9,
478 parity = 0xa,
479 noParity = 0xb
480 };
481
482 enum Prefix {
483 // segment overrides
484 CS_segment = 0x2e,
485 SS_segment = 0x36,
486 DS_segment = 0x3e,
487 ES_segment = 0x26,
488 FS_segment = 0x64,
489 GS_segment = 0x65,
490
491 REX = 0x40,
492
493 REX_B = 0x41,
494 REX_X = 0x42,
495 REX_XB = 0x43,
496 REX_R = 0x44,
497 REX_RB = 0x45,
498 REX_RX = 0x46,
499 REX_RXB = 0x47,
500
501 REX_W = 0x48,
502
503 REX_WB = 0x49,
504 REX_WX = 0x4A,
505 REX_WXB = 0x4B,
506 REX_WR = 0x4C,
507 REX_WRB = 0x4D,
508 REX_WRX = 0x4E,
509 REX_WRXB = 0x4F,
510
511 VEX_3bytes = 0xC4,
512 VEX_2bytes = 0xC5,
513 EVEX_4bytes = 0x62,
514 Prefix_EMPTY = 0x0
515 };
516
517 enum VexPrefix {
518 VEX_B = 0x20,
519 VEX_X = 0x40,
520 VEX_R = 0x80,
521 VEX_W = 0x80
522 };
523
524 enum ExexPrefix {
525 EVEX_F = 0x04,
526 EVEX_V = 0x08,
527 EVEX_Rb = 0x10,
528 EVEX_X = 0x40,
529 EVEX_Z = 0x80
530 };
531
532 enum VexSimdPrefix {
533 VEX_SIMD_NONE = 0x0,
534 VEX_SIMD_66 = 0x1,
535 VEX_SIMD_F3 = 0x2,
536 VEX_SIMD_F2 = 0x3
537 };
538
539 enum VexOpcode {
540 VEX_OPCODE_NONE = 0x0,
541 VEX_OPCODE_0F = 0x1,
542 VEX_OPCODE_0F_38 = 0x2,
543 VEX_OPCODE_0F_3A = 0x3,
544 VEX_OPCODE_MASK = 0x1F
545 };
546
547 enum AvxVectorLen {
548 AVX_128bit = 0x0,
549 AVX_256bit = 0x1,
550 AVX_512bit = 0x2,
551 AVX_NoVec = 0x4
552 };
553
554 enum EvexTupleType {
555 EVEX_FV = 0,
556 EVEX_HV = 4,
557 EVEX_FVM = 6,
558 EVEX_T1S = 7,
559 EVEX_T1F = 11,
560 EVEX_T2 = 13,
561 EVEX_T4 = 15,
562 EVEX_T8 = 17,
563 EVEX_HVM = 18,
564 EVEX_QVM = 19,
565 EVEX_OVM = 20,
566 EVEX_M128 = 21,
567 EVEX_DUP = 22,
568 EVEX_ETUP = 23
569 };
570
571 enum EvexInputSizeInBits {
572 EVEX_8bit = 0,
573 EVEX_16bit = 1,
574 EVEX_32bit = 2,
575 EVEX_64bit = 3,
576 EVEX_NObit = 4
577 };
578
579 enum WhichOperand {
580 // input to locate_operand, and format code for relocations
581 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
582 disp32_operand = 1, // embedded 32-bit displacement or address
583 call32_operand = 2, // embedded 32-bit self-relative displacement
584#ifndef _LP641
585 _WhichOperand_limit = 3
586#else
587 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
588 _WhichOperand_limit = 4
589#endif
590 };
591
592 // Comparison predicates for integral types & FP types when using SSE
593 enum ComparisonPredicate {
594 eq = 0,
595 lt = 1,
596 le = 2,
597 _false = 3,
598 neq = 4,
599 nlt = 5,
600 nle = 6,
601 _true = 7
602 };
603
604 // Comparison predicates for FP types when using AVX
605 // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
606 // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
607 enum ComparisonPredicateFP {
608 EQ_OQ = 0,
609 LT_OS = 1,
610 LE_OS = 2,
611 UNORD_Q = 3,
612 NEQ_UQ = 4,
613 NLT_US = 5,
614 NLE_US = 6,
615 ORD_Q = 7,
616 EQ_UQ = 8,
617 NGE_US = 9,
618 NGT_US = 0xA,
619 FALSE_OQ = 0XB,
620 NEQ_OQ = 0xC,
621 GE_OS = 0xD,
622 GT_OS = 0xE,
623 TRUE_UQ = 0xF,
624 EQ_OS = 0x10,
625 LT_OQ = 0x11,
626 LE_OQ = 0x12,
627 UNORD_S = 0x13,
628 NEQ_US = 0x14,
629 NLT_UQ = 0x15,
630 NLE_UQ = 0x16,
631 ORD_S = 0x17,
632 EQ_US = 0x18,
633 NGE_UQ = 0x19,
634 NGT_UQ = 0x1A,
635 FALSE_OS = 0x1B,
636 NEQ_OS = 0x1C,
637 GE_OQ = 0x1D,
638 GT_OQ = 0x1E,
639 TRUE_US =0x1F
640 };
641
642 enum Width {
643 B = 0,
644 W = 1,
645 D = 2,
646 Q = 3
647 };
648
649 //---< calculate length of instruction >---
650 // As instruction size can't be found out easily on x86/x64,
651 // we just use '4' for len and maxlen.
652 // instruction must start at passed address
653 static unsigned int instr_len(unsigned char *instr) { return 4; }
654
655 //---< longest instructions >---
656 // Max instruction length is not specified in architecture documentation.
657 // We could use a "safe enough" estimate (15), but just default to
658 // instruction length guess from above.
659 static unsigned int instr_maxlen() { return 4; }
660
661 // NOTE: The general philopsophy of the declarations here is that 64bit versions
662 // of instructions are freely declared without the need for wrapping them an ifdef.
663 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
664 // In the .cpp file the implementations are wrapped so that they are dropped out
665 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
666 // to the size it was prior to merging up the 32bit and 64bit assemblers.
667 //
668 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
669 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
670
671private:
672
673 bool _legacy_mode_bw;
674 bool _legacy_mode_dq;
675 bool _legacy_mode_vl;
676 bool _legacy_mode_vlbw;
677 NOT_LP64(bool _is_managed;)
678
679 class InstructionAttr *_attributes;
680
681 // 64bit prefixes
682 void prefix(Register reg);
683 void prefix(Register dst, Register src, Prefix p);
684 void prefix(Register dst, Address adr, Prefix p);
685
686 void prefix(Address adr);
687 void prefix(Address adr, Register reg, bool byteinst = false);
688 void prefix(Address adr, XMMRegister reg);
689
690 int prefix_and_encode(int reg_enc, bool byteinst = false);
691 int prefix_and_encode(int dst_enc, int src_enc) {
692 return prefix_and_encode(dst_enc, false, src_enc, false);
693 }
694 int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
695
696 // Some prefixq variants always emit exactly one prefix byte, so besides a
697 // prefix-emitting method we provide a method to get the prefix byte to emit,
698 // which can then be folded into a byte stream.
699 int8_t get_prefixq(Address adr);
700 int8_t get_prefixq(Address adr, Register reg);
701
702 void prefixq(Address adr);
703 void prefixq(Address adr, Register reg);
704 void prefixq(Address adr, XMMRegister reg);
705
706 int prefixq_and_encode(int reg_enc);
707 int prefixq_and_encode(int dst_enc, int src_enc);
708
709 void rex_prefix(Address adr, XMMRegister xreg,
710 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
711 int rex_prefix_and_encode(int dst_enc, int src_enc,
712 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
713
714 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
715
716 void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
717 int nds_enc, VexSimdPrefix pre, VexOpcode opc);
718
719 void vex_prefix(Address adr, int nds_enc, int xreg_enc,
720 VexSimdPrefix pre, VexOpcode opc,
721 InstructionAttr *attributes);
722
723 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
724 VexSimdPrefix pre, VexOpcode opc,
725 InstructionAttr *attributes);
726
727 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
728 VexOpcode opc, InstructionAttr *attributes);
729
730 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
731 VexOpcode opc, InstructionAttr *attributes);
732
733 // Helper functions for groups of instructions
734 void emit_arith_b(int op1, int op2, Register dst, int imm8);
735
736 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
737 // Force generation of a 4 byte immediate value even if it fits into 8bit
738 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
739 void emit_arith(int op1, int op2, Register dst, Register src);
740
741 bool emit_compressed_disp_byte(int &disp);
742
743 void emit_modrm(int mod, int dst_enc, int src_enc);
744 void emit_modrm_disp8(int mod, int dst_enc, int src_enc,
745 int disp);
746 void emit_modrm_sib(int mod, int dst_enc, int src_enc,
747 Address::ScaleFactor scale, int index_enc, int base_enc);
748 void emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
749 Address::ScaleFactor scale, int index_enc, int base_enc,
750 int disp);
751
752 void emit_operand_helper(int reg_enc,
753 int base_enc, int index_enc, Address::ScaleFactor scale,
754 int disp,
755 RelocationHolder const& rspec,
756 int rip_relative_correction = 0);
757
758 void emit_operand(Register reg,
759 Register base, Register index, Address::ScaleFactor scale,
760 int disp,
761 RelocationHolder const& rspec,
762 int rip_relative_correction = 0);
763
764 void emit_operand(Register reg,
765 Register base, XMMRegister index, Address::ScaleFactor scale,
766 int disp,
767 RelocationHolder const& rspec);
768
769 void emit_operand(XMMRegister xreg,
770 Register base, XMMRegister xindex, Address::ScaleFactor scale,
771 int disp,
772 RelocationHolder const& rspec);
773
774 void emit_operand(Register reg, Address adr,
775 int rip_relative_correction = 0);
776
777 void emit_operand(XMMRegister reg,
778 Register base, Register index, Address::ScaleFactor scale,
779 int disp,
780 RelocationHolder const& rspec);
781
782 void emit_operand(XMMRegister reg, Address adr);
783
784 // Immediate-to-memory forms
785 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
786
787 protected:
788 #ifdef ASSERT1
789 void check_relocation(RelocationHolder const& rspec, int format);
790 #endif
791
792 void emit_data(jint data, relocInfo::relocType rtype, int format);
793 void emit_data(jint data, RelocationHolder const& rspec, int format);
794 void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
795 void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
796
797 bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
798
799 // These are all easily abused and hence protected
800
801 // 32BIT ONLY SECTION
802#ifndef _LP641
803 // Make these disappear in 64bit mode since they would never be correct
804 void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
805 void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
806
807 void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
808 void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
809
810 void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
811#else
812 // 64BIT ONLY SECTION
813 void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY
814
815 void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
816 void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
817
818 void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
819 void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
820#endif // _LP64
821
822 // These are unique in that we are ensured by the caller that the 32bit
823 // relative in these instructions will always be able to reach the potentially
824 // 64bit address described by entry. Since they can take a 64bit address they
825 // don't have the 32 suffix like the other instructions in this class.
826
827 void call_literal(address entry, RelocationHolder const& rspec);
828 void jmp_literal(address entry, RelocationHolder const& rspec);
829
830 // Avoid using directly section
831 // Instructions in this section are actually usable by anyone without danger
832 // of failure but have performance issues that are addressed my enhanced
833 // instructions which will do the proper thing base on the particular cpu.
834 // We protect them because we don't trust you...
835
836 // Don't use next inc() and dec() methods directly. INC & DEC instructions
837 // could cause a partial flag stall since they don't set CF flag.
838 // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
839 // which call inc() & dec() or add() & sub() in accordance with
840 // the product flag UseIncDec value.
841
842 void decl(Register dst);
843 void decl(Address dst);
844 void decq(Address dst);
845
846 void incl(Register dst);
847 void incl(Address dst);
848 void incq(Register dst);
849 void incq(Address dst);
850
851 // New cpus require use of movsd and movss to avoid partial register stall
852 // when loading from memory. But for old Opteron use movlpd instead of movsd.
853 // The selection is done in MacroAssembler::movdbl() and movflt().
854
855 // Move Scalar Single-Precision Floating-Point Values
856 void movss(XMMRegister dst, Address src);
857 void movss(XMMRegister dst, XMMRegister src);
858 void movss(Address dst, XMMRegister src);
859
860 // Move Scalar Double-Precision Floating-Point Values
861 void movsd(XMMRegister dst, Address src);
862 void movsd(XMMRegister dst, XMMRegister src);
863 void movsd(Address dst, XMMRegister src);
864 void movlpd(XMMRegister dst, Address src);
865
866 // New cpus require use of movaps and movapd to avoid partial register stall
867 // when moving between registers.
868 void movaps(XMMRegister dst, XMMRegister src);
869 void movapd(XMMRegister dst, XMMRegister src);
870
871 // End avoid using directly
872
873
874 // Instruction prefixes
875 void prefix(Prefix p);
876
877 public:
878
879 // Creation
880 Assembler(CodeBuffer* code) : AbstractAssembler(code) {
881 init_attributes();
882 }
883
884 // Decoding
885 static address locate_operand(address inst, WhichOperand which);
886 static address locate_next_instruction(address inst);
887
888 // Utilities
889 static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
890 int cur_tuple_type, int in_size_in_bits, int cur_encoding);
891
892 // Generic instructions
893 // Does 32bit or 64bit as needed for the platform. In some sense these
894 // belong in macro assembler but there is no need for both varieties to exist
895
896 void init_attributes(void);
897
898 void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
899 void clear_attributes(void) { _attributes = NULL__null; }
900
901 void set_managed(void) { NOT_LP64(_is_managed = true;) }
902 void clear_managed(void) { NOT_LP64(_is_managed = false;) }
903 bool is_managed(void) {
904 NOT_LP64(return _is_managed;)
905 LP64_ONLY(return false;)return false; }
906
907 void lea(Register dst, Address src);
908
909 void mov(Register dst, Register src);
910
911#ifdef _LP641
912 // support caching the result of some routines
913
914 // must be called before pusha(), popa(), vzeroupper() - checked with asserts
915 static void precompute_instructions();
916
917 void pusha_uncached();
918 void popa_uncached();
919#endif
920 void vzeroupper_uncached();
921 void decq(Register dst);
922
923 void pusha();
924 void popa();
925
926 void pushf();
927 void popf();
928
929 void push(int32_t imm32);
930
931 void push(Register src);
932
933 void pop(Register dst);
934
935 // These are dummies to prevent surprise implicit conversions to Register
936 void push(void* v);
937 void pop(void* v);
938
939 // These do register sized moves/scans
940 void rep_mov();
941 void rep_stos();
942 void rep_stosb();
943 void repne_scan();
944#ifdef _LP641
945 void repne_scanl();
946#endif
947
948 // Vanilla instructions in lexical order
949
950 void adcl(Address dst, int32_t imm32);
951 void adcl(Address dst, Register src);
952 void adcl(Register dst, int32_t imm32);
953 void adcl(Register dst, Address src);
954 void adcl(Register dst, Register src);
955
956 void adcq(Register dst, int32_t imm32);
957 void adcq(Register dst, Address src);
958 void adcq(Register dst, Register src);
959
960 void addb(Address dst, int imm8);
961 void addw(Register dst, Register src);
962 void addw(Address dst, int imm16);
963
964 void addl(Address dst, int32_t imm32);
965 void addl(Address dst, Register src);
966 void addl(Register dst, int32_t imm32);
967 void addl(Register dst, Address src);
968 void addl(Register dst, Register src);
969
970 void addq(Address dst, int32_t imm32);
971 void addq(Address dst, Register src);
972 void addq(Register dst, int32_t imm32);
973 void addq(Register dst, Address src);
974 void addq(Register dst, Register src);
975
976#ifdef _LP641
977 //Add Unsigned Integers with Carry Flag
978 void adcxq(Register dst, Register src);
979
980 //Add Unsigned Integers with Overflow Flag
981 void adoxq(Register dst, Register src);
982#endif
983
984 void addr_nop_4();
985 void addr_nop_5();
986 void addr_nop_7();
987 void addr_nop_8();
988
989 // Add Scalar Double-Precision Floating-Point Values
990 void addsd(XMMRegister dst, Address src);
991 void addsd(XMMRegister dst, XMMRegister src);
992
993 // Add Scalar Single-Precision Floating-Point Values
994 void addss(XMMRegister dst, Address src);
995 void addss(XMMRegister dst, XMMRegister src);
996
997 // AES instructions
998 void aesdec(XMMRegister dst, Address src);
999 void aesdec(XMMRegister dst, XMMRegister src);
1000 void aesdeclast(XMMRegister dst, Address src);
1001 void aesdeclast(XMMRegister dst, XMMRegister src);
1002 void aesenc(XMMRegister dst, Address src);
1003 void aesenc(XMMRegister dst, XMMRegister src);
1004 void aesenclast(XMMRegister dst, Address src);
1005 void aesenclast(XMMRegister dst, XMMRegister src);
1006 // Vector AES instructions
1007 void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1008 void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1009 void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1010 void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1011
1012 void andw(Register dst, Register src);
1013 void andb(Address dst, Register src);
1014
1015 void andl(Address dst, int32_t imm32);
1016 void andl(Register dst, int32_t imm32);
1017 void andl(Register dst, Address src);
1018 void andl(Register dst, Register src);
1019 void andl(Address dst, Register src);
1020
1021 void andq(Address dst, int32_t imm32);
1022 void andq(Register dst, int32_t imm32);
1023 void andq(Register dst, Address src);
1024 void andq(Register dst, Register src);
1025 void andq(Address dst, Register src);
1026
1027 // BMI instructions
1028 void andnl(Register dst, Register src1, Register src2);
1029 void andnl(Register dst, Register src1, Address src2);
1030 void andnq(Register dst, Register src1, Register src2);
1031 void andnq(Register dst, Register src1, Address src2);
1032
1033 void blsil(Register dst, Register src);
1034 void blsil(Register dst, Address src);
1035 void blsiq(Register dst, Register src);
1036 void blsiq(Register dst, Address src);
1037
1038 void blsmskl(Register dst, Register src);
1039 void blsmskl(Register dst, Address src);
1040 void blsmskq(Register dst, Register src);
1041 void blsmskq(Register dst, Address src);
1042
1043 void blsrl(Register dst, Register src);
1044 void blsrl(Register dst, Address src);
1045 void blsrq(Register dst, Register src);
1046 void blsrq(Register dst, Address src);
1047
1048 void bsfl(Register dst, Register src);
1049 void bsrl(Register dst, Register src);
1050
1051#ifdef _LP641
1052 void bsfq(Register dst, Register src);
1053 void bsrq(Register dst, Register src);
1054#endif
1055
1056 void bswapl(Register reg);
1057
1058 void bswapq(Register reg);
1059
1060 void call(Label& L, relocInfo::relocType rtype);
1061 void call(Register reg); // push pc; pc <- reg
1062 void call(Address adr); // push pc; pc <- adr
1063
1064 void cdql();
1065
1066 void cdqq();
1067
1068 void cld();
1069
1070 void clflush(Address adr);
1071 void clflushopt(Address adr);
1072 void clwb(Address adr);
1073
1074 void cmovl(Condition cc, Register dst, Register src);
1075 void cmovl(Condition cc, Register dst, Address src);
1076
1077 void cmovq(Condition cc, Register dst, Register src);
1078 void cmovq(Condition cc, Register dst, Address src);
1079
1080
1081 void cmpb(Address dst, int imm8);
1082
1083 void cmpl(Address dst, int32_t imm32);
1084
1085 void cmp(Register dst, int32_t imm32);
1086 void cmpl(Register dst, int32_t imm32);
1087 void cmpl(Register dst, Register src);
1088 void cmpl(Register dst, Address src);
1089
1090 void cmpq(Address dst, int32_t imm32);
1091 void cmpq(Address dst, Register src);
1092
1093 void cmpq(Register dst, int32_t imm32);
1094 void cmpq(Register dst, Register src);
1095 void cmpq(Register dst, Address src);
1096
1097 // these are dummies used to catch attempting to convert NULL to Register
1098 void cmpl(Register dst, void* junk); // dummy
1099 void cmpq(Register dst, void* junk); // dummy
1100
1101 void cmpw(Address dst, int imm16);
1102
1103 void cmpxchg8 (Address adr);
1104
1105 void cmpxchgb(Register reg, Address adr);
1106 void cmpxchgl(Register reg, Address adr);
1107
1108 void cmpxchgq(Register reg, Address adr);
1109 void cmpxchgw(Register reg, Address adr);
1110
1111 // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1112 void comisd(XMMRegister dst, Address src);
1113 void comisd(XMMRegister dst, XMMRegister src);
1114
1115 // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1116 void comiss(XMMRegister dst, Address src);
1117 void comiss(XMMRegister dst, XMMRegister src);
1118
1119 // Identify processor type and features
1120 void cpuid();
1121
1122 // CRC32C
1123 void crc32(Register crc, Register v, int8_t sizeInBytes);
1124 void crc32(Register crc, Address adr, int8_t sizeInBytes);
1125
1126 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1127 void cvtsd2ss(XMMRegister dst, XMMRegister src);
1128 void cvtsd2ss(XMMRegister dst, Address src);
1129
1130 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1131 void cvtsi2sdl(XMMRegister dst, Register src);
1132 void cvtsi2sdl(XMMRegister dst, Address src);
1133 void cvtsi2sdq(XMMRegister dst, Register src);
1134 void cvtsi2sdq(XMMRegister dst, Address src);
1135
1136 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1137 void cvtsi2ssl(XMMRegister dst, Register src);
1138 void cvtsi2ssl(XMMRegister dst, Address src);
1139 void cvtsi2ssq(XMMRegister dst, Register src);
1140 void cvtsi2ssq(XMMRegister dst, Address src);
1141
1142 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1143 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1144 void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1145
1146 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1147 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1148 void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1149
1150 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1151 void cvtss2sd(XMMRegister dst, XMMRegister src);
1152 void cvtss2sd(XMMRegister dst, Address src);
1153
1154 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1155 void cvttsd2sil(Register dst, Address src);
1156 void cvttsd2sil(Register dst, XMMRegister src);
1157 void cvttsd2siq(Register dst, Address src);
1158 void cvttsd2siq(Register dst, XMMRegister src);
1159
1160 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1161 void cvttss2sil(Register dst, XMMRegister src);
1162 void cvttss2siq(Register dst, XMMRegister src);
1163
1164 // Convert vector double to int
1165 void cvttpd2dq(XMMRegister dst, XMMRegister src);
1166
1167 // Convert vector float and double
1168 void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
1169 void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
1170
1171 // Convert vector float and int
1172 void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
1173
1174 // Convert vector long to vector FP
1175 void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1176 void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1177
1178 // Convert vector double to long
1179 void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
1180
1181 // Evex casts with truncation
1182 void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
1183 void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
1184 void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
1185 void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
1186 void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
1187 void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
1188
1189 //Abs of packed Integer values
1190 void pabsb(XMMRegister dst, XMMRegister src);
1191 void pabsw(XMMRegister dst, XMMRegister src);
1192 void pabsd(XMMRegister dst, XMMRegister src);
1193 void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1194 void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1195 void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1196 void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1197
1198 // Divide Scalar Double-Precision Floating-Point Values
1199 void divsd(XMMRegister dst, Address src);
1200 void divsd(XMMRegister dst, XMMRegister src);
1201
1202 // Divide Scalar Single-Precision Floating-Point Values
1203 void divss(XMMRegister dst, Address src);
1204 void divss(XMMRegister dst, XMMRegister src);
1205
1206
1207#ifndef _LP641
1208 private:
1209
1210 void emit_farith(int b1, int b2, int i);
1211
1212 public:
1213 void emms();
1214
1215 void fabs();
1216
1217 void fadd(int i);
1218
1219 void fadd_d(Address src);
1220 void fadd_s(Address src);
1221
1222 // "Alternate" versions of x87 instructions place result down in FPU
1223 // stack instead of on TOS
1224
1225 void fadda(int i); // "alternate" fadd
1226 void faddp(int i = 1);
1227
1228 void fchs();
1229
1230 void fcom(int i);
1231
1232 void fcomp(int i = 1);
1233 void fcomp_d(Address src);
1234 void fcomp_s(Address src);
1235
1236 void fcompp();
1237
1238 void fcos();
1239
1240 void fdecstp();
1241
1242 void fdiv(int i);
1243 void fdiv_d(Address src);
1244 void fdivr_s(Address src);
1245 void fdiva(int i); // "alternate" fdiv
1246 void fdivp(int i = 1);
1247
1248 void fdivr(int i);
1249 void fdivr_d(Address src);
1250 void fdiv_s(Address src);
1251
1252 void fdivra(int i); // "alternate" reversed fdiv
1253
1254 void fdivrp(int i = 1);
1255
1256 void ffree(int i = 0);
1257
1258 void fild_d(Address adr);
1259 void fild_s(Address adr);
1260
1261 void fincstp();
1262
1263 void finit();
1264
1265 void fist_s (Address adr);
1266 void fistp_d(Address adr);
1267 void fistp_s(Address adr);
1268
1269 void fld1();
1270
1271 void fld_d(Address adr);
1272 void fld_s(Address adr);
1273 void fld_s(int index);
1274
1275 void fldcw(Address src);
1276
1277 void fldenv(Address src);
1278
1279 void fldlg2();
1280
1281 void fldln2();
1282
1283 void fldz();
1284
1285 void flog();
1286 void flog10();
1287
1288 void fmul(int i);
1289
1290 void fmul_d(Address src);
1291 void fmul_s(Address src);
1292
1293 void fmula(int i); // "alternate" fmul
1294
1295 void fmulp(int i = 1);
1296
1297 void fnsave(Address dst);
1298
1299 void fnstcw(Address src);
1300
1301 void fnstsw_ax();
1302
1303 void fprem();
1304 void fprem1();
1305
1306 void frstor(Address src);
1307
1308 void fsin();
1309
1310 void fsqrt();
1311
1312 void fst_d(Address adr);
1313 void fst_s(Address adr);
1314
1315 void fstp_d(Address adr);
1316 void fstp_d(int index);
1317 void fstp_s(Address adr);
1318
1319 void fsub(int i);
1320 void fsub_d(Address src);
1321 void fsub_s(Address src);
1322
1323 void fsuba(int i); // "alternate" fsub
1324
1325 void fsubp(int i = 1);
1326
1327 void fsubr(int i);
1328 void fsubr_d(Address src);
1329 void fsubr_s(Address src);
1330
1331 void fsubra(int i); // "alternate" reversed fsub
1332
1333 void fsubrp(int i = 1);
1334
1335 void ftan();
1336
1337 void ftst();
1338
1339 void fucomi(int i = 1);
1340 void fucomip(int i = 1);
1341
1342 void fwait();
1343
1344 void fxch(int i = 1);
1345
1346 void fyl2x();
1347 void frndint();
1348 void f2xm1();
1349 void fldl2e();
1350#endif // !_LP64
1351
1352 // operands that only take the original 32bit registers
1353 void emit_operand32(Register reg, Address adr);
1354
1355 void fld_x(Address adr); // extended-precision (80-bit) format
1356 void fstp_x(Address adr); // extended-precision (80-bit) format
1357 void fxrstor(Address src);
1358 void xrstor(Address src);
1359
1360 void fxsave(Address dst);
1361 void xsave(Address dst);
1362
1363 void hlt();
1364
1365 void idivl(Register src);
1366 void divl(Register src); // Unsigned division
1367
1368#ifdef _LP641
1369 void idivq(Register src);
1370#endif
1371
1372 void imull(Register src);
1373 void imull(Register dst, Register src);
1374 void imull(Register dst, Register src, int value);
1375 void imull(Register dst, Address src, int value);
1376 void imull(Register dst, Address src);
1377
1378#ifdef _LP641
1379 void imulq(Register dst, Register src);
1380 void imulq(Register dst, Register src, int value);
1381 void imulq(Register dst, Address src, int value);
1382 void imulq(Register dst, Address src);
1383 void imulq(Register dst);
1384#endif
1385
1386 // jcc is the generic conditional branch generator to run-
1387 // time routines, jcc is used for branches to labels. jcc
1388 // takes a branch opcode (cc) and a label (L) and generates
1389 // either a backward branch or a forward branch and links it
1390 // to the label fixup chain. Usage:
1391 //
1392 // Label L; // unbound label
1393 // jcc(cc, L); // forward branch to unbound label
1394 // bind(L); // bind label to the current pc
1395 // jcc(cc, L); // backward branch to bound label
1396 // bind(L); // illegal: a label may be bound only once
1397 //
1398 // Note: The same Label can be used for forward and backward branches
1399 // but it may be bound only once.
1400
1401 void jcc(Condition cc, Label& L, bool maybe_short = true);
1402
1403 // Conditional jump to a 8-bit offset to L.
1404 // WARNING: be very careful using this for forward jumps. If the label is
1405 // not bound within an 8-bit offset of this instruction, a run-time error
1406 // will occur.
1407
1408 // Use macro to record file and line number.
1409 #define jccb(cc, L)jccb_0(cc, L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 1409)
jccb_0(cc, L, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp", __LINE__1409)
1410
1411 void jccb_0(Condition cc, Label& L, const char* file, int line);
1412
1413 void jmp(Address entry); // pc <- entry
1414
1415 // Label operations & relative jumps (PPUM Appendix D)
1416 void jmp(Label& L, bool maybe_short = true); // unconditional jump to L
1417
1418 void jmp(Register entry); // pc <- entry
1419
1420 // Unconditional 8-bit offset jump to L.
1421 // WARNING: be very careful using this for forward jumps. If the label is
1422 // not bound within an 8-bit offset of this instruction, a run-time error
1423 // will occur.
1424
1425 // Use macro to record file and line number.
1426 #define jmpb(L)jmpb_0(L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 1426)
jmpb_0(L, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp", __LINE__1426)
1427
1428 void jmpb_0(Label& L, const char* file, int line);
1429
1430 void ldmxcsr( Address src );
1431
1432 void leal(Register dst, Address src);
1433
1434 void leaq(Register dst, Address src);
1435
1436 void lfence();
1437
1438 void lock();
1439 void size_prefix();
1440
1441 void lzcntl(Register dst, Register src);
1442
1443#ifdef _LP641
1444 void lzcntq(Register dst, Register src);
1445#endif
1446
1447 enum Membar_mask_bits {
1448 StoreStore = 1 << 3,
1449 LoadStore = 1 << 2,
1450 StoreLoad = 1 << 1,
1451 LoadLoad = 1 << 0
1452 };
1453
1454 // Serializes memory and blows flags
1455 void membar(Membar_mask_bits order_constraint);
1456
1457 void mfence();
1458 void sfence();
1459
1460 // Moves
1461
1462 void mov64(Register dst, int64_t imm64);
1463 void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);
1464
1465 void movb(Address dst, Register src);
1466 void movb(Address dst, int imm8);
1467 void movb(Register dst, Address src);
1468
1469 void movddup(XMMRegister dst, XMMRegister src);
1470
1471 void kandbl(KRegister dst, KRegister src1, KRegister src2);
1472 void kandwl(KRegister dst, KRegister src1, KRegister src2);
1473 void kanddl(KRegister dst, KRegister src1, KRegister src2);
1474 void kandql(KRegister dst, KRegister src1, KRegister src2);
1475
1476 void korbl(KRegister dst, KRegister src1, KRegister src2);
1477 void korwl(KRegister dst, KRegister src1, KRegister src2);
1478 void kordl(KRegister dst, KRegister src1, KRegister src2);
1479 void korql(KRegister dst, KRegister src1, KRegister src2);
1480
1481 void kxorbl(KRegister dst, KRegister src1, KRegister src2);
1482 void kxorwl(KRegister dst, KRegister src1, KRegister src2);
1483 void kxordl(KRegister dst, KRegister src1, KRegister src2);
1484 void kxorql(KRegister dst, KRegister src1, KRegister src2);
1485 void kmovbl(KRegister dst, Register src);
1486 void kmovbl(Register dst, KRegister src);
1487 void kmovbl(KRegister dst, KRegister src);
1488 void kmovwl(KRegister dst, Register src);
1489 void kmovwl(KRegister dst, Address src);
1490 void kmovwl(Register dst, KRegister src);
1491 void kmovwl(Address dst, KRegister src);
1492 void kmovwl(KRegister dst, KRegister src);
1493 void kmovdl(KRegister dst, Register src);
1494 void kmovdl(Register dst, KRegister src);
1495 void kmovql(KRegister dst, KRegister src);
1496 void kmovql(Address dst, KRegister src);
1497 void kmovql(KRegister dst, Address src);
1498 void kmovql(KRegister dst, Register src);
1499 void kmovql(Register dst, KRegister src);
1500
1501 void knotbl(KRegister dst, KRegister src);
1502 void knotwl(KRegister dst, KRegister src);
1503 void knotdl(KRegister dst, KRegister src);
1504 void knotql(KRegister dst, KRegister src);
1505
1506 void kortestbl(KRegister dst, KRegister src);
1507 void kortestwl(KRegister dst, KRegister src);
1508 void kortestdl(KRegister dst, KRegister src);
1509 void kortestql(KRegister dst, KRegister src);
1510
1511 void kxnorbl(KRegister dst, KRegister src1, KRegister src2);
1512 void kshiftlbl(KRegister dst, KRegister src, int imm8);
1513 void kshiftrbl(KRegister dst, KRegister src, int imm8);
1514 void kshiftrwl(KRegister dst, KRegister src, int imm8);
1515 void kshiftrdl(KRegister dst, KRegister src, int imm8);
1516 void kshiftrql(KRegister dst, KRegister src, int imm8);
1517 void ktestq(KRegister src1, KRegister src2);
1518 void ktestd(KRegister src1, KRegister src2);
1519
1520 void ktestql(KRegister dst, KRegister src);
1521 void ktestdl(KRegister dst, KRegister src);
1522 void ktestwl(KRegister dst, KRegister src);
1523 void ktestbl(KRegister dst, KRegister src);
1524
1525 void movdl(XMMRegister dst, Register src);
1526 void movdl(Register dst, XMMRegister src);
1527 void movdl(XMMRegister dst, Address src);
1528 void movdl(Address dst, XMMRegister src);
1529
1530 // Move Double Quadword
1531 void movdq(XMMRegister dst, Register src);
1532 void movdq(Register dst, XMMRegister src);
1533
1534 // Move Aligned Double Quadword
1535 void movdqa(XMMRegister dst, XMMRegister src);
1536 void movdqa(XMMRegister dst, Address src);
1537
1538 // Move Unaligned Double Quadword
1539 void movdqu(Address dst, XMMRegister src);
1540 void movdqu(XMMRegister dst, Address src);
1541 void movdqu(XMMRegister dst, XMMRegister src);
1542
1543 // Move Unaligned 256bit Vector
1544 void vmovdqu(Address dst, XMMRegister src);
1545 void vmovdqu(XMMRegister dst, Address src);
1546 void vmovdqu(XMMRegister dst, XMMRegister src);
1547
1548 // Move Unaligned 512bit Vector
1549 void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
1550 void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
1551 void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
1552 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1553 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1554 void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
1555 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1556 void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
1557 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1558 void evmovdqul(Address dst, XMMRegister src, int vector_len);
1559 void evmovdqul(XMMRegister dst, Address src, int vector_len);
1560 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1561 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1562 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1563 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1564 void evmovdquq(Address dst, XMMRegister src, int vector_len);
1565 void evmovdquq(XMMRegister dst, Address src, int vector_len);
1566 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1567 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1568 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1569 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1570
1571 // Move lower 64bit to high 64bit in 128bit register
1572 void movlhps(XMMRegister dst, XMMRegister src);
1573
1574 void movl(Register dst, int32_t imm32);
1575 void movl(Address dst, int32_t imm32);
1576 void movl(Register dst, Register src);
1577 void movl(Register dst, Address src);
1578 void movl(Address dst, Register src);
1579
1580 // These dummies prevent using movl from converting a zero (like NULL) into Register
1581 // by giving the compiler two choices it can't resolve
1582
1583 void movl(Address dst, void* junk);
1584 void movl(Register dst, void* junk);
1585
1586#ifdef _LP641
1587 void movq(Register dst, Register src);
1588 void movq(Register dst, Address src);
1589 void movq(Address dst, Register src);
1590 void movq(Address dst, int32_t imm32);
1591 void movq(Register dst, int32_t imm32);
1592
1593 // These dummies prevent using movq from converting a zero (like NULL) into Register
1594 // by giving the compiler two choices it can't resolve
1595
1596 void movq(Address dst, void* dummy);
1597 void movq(Register dst, void* dummy);
1598#endif
1599
1600 // Move Quadword
1601 void movq(Address dst, XMMRegister src);
1602 void movq(XMMRegister dst, Address src);
1603 void movq(XMMRegister dst, XMMRegister src);
1604 void movq(Register dst, XMMRegister src);
1605 void movq(XMMRegister dst, Register src);
1606
1607 void movsbl(Register dst, Address src);
1608 void movsbl(Register dst, Register src);
1609
1610#ifdef _LP641
1611 void movsbq(Register dst, Address src);
1612 void movsbq(Register dst, Register src);
1613
1614 // Move signed 32bit immediate to 64bit extending sign
1615 void movslq(Address dst, int32_t imm64);
1616 void movslq(Register dst, int32_t imm64);
1617
1618 void movslq(Register dst, Address src);
1619 void movslq(Register dst, Register src);
1620 void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1621#endif
1622
1623 void movswl(Register dst, Address src);
1624 void movswl(Register dst, Register src);
1625
1626#ifdef _LP641
1627 void movswq(Register dst, Address src);
1628 void movswq(Register dst, Register src);
1629#endif
1630
1631 void movw(Address dst, int imm16);
1632 void movw(Register dst, Address src);
1633 void movw(Address dst, Register src);
1634
1635 void movzbl(Register dst, Address src);
1636 void movzbl(Register dst, Register src);
1637
1638#ifdef _LP641
1639 void movzbq(Register dst, Address src);
1640 void movzbq(Register dst, Register src);
1641#endif
1642
1643 void movzwl(Register dst, Address src);
1644 void movzwl(Register dst, Register src);
1645
1646#ifdef _LP641
1647 void movzwq(Register dst, Address src);
1648 void movzwq(Register dst, Register src);
1649#endif
1650
1651 // Unsigned multiply with RAX destination register
1652 void mull(Address src);
1653 void mull(Register src);
1654
1655#ifdef _LP641
1656 void mulq(Address src);
1657 void mulq(Register src);
1658 void mulxq(Register dst1, Register dst2, Register src);
1659#endif
1660
1661 // Multiply Scalar Double-Precision Floating-Point Values
1662 void mulsd(XMMRegister dst, Address src);
1663 void mulsd(XMMRegister dst, XMMRegister src);
1664
1665 // Multiply Scalar Single-Precision Floating-Point Values
1666 void mulss(XMMRegister dst, Address src);
1667 void mulss(XMMRegister dst, XMMRegister src);
1668
1669 void negl(Register dst);
1670 void negl(Address dst);
1671
1672#ifdef _LP641
1673 void negq(Register dst);
1674 void negq(Address dst);
1675#endif
1676
1677 void nop(int i = 1);
1678
1679 void notl(Register dst);
1680
1681#ifdef _LP641
1682 void notq(Register dst);
1683
1684 void btsq(Address dst, int imm8);
1685 void btrq(Address dst, int imm8);
1686#endif
1687
1688 void orw(Register dst, Register src);
1689
1690 void orl(Address dst, int32_t imm32);
1691 void orl(Register dst, int32_t imm32);
1692 void orl(Register dst, Address src);
1693 void orl(Register dst, Register src);
1694 void orl(Address dst, Register src);
1695
1696 void orb(Address dst, int imm8);
1697 void orb(Address dst, Register src);
1698
1699 void orq(Address dst, int32_t imm32);
1700 void orq(Address dst, Register src);
1701 void orq(Register dst, int32_t imm32);
1702 void orq(Register dst, Address src);
1703 void orq(Register dst, Register src);
1704
1705 // Pack with signed saturation
1706 void packsswb(XMMRegister dst, XMMRegister src);
1707 void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1708 void packssdw(XMMRegister dst, XMMRegister src);
1709 void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1710
1711 // Pack with unsigned saturation
1712 void packuswb(XMMRegister dst, XMMRegister src);
1713 void packuswb(XMMRegister dst, Address src);
1714 void packusdw(XMMRegister dst, XMMRegister src);
1715 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1716 void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1717
1718 // Permutations
1719 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1720 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1721 void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1722 void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1723 void vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1724 void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1725 void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1726 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1727 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1728 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1729 void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1730 void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1731 void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1732 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1733 void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1734 void evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len);
1735
1736 void pause();
1737
1738 // Undefined Instruction
1739 void ud2();
1740
1741 // SSE4.2 string instructions
1742 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1743 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1744
1745 void pcmpeqb(XMMRegister dst, XMMRegister src);
1746 void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1747
1748 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1749 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1750 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1751 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1752
1753 void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1754 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1755 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1756
1757 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1758 void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1759
1760 void pcmpeqw(XMMRegister dst, XMMRegister src);
1761 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1762 void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1763 void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1764
1765 void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1766
1767 void pcmpeqd(XMMRegister dst, XMMRegister src);
1768 void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1769 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
1770 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1771
1772 void pcmpeqq(XMMRegister dst, XMMRegister src);
1773 void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1774 void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1775 void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1776 void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1777
1778 void pcmpgtq(XMMRegister dst, XMMRegister src);
1779 void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1780
1781 void pmovmskb(Register dst, XMMRegister src);
1782 void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
1783 void vmovmskps(Register dst, XMMRegister src, int vec_enc);
1784 void vmovmskpd(Register dst, XMMRegister src, int vec_enc);
1785 void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1786
1787 // SSE 4.1 extract
1788 void pextrd(Register dst, XMMRegister src, int imm8);
1789 void pextrq(Register dst, XMMRegister src, int imm8);
1790 void pextrd(Address dst, XMMRegister src, int imm8);
1791 void pextrq(Address dst, XMMRegister src, int imm8);
1792 void pextrb(Register dst, XMMRegister src, int imm8);
1793 void pextrb(Address dst, XMMRegister src, int imm8);
1794 // SSE 2 extract
1795 void pextrw(Register dst, XMMRegister src, int imm8);
1796 void pextrw(Address dst, XMMRegister src, int imm8);
1797
1798 // SSE 4.1 insert
1799 void pinsrd(XMMRegister dst, Register src, int imm8);
1800 void pinsrq(XMMRegister dst, Register src, int imm8);
1801 void pinsrb(XMMRegister dst, Register src, int imm8);
1802 void pinsrd(XMMRegister dst, Address src, int imm8);
1803 void pinsrq(XMMRegister dst, Address src, int imm8);
1804 void pinsrb(XMMRegister dst, Address src, int imm8);
1805 void insertps(XMMRegister dst, XMMRegister src, int imm8);
1806 // SSE 2 insert
1807 void pinsrw(XMMRegister dst, Register src, int imm8);
1808 void pinsrw(XMMRegister dst, Address src, int imm8);
1809
1810 // AVX insert
1811 void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1812 void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1813 void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1814 void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1815 void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1816
1817 // Zero extend moves
1818 void pmovzxbw(XMMRegister dst, XMMRegister src);
1819 void pmovzxbw(XMMRegister dst, Address src);
1820 void pmovzxbd(XMMRegister dst, XMMRegister src);
1821 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1822 void pmovzxdq(XMMRegister dst, XMMRegister src);
1823 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1824 void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
1825 void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
1826 void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
1827 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1828
1829 // Sign extend moves
1830 void pmovsxbd(XMMRegister dst, XMMRegister src);
1831 void pmovsxbq(XMMRegister dst, XMMRegister src);
1832 void pmovsxbw(XMMRegister dst, XMMRegister src);
1833 void pmovsxwd(XMMRegister dst, XMMRegister src);
1834 void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
1835 void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
1836 void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1837 void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
1838 void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
1839 void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
1840
1841 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1842 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1843
1844 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1845
1846 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1847
1848 // Multiply add
1849 void pmaddwd(XMMRegister dst, XMMRegister src);
1850 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1851 void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1852
1853 // Multiply add accumulate
1854 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1855
1856#ifndef _LP641 // no 32bit push/pop on amd64
1857 void popl(Address dst);
1858#endif
1859
1860#ifdef _LP641
1861 void popq(Address dst);
1862 void popq(Register dst);
1863#endif
1864
1865 void popcntl(Register dst, Address src);
1866 void popcntl(Register dst, Register src);
1867
1868 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1869
1870#ifdef _LP641
1871 void popcntq(Register dst, Address src);
1872 void popcntq(Register dst, Register src);
1873#endif
1874
1875 // Prefetches (SSE, SSE2, 3DNOW only)
1876
1877 void prefetchnta(Address src);
1878 void prefetchr(Address src);
1879 void prefetcht0(Address src);
1880 void prefetcht1(Address src);
1881 void prefetcht2(Address src);
1882 void prefetchw(Address src);
1883
1884 // Shuffle Bytes
1885 void pshufb(XMMRegister dst, XMMRegister src);
1886 void pshufb(XMMRegister dst, Address src);
1887 void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1888
1889 // Shuffle Packed Doublewords
1890 void pshufd(XMMRegister dst, XMMRegister src, int mode);
1891 void pshufd(XMMRegister dst, Address src, int mode);
1892 void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
1893
1894 // Shuffle Packed High/Low Words
1895 void pshufhw(XMMRegister dst, XMMRegister src, int mode);
1896 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1897 void pshuflw(XMMRegister dst, Address src, int mode);
1898
1899 //shuffle floats and doubles
1900 void pshufps(XMMRegister, XMMRegister, int);
1901 void pshufpd(XMMRegister, XMMRegister, int);
1902 void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
1903 void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
1904
1905 // Shuffle packed values at 128 bit granularity
1906 void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
1907
1908 // Shift Right by bytes Logical DoubleQuadword Immediate
1909 void psrldq(XMMRegister dst, int shift);
1910 // Shift Left by bytes Logical DoubleQuadword Immediate
1911 void pslldq(XMMRegister dst, int shift);
1912
1913 // Logical Compare 128bit
1914 void ptest(XMMRegister dst, XMMRegister src);
1915 void ptest(XMMRegister dst, Address src);
1916 // Logical Compare 256bit
1917 void vptest(XMMRegister dst, XMMRegister src);
1918 void vptest(XMMRegister dst, Address src);
1919
1920 void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1921
1922 // Vector compare
1923 void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1924
1925 // Interleave Low Bytes
1926 void punpcklbw(XMMRegister dst, XMMRegister src);
1927 void punpcklbw(XMMRegister dst, Address src);
1928
1929 // Interleave Low Doublewords
1930 void punpckldq(XMMRegister dst, XMMRegister src);
1931 void punpckldq(XMMRegister dst, Address src);
1932
1933 // Interleave Low Quadwords
1934 void punpcklqdq(XMMRegister dst, XMMRegister src);
1935
1936#ifndef _LP641 // no 32bit push/pop on amd64
1937 void pushl(Address src);
1938#endif
1939
1940 void pushq(Address src);
1941
1942 void rcll(Register dst, int imm8);
1943
1944 void rclq(Register dst, int imm8);
1945
1946 void rcrq(Register dst, int imm8);
1947
1948 void rcpps(XMMRegister dst, XMMRegister src);
1949
1950 void rcpss(XMMRegister dst, XMMRegister src);
1951
1952 void rdtsc();
1953
1954 void ret(int imm16);
1955
1956 void roll(Register dst);
1957
1958 void roll(Register dst, int imm8);
1959
1960 void rorl(Register dst);
1961
1962 void rorl(Register dst, int imm8);
1963
1964#ifdef _LP641
1965 void rolq(Register dst);
1966 void rolq(Register dst, int imm8);
1967 void rorq(Register dst);
1968 void rorq(Register dst, int imm8);
1969 void rorxq(Register dst, Register src, int imm8);
1970 void rorxd(Register dst, Register src, int imm8);
1971#endif
1972
1973 void sahf();
1974
1975 void sall(Register dst, int imm8);
1976 void sall(Register dst);
1977 void sall(Address dst, int imm8);
1978 void sall(Address dst);
1979
1980 void sarl(Address dst, int imm8);
1981 void sarl(Address dst);
1982 void sarl(Register dst, int imm8);
1983 void sarl(Register dst);
1984
1985#ifdef _LP641
1986 void salq(Register dst, int imm8);
1987 void salq(Register dst);
1988 void salq(Address dst, int imm8);
1989 void salq(Address dst);
1990
1991 void sarq(Address dst, int imm8);
1992 void sarq(Address dst);
1993 void sarq(Register dst, int imm8);
1994 void sarq(Register dst);
1995#endif
1996
1997 void sbbl(Address dst, int32_t imm32);
1998 void sbbl(Register dst, int32_t imm32);
1999 void sbbl(Register dst, Address src);
2000 void sbbl(Register dst, Register src);
2001
2002 void sbbq(Address dst, int32_t imm32);
2003 void sbbq(Register dst, int32_t imm32);
2004 void sbbq(Register dst, Address src);
2005 void sbbq(Register dst, Register src);
2006
2007 void setb(Condition cc, Register dst);
2008
2009 void sete(Register dst);
2010 void setl(Register dst);
2011 void setne(Register dst);
2012
2013 void palignr(XMMRegister dst, XMMRegister src, int imm8);
2014 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
2015 void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2016
2017 void pblendw(XMMRegister dst, XMMRegister src, int imm8);
2018 void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
2019
2020 void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
2021 void sha1nexte(XMMRegister dst, XMMRegister src);
2022 void sha1msg1(XMMRegister dst, XMMRegister src);
2023 void sha1msg2(XMMRegister dst, XMMRegister src);
2024 // xmm0 is implicit additional source to the following instruction.
2025 void sha256rnds2(XMMRegister dst, XMMRegister src);
2026 void sha256msg1(XMMRegister dst, XMMRegister src);
2027 void sha256msg2(XMMRegister dst, XMMRegister src);
2028
2029 void shldl(Register dst, Register src);
2030 void shldl(Register dst, Register src, int8_t imm8);
2031 void shrdl(Register dst, Register src);
2032 void shrdl(Register dst, Register src, int8_t imm8);
2033
2034 void shll(Register dst, int imm8);
2035 void shll(Register dst);
2036
2037 void shlq(Register dst, int imm8);
2038 void shlq(Register dst);
2039
2040 void shrl(Register dst, int imm8);
2041 void shrl(Register dst);
2042 void shrl(Address dst);
2043 void shrl(Address dst, int imm8);
2044
2045 void shrq(Register dst, int imm8);
2046 void shrq(Register dst);
2047 void shrq(Address dst);
2048 void shrq(Address dst, int imm8);
2049
2050 void smovl(); // QQQ generic?
2051
2052 // Compute Square Root of Scalar Double-Precision Floating-Point Value
2053 void sqrtsd(XMMRegister dst, Address src);
2054 void sqrtsd(XMMRegister dst, XMMRegister src);
2055
2056 void roundsd(XMMRegister dst, Address src, int32_t rmode);
2057 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode);
2058
2059 // Compute Square Root of Scalar Single-Precision Floating-Point Value
2060 void sqrtss(XMMRegister dst, Address src);
2061 void sqrtss(XMMRegister dst, XMMRegister src);
2062
2063 void std();
2064
2065 void stmxcsr( Address dst );
2066
2067 void subl(Address dst, int32_t imm32);
2068 void subl(Address dst, Register src);
2069 void subl(Register dst, int32_t imm32);
2070 void subl(Register dst, Address src);
2071 void subl(Register dst, Register src);
2072
2073 void subq(Address dst, int32_t imm32);
2074 void subq(Address dst, Register src);
2075 void subq(Register dst, int32_t imm32);
2076 void subq(Register dst, Address src);
2077 void subq(Register dst, Register src);
2078
2079 // Force generation of a 4 byte immediate value even if it fits into 8bit
2080 void subl_imm32(Register dst, int32_t imm32);
2081 void subq_imm32(Register dst, int32_t imm32);
2082
2083 // Subtract Scalar Double-Precision Floating-Point Values
2084 void subsd(XMMRegister dst, Address src);
2085 void subsd(XMMRegister dst, XMMRegister src);
2086
2087 // Subtract Scalar Single-Precision Floating-Point Values
2088 void subss(XMMRegister dst, Address src);
2089 void subss(XMMRegister dst, XMMRegister src);
2090
2091 void testb(Register dst, int imm8);
2092 void testb(Address dst, int imm8);
2093
2094 void testl(Register dst, int32_t imm32);
2095 void testl(Register dst, Register src);
2096 void testl(Register dst, Address src);
2097
2098 void testq(Address dst, int32_t imm32);
2099 void testq(Register dst, int32_t imm32);
2100 void testq(Register dst, Register src);
2101 void testq(Register dst, Address src);
2102
2103 // BMI - count trailing zeros
2104 void tzcntl(Register dst, Register src);
2105 void tzcntq(Register dst, Register src);
2106
2107 // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
2108 void ucomisd(XMMRegister dst, Address src);
2109 void ucomisd(XMMRegister dst, XMMRegister src);
2110
2111 // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
2112 void ucomiss(XMMRegister dst, Address src);
2113 void ucomiss(XMMRegister dst, XMMRegister src);
2114
2115 void xabort(int8_t imm8);
2116
2117 void xaddb(Address dst, Register src);
2118 void xaddw(Address dst, Register src);
2119 void xaddl(Address dst, Register src);
2120 void xaddq(Address dst, Register src);
2121
2122 void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
2123
2124 void xchgb(Register reg, Address adr);
2125 void xchgw(Register reg, Address adr);
2126 void xchgl(Register reg, Address adr);
2127 void xchgl(Register dst, Register src);
2128
2129 void xchgq(Register reg, Address adr);
2130 void xchgq(Register dst, Register src);
2131
2132 void xend();
2133
2134 // Get Value of Extended Control Register
2135 void xgetbv();
2136
2137 void xorl(Register dst, int32_t imm32);
2138 void xorl(Address dst, int32_t imm32);
2139 void xorl(Register dst, Address src);
2140 void xorl(Register dst, Register src);
2141 void xorl(Address dst, Register src);
2142
2143 void xorb(Address dst, Register src);
2144 void xorb(Register dst, Address src);
2145 void xorw(Register dst, Register src);
2146
2147 void xorq(Register dst, Address src);
2148 void xorq(Address dst, int32_t imm32);
2149 void xorq(Register dst, Register src);
2150 void xorq(Register dst, int32_t imm32);
2151 void xorq(Address dst, Register src);
2152
2153 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
2154
2155 // AVX 3-operands scalar instructions (encoded with VEX prefix)
2156
2157 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
2158 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2159 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
2160 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2161 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
2162 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2163 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
2164 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2165 void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2166 void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2167 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2168 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2169 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2170 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2171 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2172 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2173 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2174 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2175
2176 void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2177 void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2178 void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2179 void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2180
2181 void shlxl(Register dst, Register src1, Register src2);
2182 void shlxq(Register dst, Register src1, Register src2);
2183 void shrxl(Register dst, Register src1, Register src2);
2184 void shrxq(Register dst, Register src1, Register src2);
2185
2186 void bzhiq(Register dst, Register src1, Register src2);
2187 void pdep(Register dst, Register src1, Register src2);
2188 void pext(Register dst, Register src1, Register src2);
2189
2190
2191 //====================VECTOR ARITHMETIC=====================================
2192 // Add Packed Floating-Point Values
2193 void addpd(XMMRegister dst, XMMRegister src);
2194 void addpd(XMMRegister dst, Address src);
2195 void addps(XMMRegister dst, XMMRegister src);
2196 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2197 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2198 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2199 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2200
2201 // Subtract Packed Floating-Point Values
2202 void subpd(XMMRegister dst, XMMRegister src);
2203 void subps(XMMRegister dst, XMMRegister src);
2204 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2205 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2206 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2207 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2208
2209 // Multiply Packed Floating-Point Values
2210 void mulpd(XMMRegister dst, XMMRegister src);
2211 void mulpd(XMMRegister dst, Address src);
2212 void mulps(XMMRegister dst, XMMRegister src);
2213 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2214 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2215 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2216 void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2217
2218 void vfmadd231pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2219 void vfmadd231ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2220 void vfmadd231pd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2221 void vfmadd231ps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2222
2223 // Divide Packed Floating-Point Values
2224 void divpd(XMMRegister dst, XMMRegister src);
2225 void divps(XMMRegister dst, XMMRegister src);
2226 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2227 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2228 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2229 void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2230
2231 // Sqrt Packed Floating-Point Values
2232 void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
2233 void vsqrtpd(XMMRegister dst, Address src, int vector_len);
2234 void vsqrtps(XMMRegister dst, XMMRegister src, int vector_len);
2235 void vsqrtps(XMMRegister dst, Address src, int vector_len);
2236
2237 // Round Packed Double precision value.
2238 void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2239 void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2240 void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2241 void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2242
2243 // Bitwise Logical AND of Packed Floating-Point Values
2244 void andpd(XMMRegister dst, XMMRegister src);
2245 void andps(XMMRegister dst, XMMRegister src);
2246 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2247 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2248 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2249 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2250
2251 void unpckhpd(XMMRegister dst, XMMRegister src);
2252 void unpcklpd(XMMRegister dst, XMMRegister src);
2253
2254 // Bitwise Logical XOR of Packed Floating-Point Values
2255 void xorpd(XMMRegister dst, XMMRegister src);
2256 void xorps(XMMRegister dst, XMMRegister src);
2257 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2258 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2259 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2260 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2261
2262 // Add horizontal packed integers
2263 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2264 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2265 void phaddw(XMMRegister dst, XMMRegister src);
2266 void phaddd(XMMRegister dst, XMMRegister src);
2267
2268 // Add packed integers
2269 void paddb(XMMRegister dst, XMMRegister src);
2270 void paddw(XMMRegister dst, XMMRegister src);
2271 void paddd(XMMRegister dst, XMMRegister src);
2272 void paddd(XMMRegister dst, Address src);
2273 void paddq(XMMRegister dst, XMMRegister src);
2274 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2275 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2276 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2277 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2278 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2279 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2280 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2281 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2282
2283 // Leaf level assembler routines for masked operations.
2284 void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2285 void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2286 void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2287 void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2288 void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2289 void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2290 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2291 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2292 void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2293 void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2294 void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2295 void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2296 void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2297 void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2298 void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2299 void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2300 void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2301 void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2302 void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2303 void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2304 void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2305 void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2306 void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2307 void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2308 void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2309 void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2310 void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2311 void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2312 void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2313 void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2314 void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2315 void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2316 void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2317 void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2318 void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2319 void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2320 void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2321 void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2322 void evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2323 void evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2324 void evpabsw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2325 void evpabsw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2326 void evpabsd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2327 void evpabsd(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2328 void evpabsq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2329 void evpabsq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2330 void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2331 void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2332 void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2333 void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2334 void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2335 void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2336 void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2337 void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2338 void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2339 void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2340 void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2341 void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2342 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2343 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2344 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2345 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2346 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2347 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2348 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2349 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2350 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2351 void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2352 void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2353 void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2354 void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2355
2356 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2357 void evpslld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2358 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2359 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2360 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2361 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2362 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2363 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2364 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2365
2366 void evpsllvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2367 void evpsllvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2368 void evpsllvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2369 void evpsrlvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2370 void evpsrlvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2371 void evpsrlvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2372 void evpsravw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2373 void evpsravd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2374 void evpsravq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2375 void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2376 void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2377 void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2378 void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2379 void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2380 void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2381 void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2382 void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2383 void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2384 void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2385 void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2386 void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2387 void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2388 void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2389 void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2390 void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2391 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2392 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2393 void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2394 void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2395 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2396 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2397 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2398 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2399 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2400 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2401 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2402 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2403
2404 void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2405 void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2406 void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2407 void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2408 void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2409 void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2410 void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2411 void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2412
2413 // Sub packed integers
2414 void psubb(XMMRegister dst, XMMRegister src);
2415 void psubw(XMMRegister dst, XMMRegister src);
2416 void psubd(XMMRegister dst, XMMRegister src);
2417 void psubq(XMMRegister dst, XMMRegister src);
2418 void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2419 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2420 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2421 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2422 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2423 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2424 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2425 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2426 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2427
2428 // Multiply packed integers (only shorts and ints)
2429 void pmullw(XMMRegister dst, XMMRegister src);
2430 void pmulld(XMMRegister dst, XMMRegister src);
2431 void pmuludq(XMMRegister dst, XMMRegister src);
2432 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2433 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2434 void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2435 void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2436 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2437 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2438 void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2439 void vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2440
2441 // Minimum of packed integers
2442 void pminsb(XMMRegister dst, XMMRegister src);
2443 void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2444 void pminsw(XMMRegister dst, XMMRegister src);
2445 void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2446 void pminsd(XMMRegister dst, XMMRegister src);
2447 void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2448 void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2449 void minps(XMMRegister dst, XMMRegister src);
2450 void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2451 void minpd(XMMRegister dst, XMMRegister src);
2452 void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2453
2454 // Maximum of packed integers
2455 void pmaxsb(XMMRegister dst, XMMRegister src);
2456 void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2457 void pmaxsw(XMMRegister dst, XMMRegister src);
2458 void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2459 void pmaxsd(XMMRegister dst, XMMRegister src);
2460 void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2461 void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2462 void maxps(XMMRegister dst, XMMRegister src);
2463 void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2464 void maxpd(XMMRegister dst, XMMRegister src);
2465 void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2466
2467 // Shift left packed integers
2468 void psllw(XMMRegister dst, int shift);
2469 void pslld(XMMRegister dst, int shift);
2470 void psllq(XMMRegister dst, int shift);
2471 void psllw(XMMRegister dst, XMMRegister shift);
2472 void pslld(XMMRegister dst, XMMRegister shift);
2473 void psllq(XMMRegister dst, XMMRegister shift);
2474 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2475 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2476 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2477 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2478 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2479 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2480 void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2481
2482 // Logical shift right packed integers
2483 void psrlw(XMMRegister dst, int shift);
2484 void psrld(XMMRegister dst, int shift);
2485 void psrlq(XMMRegister dst, int shift);
2486 void psrlw(XMMRegister dst, XMMRegister shift);
2487 void psrld(XMMRegister dst, XMMRegister shift);
2488 void psrlq(XMMRegister dst, XMMRegister shift);
2489 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2490 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2491 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2492 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2493 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2494 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2495 void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2496 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2497 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2498
2499 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2500 void psraw(XMMRegister dst, int shift);
2501 void psrad(XMMRegister dst, int shift);
2502 void psraw(XMMRegister dst, XMMRegister shift);
2503 void psrad(XMMRegister dst, XMMRegister shift);
2504 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2505 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2506 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2507 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2508 void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2509 void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2510 void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2511
2512 // Variable shift left packed integers
2513 void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2514 void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2515
2516 // Variable shift right packed integers
2517 void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2518 void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2519
2520 // Variable shift right arithmetic packed integers
2521 void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2522 void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2523
2524 void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2525 void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2526
2527 // And packed integers
2528 void pand(XMMRegister dst, XMMRegister src);
2529 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2530 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2531 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2532
2533 // Andn packed integers
2534 void pandn(XMMRegister dst, XMMRegister src);
2535 void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2536
2537 // Or packed integers
2538 void por(XMMRegister dst, XMMRegister src);
2539 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2540 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2541 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2542
2543 // Xor packed integers
2544 void pxor(XMMRegister dst, XMMRegister src);
2545 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2546 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2547 void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2548 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2549 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2550
2551 // Ternary logic instruction.
2552 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2553 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2554 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2555
2556 // Vector Rotate Left/Right instruction.
2557 void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2558 void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2559 void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2560 void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2561 void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2562 void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2563 void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2564 void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2565
2566 // vinserti forms
2567 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2568 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2569 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2570 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2571 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2572
2573 // vinsertf forms
2574 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2575 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2576 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2577 void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2578 void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2579 void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2580
2581 // vextracti forms
2582 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2583 void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
2584 void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2585 void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
2586 void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2587 void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2588 void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);
2589
2590 // vextractf forms
2591 void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2592 void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
2593 void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2594 void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
2595 void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2596 void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2597 void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
2598
2599 // xmm/mem sourced byte/word/dword/qword replicate
2600 void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2601 void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
2602 void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2603 void vpbroadcastw(XMMRegister dst, Address src, int vector_len);
2604 void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2605 void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
2606 void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2607 void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
2608
2609 void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
2610 void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
2611 void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
2612
2613 // scalar single/double/128bit precision replicate
2614 void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2615 void vbroadcastss(XMMRegister dst, Address src, int vector_len);
2616 void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2617 void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
2618 void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
2619
2620 // gpr sourced byte/word/dword/qword replicate
2621 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2622 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2623 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2624 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2625
2626 // Gather AVX2 and AVX3
2627 void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2628 void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2629 void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2630 void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2631 void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2632 void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
2633 void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2634 void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
2635
2636 //Scatter AVX3 only
2637 void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2638 void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
2639 void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
2640 void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2641
2642 // Carry-Less Multiplication Quadword
2643 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2644 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2645 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2646 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2647 // to avoid transaction penalty between AVX and SSE states. There is no
2648 // penalty if legacy SSE instructions are encoded using VEX prefix because
2649 // they always clear upper 128 bits. It should be used before calling
2650 // runtime code and native libraries.
2651 void vzeroupper();
2652
2653 // Vector double compares
2654 void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2655 void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2656 ComparisonPredicateFP comparison, int vector_len);
2657
2658 // Vector float compares
2659 void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
2660 void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2661 ComparisonPredicateFP comparison, int vector_len);
2662
2663 // Vector integer compares
2664 void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2665 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2666 int comparison, bool is_signed, int vector_len);
2667 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2668 int comparison, bool is_signed, int vector_len);
2669
2670 // Vector long compares
2671 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2672 int comparison, bool is_signed, int vector_len);
2673 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2674 int comparison, bool is_signed, int vector_len);
2675
2676 // Vector byte compares
2677 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2678 int comparison, bool is_signed, int vector_len);
2679 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2680 int comparison, bool is_signed, int vector_len);
2681
2682 // Vector short compares
2683 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2684 int comparison, bool is_signed, int vector_len);
2685 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2686 int comparison, bool is_signed, int vector_len);
2687
2688 void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);
2689 void evpmovw2m(KRegister dst, XMMRegister src, int vector_len);
2690 void evpmovd2m(KRegister dst, XMMRegister src, int vector_len);
2691 void evpmovq2m(KRegister dst, XMMRegister src, int vector_len);
2692 void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
2693 void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
2694 void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
2695 void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
2696
2697 // Vector blends
2698 void blendvps(XMMRegister dst, XMMRegister src);
2699 void blendvpd(XMMRegister dst, XMMRegister src);
2700 void pblendvb(XMMRegister dst, XMMRegister src);
2701 void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2702 void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2703 void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2704 void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2705 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2706 void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2707 void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2708 void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2709 void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2710 void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2711 void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2712 protected:
2713 // Next instructions require address alignment 16 bytes SSE mode.
2714 // They should be called only from corresponding MacroAssembler instructions.
2715 void andpd(XMMRegister dst, Address src);
2716 void andps(XMMRegister dst, Address src);
2717 void xorpd(XMMRegister dst, Address src);
2718 void xorps(XMMRegister dst, Address src);
2719
2720};
2721
2722// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2723// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2724// are applied.
2725class InstructionAttr {
2726public:
2727 InstructionAttr(
2728 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2729 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2730 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2731 bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2732 bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX
2733 :
2734 _rex_vex_w(rex_vex_w),
2735 _legacy_mode(legacy_mode || UseAVX < 3),
2736 _no_reg_mask(no_reg_mask),
2737 _uses_vl(uses_vl),
2738 _rex_vex_w_reverted(false),
2739 _is_evex_instruction(false),
2740 _is_clear_context(true),
2741 _is_extended_context(false),
2742 _avx_vector_len(vector_len),
2743 _tuple_type(Assembler::EVEX_ETUP),
2744 _input_size_in_bits(Assembler::EVEX_NObit),
2745 _evex_encoding(0),
2746 _embedded_opmask_register_specifier(0), // hard code k0
2747 _current_assembler(NULL__null) { }
2748
2749 ~InstructionAttr() {
2750 if (_current_assembler != NULL__null) {
2751 _current_assembler->clear_attributes();
2752 }
2753 _current_assembler = NULL__null;
2754 }
2755
2756private:
2757 bool _rex_vex_w;
2758 bool _legacy_mode;
2759 bool _no_reg_mask;
2760 bool _uses_vl;
2761 bool _rex_vex_w_reverted;
2762 bool _is_evex_instruction;
2763 bool _is_clear_context;
2764 bool _is_extended_context;
2765 int _avx_vector_len;
2766 int _tuple_type;
2767 int _input_size_in_bits;
2768 int _evex_encoding;
2769 int _embedded_opmask_register_specifier;
2770
2771 Assembler *_current_assembler;
2772
2773public:
2774 // query functions for field accessors
2775 bool is_rex_vex_w(void) const { return _rex_vex_w; }
2776 bool is_legacy_mode(void) const { return _legacy_mode; }
2777 bool is_no_reg_mask(void) const { return _no_reg_mask; }
2778 bool uses_vl(void) const { return _uses_vl; }
2779 bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
2780 bool is_evex_instruction(void) const { return _is_evex_instruction; }
2781 bool is_clear_context(void) const { return _is_clear_context; }
2782 bool is_extended_context(void) const { return _is_extended_context; }
2783 int get_vector_len(void) const { return _avx_vector_len; }
2784 int get_tuple_type(void) const { return _tuple_type; }
2785 int get_input_size(void) const { return _input_size_in_bits; }
2786 int get_evex_encoding(void) const { return _evex_encoding; }
2787 int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2788
2789 // Set the vector len manually
2790 void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2791
2792 // Set revert rex_vex_w for avx encoding
2793 void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2794
2795 // Set rex_vex_w based on state
2796 void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2797
2798 // Set the instruction to be encoded in AVX mode
2799 void set_is_legacy_mode(void) { _legacy_mode = true; }
2800
2801 // Set the current instuction to be encoded as an EVEX instuction
2802 void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2803
2804 // Internal encoding data used in compressed immediate offset programming
2805 void set_evex_encoding(int value) { _evex_encoding = value; }
2806
2807 // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
2808 // This method unsets it so that merge semantics are used instead.
2809 void reset_is_clear_context(void) { _is_clear_context = false; }
2810
2811 // Map back to current asembler so that we can manage object level assocation
2812 void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
2813
2814 // Address modifiers used for compressed displacement calculation
2815 void set_address_attributes(int tuple_type, int input_size_in_bits);
2816
2817 // Set embedded opmask register specifier.
2818 void set_embedded_opmask_register_specifier(KRegister mask) {
2819 _embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
2820 }
2821
2822};
2823
2824#endif // CPU_X86_ASSEMBLER_X86_HPP