Bug Summary

File:jdk/src/hotspot/share/opto/superword.cpp
Warning:line 4602, column 9
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name superword.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp

/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp

1/*
2 * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24#include "precompiled.hpp"
25#include "compiler/compileLog.hpp"
26#include "libadt/vectset.hpp"
27#include "memory/allocation.inline.hpp"
28#include "memory/resourceArea.hpp"
29#include "opto/addnode.hpp"
30#include "opto/callnode.hpp"
31#include "opto/castnode.hpp"
32#include "opto/convertnode.hpp"
33#include "opto/divnode.hpp"
34#include "opto/matcher.hpp"
35#include "opto/memnode.hpp"
36#include "opto/mulnode.hpp"
37#include "opto/opcodes.hpp"
38#include "opto/opaquenode.hpp"
39#include "opto/superword.hpp"
40#include "opto/vectornode.hpp"
41#include "opto/movenode.hpp"
42#include "utilities/powerOfTwo.hpp"
43
44//
45// S U P E R W O R D T R A N S F O R M
46//=============================================================================
47
48//------------------------------SuperWord---------------------------
49SuperWord::SuperWord(PhaseIdealLoop* phase) :
50 _phase(phase),
51 _arena(phase->C->comp_arena()),
52 _igvn(phase->_igvn),
53 _packset(arena(), 8, 0, NULL__null), // packs for the current block
54 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
55 _block(arena(), 8, 0, NULL__null), // nodes in current block
56 _post_block(arena(), 8, 0, NULL__null), // nodes common to current block which are marked as post loop vectorizable
57 _data_entry(arena(), 8, 0, NULL__null), // nodes with all inputs from outside
58 _mem_slice_head(arena(), 8, 0, NULL__null), // memory slice heads
59 _mem_slice_tail(arena(), 8, 0, NULL__null), // memory slice tails
60 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
61 _clone_map(phase->C->clone_map()), // map of nodes created in cloning
62 _cmovev_kit(_arena, this), // map to facilitate CMoveV creation
63 _align_to_ref(NULL__null), // memory reference to align vectors to
64 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
65 _dg(_arena), // dependence graph
66 _visited(arena()), // visited node set
67 _post_visited(arena()), // post visited node set
68 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
69 _nlist(arena(), 8, 0, NULL__null), // scratch list of nodes
70 _stk(arena(), 8, 0, NULL__null), // scratch stack of nodes
71 _lpt(NULL__null), // loop tree node
72 _lp(NULL__null), // CountedLoopNode
73 _pre_loop_end(NULL__null), // Pre loop CountedLoopEndNode
74 _bb(NULL__null), // basic block
75 _iv(NULL__null), // induction var
76 _race_possible(false), // cases where SDMU is true
77 _early_return(true), // analysis evaluations routine
78 _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style
79 _do_reserve_copy(DoReserveCopyInSuperWord),
80 _num_work_vecs(0), // amount of vector work we have
81 _num_reductions(0), // amount of reduction work we have
82 _ii_first(-1), // first loop generation index - only if do_vector_loop()
83 _ii_last(-1), // last loop generation index - only if do_vector_loop()
84 _ii_order(arena(), 8, 0, 0)
85{
86#ifndef PRODUCT
87 _vector_loop_debug = 0;
88 if (_phase->C->method() != NULL__null) {
89 _vector_loop_debug = phase->C->directive()->VectorizeDebugOption;
90 }
91
92#endif
93}
94
95static const bool _do_vector_loop_experimental = false; // Experimental vectorization which uses data from loop unrolling.
96
97//------------------------------transform_loop---------------------------
98void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
99 assert(UseSuperWord, "should be")do { if (!(UseSuperWord)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 99, "assert(" "UseSuperWord" ") failed", "should be"); ::breakpoint
(); } } while (0)
;
100 // SuperWord only works with power of two vector sizes.
101 int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
102 if (vector_width < 2 || !is_power_of_2(vector_width)) {
103 return;
104 }
105
106 assert(lpt->_head->is_CountedLoop(), "must be")do { if (!(lpt->_head->is_CountedLoop())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 106, "assert(" "lpt->_head->is_CountedLoop()" ") failed"
, "must be"); ::breakpoint(); } } while (0)
;
107 CountedLoopNode *cl = lpt->_head->as_CountedLoop();
108
109 if (!cl->is_valid_counted_loop(T_INT)) return; // skip malformed counted loop
110
111 bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
112 if (post_loop_allowed) {
113 if (cl->is_reduction_loop()) return; // no predication mapping
114 Node *limit = cl->limit();
115 if (limit->is_Con()) return; // non constant limits only
116 // Now check the limit for expressions we do not handle
117 if (limit->is_Add()) {
118 Node *in2 = limit->in(2);
119 if (in2->is_Con()) {
120 int val = in2->get_int();
121 // should not try to program these cases
122 if (val < 0) return;
123 }
124 }
125 }
126
127 // skip any loop that has not been assigned max unroll by analysis
128 if (do_optimization) {
129 if (SuperWordLoopUnrollAnalysis && cl->slp_max_unroll() == 0) return;
130 }
131
132 // Check for no control flow in body (other than exit)
133 Node *cl_exit = cl->loopexit();
134 if (cl->is_main_loop() && (cl_exit->in(0) != lpt->_head)) {
135 #ifndef PRODUCT
136 if (TraceSuperWord) {
137 tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head");
138 tty->print("cl_exit %d", cl_exit->_idx); cl_exit->dump();
139 tty->print("cl_exit->in(0) %d", cl_exit->in(0)->_idx); cl_exit->in(0)->dump();
140 tty->print("lpt->_head %d", lpt->_head->_idx); lpt->_head->dump();
141 lpt->dump_head();
142 }
143 #endif
144 return;
145 }
146
147 // Make sure the are no extra control users of the loop backedge
148 if (cl->back_control()->outcnt() != 1) {
149 return;
150 }
151
152 // Skip any loops already optimized by slp
153 if (cl->is_vectorized_loop()) return;
154
155 if (cl->is_unroll_only()) return;
156
157 if (cl->is_main_loop()) {
158 // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
159 CountedLoopEndNode* pre_end = find_pre_loop_end(cl);
160 if (pre_end == NULL__null) {
161 return;
162 }
163 Node* pre_opaq1 = pre_end->limit();
164 if (pre_opaq1->Opcode() != Op_Opaque1) {
165 return;
166 }
167 set_pre_loop_end(pre_end);
168 }
169
170 init(); // initialize data structures
171
172 set_lpt(lpt);
173 set_lp(cl);
174
175 // For now, define one block which is the entire loop body
176 set_bb(cl);
177
178 if (do_optimization) {
179 assert(_packset.length() == 0, "packset must be empty")do { if (!(_packset.length() == 0)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 179, "assert(" "_packset.length() == 0" ") failed", "packset must be empty"
); ::breakpoint(); } } while (0)
;
180 SLP_extract();
181 if (PostLoopMultiversioning && Matcher::has_predicated_vectors()) {
182 if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) {
183 IdealLoopTree *lpt_next = lpt->_next;
184 CountedLoopNode *cl_next = lpt_next->_head->as_CountedLoop();
185 _phase->has_range_checks(lpt_next);
186 if (cl_next->is_post_loop() && !cl_next->range_checks_present()) {
187 if (!cl_next->is_vectorized_loop()) {
188 int slp_max_unroll_factor = cl->slp_max_unroll();
189 cl_next->set_slp_max_unroll(slp_max_unroll_factor);
190 }
191 }
192 }
193 }
194 }
195}
196
197//------------------------------early unrolling analysis------------------------------
198void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
199 bool is_slp = true;
200 ResourceMark rm;
201 size_t ignored_size = lpt()->_body.size();
202 int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size)(int*) resource_allocate_bytes((ignored_size) * sizeof(int));
203 Node_Stack nstack((int)ignored_size);
204 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
205 Node *cl_exit = cl->loopexit_or_null();
206 int rpo_idx = _post_block.length();
207
208 assert(rpo_idx == 0, "post loop block is empty")do { if (!(rpo_idx == 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 208, "assert(" "rpo_idx == 0" ") failed", "post loop block is empty"
); ::breakpoint(); } } while (0)
;
209
210 // First clear the entries
211 for (uint i = 0; i < lpt()->_body.size(); i++) {
212 ignored_loop_nodes[i] = -1;
213 }
214
215 int max_vector = Matcher::max_vector_size(T_BYTE);
216 bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
217
218 // Process the loop, some/all of the stack entries will not be in order, ergo
219 // need to preprocess the ignored initial state before we process the loop
220 for (uint i = 0; i < lpt()->_body.size(); i++) {
221 Node* n = lpt()->_body.at(i);
222 if (n == cl->incr() ||
223 n->is_reduction() ||
224 n->is_AddP() ||
225 n->is_Cmp() ||
226 n->is_IfTrue() ||
227 n->is_CountedLoop() ||
228 (n == cl_exit)) {
229 ignored_loop_nodes[i] = n->_idx;
230 continue;
231 }
232
233 if (n->is_If()) {
234 IfNode *iff = n->as_If();
235 if (iff->_fcnt != COUNT_UNKNOWN(-1.0f) && iff->_prob != PROB_UNKNOWN(-1.0f)) {
236 if (lpt()->is_loop_exit(iff)) {
237 ignored_loop_nodes[i] = n->_idx;
238 continue;
239 }
240 }
241 }
242
243 if (n->is_Phi() && (n->bottom_type() == Type::MEMORY)) {
244 Node* n_tail = n->in(LoopNode::LoopBackControl);
245 if (n_tail != n->in(LoopNode::EntryControl)) {
246 if (!n_tail->is_Mem()) {
247 is_slp = false;
248 break;
249 }
250 }
251 }
252
253 // This must happen after check of phi/if
254 if (n->is_Phi() || n->is_If()) {
255 ignored_loop_nodes[i] = n->_idx;
256 continue;
257 }
258
259 if (n->is_LoadStore() || n->is_MergeMem() ||
260 (n->is_Proj() && !n->as_Proj()->is_CFG())) {
261 is_slp = false;
262 break;
263 }
264
265 // Ignore nodes with non-primitive type.
266 BasicType bt;
267 if (n->is_Mem()) {
268 bt = n->as_Mem()->memory_type();
269 } else {
270 bt = n->bottom_type()->basic_type();
271 }
272 if (is_java_primitive(bt) == false) {
273 ignored_loop_nodes[i] = n->_idx;
274 continue;
275 }
276
277 if (n->is_Mem()) {
278 MemNode* current = n->as_Mem();
279 Node* adr = n->in(MemNode::Address);
280 Node* n_ctrl = _phase->get_ctrl(adr);
281
282 // save a queue of post process nodes
283 if (n_ctrl != NULL__null && lpt()->is_member(_phase->get_loop(n_ctrl))) {
284 // Process the memory expression
285 int stack_idx = 0;
286 bool have_side_effects = true;
287 if (adr->is_AddP() == false) {
288 nstack.push(adr, stack_idx++);
289 } else {
290 // Mark the components of the memory operation in nstack
291 SWPointer p1(current, this, &nstack, true);
292 have_side_effects = p1.node_stack()->is_nonempty();
293 }
294
295 // Process the pointer stack
296 while (have_side_effects) {
297 Node* pointer_node = nstack.node();
298 for (uint j = 0; j < lpt()->_body.size(); j++) {
299 Node* cur_node = lpt()->_body.at(j);
300 if (cur_node == pointer_node) {
301 ignored_loop_nodes[j] = cur_node->_idx;
302 break;
303 }
304 }
305 nstack.pop();
306 have_side_effects = nstack.is_nonempty();
307 }
308 }
309 }
310 }
311
312 if (is_slp) {
313 // Now we try to find the maximum supported consistent vector which the machine
314 // description can use
315 bool small_basic_type = false;
316 bool flag_small_bt = false;
317 for (uint i = 0; i < lpt()->_body.size(); i++) {
318 if (ignored_loop_nodes[i] != -1) continue;
319
320 BasicType bt;
321 Node* n = lpt()->_body.at(i);
322 if (n->is_Mem()) {
323 bt = n->as_Mem()->memory_type();
324 } else {
325 bt = n->bottom_type()->basic_type();
326 }
327
328 if (post_loop_allowed) {
329 if (!small_basic_type) {
330 switch (bt) {
331 case T_CHAR:
332 case T_BYTE:
333 case T_SHORT:
334 small_basic_type = true;
335 break;
336
337 case T_LONG:
338 // TODO: Remove when support completed for mask context with LONG.
339 // Support needs to be augmented for logical qword operations, currently we map to dword
340 // buckets for vectors on logicals as these were legacy.
341 small_basic_type = true;
342 break;
343
344 default:
345 break;
346 }
347 }
348 }
349
350 if (is_java_primitive(bt) == false) continue;
351
352 int cur_max_vector = Matcher::max_vector_size(bt);
353
354 // If a max vector exists which is not larger than _local_loop_unroll_factor
355 // stop looking, we already have the max vector to map to.
356 if (cur_max_vector < local_loop_unroll_factor) {
357 is_slp = false;
358 if (TraceSuperWordLoopUnrollAnalysis) {
359 tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
360 }
361 break;
362 }
363
364 // Map the maximal common vector
365 if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
366 if (cur_max_vector < max_vector && !flag_small_bt) {
367 max_vector = cur_max_vector;
368 } else if (cur_max_vector > max_vector && UseSubwordForMaxVector) {
369 // Analyse subword in the loop to set maximum vector size to take advantage of full vector width for subword types.
370 // Here we analyze if narrowing is likely to happen and if it is we set vector size more aggressively.
371 // We check for possibility of narrowing by looking through chain operations using subword types.
372 if (is_subword_type(bt)) {
373 uint start, end;
374 VectorNode::vector_operands(n, &start, &end);
375
376 for (uint j = start; j < end; j++) {
377 Node* in = n->in(j);
378 // Don't propagate through a memory
379 if (!in->is_Mem() && in_bb(in) && in->bottom_type()->basic_type() == T_INT) {
380 bool same_type = true;
381 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
382 Node *use = in->fast_out(k);
383 if (!in_bb(use) && use->bottom_type()->basic_type() != bt) {
384 same_type = false;
385 break;
386 }
387 }
388 if (same_type) {
389 max_vector = cur_max_vector;
390 flag_small_bt = true;
391 cl->mark_subword_loop();
392 }
393 }
394 }
395 }
396 }
397 // We only process post loops on predicated targets where we want to
398 // mask map the loop to a single iteration
399 if (post_loop_allowed) {
400 _post_block.at_put_grow(rpo_idx++, n);
401 }
402 }
403 }
404 if (is_slp) {
405 local_loop_unroll_factor = max_vector;
406 cl->mark_passed_slp();
407 }
408 cl->mark_was_slp();
409 if (cl->is_main_loop()) {
410 cl->set_slp_max_unroll(local_loop_unroll_factor);
411 } else if (post_loop_allowed) {
412 if (!small_basic_type) {
413 // avoid replication context for small basic types in programmable masked loops
414 cl->set_slp_max_unroll(local_loop_unroll_factor);
415 }
416 }
417 }
418}
419
420//------------------------------SLP_extract---------------------------
421// Extract the superword level parallelism
422//
423// 1) A reverse post-order of nodes in the block is constructed. By scanning
424// this list from first to last, all definitions are visited before their uses.
425//
426// 2) A point-to-point dependence graph is constructed between memory references.
427// This simplies the upcoming "independence" checker.
428//
429// 3) The maximum depth in the node graph from the beginning of the block
430// to each node is computed. This is used to prune the graph search
431// in the independence checker.
432//
433// 4) For integer types, the necessary bit width is propagated backwards
434// from stores to allow packed operations on byte, char, and short
435// integers. This reverses the promotion to type "int" that javac
436// did for operations like: char c1,c2,c3; c1 = c2 + c3.
437//
438// 5) One of the memory references is picked to be an aligned vector reference.
439// The pre-loop trip count is adjusted to align this reference in the
440// unrolled body.
441//
442// 6) The initial set of pack pairs is seeded with memory references.
443//
444// 7) The set of pack pairs is extended by following use->def and def->use links.
445//
446// 8) The pairs are combined into vector sized packs.
447//
448// 9) Reorder the memory slices to co-locate members of the memory packs.
449//
450// 10) Generate ideal vector nodes for the final set of packs and where necessary,
451// inserting scalar promotion, vector creation from multiple scalars, and
452// extraction of scalar values from vectors.
453//
454void SuperWord::SLP_extract() {
455
456#ifndef PRODUCT
457 if (_do_vector_loop && TraceSuperWord) {
458 tty->print("SuperWord::SLP_extract\n");
459 tty->print("input loop\n");
460 _lpt->dump_head();
461 _lpt->dump();
462 for (uint i = 0; i < _lpt->_body.size(); i++) {
463 _lpt->_body.at(i)->dump();
464 }
465 }
466#endif
467 // Ready the block
468 if (!construct_bb()) {
469 return; // Exit if no interesting nodes or complex graph.
470 }
471
472 // build _dg, _disjoint_ptrs
473 dependence_graph();
474
475 // compute function depth(Node*)
476 compute_max_depth();
477
478 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
479 bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
480 if (cl->is_main_loop()) {
481 if (_do_vector_loop_experimental) {
482 if (mark_generations() != -1) {
483 hoist_loads_in_graph(); // this only rebuild the graph; all basic structs need rebuild explicitly
484
485 if (!construct_bb()) {
486 return; // Exit if no interesting nodes or complex graph.
487 }
488 dependence_graph();
489 compute_max_depth();
490 }
491
492#ifndef PRODUCT
493 if (TraceSuperWord) {
494 tty->print_cr("\nSuperWord::_do_vector_loop: graph after hoist_loads_in_graph");
495 _lpt->dump_head();
496 for (int j = 0; j < _block.length(); j++) {
497 Node* n = _block.at(j);
498 int d = depth(n);
499 for (int i = 0; i < d; i++) tty->print("%s", " ");
500 tty->print("%d :", d);
501 n->dump();
502 }
503 }
504#endif
505 }
506
507 compute_vector_element_type();
508
509 // Attempt vectorization
510
511 find_adjacent_refs();
512
513 if (align_to_ref() == NULL__null) {
514 return; // Did not find memory reference to align vectors
515 }
516
517 extend_packlist();
518
519 if (_do_vector_loop_experimental) {
520 if (_packset.length() == 0) {
521#ifndef PRODUCT
522 if (TraceSuperWord) {
523 tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
524 }
525#endif
526 pack_parallel();
527 }
528 }
529
530 combine_packs();
531
532 construct_my_pack_map();
533 if (UseVectorCmov) {
534 merge_packs_to_cmovd();
535 }
536
537 filter_packs();
538
539 schedule();
540 } else if (post_loop_allowed) {
541 int saved_mapped_unroll_factor = cl->slp_max_unroll();
542 if (saved_mapped_unroll_factor) {
543 int vector_mapped_unroll_factor = saved_mapped_unroll_factor;
544
545 // now reset the slp_unroll_factor so that we can check the analysis mapped
546 // what the vector loop was mapped to
547 cl->set_slp_max_unroll(0);
548
549 // do the analysis on the post loop
550 unrolling_analysis(vector_mapped_unroll_factor);
551
552 // if our analyzed loop is a canonical fit, start processing it
553 if (vector_mapped_unroll_factor == saved_mapped_unroll_factor) {
554 // now add the vector nodes to packsets
555 for (int i = 0; i < _post_block.length(); i++) {
556 Node* n = _post_block.at(i);
557 Node_List* singleton = new Node_List();
558 singleton->push(n);
559 _packset.append(singleton);
560 set_my_pack(n, singleton);
561 }
562
563 // map base types for vector usage
564 compute_vector_element_type();
565 } else {
566 return;
567 }
568 } else {
569 // for some reason we could not map the slp analysis state of the vectorized loop
570 return;
571 }
572 }
573
574 output();
575}
576
577//------------------------------find_adjacent_refs---------------------------
578// Find the adjacent memory references and create pack pairs for them.
579// This is the initial set of packs that will then be extended by
580// following use->def and def->use links. The align positions are
581// assigned relative to the reference "align_to_ref"
582void SuperWord::find_adjacent_refs() {
583 // Get list of memory operations
584 Node_List memops;
585 for (int i = 0; i < _block.length(); i++) {
586 Node* n = _block.at(i);
587 if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
588 is_java_primitive(n->as_Mem()->memory_type())) {
589 int align = memory_alignment(n->as_Mem(), 0);
590 if (align != bottom_align) {
591 memops.push(n);
592 }
593 }
594 }
595 if (TraceSuperWord) {
596 tty->print_cr("\nfind_adjacent_refs found %d memops", memops.size());
597 }
598
599 Node_List align_to_refs;
600 int max_idx;
601 int best_iv_adjustment = 0;
602 MemNode* best_align_to_mem_ref = NULL__null;
603
604 while (memops.size() != 0) {
605 // Find a memory reference to align to.
606 MemNode* mem_ref = find_align_to_ref(memops, max_idx);
607 if (mem_ref == NULL__null) break;
608 align_to_refs.push(mem_ref);
609 int iv_adjustment = get_iv_adjustment(mem_ref);
610
611 if (best_align_to_mem_ref == NULL__null) {
612 // Set memory reference which is the best from all memory operations
613 // to be used for alignment. The pre-loop trip count is modified to align
614 // this reference to a vector-aligned address.
615 best_align_to_mem_ref = mem_ref;
616 best_iv_adjustment = iv_adjustment;
617 NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment
);
618 }
619
620 SWPointer align_to_ref_p(mem_ref, this, NULL__null, false);
621 // Set alignment relative to "align_to_ref" for all related memory operations.
622 for (int i = memops.size() - 1; i >= 0; i--) {
623 MemNode* s = memops.at(i)->as_Mem();
624 if (isomorphic(s, mem_ref) &&
625 (!_do_vector_loop || same_origin_idx(s, mem_ref))) {
626 SWPointer p2(s, this, NULL__null, false);
627 if (p2.comparable(align_to_ref_p)) {
628 int align = memory_alignment(s, iv_adjustment);
629 set_alignment(s, align);
630 }
631 }
632 }
633
634 // Create initial pack pairs of memory operations for which
635 // alignment is set and vectors will be aligned.
636 bool create_pack = true;
637 if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) {
638 if (vectors_should_be_aligned()) {
639 int vw = vector_width(mem_ref);
640 int vw_best = vector_width(best_align_to_mem_ref);
641 if (vw > vw_best) {
642 // Do not vectorize a memory access with more elements per vector
643 // if unaligned memory access is not allowed because number of
644 // iterations in pre-loop will be not enough to align it.
645 create_pack = false;
646 } else {
647 SWPointer p2(best_align_to_mem_ref, this, NULL__null, false);
648 if (!align_to_ref_p.invar_equals(p2)) {
649 // Do not vectorize memory accesses with different invariants
650 // if unaligned memory accesses are not allowed.
651 create_pack = false;
652 }
653 }
654 }
655 } else {
656 if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
657 // Can't allow vectorization of unaligned memory accesses with the
658 // same type since it could be overlapped accesses to the same array.
659 create_pack = false;
660 } else {
661 // Allow independent (different type) unaligned memory operations
662 // if HW supports them.
663 if (vectors_should_be_aligned()) {
664 create_pack = false;
665 } else {
666 // Check if packs of the same memory type but
667 // with a different alignment were created before.
668 for (uint i = 0; i < align_to_refs.size(); i++) {
669 MemNode* mr = align_to_refs.at(i)->as_Mem();
670 if (mr == mem_ref) {
671 // Skip when we are looking at same memory operation.
672 continue;
673 }
674 if (same_velt_type(mr, mem_ref) &&
675 memory_alignment(mr, iv_adjustment) != 0)
676 create_pack = false;
677 }
678 }
679 }
680 }
681 if (create_pack) {
682 for (uint i = 0; i < memops.size(); i++) {
683 Node* s1 = memops.at(i);
684 int align = alignment(s1);
685 if (align == top_align) continue;
686 for (uint j = 0; j < memops.size(); j++) {
687 Node* s2 = memops.at(j);
688 if (alignment(s2) == top_align) continue;
689 if (s1 != s2 && are_adjacent_refs(s1, s2)) {
690 if (stmts_can_pack(s1, s2, align)) {
691 Node_List* pair = new Node_List();
692 pair->push(s1);
693 pair->push(s2);
694 if (!_do_vector_loop || same_origin_idx(s1, s2)) {
695 _packset.append(pair);
696 }
697 }
698 }
699 }
700 }
701 } else { // Don't create unaligned pack
702 // First, remove remaining memory ops of the same type from the list.
703 for (int i = memops.size() - 1; i >= 0; i--) {
704 MemNode* s = memops.at(i)->as_Mem();
705 if (same_velt_type(s, mem_ref)) {
706 memops.remove(i);
707 }
708 }
709
710 // Second, remove already constructed packs of the same type.
711 for (int i = _packset.length() - 1; i >= 0; i--) {
712 Node_List* p = _packset.at(i);
713 MemNode* s = p->at(0)->as_Mem();
714 if (same_velt_type(s, mem_ref)) {
715 remove_pack_at(i);
716 }
717 }
718
719 // If needed find the best memory reference for loop alignment again.
720 if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
721 // Put memory ops from remaining packs back on memops list for
722 // the best alignment search.
723 uint orig_msize = memops.size();
724 for (int i = 0; i < _packset.length(); i++) {
725 Node_List* p = _packset.at(i);
726 MemNode* s = p->at(0)->as_Mem();
727 assert(!same_velt_type(s, mem_ref), "sanity")do { if (!(!same_velt_type(s, mem_ref))) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 727, "assert(" "!same_velt_type(s, mem_ref)" ") failed", "sanity"
); ::breakpoint(); } } while (0)
;
728 memops.push(s);
729 }
730 best_align_to_mem_ref = find_align_to_ref(memops, max_idx);
731 if (best_align_to_mem_ref == NULL__null) {
732 if (TraceSuperWord) {
733 tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");
734 }
735 // best_align_to_mem_ref will be used for adjusting the pre-loop limit in
736 // SuperWord::align_initial_loop_index. Find one with the biggest vector size,
737 // smallest data size and smallest iv offset from memory ops from remaining packs.
738 if (_packset.length() > 0) {
739 if (orig_msize == 0) {
740 best_align_to_mem_ref = memops.at(max_idx)->as_Mem();
741 } else {
742 for (uint i = 0; i < orig_msize; i++) {
743 memops.remove(0);
744 }
745 best_align_to_mem_ref = find_align_to_ref(memops, max_idx);
746 assert(best_align_to_mem_ref == NULL, "sanity")do { if (!(best_align_to_mem_ref == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 746, "assert(" "best_align_to_mem_ref == __null" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
747 best_align_to_mem_ref = memops.at(max_idx)->as_Mem();
748 }
749 assert(best_align_to_mem_ref != NULL, "sanity")do { if (!(best_align_to_mem_ref != __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 749, "assert(" "best_align_to_mem_ref != __null" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
750 }
751 break;
752 }
753 best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
754 NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment
);
755 // Restore list.
756 while (memops.size() > orig_msize)
757 (void)memops.pop();
758 }
759 } // unaligned memory accesses
760
761 // Remove used mem nodes.
762 for (int i = memops.size() - 1; i >= 0; i--) {
763 MemNode* m = memops.at(i)->as_Mem();
764 if (alignment(m) != top_align) {
765 memops.remove(i);
766 }
767 }
768
769 } // while (memops.size() != 0
770 set_align_to_ref(best_align_to_mem_ref);
771
772 if (TraceSuperWord) {
773 tty->print_cr("\nAfter find_adjacent_refs");
774 print_packset();
775 }
776}
777
778#ifndef PRODUCT
779void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) {
780 if (is_trace_adjacent()) {
781 tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d",
782 best_align_to_mem_ref->_idx, best_iv_adjustment);
783 best_align_to_mem_ref->dump();
784 }
785}
786#endif
787
788//------------------------------find_align_to_ref---------------------------
789// Find a memory reference to align the loop induction variable to.
790// Looks first at stores then at loads, looking for a memory reference
791// with the largest number of references similar to it.
792MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) {
793 GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
794
795 // Count number of comparable memory ops
796 for (uint i = 0; i < memops.size(); i++) {
797 MemNode* s1 = memops.at(i)->as_Mem();
798 SWPointer p1(s1, this, NULL__null, false);
799 // Only discard unalignable memory references if vector memory references
800 // should be aligned on this platform.
801 if (vectors_should_be_aligned() && !ref_is_alignable(p1)) {
802 *cmp_ct.adr_at(i) = 0;
803 continue;
804 }
805 for (uint j = i+1; j < memops.size(); j++) {
806 MemNode* s2 = memops.at(j)->as_Mem();
807 if (isomorphic(s1, s2)) {
808 SWPointer p2(s2, this, NULL__null, false);
809 if (p1.comparable(p2)) {
810 (*cmp_ct.adr_at(i))++;
811 (*cmp_ct.adr_at(j))++;
812 }
813 }
814 }
815 }
816
817 // Find Store (or Load) with the greatest number of "comparable" references,
818 // biggest vector size, smallest data size and smallest iv offset.
819 int max_ct = 0;
820 int max_vw = 0;
821 int max_idx = -1;
822 int min_size = max_jint;
823 int min_iv_offset = max_jint;
824 for (uint j = 0; j < memops.size(); j++) {
825 MemNode* s = memops.at(j)->as_Mem();
826 if (s->is_Store()) {
827 int vw = vector_width_in_bytes(s);
828 assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 828, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint
(); } } while (0)
;
829 SWPointer p(s, this, NULL__null, false);
830 if ( cmp_ct.at(j) > max_ct ||
831 (cmp_ct.at(j) == max_ct &&
832 ( vw > max_vw ||
833 (vw == max_vw &&
834 ( data_size(s) < min_size ||
835 (data_size(s) == min_size &&
836 p.offset_in_bytes() < min_iv_offset)))))) {
837 max_ct = cmp_ct.at(j);
838 max_vw = vw;
839 max_idx = j;
840 min_size = data_size(s);
841 min_iv_offset = p.offset_in_bytes();
842 }
843 }
844 }
845 // If no stores, look at loads
846 if (max_ct == 0) {
847 for (uint j = 0; j < memops.size(); j++) {
848 MemNode* s = memops.at(j)->as_Mem();
849 if (s->is_Load()) {
850 int vw = vector_width_in_bytes(s);
851 assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 851, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint
(); } } while (0)
;
852 SWPointer p(s, this, NULL__null, false);
853 if ( cmp_ct.at(j) > max_ct ||
854 (cmp_ct.at(j) == max_ct &&
855 ( vw > max_vw ||
856 (vw == max_vw &&
857 ( data_size(s) < min_size ||
858 (data_size(s) == min_size &&
859 p.offset_in_bytes() < min_iv_offset)))))) {
860 max_ct = cmp_ct.at(j);
861 max_vw = vw;
862 max_idx = j;
863 min_size = data_size(s);
864 min_iv_offset = p.offset_in_bytes();
865 }
866 }
867 }
868 }
869
870#ifdef ASSERT1
871 if (TraceSuperWord && Verbose) {
872 tty->print_cr("\nVector memops after find_align_to_ref");
873 for (uint i = 0; i < memops.size(); i++) {
874 MemNode* s = memops.at(i)->as_Mem();
875 s->dump();
876 }
877 }
878#endif
879
880 idx = max_idx;
881 if (max_ct > 0) {
882#ifdef ASSERT1
883 if (TraceSuperWord) {
884 tty->print("\nVector align to node: ");
885 memops.at(max_idx)->as_Mem()->dump();
886 }
887#endif
888 return memops.at(max_idx)->as_Mem();
889 }
890 return NULL__null;
891}
892
893//------------------span_works_for_memory_size-----------------------------
894static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) {
895 bool span_matches_memory = false;
896 if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT))
897 && ABS(span) == type2aelembytes(T_INT)) {
898 // There is a mismatch on span size compared to memory.
899 for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) {
900 Node* use = mem->fast_out(j);
901 if (!VectorNode::is_type_transition_to_int(use)) {
902 return false;
903 }
904 }
905 // If all uses transition to integer, it means that we can successfully align even on mismatch.
906 return true;
907 }
908 else {
909 span_matches_memory = ABS(span) == mem_size;
910 }
911 return span_matches_memory && (ABS(offset) % mem_size) == 0;
912}
913
914//------------------------------ref_is_alignable---------------------------
915// Can the preloop align the reference to position zero in the vector?
916bool SuperWord::ref_is_alignable(SWPointer& p) {
917 if (!p.has_iv()) {
918 return true; // no induction variable
919 }
920 CountedLoopEndNode* pre_end = pre_loop_end();
921 assert(pre_end->stride_is_con(), "pre loop stride is constant")do { if (!(pre_end->stride_is_con())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 921, "assert(" "pre_end->stride_is_con()" ") failed", "pre loop stride is constant"
); ::breakpoint(); } } while (0)
;
922 int preloop_stride = pre_end->stride_con();
923
924 int span = preloop_stride * p.scale_in_bytes();
925 int mem_size = p.memory_size();
926 int offset = p.offset_in_bytes();
927 // Stride one accesses are alignable if offset is aligned to memory operation size.
928 // Offset can be unaligned when UseUnalignedAccesses is used.
929 if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) {
930 return true;
931 }
932 // If the initial offset from start of the object is computable,
933 // check if the pre-loop can align the final offset accordingly.
934 //
935 // In other words: Can we find an i such that the offset
936 // after i pre-loop iterations is aligned to vw?
937 // (init_offset + pre_loop) % vw == 0 (1)
938 // where
939 // pre_loop = i * span
940 // is the number of bytes added to the offset by i pre-loop iterations.
941 //
942 // For this to hold we need pre_loop to increase init_offset by
943 // pre_loop = vw - (init_offset % vw)
944 //
945 // This is only possible if pre_loop is divisible by span because each
946 // pre-loop iteration increases the initial offset by 'span' bytes:
947 // (vw - (init_offset % vw)) % span == 0
948 //
949 int vw = vector_width_in_bytes(p.mem());
950 assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 950, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint
(); } } while (0)
;
951 Node* init_nd = pre_end->init_trip();
952 if (init_nd->is_Con() && p.invar() == NULL__null) {
953 int init = init_nd->bottom_type()->is_int()->get_con();
954 int init_offset = init * p.scale_in_bytes() + offset;
955 if (init_offset < 0) { // negative offset from object start?
956 return false; // may happen in dead loop
957 }
958 if (vw % span == 0) {
959 // If vm is a multiple of span, we use formula (1).
960 if (span > 0) {
961 return (vw - (init_offset % vw)) % span == 0;
962 } else {
963 assert(span < 0, "nonzero stride * scale")do { if (!(span < 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 963, "assert(" "span < 0" ") failed", "nonzero stride * scale"
); ::breakpoint(); } } while (0)
;
964 return (init_offset % vw) % -span == 0;
965 }
966 } else if (span % vw == 0) {
967 // If span is a multiple of vw, we can simplify formula (1) to:
968 // (init_offset + i * span) % vw == 0
969 // =>
970 // (init_offset % vw) + ((i * span) % vw) == 0
971 // =>
972 // init_offset % vw == 0
973 //
974 // Because we add a multiple of vw to the initial offset, the final
975 // offset is a multiple of vw if and only if init_offset is a multiple.
976 //
977 return (init_offset % vw) == 0;
978 }
979 }
980 return false;
981}
982//---------------------------get_vw_bytes_special------------------------
983int SuperWord::get_vw_bytes_special(MemNode* s) {
984 // Get the vector width in bytes.
985 int vw = vector_width_in_bytes(s);
986
987 // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined.
988 BasicType btype = velt_basic_type(s);
989 if (type2aelembytes(btype) == 2) {
990 bool should_combine_adjacent = true;
991 for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
992 Node* user = s->fast_out(i);
993 if (!VectorNode::is_muladds2i(user)) {
994 should_combine_adjacent = false;
995 }
996 }
997 if (should_combine_adjacent) {
998 vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2);
999 }
1000 }
1001
1002 return vw;
1003}
1004
1005//---------------------------get_iv_adjustment---------------------------
1006// Calculate loop's iv adjustment for this memory ops.
1007int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
1008 SWPointer align_to_ref_p(mem_ref, this, NULL__null, false);
1009 int offset = align_to_ref_p.offset_in_bytes();
1010 int scale = align_to_ref_p.scale_in_bytes();
1011 int elt_size = align_to_ref_p.memory_size();
1012 int vw = get_vw_bytes_special(mem_ref);
1013 assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1013, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint
(); } } while (0)
;
1014 int iv_adjustment;
1015 if (scale != 0) {
1016 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
1017 // At least one iteration is executed in pre-loop by default. As result
1018 // several iterations are needed to align memory operations in main-loop even
1019 // if offset is 0.
1020 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
1021 // iv_adjustment_in_bytes must be a multiple of elt_size if vector memory
1022 // references should be aligned on this platform.
1023 assert((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned(),do { if (!((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !
vectors_should_be_aligned())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1024, "assert(" "(ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned()"
") failed", "(%d) should be divisible by (%d)", iv_adjustment_in_bytes
, elt_size); ::breakpoint(); } } while (0)
1024 "(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size)do { if (!((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !
vectors_should_be_aligned())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1024, "assert(" "(ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned()"
") failed", "(%d) should be divisible by (%d)", iv_adjustment_in_bytes
, elt_size); ::breakpoint(); } } while (0)
;
1025 iv_adjustment = iv_adjustment_in_bytes/elt_size;
1026 } else {
1027 // This memory op is not dependent on iv (scale == 0)
1028 iv_adjustment = 0;
1029 }
1030
1031#ifndef PRODUCT
1032 if (TraceSuperWord) {
1033 tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
1034 mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
1035 mem_ref->dump();
1036 }
1037#endif
1038 return iv_adjustment;
1039}
1040
1041//---------------------------dependence_graph---------------------------
1042// Construct dependency graph.
1043// Add dependence edges to load/store nodes for memory dependence
1044// A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
1045void SuperWord::dependence_graph() {
1046 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
1047 // First, assign a dependence node to each memory node
1048 for (int i = 0; i < _block.length(); i++ ) {
1049 Node *n = _block.at(i);
1050 if (n->is_Mem() || (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
1051 _dg.make_node(n);
1052 }
1053 }
1054
1055 // For each memory slice, create the dependences
1056 for (int i = 0; i < _mem_slice_head.length(); i++) {
1057 Node* n = _mem_slice_head.at(i);
1058 Node* n_tail = _mem_slice_tail.at(i);
1059
1060 // Get slice in predecessor order (last is first)
1061 if (cl->is_main_loop()) {
1062 mem_slice_preds(n_tail, n, _nlist);
1063 }
1064
1065#ifndef PRODUCT
1066 if(TraceSuperWord && Verbose) {
1067 tty->print_cr("SuperWord::dependence_graph: built a new mem slice");
1068 for (int j = _nlist.length() - 1; j >= 0 ; j--) {
1069 _nlist.at(j)->dump();
1070 }
1071 }
1072#endif
1073 // Make the slice dependent on the root
1074 DepMem* slice = _dg.dep(n);
1075 _dg.make_edge(_dg.root(), slice);
1076
1077 // Create a sink for the slice
1078 DepMem* slice_sink = _dg.make_node(NULL__null);
1079 _dg.make_edge(slice_sink, _dg.tail());
1080
1081 // Now visit each pair of memory ops, creating the edges
1082 for (int j = _nlist.length() - 1; j >= 0 ; j--) {
1083 Node* s1 = _nlist.at(j);
1084
1085 // If no dependency yet, use slice
1086 if (_dg.dep(s1)->in_cnt() == 0) {
1087 _dg.make_edge(slice, s1);
1088 }
1089 SWPointer p1(s1->as_Mem(), this, NULL__null, false);
1090 bool sink_dependent = true;
1091 for (int k = j - 1; k >= 0; k--) {
1092 Node* s2 = _nlist.at(k);
1093 if (s1->is_Load() && s2->is_Load())
1094 continue;
1095 SWPointer p2(s2->as_Mem(), this, NULL__null, false);
1096
1097 int cmp = p1.cmp(p2);
1098 if (SuperWordRTDepCheck &&
1099 p1.base() != p2.base() && p1.valid() && p2.valid()) {
1100 // Create a runtime check to disambiguate
1101 OrderedPair pp(p1.base(), p2.base());
1102 _disjoint_ptrs.append_if_missing(pp);
1103 } else if (!SWPointer::not_equal(cmp)) {
1104 // Possibly same address
1105 _dg.make_edge(s1, s2);
1106 sink_dependent = false;
1107 }
1108 }
1109 if (sink_dependent) {
1110 _dg.make_edge(s1, slice_sink);
1111 }
1112 }
1113
1114 if (TraceSuperWord) {
1115 tty->print_cr("\nDependence graph for slice: %d", n->_idx);
1116 for (int q = 0; q < _nlist.length(); q++) {
1117 _dg.print(_nlist.at(q));
1118 }
1119 tty->cr();
1120 }
1121
1122 _nlist.clear();
1123 }
1124
1125 if (TraceSuperWord) {
1126 tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE");
1127 for (int r = 0; r < _disjoint_ptrs.length(); r++) {
1128 _disjoint_ptrs.at(r).print();
1129 tty->cr();
1130 }
1131 tty->cr();
1132 }
1133
1134}
1135
1136//---------------------------mem_slice_preds---------------------------
1137// Return a memory slice (node list) in predecessor order starting at "start"
1138void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) {
1139 assert(preds.length() == 0, "start empty")do { if (!(preds.length() == 0)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1139, "assert(" "preds.length() == 0" ") failed", "start empty"
); ::breakpoint(); } } while (0)
;
1140 Node* n = start;
1141 Node* prev = NULL__null;
1142 while (true) {
1143 NOT_PRODUCT( if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);)if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d"
, n->_idx);
1144 assert(in_bb(n), "must be in block")do { if (!(in_bb(n))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1144, "assert(" "in_bb(n)" ") failed", "must be in block");
::breakpoint(); } } while (0)
;
1145 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1146 Node* out = n->fast_out(i);
1147 if (out->is_Load()) {
1148 if (in_bb(out)) {
1149 preds.push(out);
1150 if (TraceSuperWord && Verbose) {
1151 tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);
1152 }
1153 }
1154 } else {
1155 // FIXME
1156 if (out->is_MergeMem() && !in_bb(out)) {
1157 // Either unrolling is causing a memory edge not to disappear,
1158 // or need to run igvn.optimize() again before SLP
1159 } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
1160 // Ditto. Not sure what else to check further.
1161 } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) {
1162 // StoreCM has an input edge used as a precedence edge.
1163 // Maybe an issue when oop stores are vectorized.
1164 } else {
1165 assert(out == prev || prev == NULL, "no branches off of store slice")do { if (!(out == prev || prev == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1165, "assert(" "out == prev || prev == __null" ") failed",
"no branches off of store slice"); ::breakpoint(); } } while
(0)
;
1166 }
1167 }//else
1168 }//for
1169 if (n == stop) break;
1170 preds.push(n);
1171 if (TraceSuperWord && Verbose) {
1172 tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);
1173 }
1174 prev = n;
1175 assert(n->is_Mem(), "unexpected node %s", n->Name())do { if (!(n->is_Mem())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1175, "assert(" "n->is_Mem()" ") failed", "unexpected node %s"
, n->Name()); ::breakpoint(); } } while (0)
;
1176 n = n->in(MemNode::Memory);
1177 }
1178}
1179
1180//------------------------------stmts_can_pack---------------------------
1181// Can s1 and s2 be in a pack with s1 immediately preceding s2 and
1182// s1 aligned at "align"
1183bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
1184
1185 // Do not use superword for non-primitives
1186 BasicType bt1 = velt_basic_type(s1);
1187 BasicType bt2 = velt_basic_type(s2);
1188 if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
1189 return false;
1190 if (Matcher::max_vector_size(bt1) < 2) {
1191 return false; // No vectors for this type
1192 }
1193
1194 if (isomorphic(s1, s2)) {
1195 if ((independent(s1, s2) && have_similar_inputs(s1, s2)) || reduction(s1, s2)) {
1196 if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
1197 if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
1198 int s1_align = alignment(s1);
1199 int s2_align = alignment(s2);
1200 if (s1_align == top_align || s1_align == align) {
1201 if (s2_align == top_align || s2_align == align + data_size(s1)) {
1202 return true;
1203 }
1204 }
1205 }
1206 }
1207 }
1208 }
1209 return false;
1210}
1211
1212//------------------------------exists_at---------------------------
1213// Does s exist in a pack at position pos?
1214bool SuperWord::exists_at(Node* s, uint pos) {
1215 for (int i = 0; i < _packset.length(); i++) {
1216 Node_List* p = _packset.at(i);
1217 if (p->at(pos) == s) {
1218 return true;
1219 }
1220 }
1221 return false;
1222}
1223
1224//------------------------------are_adjacent_refs---------------------------
1225// Is s1 immediately before s2 in memory?
1226bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
1227 if (!s1->is_Mem() || !s2->is_Mem()) return false;
1228 if (!in_bb(s1) || !in_bb(s2)) return false;
1229
1230 // Do not use superword for non-primitives
1231 if (!is_java_primitive(s1->as_Mem()->memory_type()) ||
1232 !is_java_primitive(s2->as_Mem()->memory_type())) {
1233 return false;
1234 }
1235
1236 // FIXME - co_locate_pack fails on Stores in different mem-slices, so
1237 // only pack memops that are in the same alias set until that's fixed.
1238 if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
1239 _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
1240 return false;
1241 SWPointer p1(s1->as_Mem(), this, NULL__null, false);
1242 SWPointer p2(s2->as_Mem(), this, NULL__null, false);
1243 if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
1244 int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
1245 return diff == data_size(s1);
1246}
1247
1248//------------------------------isomorphic---------------------------
1249// Are s1 and s2 similar?
1250bool SuperWord::isomorphic(Node* s1, Node* s2) {
1251 if (s1->Opcode() != s2->Opcode()) return false;
1252 if (s1->req() != s2->req()) return false;
1253 if (!same_velt_type(s1, s2)) return false;
1254 Node* s1_ctrl = s1->in(0);
1255 Node* s2_ctrl = s2->in(0);
1256 // If the control nodes are equivalent, no further checks are required to test for isomorphism.
1257 if (s1_ctrl == s2_ctrl) {
1258 return true;
1259 } else {
1260 bool s1_ctrl_inv = ((s1_ctrl == NULL__null) ? true : lpt()->is_invariant(s1_ctrl));
1261 bool s2_ctrl_inv = ((s2_ctrl == NULL__null) ? true : lpt()->is_invariant(s2_ctrl));
1262 // If the control nodes are not invariant for the loop, fail isomorphism test.
1263 if (!s1_ctrl_inv || !s2_ctrl_inv) {
1264 return false;
1265 }
1266 if(s1_ctrl != NULL__null && s2_ctrl != NULL__null) {
1267 if (s1_ctrl->is_Proj()) {
1268 s1_ctrl = s1_ctrl->in(0);
1269 assert(lpt()->is_invariant(s1_ctrl), "must be invariant")do { if (!(lpt()->is_invariant(s1_ctrl))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1269, "assert(" "lpt()->is_invariant(s1_ctrl)" ") failed"
, "must be invariant"); ::breakpoint(); } } while (0)
;
1270 }
1271 if (s2_ctrl->is_Proj()) {
1272 s2_ctrl = s2_ctrl->in(0);
1273 assert(lpt()->is_invariant(s2_ctrl), "must be invariant")do { if (!(lpt()->is_invariant(s2_ctrl))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1273, "assert(" "lpt()->is_invariant(s2_ctrl)" ") failed"
, "must be invariant"); ::breakpoint(); } } while (0)
;
1274 }
1275 if (!s1_ctrl->is_RangeCheck() || !s2_ctrl->is_RangeCheck()) {
1276 return false;
1277 }
1278 }
1279 // Control nodes are invariant. However, we have no way of checking whether they resolve
1280 // in an equivalent manner. But, we know that invariant range checks are guaranteed to
1281 // throw before the loop (if they would have thrown). Thus, the loop would not have been reached.
1282 // Therefore, if the control nodes for both are range checks, we accept them to be isomorphic.
1283 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1284 Node* t1 = s1->fast_out(i);
1285 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
1286 Node* t2 = s2->fast_out(j);
1287 if (VectorNode::is_muladds2i(t1) && VectorNode::is_muladds2i(t2)) {
1288 return true;
1289 }
1290 }
1291 }
1292 }
1293 return false;
1294}
1295
1296//------------------------------independent---------------------------
1297// Is there no data path from s1 to s2 or s2 to s1?
1298bool SuperWord::independent(Node* s1, Node* s2) {
1299 // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
1300 int d1 = depth(s1);
1301 int d2 = depth(s2);
1302 if (d1 == d2) return s1 != s2;
1303 Node* deep = d1 > d2 ? s1 : s2;
1304 Node* shallow = d1 > d2 ? s2 : s1;
1305
1306 visited_clear();
1307
1308 return independent_path(shallow, deep);
1309}
1310
1311//--------------------------have_similar_inputs-----------------------
1312// For a node pair (s1, s2) which is isomorphic and independent,
1313// do s1 and s2 have similar input edges?
1314bool SuperWord::have_similar_inputs(Node* s1, Node* s2) {
1315 // assert(isomorphic(s1, s2) == true, "check isomorphic");
1316 // assert(independent(s1, s2) == true, "check independent");
1317 if (s1->req() > 1 && !s1->is_Store() && !s1->is_Load()) {
1318 for (uint i = 1; i < s1->req(); i++) {
1319 if (s1->in(i)->Opcode() != s2->in(i)->Opcode()) return false;
1320 }
1321 }
1322 return true;
1323}
1324
1325//------------------------------reduction---------------------------
1326// Is there a data path between s1 and s2 and the nodes reductions?
1327bool SuperWord::reduction(Node* s1, Node* s2) {
1328 bool retValue = false;
1329 int d1 = depth(s1);
1330 int d2 = depth(s2);
1331 if (d2 > d1) {
1332 if (s1->is_reduction() && s2->is_reduction()) {
1333 // This is an ordered set, so s1 should define s2
1334 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1335 Node* t1 = s1->fast_out(i);
1336 if (t1 == s2) {
1337 // both nodes are reductions and connected
1338 retValue = true;
1339 }
1340 }
1341 }
1342 }
1343
1344 return retValue;
1345}
1346
1347//------------------------------independent_path------------------------------
1348// Helper for independent
1349bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
1350 if (dp >= 1000) return false; // stop deep recursion
1351 visited_set(deep);
1352 int shal_depth = depth(shallow);
1353 assert(shal_depth <= depth(deep), "must be")do { if (!(shal_depth <= depth(deep))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1353, "assert(" "shal_depth <= depth(deep)" ") failed", "must be"
); ::breakpoint(); } } while (0)
;
1354 for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
1355 Node* pred = preds.current();
1356 if (in_bb(pred) && !visited_test(pred)) {
1357 if (shallow == pred) {
1358 return false;
1359 }
1360 if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
1361 return false;
1362 }
1363 }
1364 }
1365 return true;
1366}
1367
1368//------------------------------set_alignment---------------------------
1369void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
1370 set_alignment(s1, align);
1371 if (align == top_align || align == bottom_align) {
1372 set_alignment(s2, align);
1373 } else {
1374 set_alignment(s2, align + data_size(s1));
1375 }
1376}
1377
1378//------------------------------data_size---------------------------
1379int SuperWord::data_size(Node* s) {
1380 Node* use = NULL__null; //test if the node is a candidate for CMoveV optimization, then return the size of CMov
1381 if (UseVectorCmov) {
1382 use = _cmovev_kit.is_Bool_candidate(s);
1383 if (use != NULL__null) {
1384 return data_size(use);
1385 }
1386 use = _cmovev_kit.is_CmpD_candidate(s);
1387 if (use != NULL__null) {
1388 return data_size(use);
1389 }
1390 }
1391
1392 int bsize = type2aelembytes(velt_basic_type(s));
1393 assert(bsize != 0, "valid size")do { if (!(bsize != 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1393, "assert(" "bsize != 0" ") failed", "valid size"); ::breakpoint
(); } } while (0)
;
1394 return bsize;
1395}
1396
1397//------------------------------extend_packlist---------------------------
1398// Extend packset by following use->def and def->use links from pack members.
1399void SuperWord::extend_packlist() {
1400 bool changed;
1401 do {
1402 packset_sort(_packset.length());
1403 changed = false;
1404 for (int i = 0; i < _packset.length(); i++) {
1405 Node_List* p = _packset.at(i);
1406 changed |= follow_use_defs(p);
1407 changed |= follow_def_uses(p);
1408 }
1409 } while (changed);
1410
1411 if (_race_possible) {
1412 for (int i = 0; i < _packset.length(); i++) {
1413 Node_List* p = _packset.at(i);
1414 order_def_uses(p);
1415 }
1416 }
1417
1418 if (TraceSuperWord) {
1419 tty->print_cr("\nAfter extend_packlist");
1420 print_packset();
1421 }
1422}
1423
1424//------------------------------follow_use_defs---------------------------
1425// Extend the packset by visiting operand definitions of nodes in pack p
1426bool SuperWord::follow_use_defs(Node_List* p) {
1427 assert(p->size() == 2, "just checking")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1427, "assert(" "p->size() == 2" ") failed", "just checking"
); ::breakpoint(); } } while (0)
;
1428 Node* s1 = p->at(0);
1429 Node* s2 = p->at(1);
1430 assert(s1->req() == s2->req(), "just checking")do { if (!(s1->req() == s2->req())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1430, "assert(" "s1->req() == s2->req()" ") failed", "just checking"
); ::breakpoint(); } } while (0)
;
1431 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking")do { if (!(alignment(s1) + data_size(s1) == alignment(s2))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1431, "assert(" "alignment(s1) + data_size(s1) == alignment(s2)"
") failed", "just checking"); ::breakpoint(); } } while (0)
;
1432
1433 if (s1->is_Load()) return false;
1434
1435 int align = alignment(s1);
1436 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d"
, s1->_idx, align);
1437 bool changed = false;
1438 int start = s1->is_Store() ? MemNode::ValueIn : 1;
1439 int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
1440 for (int j = start; j < end; j++) {
1441 Node* t1 = s1->in(j);
1442 Node* t2 = s2->in(j);
1443 if (!in_bb(t1) || !in_bb(t2))
1444 continue;
1445 if (stmts_can_pack(t1, t2, align)) {
1446 if (est_savings(t1, t2) >= 0) {
1447 Node_List* pair = new Node_List();
1448 pair->push(t1);
1449 pair->push(t2);
1450 _packset.append(pair);
1451 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)", t1->_idx, t2->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)"
, t1->_idx, t2->_idx, align);
1452 set_alignment(t1, t2, align);
1453 changed = true;
1454 }
1455 }
1456 }
1457 return changed;
1458}
1459
1460//------------------------------follow_def_uses---------------------------
1461// Extend the packset by visiting uses of nodes in pack p
1462bool SuperWord::follow_def_uses(Node_List* p) {
1463 bool changed = false;
1464 Node* s1 = p->at(0);
1465 Node* s2 = p->at(1);
1466 assert(p->size() == 2, "just checking")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1466, "assert(" "p->size() == 2" ") failed", "just checking"
); ::breakpoint(); } } while (0)
;
1467 assert(s1->req() == s2->req(), "just checking")do { if (!(s1->req() == s2->req())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1467, "assert(" "s1->req() == s2->req()" ") failed", "just checking"
); ::breakpoint(); } } while (0)
;
1468 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking")do { if (!(alignment(s1) + data_size(s1) == alignment(s2))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1468, "assert(" "alignment(s1) + data_size(s1) == alignment(s2)"
") failed", "just checking"); ::breakpoint(); } } while (0)
;
1469
1470 if (s1->is_Store()) return false;
1471
1472 int align = alignment(s1);
1473 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d", s1->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d"
, s1->_idx, align);
1474 int savings = -1;
1475 int num_s1_uses = 0;
1476 Node* u1 = NULL__null;
1477 Node* u2 = NULL__null;
1478 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1479 Node* t1 = s1->fast_out(i);
1480 num_s1_uses++;
1481 if (!in_bb(t1)) continue;
1482 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
1483 Node* t2 = s2->fast_out(j);
1484 if (!in_bb(t2)) continue;
1485 if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv
1486 if (!opnd_positions_match(s1, t1, s2, t2))
1487 continue;
1488 if (stmts_can_pack(t1, t2, align)) {
1489 int my_savings = est_savings(t1, t2);
1490 if (my_savings > savings) {
1491 savings = my_savings;
1492 u1 = t1;
1493 u2 = t2;
1494 }
1495 }
1496 }
1497 }
1498 if (num_s1_uses > 1) {
1499 _race_possible = true;
1500 }
1501 if (savings >= 0) {
1502 Node_List* pair = new Node_List();
1503 pair->push(u1);
1504 pair->push(u2);
1505 _packset.append(pair);
1506 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)", u1->_idx, u2->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)"
, u1->_idx, u2->_idx, align);
1507 set_alignment(u1, u2, align);
1508 changed = true;
1509 }
1510 return changed;
1511}
1512
1513//------------------------------order_def_uses---------------------------
1514// For extended packsets, ordinally arrange uses packset by major component
1515void SuperWord::order_def_uses(Node_List* p) {
1516 Node* s1 = p->at(0);
1517
1518 if (s1->is_Store()) return;
1519
1520 // reductions are always managed beforehand
1521 if (s1->is_reduction()) return;
1522
1523 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1524 Node* t1 = s1->fast_out(i);
1525
1526 // Only allow operand swap on commuting operations
1527 if (!t1->is_Add() && !t1->is_Mul() && !VectorNode::is_muladds2i(t1)) {
1528 break;
1529 }
1530
1531 // Now find t1's packset
1532 Node_List* p2 = NULL__null;
1533 for (int j = 0; j < _packset.length(); j++) {
1534 p2 = _packset.at(j);
1535 Node* first = p2->at(0);
1536 if (t1 == first) {
1537 break;
1538 }
1539 p2 = NULL__null;
1540 }
1541 // Arrange all sub components by the major component
1542 if (p2 != NULL__null) {
1543 for (uint j = 1; j < p->size(); j++) {
1544 Node* d1 = p->at(j);
1545 Node* u1 = p2->at(j);
1546 opnd_positions_match(s1, t1, d1, u1);
1547 }
1548 }
1549 }
1550}
1551
1552//---------------------------opnd_positions_match-------------------------
1553// Is the use of d1 in u1 at the same operand position as d2 in u2?
1554bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
1555 // check reductions to see if they are marshalled to represent the reduction
1556 // operator in a specified opnd
1557 if (u1->is_reduction() && u2->is_reduction()) {
1558 // ensure reductions have phis and reduction definitions feeding the 1st operand
1559 Node* first = u1->in(2);
1560 if (first->is_Phi() || first->is_reduction()) {
1561 u1->swap_edges(1, 2);
1562 }
1563 // ensure reductions have phis and reduction definitions feeding the 1st operand
1564 first = u2->in(2);
1565 if (first->is_Phi() || first->is_reduction()) {
1566 u2->swap_edges(1, 2);
1567 }
1568 return true;
1569 }
1570
1571 uint ct = u1->req();
1572 if (ct != u2->req()) return false;
1573 uint i1 = 0;
1574 uint i2 = 0;
1575 do {
1576 for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
1577 for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
1578 if (i1 != i2) {
1579 if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
1580 // Further analysis relies on operands position matching.
1581 u2->swap_edges(i1, i2);
1582 } else if (VectorNode::is_muladds2i(u2) && u1 != u2) {
1583 if (i1 == 5 - i2) { // ((i1 == 3 && i2 == 2) || (i1 == 2 && i2 == 3) || (i1 == 1 && i2 == 4) || (i1 == 4 && i2 == 1))
1584 u2->swap_edges(1, 2);
1585 u2->swap_edges(3, 4);
1586 }
1587 if (i1 == 3 - i2 || i1 == 7 - i2) { // ((i1 == 1 && i2 == 2) || (i1 == 2 && i2 == 1) || (i1 == 3 && i2 == 4) || (i1 == 4 && i2 == 3))
1588 u2->swap_edges(2, 3);
1589 u2->swap_edges(1, 4);
1590 }
1591 return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs
1592 } else {
1593 return false;
1594 }
1595 } else if (i1 == i2 && VectorNode::is_muladds2i(u2) && u1 != u2) {
1596 u2->swap_edges(1, 3);
1597 u2->swap_edges(2, 4);
1598 return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs
1599 }
1600 } while (i1 < ct);
1601 return true;
1602}
1603
1604//------------------------------est_savings---------------------------
1605// Estimate the savings from executing s1 and s2 as a pack
1606int SuperWord::est_savings(Node* s1, Node* s2) {
1607 int save_in = 2 - 1; // 2 operations per instruction in packed form
1608
1609 // inputs
1610 for (uint i = 1; i < s1->req(); i++) {
1611 Node* x1 = s1->in(i);
1612 Node* x2 = s2->in(i);
1613 if (x1 != x2) {
1614 if (are_adjacent_refs(x1, x2)) {
1615 save_in += adjacent_profit(x1, x2);
1616 } else if (!in_packset(x1, x2)) {
1617 save_in -= pack_cost(2);
1618 } else {
1619 save_in += unpack_cost(2);
1620 }
1621 }
1622 }
1623
1624 // uses of result
1625 uint ct = 0;
1626 int save_use = 0;
1627 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1628 Node* s1_use = s1->fast_out(i);
1629 for (int j = 0; j < _packset.length(); j++) {
1630 Node_List* p = _packset.at(j);
1631 if (p->at(0) == s1_use) {
1632 for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) {
1633 Node* s2_use = s2->fast_out(k);
1634 if (p->at(p->size()-1) == s2_use) {
1635 ct++;
1636 if (are_adjacent_refs(s1_use, s2_use)) {
1637 save_use += adjacent_profit(s1_use, s2_use);
1638 }
1639 }
1640 }
1641 }
1642 }
1643 }
1644
1645 if (ct < s1->outcnt()) save_use += unpack_cost(1);
1646 if (ct < s2->outcnt()) save_use += unpack_cost(1);
1647
1648 return MAX2(save_in, save_use);
1649}
1650
1651//------------------------------costs---------------------------
1652int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
1653int SuperWord::pack_cost(int ct) { return ct; }
1654int SuperWord::unpack_cost(int ct) { return ct; }
1655
1656//------------------------------combine_packs---------------------------
1657// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
1658void SuperWord::combine_packs() {
1659 bool changed = true;
1660 // Combine packs regardless max vector size.
1661 while (changed) {
1662 changed = false;
1663 for (int i = 0; i < _packset.length(); i++) {
1664 Node_List* p1 = _packset.at(i);
1665 if (p1 == NULL__null) continue;
1666 // Because of sorting we can start at i + 1
1667 for (int j = i + 1; j < _packset.length(); j++) {
1668 Node_List* p2 = _packset.at(j);
1669 if (p2 == NULL__null) continue;
1670 if (i == j) continue;
1671 if (p1->at(p1->size()-1) == p2->at(0)) {
1672 for (uint k = 1; k < p2->size(); k++) {
1673 p1->push(p2->at(k));
1674 }
1675 _packset.at_put(j, NULL__null);
1676 changed = true;
1677 }
1678 }
1679 }
1680 }
1681
1682 // Split packs which have size greater then max vector size.
1683 for (int i = 0; i < _packset.length(); i++) {
1684 Node_List* p1 = _packset.at(i);
1685 if (p1 != NULL__null) {
1686 BasicType bt = velt_basic_type(p1->at(0));
1687 uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
1688 assert(is_power_of_2(max_vlen), "sanity")do { if (!(is_power_of_2(max_vlen))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1688, "assert(" "is_power_of_2(max_vlen)" ") failed", "sanity"
); ::breakpoint(); } } while (0)
;
1689 uint psize = p1->size();
1690 if (!is_power_of_2(psize)) {
1691 // Skip pack which can't be vector.
1692 // case1: for(...) { a[i] = i; } elements values are different (i+x)
1693 // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store
1694 _packset.at_put(i, NULL__null);
1695 continue;
1696 }
1697 if (psize > max_vlen) {
1698 Node_List* pack = new Node_List();
1699 for (uint j = 0; j < psize; j++) {
1700 pack->push(p1->at(j));
1701 if (pack->size() >= max_vlen) {
1702 assert(is_power_of_2(pack->size()), "sanity")do { if (!(is_power_of_2(pack->size()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1702, "assert(" "is_power_of_2(pack->size())" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
1703 _packset.append(pack);
1704 pack = new Node_List();
1705 }
1706 }
1707 _packset.at_put(i, NULL__null);
1708 }
1709 }
1710 }
1711
1712 // Compress list.
1713 for (int i = _packset.length() - 1; i >= 0; i--) {
1714 Node_List* p1 = _packset.at(i);
1715 if (p1 == NULL__null) {
1716 _packset.remove_at(i);
1717 }
1718 }
1719
1720 if (TraceSuperWord) {
1721 tty->print_cr("\nAfter combine_packs");
1722 print_packset();
1723 }
1724}
1725
1726//-----------------------------construct_my_pack_map--------------------------
1727// Construct the map from nodes to packs. Only valid after the
1728// point where a node is only in one pack (after combine_packs).
1729void SuperWord::construct_my_pack_map() {
1730 Node_List* rslt = NULL__null;
1731 for (int i = 0; i < _packset.length(); i++) {
1732 Node_List* p = _packset.at(i);
1733 for (uint j = 0; j < p->size(); j++) {
1734 Node* s = p->at(j);
1735#ifdef ASSERT1
1736 if (my_pack(s) != NULL__null) {
1737 s->dump(1);
1738 tty->print_cr("packs[%d]:", i);
1739 print_pack(p);
1740 assert(false, "only in one pack")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1740, "assert(" "false" ") failed", "only in one pack"); ::
breakpoint(); } } while (0)
;
1741 }
1742#endif
1743 set_my_pack(s, p);
1744 }
1745 }
1746}
1747
1748//------------------------------filter_packs---------------------------
1749// Remove packs that are not implemented or not profitable.
1750void SuperWord::filter_packs() {
1751 // Remove packs that are not implemented
1752 for (int i = _packset.length() - 1; i >= 0; i--) {
1753 Node_List* pk = _packset.at(i);
1754 bool impl = implemented(pk);
1755 if (!impl) {
1756#ifndef PRODUCT
1757 if ((TraceSuperWord && Verbose) || _vector_loop_debug) {
1758 tty->print_cr("Unimplemented");
1759 pk->at(0)->dump();
1760 }
1761#endif
1762 remove_pack_at(i);
1763 }
1764 Node *n = pk->at(0);
1765 if (n->is_reduction()) {
1766 _num_reductions++;
1767 } else {
1768 _num_work_vecs++;
1769 }
1770 }
1771
1772 // Remove packs that are not profitable
1773 bool changed;
1774 do {
1775 changed = false;
1776 for (int i = _packset.length() - 1; i >= 0; i--) {
1777 Node_List* pk = _packset.at(i);
1778 bool prof = profitable(pk);
1779 if (!prof) {
1780#ifndef PRODUCT
1781 if ((TraceSuperWord && Verbose) || _vector_loop_debug) {
1782 tty->print_cr("Unprofitable");
1783 pk->at(0)->dump();
1784 }
1785#endif
1786 remove_pack_at(i);
1787 changed = true;
1788 }
1789 }
1790 } while (changed);
1791
1792#ifndef PRODUCT
1793 if (TraceSuperWord) {
1794 tty->print_cr("\nAfter filter_packs");
1795 print_packset();
1796 tty->cr();
1797 }
1798#endif
1799}
1800
1801//------------------------------merge_packs_to_cmovd---------------------------
1802// Merge CMoveD into new vector-nodes
1803// We want to catch this pattern and subsume CmpD and Bool into CMoveD
1804//
1805// SubD ConD
1806// / | /
1807// / | / /
1808// / | / /
1809// / | / /
1810// / / /
1811// / / | /
1812// v / | /
1813// CmpD | /
1814// | | /
1815// v | /
1816// Bool | /
1817// \ | /
1818// \ | /
1819// \ | /
1820// \ | /
1821// \ v /
1822// CMoveD
1823//
1824
1825void SuperWord::merge_packs_to_cmovd() {
1826 for (int i = _packset.length() - 1; i >= 0; i--) {
1827 _cmovev_kit.make_cmovevd_pack(_packset.at(i));
1828 }
1829 #ifndef PRODUCT
1830 if (TraceSuperWord) {
1831 tty->print_cr("\nSuperWord::merge_packs_to_cmovd(): After merge");
1832 print_packset();
1833 tty->cr();
1834 }
1835 #endif
1836}
1837
1838Node* CMoveKit::is_Bool_candidate(Node* def) const {
1839 Node* use = NULL__null;
1840 if (!def->is_Bool() || def->in(0) != NULL__null || def->outcnt() != 1) {
1841 return NULL__null;
1842 }
1843 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1844 use = def->fast_out(j);
1845 if (!_sw->same_generation(def, use) || !use->is_CMove()) {
1846 return NULL__null;
1847 }
1848 }
1849 return use;
1850}
1851
1852Node* CMoveKit::is_CmpD_candidate(Node* def) const {
1853 Node* use = NULL__null;
1854 if (!def->is_Cmp() || def->in(0) != NULL__null || def->outcnt() != 1) {
1855 return NULL__null;
1856 }
1857 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1858 use = def->fast_out(j);
1859 if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == NULL__null || !_sw->same_generation(def, use)) {
1860 return NULL__null;
1861 }
1862 }
1863 return use;
1864}
1865
1866Node_List* CMoveKit::make_cmovevd_pack(Node_List* cmovd_pk) {
1867 Node *cmovd = cmovd_pk->at(0);
1868 if (!cmovd->is_CMove()) {
1869 return NULL__null;
1870 }
1871 if (cmovd->Opcode() != Op_CMoveF && cmovd->Opcode() != Op_CMoveD) {
1872 return NULL__null;
1873 }
1874 if (pack(cmovd) != NULL__null) { // already in the cmov pack
1875 return NULL__null;
1876 }
1877 if (cmovd->in(0) != NULL__null) {
1878 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping...", cmovd->_idx); cmovd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping..."
, cmovd->_idx); cmovd->dump();}
1879 return NULL__null;
1880 }
1881
1882 Node* bol = cmovd->as_CMove()->in(CMoveNode::Condition);
1883 if (!bol->is_Bool()
1884 || bol->outcnt() != 1
1885 || !_sw->same_generation(bol, cmovd)
1886 || bol->in(0) != NULL__null // BoolNode has control flow!!
1887 || _sw->my_pack(bol) == NULL__null) {
1888 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping...", bol->_idx, cmovd->_idx); bol->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping..."
, bol->_idx, cmovd->_idx); bol->dump();}
1889 return NULL__null;
1890 }
1891 Node_List* bool_pk = _sw->my_pack(bol);
1892 if (bool_pk->size() != cmovd_pk->size() ) {
1893 return NULL__null;
1894 }
1895
1896 Node* cmpd = bol->in(1);
1897 if (!cmpd->is_Cmp()
1898 || cmpd->outcnt() != 1
1899 || !_sw->same_generation(cmpd, cmovd)
1900 || cmpd->in(0) != NULL__null // CmpDNode has control flow!!
1901 || _sw->my_pack(cmpd) == NULL__null) {
1902 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping...", cmpd->_idx, cmovd->_idx); cmpd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping..."
, cmpd->_idx, cmovd->_idx); cmpd->dump();}
1903 return NULL__null;
1904 }
1905 Node_List* cmpd_pk = _sw->my_pack(cmpd);
1906 if (cmpd_pk->size() != cmovd_pk->size() ) {
1907 return NULL__null;
1908 }
1909
1910 if (!test_cmpd_pack(cmpd_pk, cmovd_pk)) {
1911 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test", cmpd->_idx); cmpd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test"
, cmpd->_idx); cmpd->dump();}
1912 return NULL__null;
1913 }
1914
1915 Node_List* new_cmpd_pk = new Node_List();
1916 uint sz = cmovd_pk->size() - 1;
1917 for (uint i = 0; i <= sz; ++i) {
1918 Node* cmov = cmovd_pk->at(i);
1919 Node* bol = bool_pk->at(i);
1920 Node* cmp = cmpd_pk->at(i);
1921
1922 new_cmpd_pk->insert(i, cmov);
1923
1924 map(cmov, new_cmpd_pk);
1925 map(bol, new_cmpd_pk);
1926 map(cmp, new_cmpd_pk);
1927
1928 _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool
1929 }
1930 _sw->_packset.remove(cmovd_pk);
1931 _sw->_packset.remove(bool_pk);
1932 _sw->_packset.remove(cmpd_pk);
1933 _sw->_packset.append(new_cmpd_pk);
1934 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);})if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"
); _sw->print_pack(new_cmpd_pk);}
1935 return new_cmpd_pk;
1936}
1937
1938bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) {
1939 Node* cmpd0 = cmpd_pk->at(0);
1940 assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode")do { if (!(cmpd0->is_Cmp())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1940, "assert(" "cmpd0->is_Cmp()" ") failed", "CMoveKit::test_cmpd_pack: should be CmpDNode"
); ::breakpoint(); } } while (0)
;
1941 assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD")do { if (!(cmovd_pk->at(0)->is_CMove())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1941, "assert(" "cmovd_pk->at(0)->is_CMove()" ") failed"
, "CMoveKit::test_cmpd_pack: should be CMoveD"); ::breakpoint
(); } } while (0)
;
1942 assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size")do { if (!(cmpd_pk->size() == cmovd_pk->size())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 1942, "assert(" "cmpd_pk->size() == cmovd_pk->size()"
") failed", "CMoveKit::test_cmpd_pack: should be same size")
; ::breakpoint(); } } while (0)
;
1943 Node* in1 = cmpd0->in(1);
1944 Node* in2 = cmpd0->in(2);
1945 Node_List* in1_pk = _sw->my_pack(in1);
1946 Node_List* in2_pk = _sw->my_pack(in2);
1947
1948 if ( (in1_pk != NULL__null && in1_pk->size() != cmpd_pk->size())
1949 || (in2_pk != NULL__null && in2_pk->size() != cmpd_pk->size()) ) {
1950 return false;
1951 }
1952
1953 // test if "all" in1 are in the same pack or the same node
1954 if (in1_pk == NULL__null) {
1955 for (uint j = 1; j < cmpd_pk->size(); j++) {
1956 if (cmpd_pk->at(j)->in(1) != in1) {
1957 return false;
1958 }
1959 }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1)
1960 }
1961 // test if "all" in2 are in the same pack or the same node
1962 if (in2_pk == NULL__null) {
1963 for (uint j = 1; j < cmpd_pk->size(); j++) {
1964 if (cmpd_pk->at(j)->in(2) != in2) {
1965 return false;
1966 }
1967 }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2)
1968 }
1969 //now check if cmpd_pk may be subsumed in vector built for cmovd_pk
1970 int cmovd_ind1, cmovd_ind2;
1971 if (cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
1972 && cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
1973 cmovd_ind1 = CMoveNode::IfFalse;
1974 cmovd_ind2 = CMoveNode::IfTrue;
1975 } else if (cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
1976 && cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
1977 cmovd_ind2 = CMoveNode::IfFalse;
1978 cmovd_ind1 = CMoveNode::IfTrue;
1979 }
1980 else {
1981 return false;
1982 }
1983
1984 for (uint j = 1; j < cmpd_pk->size(); j++) {
1985 if (cmpd_pk->at(j)->in(1) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind1)
1986 || cmpd_pk->at(j)->in(2) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind2)) {
1987 return false;
1988 }//if
1989 }
1990 NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: ", cmpd0->_idx); cmpd0->dump(); })if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: "
, cmpd0->_idx); cmpd0->dump(); }
1991 return true;
1992}
1993
1994//------------------------------implemented---------------------------
1995// Can code be generated for pack p?
1996bool SuperWord::implemented(Node_List* p) {
1997 bool retValue = false;
1998 Node* p0 = p->at(0);
1999 if (p0 != NULL__null) {
2000 int opc = p0->Opcode();
2001 uint size = p->size();
2002 if (p0->is_reduction()) {
2003 const Type *arith_type = p0->bottom_type();
2004 // Length 2 reductions of INT/LONG do not offer performance benefits
2005 if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) {
2006 retValue = false;
2007 } else {
2008 retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
2009 }
2010 } else {
2011 retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
2012 }
2013 if (!retValue) {
2014 if (is_cmov_pack(p)) {
2015 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"
); print_pack(p);}
2016 return true;
2017 }
2018 }
2019 }
2020 return retValue;
2021}
2022
2023bool SuperWord::is_cmov_pack(Node_List* p) {
2024 return _cmovev_kit.pack(p->at(0)) != NULL__null;
2025}
2026//------------------------------same_inputs--------------------------
2027// For pack p, are all idx operands the same?
2028bool SuperWord::same_inputs(Node_List* p, int idx) {
2029 Node* p0 = p->at(0);
2030 uint vlen = p->size();
2031 Node* p0_def = p0->in(idx);
2032 for (uint i = 1; i < vlen; i++) {
2033 Node* pi = p->at(i);
2034 Node* pi_def = pi->in(idx);
2035 if (p0_def != pi_def) {
2036 return false;
2037 }
2038 }
2039 return true;
2040}
2041
2042//------------------------------profitable---------------------------
2043// For pack p, are all operands and all uses (with in the block) vector?
2044bool SuperWord::profitable(Node_List* p) {
2045 Node* p0 = p->at(0);
2046 uint start, end;
2047 VectorNode::vector_operands(p0, &start, &end);
2048
2049 // Return false if some inputs are not vectors or vectors with different
2050 // size or alignment.
2051 // Also, for now, return false if not scalar promotion case when inputs are
2052 // the same. Later, implement PackNode and allow differing, non-vector inputs
2053 // (maybe just the ones from outside the block.)
2054 for (uint i = start; i < end; i++) {
2055 if (!is_vector_use(p0, i)) {
2056 return false;
2057 }
2058 }
2059 // Check if reductions are connected
2060 if (p0->is_reduction()) {
2061 Node* second_in = p0->in(2);
2062 Node_List* second_pk = my_pack(second_in);
2063 if ((second_pk == NULL__null) || (_num_work_vecs == _num_reductions)) {
2064 // Remove reduction flag if no parent pack or if not enough work
2065 // to cover reduction expansion overhead
2066 p0->remove_flag(Node::Flag_is_reduction);
2067 return false;
2068 } else if (second_pk->size() != p->size()) {
2069 return false;
2070 }
2071 }
2072 if (VectorNode::is_shift(p0)) {
2073 // For now, return false if shift count is vector or not scalar promotion
2074 // case (different shift counts) because it is not supported yet.
2075 Node* cnt = p0->in(2);
2076 Node_List* cnt_pk = my_pack(cnt);
2077 if (cnt_pk != NULL__null)
2078 return false;
2079 if (!same_inputs(p, 2))
2080 return false;
2081 }
2082 if (!p0->is_Store()) {
2083 // For now, return false if not all uses are vector.
2084 // Later, implement ExtractNode and allow non-vector uses (maybe
2085 // just the ones outside the block.)
2086 for (uint i = 0; i < p->size(); i++) {
2087 Node* def = p->at(i);
2088 if (is_cmov_pack_internal_node(p, def)) {
2089 continue;
2090 }
2091 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2092 Node* use = def->fast_out(j);
2093 for (uint k = 0; k < use->req(); k++) {
2094 Node* n = use->in(k);
2095 if (def == n) {
2096 // Reductions should only have a Phi use at the loop head or a non-phi use
2097 // outside of the loop if it is the last element of the pack (e.g. SafePoint).
2098 if (def->is_reduction() &&
2099 ((use->is_Phi() && use->in(0) == _lpt->_head) ||
2100 (!_lpt->is_member(_phase->get_loop(_phase->ctrl_or_self(use))) && i == p->size()-1))) {
2101 continue;
2102 }
2103 if (!is_vector_use(use, k)) {
2104 return false;
2105 }
2106 }
2107 }
2108 }
2109 }
2110 }
2111 return true;
2112}
2113
2114//------------------------------schedule---------------------------
2115// Adjust the memory graph for the packed operations
2116void SuperWord::schedule() {
2117
2118 // Co-locate in the memory graph the members of each memory pack
2119 for (int i = 0; i < _packset.length(); i++) {
2120 co_locate_pack(_packset.at(i));
2121 }
2122}
2123
2124//-------------------------------remove_and_insert-------------------
2125// Remove "current" from its current position in the memory graph and insert
2126// it after the appropriate insertion point (lip or uip).
2127void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
2128 Node *uip, Unique_Node_List &sched_before) {
2129 Node* my_mem = current->in(MemNode::Memory);
2130 bool sched_up = sched_before.member(current);
2131
2132 // remove current_store from its current position in the memmory graph
2133 for (DUIterator i = current->outs(); current->has_out(i); i++) {
2134 Node* use = current->out(i);
2135 if (use->is_Mem()) {
2136 assert(use->in(MemNode::Memory) == current, "must be")do { if (!(use->in(MemNode::Memory) == current)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2136, "assert(" "use->in(MemNode::Memory) == current" ") failed"
, "must be"); ::breakpoint(); } } while (0)
;
2137 if (use == prev) { // connect prev to my_mem
2138 _igvn.replace_input_of(use, MemNode::Memory, my_mem);
2139 --i; //deleted this edge; rescan position
2140 } else if (sched_before.member(use)) {
2141 if (!sched_up) { // Will be moved together with current
2142 _igvn.replace_input_of(use, MemNode::Memory, uip);
2143 --i; //deleted this edge; rescan position
2144 }
2145 } else {
2146 if (sched_up) { // Will be moved together with current
2147 _igvn.replace_input_of(use, MemNode::Memory, lip);
2148 --i; //deleted this edge; rescan position
2149 }
2150 }
2151 }
2152 }
2153
2154 Node *insert_pt = sched_up ? uip : lip;
2155
2156 // all uses of insert_pt's memory state should use current's instead
2157 for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
2158 Node* use = insert_pt->out(i);
2159 if (use->is_Mem()) {
2160 assert(use->in(MemNode::Memory) == insert_pt, "must be")do { if (!(use->in(MemNode::Memory) == insert_pt)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2160, "assert(" "use->in(MemNode::Memory) == insert_pt" ") failed"
, "must be"); ::breakpoint(); } } while (0)
;
2161 _igvn.replace_input_of(use, MemNode::Memory, current);
2162 --i; //deleted this edge; rescan position
2163 } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) {
2164 uint pos; //lip (lower insert point) must be the last one in the memory slice
2165 for (pos=1; pos < use->req(); pos++) {
2166 if (use->in(pos) == insert_pt) break;
2167 }
2168 _igvn.replace_input_of(use, pos, current);
2169 --i;
2170 }
2171 }
2172
2173 //connect current to insert_pt
2174 _igvn.replace_input_of(current, MemNode::Memory, insert_pt);
2175}
2176
2177//------------------------------co_locate_pack----------------------------------
2178// To schedule a store pack, we need to move any sandwiched memory ops either before
2179// or after the pack, based upon dependence information:
2180// (1) If any store in the pack depends on the sandwiched memory op, the
2181// sandwiched memory op must be scheduled BEFORE the pack;
2182// (2) If a sandwiched memory op depends on any store in the pack, the
2183// sandwiched memory op must be scheduled AFTER the pack;
2184// (3) If a sandwiched memory op (say, memA) depends on another sandwiched
2185// memory op (say memB), memB must be scheduled before memA. So, if memA is
2186// scheduled before the pack, memB must also be scheduled before the pack;
2187// (4) If there is no dependence restriction for a sandwiched memory op, we simply
2188// schedule this store AFTER the pack
2189// (5) We know there is no dependence cycle, so there in no other case;
2190// (6) Finally, all memory ops in another single pack should be moved in the same direction.
2191//
2192// To schedule a load pack, we use the memory state of either the first or the last load in
2193// the pack, based on the dependence constraint.
2194void SuperWord::co_locate_pack(Node_List* pk) {
2195 if (pk->at(0)->is_Store()) {
2196 MemNode* first = executed_first(pk)->as_Mem();
2197 MemNode* last = executed_last(pk)->as_Mem();
2198 Unique_Node_List schedule_before_pack;
2199 Unique_Node_List memops;
2200
2201 MemNode* current = last->in(MemNode::Memory)->as_Mem();
2202 MemNode* previous = last;
2203 while (true) {
2204 assert(in_bb(current), "stay in block")do { if (!(in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2204, "assert(" "in_bb(current)" ") failed", "stay in block"
); ::breakpoint(); } } while (0)
;
2205 memops.push(previous);
2206 for (DUIterator i = current->outs(); current->has_out(i); i++) {
2207 Node* use = current->out(i);
2208 if (use->is_Mem() && use != previous)
2209 memops.push(use);
2210 }
2211 if (current == first) break;
2212 previous = current;
2213 current = current->in(MemNode::Memory)->as_Mem();
2214 }
2215
2216 // determine which memory operations should be scheduled before the pack
2217 for (uint i = 1; i < memops.size(); i++) {
2218 Node *s1 = memops.at(i);
2219 if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) {
2220 for (uint j = 0; j< i; j++) {
2221 Node *s2 = memops.at(j);
2222 if (!independent(s1, s2)) {
2223 if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
2224 schedule_before_pack.push(s1); // s1 must be scheduled before
2225 Node_List* mem_pk = my_pack(s1);
2226 if (mem_pk != NULL__null) {
2227 for (uint ii = 0; ii < mem_pk->size(); ii++) {
2228 Node* s = mem_pk->at(ii); // follow partner
2229 if (memops.member(s) && !schedule_before_pack.member(s))
2230 schedule_before_pack.push(s);
2231 }
2232 }
2233 break;
2234 }
2235 }
2236 }
2237 }
2238 }
2239
2240 Node* upper_insert_pt = first->in(MemNode::Memory);
2241 // Following code moves loads connected to upper_insert_pt below aliased stores.
2242 // Collect such loads here and reconnect them back to upper_insert_pt later.
2243 memops.clear();
2244 for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {
2245 Node* use = upper_insert_pt->out(i);
2246 if (use->is_Mem() && !use->is_Store()) {
2247 memops.push(use);
2248 }
2249 }
2250
2251 MemNode* lower_insert_pt = last;
2252 previous = last; //previous store in pk
2253 current = last->in(MemNode::Memory)->as_Mem();
2254
2255 // start scheduling from "last" to "first"
2256 while (true) {
2257 assert(in_bb(current), "stay in block")do { if (!(in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2257, "assert(" "in_bb(current)" ") failed", "stay in block"
); ::breakpoint(); } } while (0)
;
2258 assert(in_pack(previous, pk), "previous stays in pack")do { if (!(in_pack(previous, pk))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2258, "assert(" "in_pack(previous, pk)" ") failed", "previous stays in pack"
); ::breakpoint(); } } while (0)
;
2259 Node* my_mem = current->in(MemNode::Memory);
2260
2261 if (in_pack(current, pk)) {
2262 // Forward users of my memory state (except "previous) to my input memory state
2263 for (DUIterator i = current->outs(); current->has_out(i); i++) {
2264 Node* use = current->out(i);
2265 if (use->is_Mem() && use != previous) {
2266 assert(use->in(MemNode::Memory) == current, "must be")do { if (!(use->in(MemNode::Memory) == current)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2266, "assert(" "use->in(MemNode::Memory) == current" ") failed"
, "must be"); ::breakpoint(); } } while (0)
;
2267 if (schedule_before_pack.member(use)) {
2268 _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt);
2269 } else {
2270 _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt);
2271 }
2272 --i; // deleted this edge; rescan position
2273 }
2274 }
2275 previous = current;
2276 } else { // !in_pack(current, pk) ==> a sandwiched store
2277 remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack);
2278 }
2279
2280 if (current == first) break;
2281 current = my_mem->as_Mem();
2282 } // end while
2283
2284 // Reconnect loads back to upper_insert_pt.
2285 for (uint i = 0; i < memops.size(); i++) {
2286 Node *ld = memops.at(i);
2287 if (ld->in(MemNode::Memory) != upper_insert_pt) {
2288 _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt);
2289 }
2290 }
2291 } else if (pk->at(0)->is_Load()) { // Load pack
2292 // All loads in the pack should have the same memory state. By default,
2293 // we use the memory state of the last load. However, if any load could
2294 // not be moved down due to the dependence constraint, we use the memory
2295 // state of the first load.
2296 Node* mem_input = pick_mem_state(pk);
2297 _igvn.hash_delete(mem_input);
2298 // Give each load the same memory state
2299 for (uint i = 0; i < pk->size(); i++) {
2300 LoadNode* ld = pk->at(i)->as_Load();
2301 _igvn.replace_input_of(ld, MemNode::Memory, mem_input);
2302 }
2303 }
2304}
2305
2306// Finds the first and last memory state and then picks either of them by checking dependence constraints.
2307// If a store is dependent on an earlier load then we need to pick the memory state of the first load and cannot
2308// pick the memory state of the last load.
2309Node* SuperWord::pick_mem_state(Node_List* pk) {
2310 Node* first_mem = find_first_mem_state(pk);
2311 Node* last_mem = find_last_mem_state(pk, first_mem);
2312
2313 for (uint i = 0; i < pk->size(); i++) {
2314 Node* ld = pk->at(i);
2315 for (Node* current = last_mem; current != ld->in(MemNode::Memory); current = current->in(MemNode::Memory)) {
2316 assert(current->is_Mem() && in_bb(current), "unexpected memory")do { if (!(current->is_Mem() && in_bb(current))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2316, "assert(" "current->is_Mem() && in_bb(current)"
") failed", "unexpected memory"); ::breakpoint(); } } while (
0)
;
2317 assert(current != first_mem, "corrupted memory graph")do { if (!(current != first_mem)) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2317, "assert(" "current != first_mem" ") failed", "corrupted memory graph"
); ::breakpoint(); } } while (0)
;
2318 if (!independent(current, ld)) {
2319 // A later store depends on this load, pick the memory state of the first load. This can happen, for example,
2320 // if a load pack has interleaving stores that are part of a store pack which, however, is removed at the pack
2321 // filtering stage. This leaves us with only a load pack for which we cannot take the memory state of the
2322 // last load as the remaining unvectorized stores could interfere since they have a dependency to the loads.
2323 // Some stores could be executed before the load vector resulting in a wrong result. We need to take the
2324 // memory state of the first load to prevent this.
2325 return first_mem;
2326 }
2327 }
2328 }
2329 return last_mem;
2330}
2331
2332// Walk the memory graph from the current first load until the
2333// start of the loop and check if nodes on the way are memory
2334// edges of loads in the pack. The last one we encounter is the
2335// first load.
2336Node* SuperWord::find_first_mem_state(Node_List* pk) {
2337 Node* first_mem = pk->at(0)->in(MemNode::Memory);
2338 for (Node* current = first_mem; in_bb(current); current = current->is_Phi() ? current->in(LoopNode::EntryControl) : current->in(MemNode::Memory)) {
2339 assert(current->is_Mem() || (current->is_Phi() && current->in(0) == bb()), "unexpected memory")do { if (!(current->is_Mem() || (current->is_Phi() &&
current->in(0) == bb()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2339, "assert(" "current->is_Mem() || (current->is_Phi() && current->in(0) == bb())"
") failed", "unexpected memory"); ::breakpoint(); } } while (
0)
;
2340 for (uint i = 1; i < pk->size(); i++) {
2341 Node* ld = pk->at(i);
2342 if (ld->in(MemNode::Memory) == current) {
2343 first_mem = current;
2344 break;
2345 }
2346 }
2347 }
2348 return first_mem;
2349}
2350
2351// Find the last load by going over the pack again and walking
2352// the memory graph from the loads of the pack to the memory of
2353// the first load. If we encounter the memory of the current last
2354// load, then we started from further down in the memory graph and
2355// the load we started from is the last load.
2356Node* SuperWord::find_last_mem_state(Node_List* pk, Node* first_mem) {
2357 Node* last_mem = pk->at(0)->in(MemNode::Memory);
2358 for (uint i = 0; i < pk->size(); i++) {
2359 Node* ld = pk->at(i);
2360 for (Node* current = ld->in(MemNode::Memory); current != first_mem; current = current->in(MemNode::Memory)) {
2361 assert(current->is_Mem() && in_bb(current), "unexpected memory")do { if (!(current->is_Mem() && in_bb(current))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2361, "assert(" "current->is_Mem() && in_bb(current)"
") failed", "unexpected memory"); ::breakpoint(); } } while (
0)
;
2362 if (current->in(MemNode::Memory) == last_mem) {
2363 last_mem = ld->in(MemNode::Memory);
2364 }
2365 }
2366 }
2367 return last_mem;
2368}
2369
2370#ifndef PRODUCT
2371void SuperWord::print_loop(bool whole) {
2372 Node_Stack stack(_arena, _phase->C->unique() >> 2);
2373 Node_List rpo_list;
2374 VectorSet visited(_arena);
2375 visited.set(lpt()->_head->_idx);
2376 _phase->rpo(lpt()->_head, stack, visited, rpo_list);
2377 _phase->dump(lpt(), rpo_list.size(), rpo_list );
2378 if(whole) {
2379 tty->print_cr("\n Whole loop tree");
2380 _phase->dump();
2381 tty->print_cr(" End of whole loop tree\n");
2382 }
2383}
2384#endif
2385
2386//------------------------------output---------------------------
2387// Convert packs into vector node operations
2388void SuperWord::output() {
2389 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
2390 Compile* C = _phase->C;
2391 if (_packset.length() == 0) {
2392 if (cl->is_main_loop()) {
2393 // Instigate more unrolling for optimization when vectorization fails.
2394 C->set_major_progress();
2395 cl->set_notpassed_slp();
2396 cl->mark_do_unroll_only();
2397 }
2398 return;
2399 }
2400
2401#ifndef PRODUCT
2402 if (TraceLoopOpts) {
2403 tty->print("SuperWord::output ");
2404 lpt()->dump_head();
2405 }
2406#endif
2407
2408 if (cl->is_main_loop()) {
2409 // MUST ENSURE main loop's initial value is properly aligned:
2410 // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
2411
2412 align_initial_loop_index(align_to_ref());
2413
2414 // Insert extract (unpack) operations for scalar uses
2415 for (int i = 0; i < _packset.length(); i++) {
2416 insert_extracts(_packset.at(i));
2417 }
2418 }
2419
2420 uint max_vlen_in_bytes = 0;
2421 uint max_vlen = 0;
2422 bool can_process_post_loop = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
2423
2424 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"
); print_loop(true);}
2425
2426 CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy());
2427
2428 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"
); print_loop(true);}
2429
2430 if (do_reserve_copy() && !make_reversable.has_reserved()) {
2431 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: loop was not reserved correctly, exiting SuperWord"
);}
2432 return;
2433 }
2434
2435 for (int i = 0; i < _block.length(); i++) {
2436 Node* n = _block.at(i);
2437 Node_List* p = my_pack(n);
2438 if (p && n == executed_last(p)) {
2439 uint vlen = p->size();
2440 uint vlen_in_bytes = 0;
2441 Node* vn = NULL__null;
2442 Node* low_adr = p->at(0);
2443 Node* first = executed_first(p);
2444 if (can_process_post_loop) {
2445 // override vlen with the main loops vector length
2446 vlen = cl->slp_max_unroll();
2447 }
2448 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack"
, first->_idx, n->_idx); print_pack(p);}
2449 int opc = n->Opcode();
2450 if (n->is_Load()) {
2451 Node* ctl = n->in(MemNode::Control);
2452 Node* mem = first->in(MemNode::Memory);
2453 SWPointer p1(n->as_Mem(), this, NULL__null, false);
2454 // Identify the memory dependency for the new loadVector node by
2455 // walking up through memory chain.
2456 // This is done to give flexibility to the new loadVector node so that
2457 // it can move above independent storeVector nodes.
2458 while (mem->is_StoreVector()) {
2459 SWPointer p2(mem->as_Mem(), this, NULL__null, false);
2460 int cmp = p1.cmp(p2);
2461 if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
2462 mem = mem->in(MemNode::Memory);
2463 } else {
2464 break; // dependent memory
2465 }
2466 }
2467 Node* adr = low_adr->in(MemNode::Address);
2468 const TypePtr* atyp = n->adr_type();
2469 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
2470 vlen_in_bytes = vn->as_LoadVector()->memory_size();
2471 } else if (n->is_Store()) {
2472 // Promote value to be stored to vector
2473 Node* val = vector_opd(p, MemNode::ValueIn);
2474 if (val == NULL__null) {
2475 if (do_reserve_copy()) {
2476 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: val should not be NULL, exiting SuperWord"
);}
2477 return; //and reverse to backup IG
2478 }
2479 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2479); ::breakpoint(); } while (0)
;
2480 }
2481
2482 Node* ctl = n->in(MemNode::Control);
2483 Node* mem = first->in(MemNode::Memory);
2484 Node* adr = low_adr->in(MemNode::Address);
2485 const TypePtr* atyp = n->adr_type();
2486 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
2487 vlen_in_bytes = vn->as_StoreVector()->memory_size();
2488 } else if (VectorNode::is_scalar_rotate(n)) {
2489 Node* in1 = low_adr->in(1);
2490 Node* in2 = p->at(0)->in(2);
2491 // If rotation count is non-constant or greater than 8bit value create a vector.
2492 if (!in2->is_Con() || !Matcher::supports_vector_constant_rotates(in2->get_int())) {
2493 in2 = vector_opd(p, 2);
2494 }
2495 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
2496 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2497 } else if (VectorNode::is_roundopD(n)) {
2498 Node* in1 = vector_opd(p, 1);
2499 Node* in2 = low_adr->in(2);
2500 assert(in2->is_Con(), "Constant rounding mode expected.")do { if (!(in2->is_Con())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2500, "assert(" "in2->is_Con()" ") failed", "Constant rounding mode expected."
); ::breakpoint(); } } while (0)
;
2501 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
2502 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2503 } else if (VectorNode::is_muladds2i(n)) {
2504 assert(n->req() == 5u, "MulAddS2I should have 4 operands.")do { if (!(n->req() == 5u)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2504, "assert(" "n->req() == 5u" ") failed", "MulAddS2I should have 4 operands."
); ::breakpoint(); } } while (0)
;
2505 Node* in1 = vector_opd(p, 1);
2506 Node* in2 = vector_opd(p, 2);
2507 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
2508 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2509 } else if (n->req() == 3 && !is_cmov_pack(p)) {
2510 // Promote operands to vector
2511 Node* in1 = NULL__null;
2512 bool node_isa_reduction = n->is_reduction();
2513 if (node_isa_reduction) {
2514 // the input to the first reduction operation is retained
2515 in1 = low_adr->in(1);
2516 } else {
2517 in1 = vector_opd(p, 1);
2518 if (in1 == NULL__null) {
2519 if (do_reserve_copy()) {
2520 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: in1 should not be NULL, exiting SuperWord"
);}
2521 return; //and reverse to backup IG
2522 }
2523 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2523); ::breakpoint(); } while (0)
;
2524 }
2525 }
2526 Node* in2 = vector_opd(p, 2);
2527 if (in2 == NULL__null) {
2528 if (do_reserve_copy()) {
2529 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: in2 should not be NULL, exiting SuperWord"
);}
2530 return; //and reverse to backup IG
2531 }
2532 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2532); ::breakpoint(); } while (0)
;
2533 }
2534 if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
2535 // Move invariant vector input into second position to avoid register spilling.
2536 Node* tmp = in1;
2537 in1 = in2;
2538 in2 = tmp;
2539 }
2540 if (node_isa_reduction) {
2541 const Type *arith_type = n->bottom_type();
2542 vn = ReductionNode::make(opc, NULL__null, in1, in2, arith_type->basic_type());
2543 if (in2->is_Load()) {
2544 vlen_in_bytes = in2->as_LoadVector()->memory_size();
2545 } else {
2546 vlen_in_bytes = in2->as_Vector()->length_in_bytes();
2547 }
2548 } else {
2549 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
2550 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2551 }
2552 } else if (opc == Op_SqrtF || opc == Op_SqrtD ||
2553 opc == Op_AbsF || opc == Op_AbsD ||
2554 opc == Op_AbsI || opc == Op_AbsL ||
2555 opc == Op_NegF || opc == Op_NegD ||
2556 opc == Op_PopCountI) {
2557 assert(n->req() == 2, "only one input expected")do { if (!(n->req() == 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2557, "assert(" "n->req() == 2" ") failed", "only one input expected"
); ::breakpoint(); } } while (0)
;
2558 Node* in = vector_opd(p, 1);
2559 vn = VectorNode::make(opc, in, NULL__null, vlen, velt_basic_type(n));
2560 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2561 } else if (opc == Op_ConvI2F || opc == Op_ConvL2D ||
2562 opc == Op_ConvF2I || opc == Op_ConvD2L) {
2563 assert(n->req() == 2, "only one input expected")do { if (!(n->req() == 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2563, "assert(" "n->req() == 2" ") failed", "only one input expected"
); ::breakpoint(); } } while (0)
;
2564 BasicType bt = velt_basic_type(n);
2565 int vopc = VectorNode::opcode(opc, bt);
2566 Node* in = vector_opd(p, 1);
2567 vn = VectorCastNode::make(vopc, in, bt, vlen);
2568 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2569 } else if (is_cmov_pack(p)) {
2570 if (can_process_post_loop) {
2571 // do not refactor of flow in post loop context
2572 return;
2573 }
2574 if (!n->is_CMove()) {
2575 continue;
2576 }
2577 // place here CMoveVDNode
2578 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"
); print_loop(false);}
2579 Node* bol = n->in(CMoveNode::Condition);
2580 if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) {
2581 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d"
, bol->_idx, bol->in(1)->_idx); bol->dump(); bol->
in(1)->dump();}
2582 bol = bol->in(1); //may be ExtractNode
2583 }
2584
2585 assert(bol->is_Bool(), "should be BoolNode - too late to bail out!")do { if (!(bol->is_Bool())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2585, "assert(" "bol->is_Bool()" ") failed", "should be BoolNode - too late to bail out!"
); ::breakpoint(); } } while (0)
;
2586 if (!bol->is_Bool()) {
2587 if (do_reserve_copy()) {
2588 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: expected %d bool node, exiting SuperWord"
, bol->_idx); bol->dump();}
2589 return; //and reverse to backup IG
2590 }
2591 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2591); ::breakpoint(); } while (0)
;
2592 }
2593
2594 int cond = (int)bol->as_Bool()->_test._test;
2595 Node* in_cc = _igvn.intcon(cond);
2596 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d"
, in_cc->_idx); in_cc->dump();}
2597 Node* cc = bol->clone();
2598 cc->set_req(1, in_cc);
2599 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d"
, cc->_idx); cc->dump();}
2600
2601 Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse
2602 if (src1 == NULL__null) {
2603 if (do_reserve_copy()) {
2604 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: src1 should not be NULL, exiting SuperWord"
);}
2605 return; //and reverse to backup IG
2606 }
2607 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2607); ::breakpoint(); } while (0)
;
2608 }
2609 Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue
2610 if (src2 == NULL__null) {
2611 if (do_reserve_copy()) {
2612 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: src2 should not be NULL, exiting SuperWord"
);}
2613 return; //and reverse to backup IG
2614 }
2615 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2615); ::breakpoint(); } while (0)
;
2616 }
2617 BasicType bt = velt_basic_type(n);
2618 const TypeVect* vt = TypeVect::make(bt, vlen);
2619 assert(bt == T_FLOAT || bt == T_DOUBLE, "Only vectorization for FP cmovs is supported")do { if (!(bt == T_FLOAT || bt == T_DOUBLE)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2619, "assert(" "bt == T_FLOAT || bt == T_DOUBLE" ") failed"
, "Only vectorization for FP cmovs is supported"); ::breakpoint
(); } } while (0)
;
2620 if (bt == T_FLOAT) {
2621 vn = new CMoveVFNode(cc, src1, src2, vt);
2622 } else {
2623 assert(bt == T_DOUBLE, "Expected double")do { if (!(bt == T_DOUBLE)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2623, "assert(" "bt == T_DOUBLE" ") failed", "Expected double"
); ::breakpoint(); } } while (0)
;
2624 vn = new CMoveVDNode(cc, src1, src2, vt);
2625 }
2626 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: "
, vn->_idx); vn->dump();}
2627 } else if (opc == Op_FmaD || opc == Op_FmaF) {
2628 // Promote operands to vector
2629 Node* in1 = vector_opd(p, 1);
2630 Node* in2 = vector_opd(p, 2);
2631 Node* in3 = vector_opd(p, 3);
2632 vn = VectorNode::make(opc, in1, in2, in3, vlen, velt_basic_type(n));
2633 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2634 } else {
2635 if (do_reserve_copy()) {
2636 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: ShouldNotReachHere, exiting SuperWord");
}
2637 return; //and reverse to backup IG
2638 }
2639 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2639); ::breakpoint(); } while (0)
;
2640 }
2641
2642 assert(vn != NULL, "sanity")do { if (!(vn != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2642, "assert(" "vn != __null" ") failed", "sanity"); ::breakpoint
(); } } while (0)
;
2643 if (vn == NULL__null) {
2644 if (do_reserve_copy()){
2645 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord"
);}
2646 return; //and reverse to backup IG
2647 }
2648 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2648); ::breakpoint(); } while (0)
;
2649 }
2650
2651 _block.at_put(i, vn);
2652 _igvn.register_new_node_with_optimizer(vn);
2653 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
2654 for (uint j = 0; j < p->size(); j++) {
2655 Node* pm = p->at(j);
2656 _igvn.replace_node(pm, vn);
2657 }
2658 _igvn._worklist.push(vn);
2659
2660 if (can_process_post_loop) {
2661 // first check if the vector size if the maximum vector which we can use on the machine,
2662 // other vector size have reduced values for predicated data mapping.
2663 if (vlen_in_bytes != (uint)MaxVectorSize) {
2664 return;
2665 }
2666 }
2667
2668 if (vlen > max_vlen) {
2669 max_vlen = vlen;
2670 }
2671 if (vlen_in_bytes > max_vlen_in_bytes) {
2672 max_vlen_in_bytes = vlen_in_bytes;
2673 }
2674#ifdef ASSERT1
2675 if (TraceNewVectors) {
2676 tty->print("new Vector node: ");
2677 vn->dump();
2678 }
2679#endif
2680 }
2681 }//for (int i = 0; i < _block.length(); i++)
2682
2683 if (max_vlen_in_bytes > C->max_vector_size()) {
2684 C->set_max_vector_size(max_vlen_in_bytes);
2685 }
2686 if (max_vlen_in_bytes > 0) {
2687 cl->mark_loop_vectorized();
2688 }
2689
2690 if (SuperWordLoopUnrollAnalysis) {
2691 if (cl->has_passed_slp()) {
2692 uint slp_max_unroll_factor = cl->slp_max_unroll();
2693 if (slp_max_unroll_factor == max_vlen) {
2694 if (TraceSuperWordLoopUnrollAnalysis) {
2695 tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte);
2696 }
2697
2698 // For atomic unrolled loops which are vector mapped, instigate more unrolling
2699 cl->set_notpassed_slp();
2700 if (cl->is_main_loop()) {
2701 // if vector resources are limited, do not allow additional unrolling, also
2702 // do not unroll more on pure vector loops which were not reduced so that we can
2703 // program the post loop to single iteration execution.
2704 if (Matcher::float_pressure_limit() > 8) {
2705 C->set_major_progress();
2706 cl->mark_do_unroll_only();
2707 }
2708 }
2709
2710 if (do_reserve_copy()) {
2711 if (can_process_post_loop) {
2712 // Now create the difference of trip and limit and use it as our mask index.
2713 // Note: We limited the unroll of the vectorized loop so that
2714 // only vlen-1 size iterations can remain to be mask programmed.
2715 Node *incr = cl->incr();
2716 SubINode *index = new SubINode(cl->limit(), cl->init_trip());
2717 _igvn.register_new_node_with_optimizer(index);
2718 SetVectMaskINode *mask = new SetVectMaskINode(_phase->get_ctrl(cl->init_trip()), index);
2719 _igvn.register_new_node_with_optimizer(mask);
2720 // make this a single iteration loop
2721 AddINode *new_incr = new AddINode(incr->in(1), mask);
2722 _igvn.register_new_node_with_optimizer(new_incr);
2723 _phase->set_ctrl(new_incr, _phase->get_ctrl(incr));
2724 _igvn.replace_node(incr, new_incr);
2725 cl->mark_is_multiversioned();
2726 cl->loopexit()->add_flag(Node::Flag_has_vector_mask_set);
2727 }
2728 }
2729 }
2730 }
2731 }
2732
2733 if (do_reserve_copy()) {
2734 make_reversable.use_new();
2735 }
2736 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"
); print_loop(true);}
2737 return;
2738}
2739
2740//------------------------------vector_opd---------------------------
2741// Create a vector operand for the nodes in pack p for operand: in(opd_idx)
2742Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
2743 Node* p0 = p->at(0);
2744 uint vlen = p->size();
2745 Node* opd = p0->in(opd_idx);
2746 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
2747
2748 if (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()) {
2749 // override vlen with the main loops vector length
2750 vlen = cl->slp_max_unroll();
2751 }
2752
2753 if (same_inputs(p, opd_idx)) {
2754 if (opd->is_Vector() || opd->is_LoadVector()) {
2755 assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector")do { if (!(((opd_idx != 2) || !VectorNode::is_shift(p0)))) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2755, "assert(" "((opd_idx != 2) || !VectorNode::is_shift(p0))"
") failed", "shift's count can't be vector"); ::breakpoint()
; } } while (0)
;
2756 if (opd_idx == 2 && VectorNode::is_shift(p0)) {
2757 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("shift's count can't be vector");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("shift's count can't be vector");}
2758 return NULL__null;
2759 }
2760 return opd; // input is matching vector
2761 }
2762 if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
2763 Compile* C = _phase->C;
2764 Node* cnt = opd;
2765 // Vector instructions do not mask shift count, do it here.
2766 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
2767 const TypeInt* t = opd->find_int_type();
2768 if (t != NULL__null && t->is_con()) {
2769 juint shift = t->get_con();
2770 if (shift > mask) { // Unsigned cmp
2771 cnt = ConNode::make(TypeInt::make(shift & mask));
2772 }
2773 } else {
2774 if (t == NULL__null || t->_lo < 0 || t->_hi > (int)mask) {
2775 cnt = ConNode::make(TypeInt::make(mask));
2776 _igvn.register_new_node_with_optimizer(cnt);
2777 cnt = new AndINode(opd, cnt);
2778 _igvn.register_new_node_with_optimizer(cnt);
2779 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
2780 }
2781 assert(opd->bottom_type()->isa_int(), "int type only")do { if (!(opd->bottom_type()->isa_int())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2781, "assert(" "opd->bottom_type()->isa_int()" ") failed"
, "int type only"); ::breakpoint(); } } while (0)
;
2782 if (!opd->bottom_type()->isa_int()) {
2783 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should be int type only");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("Should be int type only");}
2784 return NULL__null;
2785 }
2786 }
2787 // Move shift count into vector register.
2788 cnt = VectorNode::shift_count(p0->Opcode(), cnt, vlen, velt_basic_type(p0));
2789 _igvn.register_new_node_with_optimizer(cnt);
2790 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
2791 return cnt;
2792 }
2793 assert(!opd->is_StoreVector(), "such vector is not expected here")do { if (!(!opd->is_StoreVector())) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2793, "assert(" "!opd->is_StoreVector()" ") failed", "such vector is not expected here"
); ::breakpoint(); } } while (0)
;
2794 if (opd->is_StoreVector()) {
2795 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("StoreVector is not expected here");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("StoreVector is not expected here");}
2796 return NULL__null;
2797 }
2798 // Convert scalar input to vector with the same number of elements as
2799 // p0's vector. Use p0's type because size of operand's container in
2800 // vector should match p0's size regardless operand's size.
2801 const Type* p0_t = NULL__null;
2802 VectorNode* vn = NULL__null;
2803 if (opd_idx == 2 && VectorNode::is_scalar_rotate(p0)) {
2804 Node* conv = opd;
2805 p0_t = TypeInt::INT;
2806 if (p0->bottom_type()->isa_long()) {
2807 p0_t = TypeLong::LONG;
2808 conv = new ConvI2LNode(opd);
2809 _igvn.register_new_node_with_optimizer(conv);
2810 _phase->set_ctrl(conv, _phase->get_ctrl(opd));
2811 }
2812 vn = VectorNode::scalar2vector(conv, vlen, p0_t);
2813 } else {
2814 p0_t = velt_type(p0);
2815 vn = VectorNode::scalar2vector(opd, vlen, p0_t);
2816 }
2817
2818 _igvn.register_new_node_with_optimizer(vn);
2819 _phase->set_ctrl(vn, _phase->get_ctrl(opd));
2820#ifdef ASSERT1
2821 if (TraceNewVectors) {
2822 tty->print("new Vector node: ");
2823 vn->dump();
2824 }
2825#endif
2826 return vn;
2827 }
2828
2829 // Insert pack operation
2830 BasicType bt = velt_basic_type(p0);
2831 PackNode* pk = PackNode::make(opd, vlen, bt);
2832 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )const BasicType opd_bt = opd->bottom_type()->basic_type
();
2833
2834 for (uint i = 1; i < vlen; i++) {
2835 Node* pi = p->at(i);
2836 Node* in = pi->in(opd_idx);
2837 assert(my_pack(in) == NULL, "Should already have been unpacked")do { if (!(my_pack(in) == __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2837, "assert(" "my_pack(in) == __null" ") failed", "Should already have been unpacked"
); ::breakpoint(); } } while (0)
;
2838 if (my_pack(in) != NULL__null) {
2839 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr
("Should already have been unpacked");}
2840 return NULL__null;
2841 }
2842 assert(opd_bt == in->bottom_type()->basic_type(), "all same type")do { if (!(opd_bt == in->bottom_type()->basic_type())) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2842, "assert(" "opd_bt == in->bottom_type()->basic_type()"
") failed", "all same type"); ::breakpoint(); } } while (0)
;
2843 pk->add_opd(in);
2844 if (VectorNode::is_muladds2i(pi)) {
2845 Node* in2 = pi->in(opd_idx + 2);
2846 assert(my_pack(in2) == NULL, "Should already have been unpacked")do { if (!(my_pack(in2) == __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2846, "assert(" "my_pack(in2) == __null" ") failed", "Should already have been unpacked"
); ::breakpoint(); } } while (0)
;
2847 if (my_pack(in2) != NULL__null) {
2848 NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); })if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr
("Should already have been unpacked"); }
2849 return NULL__null;
2850 }
2851 assert(opd_bt == in2->bottom_type()->basic_type(), "all same type")do { if (!(opd_bt == in2->bottom_type()->basic_type()))
{ (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2851, "assert(" "opd_bt == in2->bottom_type()->basic_type()"
") failed", "all same type"); ::breakpoint(); } } while (0)
;
2852 pk->add_opd(in2);
2853 }
2854 }
2855 _igvn.register_new_node_with_optimizer(pk);
2856 _phase->set_ctrl(pk, _phase->get_ctrl(opd));
2857#ifdef ASSERT1
2858 if (TraceNewVectors) {
2859 tty->print("new Vector node: ");
2860 pk->dump();
2861 }
2862#endif
2863 return pk;
2864}
2865
2866//------------------------------insert_extracts---------------------------
2867// If a use of pack p is not a vector use, then replace the
2868// use with an extract operation.
2869void SuperWord::insert_extracts(Node_List* p) {
2870 if (p->at(0)->is_Store()) return;
2871 assert(_n_idx_list.is_empty(), "empty (node,index) list")do { if (!(_n_idx_list.is_empty())) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2871, "assert(" "_n_idx_list.is_empty()" ") failed", "empty (node,index) list"
); ::breakpoint(); } } while (0)
;
2872
2873 // Inspect each use of each pack member. For each use that is
2874 // not a vector use, replace the use with an extract operation.
2875
2876 for (uint i = 0; i < p->size(); i++) {
2877 Node* def = p->at(i);
2878 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2879 Node* use = def->fast_out(j);
2880 for (uint k = 0; k < use->req(); k++) {
2881 Node* n = use->in(k);
2882 if (def == n) {
2883 Node_List* u_pk = my_pack(use);
2884 if ((u_pk == NULL__null || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) {
2885 _n_idx_list.push(use, k);
2886 }
2887 }
2888 }
2889 }
2890 }
2891
2892 while (_n_idx_list.is_nonempty()) {
2893 Node* use = _n_idx_list.node();
2894 int idx = _n_idx_list.index();
2895 _n_idx_list.pop();
2896 Node* def = use->in(idx);
2897
2898 if (def->is_reduction()) continue;
2899
2900 // Insert extract operation
2901 _igvn.hash_delete(def);
2902 int def_pos = alignment(def) / data_size(def);
2903
2904 Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
2905 _igvn.register_new_node_with_optimizer(ex);
2906 _phase->set_ctrl(ex, _phase->get_ctrl(def));
2907 _igvn.replace_input_of(use, idx, ex);
2908 _igvn._worklist.push(def);
2909
2910 bb_insert_after(ex, bb_idx(def));
2911 set_velt_type(ex, velt_type(def));
2912 }
2913}
2914
2915//------------------------------is_vector_use---------------------------
2916// Is use->in(u_idx) a vector use?
2917bool SuperWord::is_vector_use(Node* use, int u_idx) {
2918 Node_List* u_pk = my_pack(use);
2919 if (u_pk == NULL__null) return false;
2920 if (use->is_reduction()) return true;
2921 Node* def = use->in(u_idx);
2922 Node_List* d_pk = my_pack(def);
2923 if (d_pk == NULL__null) {
2924 // check for scalar promotion
2925 Node* n = u_pk->at(0)->in(u_idx);
2926 for (uint i = 1; i < u_pk->size(); i++) {
2927 if (u_pk->at(i)->in(u_idx) != n) return false;
2928 }
2929 return true;
2930 }
2931 if (VectorNode::is_muladds2i(use)) {
2932 // MulAddS2I takes shorts and produces ints - hence the special checks
2933 // on alignment and size.
2934 if (u_pk->size() * 2 != d_pk->size()) {
2935 return false;
2936 }
2937 for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
2938 Node* ui = u_pk->at(i);
2939 Node* di = d_pk->at(i);
2940 if (alignment(ui) != alignment(di) * 2) {
2941 return false;
2942 }
2943 }
2944 return true;
2945 }
2946 if (u_pk->size() != d_pk->size())
2947 return false;
2948 for (uint i = 0; i < u_pk->size(); i++) {
2949 Node* ui = u_pk->at(i);
2950 Node* di = d_pk->at(i);
2951 if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
2952 return false;
2953 }
2954 return true;
2955}
2956
2957//------------------------------construct_bb---------------------------
2958// Construct reverse postorder list of block members
2959bool SuperWord::construct_bb() {
2960 Node* entry = bb();
2961
2962 assert(_stk.length() == 0, "stk is empty")do { if (!(_stk.length() == 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2962, "assert(" "_stk.length() == 0" ") failed", "stk is empty"
); ::breakpoint(); } } while (0)
;
2963 assert(_block.length() == 0, "block is empty")do { if (!(_block.length() == 0)) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2963, "assert(" "_block.length() == 0" ") failed", "block is empty"
); ::breakpoint(); } } while (0)
;
2964 assert(_data_entry.length() == 0, "data_entry is empty")do { if (!(_data_entry.length() == 0)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2964, "assert(" "_data_entry.length() == 0" ") failed", "data_entry is empty"
); ::breakpoint(); } } while (0)
;
2965 assert(_mem_slice_head.length() == 0, "mem_slice_head is empty")do { if (!(_mem_slice_head.length() == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2965, "assert(" "_mem_slice_head.length() == 0" ") failed",
"mem_slice_head is empty"); ::breakpoint(); } } while (0)
;
2966 assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty")do { if (!(_mem_slice_tail.length() == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2966, "assert(" "_mem_slice_tail.length() == 0" ") failed",
"mem_slice_tail is empty"); ::breakpoint(); } } while (0)
;
2967
2968 // Find non-control nodes with no inputs from within block,
2969 // create a temporary map from node _idx to bb_idx for use
2970 // by the visited and post_visited sets,
2971 // and count number of nodes in block.
2972 int bb_ct = 0;
2973 for (uint i = 0; i < lpt()->_body.size(); i++) {
2974 Node *n = lpt()->_body.at(i);
2975 set_bb_idx(n, i); // Create a temporary map
2976 if (in_bb(n)) {
2977 if (n->is_LoadStore() || n->is_MergeMem() ||
2978 (n->is_Proj() && !n->as_Proj()->is_CFG())) {
2979 // Bailout if the loop has LoadStore, MergeMem or data Proj
2980 // nodes. Superword optimization does not work with them.
2981 return false;
2982 }
2983 bb_ct++;
2984 if (!n->is_CFG()) {
2985 bool found = false;
2986 for (uint j = 0; j < n->req(); j++) {
2987 Node* def = n->in(j);
2988 if (def && in_bb(def)) {
2989 found = true;
2990 break;
2991 }
2992 }
2993 if (!found) {
2994 assert(n != entry, "can't be entry")do { if (!(n != entry)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 2994, "assert(" "n != entry" ") failed", "can't be entry");
::breakpoint(); } } while (0)
;
2995 _data_entry.push(n);
2996 }
2997 }
2998 }
2999 }
3000
3001 // Find memory slices (head and tail)
3002 for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
3003 Node *n = lp()->fast_out(i);
3004 if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
3005 Node* n_tail = n->in(LoopNode::LoopBackControl);
3006 if (n_tail != n->in(LoopNode::EntryControl)) {
3007 if (!n_tail->is_Mem()) {
3008 assert(n_tail->is_Mem(), "unexpected node for memory slice: %s", n_tail->Name())do { if (!(n_tail->is_Mem())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3008, "assert(" "n_tail->is_Mem()" ") failed", "unexpected node for memory slice: %s"
, n_tail->Name()); ::breakpoint(); } } while (0)
;
3009 return false; // Bailout
3010 }
3011 _mem_slice_head.push(n);
3012 _mem_slice_tail.push(n_tail);
3013 }
3014 }
3015 }
3016
3017 // Create an RPO list of nodes in block
3018
3019 visited_clear();
3020 post_visited_clear();
3021
3022 // Push all non-control nodes with no inputs from within block, then control entry
3023 for (int j = 0; j < _data_entry.length(); j++) {
3024 Node* n = _data_entry.at(j);
3025 visited_set(n);
3026 _stk.push(n);
3027 }
3028 visited_set(entry);
3029 _stk.push(entry);
3030
3031 // Do a depth first walk over out edges
3032 int rpo_idx = bb_ct - 1;
3033 int size;
3034 int reduction_uses = 0;
3035 while ((size = _stk.length()) > 0) {
3036 Node* n = _stk.top(); // Leave node on stack
3037 if (!visited_test_set(n)) {
3038 // forward arc in graph
3039 } else if (!post_visited_test(n)) {
3040 // cross or back arc
3041 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
3042 Node *use = n->fast_out(i);
3043 if (in_bb(use) && !visited_test(use) &&
3044 // Don't go around backedge
3045 (!use->is_Phi() || n == entry)) {
3046 if (use->is_reduction()) {
3047 // First see if we can map the reduction on the given system we are on, then
3048 // make a data entry operation for each reduction we see.
3049 BasicType bt = use->bottom_type()->basic_type();
3050 if (ReductionNode::implemented(use->Opcode(), Matcher::min_vector_size(bt), bt)) {
3051 reduction_uses++;
3052 }
3053 }
3054 _stk.push(use);
3055 }
3056 }
3057 if (_stk.length() == size) {
3058 // There were no additional uses, post visit node now
3059 _stk.pop(); // Remove node from stack
3060 assert(rpo_idx >= 0, "")do { if (!(rpo_idx >= 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3060, "assert(" "rpo_idx >= 0" ") failed", ""); ::breakpoint
(); } } while (0)
;
3061 _block.at_put_grow(rpo_idx, n);
3062 rpo_idx--;
3063 post_visited_set(n);
3064 assert(rpo_idx >= 0 || _stk.is_empty(), "")do { if (!(rpo_idx >= 0 || _stk.is_empty())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3064, "assert(" "rpo_idx >= 0 || _stk.is_empty()" ") failed"
, ""); ::breakpoint(); } } while (0)
;
3065 }
3066 } else {
3067 _stk.pop(); // Remove post-visited node from stack
3068 }
3069 }//while
3070
3071 int ii_current = -1;
3072 unsigned int load_idx = (unsigned int)-1;
3073 // Build iterations order if needed
3074 bool build_ii_order = _do_vector_loop_experimental && _ii_order.is_empty();
3075 // Create real map of block indices for nodes
3076 for (int j = 0; j < _block.length(); j++) {
3077 Node* n = _block.at(j);
3078 set_bb_idx(n, j);
3079 if (build_ii_order && n->is_Load()) {
3080 if (ii_current == -1) {
3081 ii_current = _clone_map.gen(n->_idx);
3082 _ii_order.push(ii_current);
3083 load_idx = _clone_map.idx(n->_idx);
3084 } else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) {
3085 ii_current = _clone_map.gen(n->_idx);
3086 _ii_order.push(ii_current);
3087 }
3088 }
3089 }//for
3090
3091 // Ensure extra info is allocated.
3092 initialize_bb();
3093
3094#ifndef PRODUCT
3095 if (_vector_loop_debug && _ii_order.length() > 0) {
3096 tty->print("SuperWord::construct_bb: List of generations: ");
3097 for (int jj = 0; jj < _ii_order.length(); ++jj) {
3098 tty->print(" %d:%d", jj, _ii_order.at(jj));
3099 }
3100 tty->print_cr(" ");
3101 }
3102 if (TraceSuperWord) {
3103 print_bb();
3104 tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
3105 for (int m = 0; m < _data_entry.length(); m++) {
3106 tty->print("%3d ", m);
3107 _data_entry.at(m)->dump();
3108 }
3109 tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
3110 for (int m = 0; m < _mem_slice_head.length(); m++) {
3111 tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
3112 tty->print(" "); _mem_slice_tail.at(m)->dump();
3113 }
3114 }
3115#endif
3116 assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found")do { if (!(rpo_idx == -1 && bb_ct == _block.length())
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3116, "assert(" "rpo_idx == -1 && bb_ct == _block.length()"
") failed", "all block members found"); ::breakpoint(); } } while
(0)
;
3117 return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0);
3118}
3119
3120//------------------------------initialize_bb---------------------------
3121// Initialize per node info
3122void SuperWord::initialize_bb() {
3123 Node* last = _block.at(_block.length() - 1);
3124 grow_node_info(bb_idx(last));
3125}
3126
3127//------------------------------bb_insert_after---------------------------
3128// Insert n into block after pos
3129void SuperWord::bb_insert_after(Node* n, int pos) {
3130 int n_pos = pos + 1;
3131 // Make room
3132 for (int i = _block.length() - 1; i >= n_pos; i--) {
3133 _block.at_put_grow(i+1, _block.at(i));
3134 }
3135 for (int j = _node_info.length() - 1; j >= n_pos; j--) {
3136 _node_info.at_put_grow(j+1, _node_info.at(j));
3137 }
3138 // Set value
3139 _block.at_put_grow(n_pos, n);
3140 _node_info.at_put_grow(n_pos, SWNodeInfo::initial);
3141 // Adjust map from node->_idx to _block index
3142 for (int i = n_pos; i < _block.length(); i++) {
3143 set_bb_idx(_block.at(i), i);
3144 }
3145}
3146
3147//------------------------------compute_max_depth---------------------------
3148// Compute max depth for expressions from beginning of block
3149// Use to prune search paths during test for independence.
3150void SuperWord::compute_max_depth() {
3151 int ct = 0;
3152 bool again;
3153 do {
3154 again = false;
3155 for (int i = 0; i < _block.length(); i++) {
3156 Node* n = _block.at(i);
3157 if (!n->is_Phi()) {
3158 int d_orig = depth(n);
3159 int d_in = 0;
3160 for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
3161 Node* pred = preds.current();
3162 if (in_bb(pred)) {
3163 d_in = MAX2(d_in, depth(pred));
3164 }
3165 }
3166 if (d_in + 1 != d_orig) {
3167 set_depth(n, d_in + 1);
3168 again = true;
3169 }
3170 }
3171 }
3172 ct++;
3173 } while (again);
3174
3175 if (TraceSuperWord && Verbose) {
3176 tty->print_cr("compute_max_depth iterated: %d times", ct);
3177 }
3178}
3179
3180//-------------------------compute_vector_element_type-----------------------
3181// Compute necessary vector element type for expressions
3182// This propagates backwards a narrower integer type when the
3183// upper bits of the value are not needed.
3184// Example: char a,b,c; a = b + c;
3185// Normally the type of the add is integer, but for packed character
3186// operations the type of the add needs to be char.
3187void SuperWord::compute_vector_element_type() {
3188 if (TraceSuperWord && Verbose) {
3189 tty->print_cr("\ncompute_velt_type:");
3190 }
3191
3192 // Initial type
3193 for (int i = 0; i < _block.length(); i++) {
3194 Node* n = _block.at(i);
3195 set_velt_type(n, container_type(n));
3196 }
3197
3198 // Propagate integer narrowed type backwards through operations
3199 // that don't depend on higher order bits
3200 for (int i = _block.length() - 1; i >= 0; i--) {
3201 Node* n = _block.at(i);
3202 // Only integer types need be examined
3203 const Type* vtn = velt_type(n);
3204 if (vtn->basic_type() == T_INT) {
3205 uint start, end;
3206 VectorNode::vector_operands(n, &start, &end);
3207
3208 for (uint j = start; j < end; j++) {
3209 Node* in = n->in(j);
3210 // Don't propagate through a memory
3211 if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT &&
3212 data_size(n) < data_size(in)) {
3213 bool same_type = true;
3214 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
3215 Node *use = in->fast_out(k);
3216 if (!in_bb(use) || !same_velt_type(use, n)) {
3217 same_type = false;
3218 break;
3219 }
3220 }
3221 if (same_type) {
3222 // In any Java arithmetic operation, operands of small integer types
3223 // (boolean, byte, char & short) should be promoted to int first. As
3224 // vector elements of small types don't have upper bits of int, for
3225 // RShiftI or AbsI operations, the compiler has to know the precise
3226 // signedness info of the 1st operand. These operations shouldn't be
3227 // vectorized if the signedness info is imprecise.
3228 const Type* vt = vtn;
3229 int op = in->Opcode();
3230 if (VectorNode::is_shift_opcode(op) || op == Op_AbsI) {
3231 Node* load = in->in(1);
3232 if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {
3233 // Only Load nodes distinguish signed (LoadS/LoadB) and unsigned
3234 // (LoadUS/LoadUB) values. Store nodes only have one version.
3235 vt = velt_type(load);
3236 } else if (op != Op_LShiftI) {
3237 // Widen type to int to avoid the creation of vector nodes. Note
3238 // that left shifts work regardless of the signedness.
3239 vt = TypeInt::INT;
3240 }
3241 }
3242 set_velt_type(in, vt);
3243 }
3244 }
3245 }
3246 }
3247 }
3248#ifndef PRODUCT
3249 if (TraceSuperWord && Verbose) {
3250 for (int i = 0; i < _block.length(); i++) {
3251 Node* n = _block.at(i);
3252 velt_type(n)->dump();
3253 tty->print("\t");
3254 n->dump();
3255 }
3256 }
3257#endif
3258}
3259
3260//------------------------------memory_alignment---------------------------
3261// Alignment within a vector memory reference
3262int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
3263#ifndef PRODUCT
3264 if ((TraceSuperWord && Verbose) || is_trace_alignment()) {
3265 tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
3266 }
3267#endif
3268 NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);)SWPointer::Tracer::Depth ddd(0);
3269 SWPointer p(s, this, NULL__null, false);
3270 if (!p.valid()) {
3271 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align"
);
3272 return bottom_align;
3273 }
3274 int vw = get_vw_bytes_special(s);
3275 if (vw < 2) {
3276 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align"
);
3277 return bottom_align; // No vectors for this type
3278 }
3279 int offset = p.offset_in_bytes();
3280 offset += iv_adjust*p.memory_size();
3281 int off_rem = offset % vw;
3282 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
3283#ifndef PRODUCT
3284 if ((TraceSuperWord && Verbose) || is_trace_alignment()) {
3285 tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);
3286 }
3287#endif
3288 return off_mod;
3289}
3290
3291//---------------------------container_type---------------------------
3292// Smallest type containing range of values
3293const Type* SuperWord::container_type(Node* n) {
3294 if (n->is_Mem()) {
3295 BasicType bt = n->as_Mem()->memory_type();
3296 if (n->is_Store() && (bt == T_CHAR)) {
3297 // Use T_SHORT type instead of T_CHAR for stored values because any
3298 // preceding arithmetic operation extends values to signed Int.
3299 bt = T_SHORT;
3300 }
3301 if (n->Opcode() == Op_LoadUB) {
3302 // Adjust type for unsigned byte loads, it is important for right shifts.
3303 // T_BOOLEAN is used because there is no basic type representing type
3304 // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only
3305 // size (one byte) and sign is important.
3306 bt = T_BOOLEAN;
3307 }
3308 return Type::get_const_basic_type(bt);
3309 }
3310 const Type* t = _igvn.type(n);
3311 if (t->basic_type() == T_INT) {
3312 // A narrow type of arithmetic operations will be determined by
3313 // propagating the type of memory operations.
3314 return TypeInt::INT;
3315 }
3316 return t;
3317}
3318
3319bool SuperWord::same_velt_type(Node* n1, Node* n2) {
3320 const Type* vt1 = velt_type(n1);
3321 const Type* vt2 = velt_type(n2);
3322 if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) {
3323 // Compare vectors element sizes for integer types.
3324 return data_size(n1) == data_size(n2);
3325 }
3326 return vt1 == vt2;
3327}
3328
3329//------------------------------in_packset---------------------------
3330// Are s1 and s2 in a pack pair and ordered as s1,s2?
3331bool SuperWord::in_packset(Node* s1, Node* s2) {
3332 for (int i = 0; i < _packset.length(); i++) {
3333 Node_List* p = _packset.at(i);
3334 assert(p->size() == 2, "must be")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3334, "assert(" "p->size() == 2" ") failed", "must be");
::breakpoint(); } } while (0)
;
3335 if (p->at(0) == s1 && p->at(p->size()-1) == s2) {
3336 return true;
3337 }
3338 }
3339 return false;
3340}
3341
3342//------------------------------in_pack---------------------------
3343// Is s in pack p?
3344Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
3345 for (uint i = 0; i < p->size(); i++) {
3346 if (p->at(i) == s) {
3347 return p;
3348 }
3349 }
3350 return NULL__null;
3351}
3352
3353//------------------------------remove_pack_at---------------------------
3354// Remove the pack at position pos in the packset
3355void SuperWord::remove_pack_at(int pos) {
3356 Node_List* p = _packset.at(pos);
3357 for (uint i = 0; i < p->size(); i++) {
3358 Node* s = p->at(i);
3359 set_my_pack(s, NULL__null);
3360 }
3361 _packset.remove_at(pos);
3362}
3363
3364void SuperWord::packset_sort(int n) {
3365 // simple bubble sort so that we capitalize with O(n) when its already sorted
3366 while (n != 0) {
3367 bool swapped = false;
3368 for (int i = 1; i < n; i++) {
3369 Node_List* q_low = _packset.at(i-1);
3370 Node_List* q_i = _packset.at(i);
3371
3372 // only swap when we find something to swap
3373 if (alignment(q_low->at(0)) > alignment(q_i->at(0))) {
3374 Node_List* t = q_i;
3375 *(_packset.adr_at(i)) = q_low;
3376 *(_packset.adr_at(i-1)) = q_i;
3377 swapped = true;
3378 }
3379 }
3380 if (swapped == false) break;
3381 n--;
3382 }
3383}
3384
3385//------------------------------executed_first---------------------------
3386// Return the node executed first in pack p. Uses the RPO block list
3387// to determine order.
3388Node* SuperWord::executed_first(Node_List* p) {
3389 Node* n = p->at(0);
3390 int n_rpo = bb_idx(n);
3391 for (uint i = 1; i < p->size(); i++) {
3392 Node* s = p->at(i);
3393 int s_rpo = bb_idx(s);
3394 if (s_rpo < n_rpo) {
3395 n = s;
3396 n_rpo = s_rpo;
3397 }
3398 }
3399 return n;
3400}
3401
3402//------------------------------executed_last---------------------------
3403// Return the node executed last in pack p.
3404Node* SuperWord::executed_last(Node_List* p) {
3405 Node* n = p->at(0);
3406 int n_rpo = bb_idx(n);
3407 for (uint i = 1; i < p->size(); i++) {
3408 Node* s = p->at(i);
3409 int s_rpo = bb_idx(s);
3410 if (s_rpo > n_rpo) {
3411 n = s;
3412 n_rpo = s_rpo;
3413 }
3414 }
3415 return n;
3416}
3417
3418LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
3419 LoadNode::ControlDependency dep = LoadNode::DependsOnlyOnTest;
3420 for (uint i = 0; i < p->size(); i++) {
3421 Node* n = p->at(i);
3422 assert(n->is_Load(), "only meaningful for loads")do { if (!(n->is_Load())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3422, "assert(" "n->is_Load()" ") failed", "only meaningful for loads"
); ::breakpoint(); } } while (0)
;
3423 if (!n->depends_only_on_test()) {
3424 if (n->as_Load()->has_unknown_control_dependency() &&
3425 dep != LoadNode::Pinned) {
3426 // Upgrade to unknown control...
3427 dep = LoadNode::UnknownControl;
3428 } else {
3429 // Otherwise, we must pin it.
3430 dep = LoadNode::Pinned;
3431 }
3432 }
3433 }
3434 return dep;
3435}
3436
3437
3438//----------------------------align_initial_loop_index---------------------------
3439// Adjust pre-loop limit so that in main loop, a load/store reference
3440// to align_to_ref will be a position zero in the vector.
3441// (iv + k) mod vector_align == 0
3442void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
3443 assert(lp()->is_main_loop(), "")do { if (!(lp()->is_main_loop())) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3443, "assert(" "lp()->is_main_loop()" ") failed", ""); ::
breakpoint(); } } while (0)
;
3444 CountedLoopEndNode* pre_end = pre_loop_end();
3445 Node* pre_opaq1 = pre_end->limit();
3446 assert(pre_opaq1->Opcode() == Op_Opaque1, "")do { if (!(pre_opaq1->Opcode() == Op_Opaque1)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3446, "assert(" "pre_opaq1->Opcode() == Op_Opaque1" ") failed"
, ""); ::breakpoint(); } } while (0)
;
3447 Opaque1Node* pre_opaq = (Opaque1Node*)pre_opaq1;
3448 Node* lim0 = pre_opaq->in(1);
3449
3450 // Where we put new limit calculations
3451 Node* pre_ctrl = pre_loop_head()->in(LoopNode::EntryControl);
3452
3453 // Ensure the original loop limit is available from the
3454 // pre-loop Opaque1 node.
3455 Node* orig_limit = pre_opaq->original_loop_limit();
3456 assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "")do { if (!(orig_limit != __null && _igvn.type(orig_limit
) != Type::TOP)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3456, "assert(" "orig_limit != __null && _igvn.type(orig_limit) != Type::TOP"
") failed", ""); ::breakpoint(); } } while (0)
;
3457
3458 SWPointer align_to_ref_p(align_to_ref, this, NULL__null, false);
3459 assert(align_to_ref_p.valid(), "sanity")do { if (!(align_to_ref_p.valid())) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3459, "assert(" "align_to_ref_p.valid()" ") failed", "sanity"
); ::breakpoint(); } } while (0)
;
3460
3461 // Given:
3462 // lim0 == original pre loop limit
3463 // V == v_align (power of 2)
3464 // invar == extra invariant piece of the address expression
3465 // e == offset [ +/- invar ]
3466 //
3467 // When reassociating expressions involving '%' the basic rules are:
3468 // (a - b) % k == 0 => a % k == b % k
3469 // and:
3470 // (a + b) % k == 0 => a % k == (k - b) % k
3471 //
3472 // For stride > 0 && scale > 0,
3473 // Derive the new pre-loop limit "lim" such that the two constraints:
3474 // (1) lim = lim0 + N (where N is some positive integer < V)
3475 // (2) (e + lim) % V == 0
3476 // are true.
3477 //
3478 // Substituting (1) into (2),
3479 // (e + lim0 + N) % V == 0
3480 // solve for N:
3481 // N = (V - (e + lim0)) % V
3482 // substitute back into (1), so that new limit
3483 // lim = lim0 + (V - (e + lim0)) % V
3484 //
3485 // For stride > 0 && scale < 0
3486 // Constraints:
3487 // lim = lim0 + N
3488 // (e - lim) % V == 0
3489 // Solving for lim:
3490 // (e - lim0 - N) % V == 0
3491 // N = (e - lim0) % V
3492 // lim = lim0 + (e - lim0) % V
3493 //
3494 // For stride < 0 && scale > 0
3495 // Constraints:
3496 // lim = lim0 - N
3497 // (e + lim) % V == 0
3498 // Solving for lim:
3499 // (e + lim0 - N) % V == 0
3500 // N = (e + lim0) % V
3501 // lim = lim0 - (e + lim0) % V
3502 //
3503 // For stride < 0 && scale < 0
3504 // Constraints:
3505 // lim = lim0 - N
3506 // (e - lim) % V == 0
3507 // Solving for lim:
3508 // (e - lim0 + N) % V == 0
3509 // N = (V - (e - lim0)) % V
3510 // lim = lim0 - (V - (e - lim0)) % V
3511
3512 int vw = vector_width_in_bytes(align_to_ref);
3513 int stride = iv_stride();
3514 int scale = align_to_ref_p.scale_in_bytes();
3515 int elt_size = align_to_ref_p.memory_size();
3516 int v_align = vw / elt_size;
3517 assert(v_align > 1, "sanity")do { if (!(v_align > 1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3517, "assert(" "v_align > 1" ") failed", "sanity"); ::breakpoint
(); } } while (0)
;
3518 int offset = align_to_ref_p.offset_in_bytes() / elt_size;
3519 Node *offsn = _igvn.intcon(offset);
3520
3521 Node *e = offsn;
3522 if (align_to_ref_p.invar() != NULL__null) {
3523 // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)
3524 Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
3525 Node* invar = align_to_ref_p.invar();
3526 if (_igvn.type(invar)->isa_long()) {
3527 // Computations are done % (vector width/element size) so it's
3528 // safe to simply convert invar to an int and loose the upper 32
3529 // bit half.
3530 invar = new ConvL2INode(invar);
3531 _igvn.register_new_node_with_optimizer(invar);
3532 }
3533 Node* invar_scale = align_to_ref_p.invar_scale();
3534 if (invar_scale != NULL__null) {
3535 invar = new LShiftINode(invar, invar_scale);
3536 _igvn.register_new_node_with_optimizer(invar);
3537 }
3538 Node* aref = new URShiftINode(invar, log2_elt);
3539 _igvn.register_new_node_with_optimizer(aref);
3540 _phase->set_ctrl(aref, pre_ctrl);
3541 if (align_to_ref_p.negate_invar()) {
3542 e = new SubINode(e, aref);
3543 } else {
3544 e = new AddINode(e, aref);
3545 }
3546 _igvn.register_new_node_with_optimizer(e);
3547 _phase->set_ctrl(e, pre_ctrl);
3548 }
3549 if (vw > ObjectAlignmentInBytes || align_to_ref_p.base()->is_top()) {
3550 // incorporate base e +/- base && Mask >>> log2(elt)
3551 Node* xbase = new CastP2XNode(NULL__null, align_to_ref_p.adr());
3552 _igvn.register_new_node_with_optimizer(xbase);
3553#ifdef _LP641
3554 xbase = new ConvL2INode(xbase);
3555 _igvn.register_new_node_with_optimizer(xbase);
3556#endif
3557 Node* mask = _igvn.intcon(vw-1);
3558 Node* masked_xbase = new AndINode(xbase, mask);
3559 _igvn.register_new_node_with_optimizer(masked_xbase);
3560 Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
3561 Node* bref = new URShiftINode(masked_xbase, log2_elt);
3562 _igvn.register_new_node_with_optimizer(bref);
3563 _phase->set_ctrl(bref, pre_ctrl);
3564 e = new AddINode(e, bref);
3565 _igvn.register_new_node_with_optimizer(e);
3566 _phase->set_ctrl(e, pre_ctrl);
3567 }
3568
3569 // compute e +/- lim0
3570 if (scale < 0) {
3571 e = new SubINode(e, lim0);
3572 } else {
3573 e = new AddINode(e, lim0);
3574 }
3575 _igvn.register_new_node_with_optimizer(e);
3576 _phase->set_ctrl(e, pre_ctrl);
3577
3578 if (stride * scale > 0) {
3579 // compute V - (e +/- lim0)
3580 Node* va = _igvn.intcon(v_align);
3581 e = new SubINode(va, e);
3582 _igvn.register_new_node_with_optimizer(e);
3583 _phase->set_ctrl(e, pre_ctrl);
3584 }
3585 // compute N = (exp) % V
3586 Node* va_msk = _igvn.intcon(v_align - 1);
3587 Node* N = new AndINode(e, va_msk);
3588 _igvn.register_new_node_with_optimizer(N);
3589 _phase->set_ctrl(N, pre_ctrl);
3590
3591 // substitute back into (1), so that new limit
3592 // lim = lim0 + N
3593 Node* lim;
3594 if (stride < 0) {
3595 lim = new SubINode(lim0, N);
3596 } else {
3597 lim = new AddINode(lim0, N);
3598 }
3599 _igvn.register_new_node_with_optimizer(lim);
3600 _phase->set_ctrl(lim, pre_ctrl);
3601 Node* constrained =
3602 (stride > 0) ? (Node*) new MinINode(lim, orig_limit)
3603 : (Node*) new MaxINode(lim, orig_limit);
3604 _igvn.register_new_node_with_optimizer(constrained);
3605 _phase->set_ctrl(constrained, pre_ctrl);
3606 _igvn.replace_input_of(pre_opaq, 1, constrained);
3607}
3608
3609//----------------------------get_pre_loop_end---------------------------
3610// Find pre loop end from main loop. Returns null if none.
3611CountedLoopEndNode* SuperWord::find_pre_loop_end(CountedLoopNode* cl) const {
3612 // The loop cannot be optimized if the graph shape at
3613 // the loop entry is inappropriate.
3614 if (cl->is_canonical_loop_entry() == NULL__null) {
3615 return NULL__null;
3616 }
3617
3618 Node* p_f = cl->skip_predicates()->in(0)->in(0);
3619 if (!p_f->is_IfFalse()) return NULL__null;
3620 if (!p_f->in(0)->is_CountedLoopEnd()) return NULL__null;
3621 CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd();
3622 CountedLoopNode* loop_node = pre_end->loopnode();
3623 if (loop_node == NULL__null || !loop_node->is_pre_loop()) return NULL__null;
3624 return pre_end;
3625}
3626
3627//------------------------------init---------------------------
3628void SuperWord::init() {
3629 _dg.init();
3630 _packset.clear();
3631 _disjoint_ptrs.clear();
3632 _block.clear();
3633 _post_block.clear();
3634 _data_entry.clear();
3635 _mem_slice_head.clear();
3636 _mem_slice_tail.clear();
3637 _iteration_first.clear();
3638 _iteration_last.clear();
3639 _node_info.clear();
3640 _align_to_ref = NULL__null;
3641 _lpt = NULL__null;
3642 _lp = NULL__null;
3643 _bb = NULL__null;
3644 _iv = NULL__null;
3645 _race_possible = 0;
3646 _early_return = false;
3647 _num_work_vecs = 0;
3648 _num_reductions = 0;
3649}
3650
3651//------------------------------restart---------------------------
3652void SuperWord::restart() {
3653 _dg.init();
3654 _packset.clear();
3655 _disjoint_ptrs.clear();
3656 _block.clear();
3657 _post_block.clear();
3658 _data_entry.clear();
3659 _mem_slice_head.clear();
3660 _mem_slice_tail.clear();
3661 _node_info.clear();
3662}
3663
3664//------------------------------print_packset---------------------------
3665void SuperWord::print_packset() {
3666#ifndef PRODUCT
3667 tty->print_cr("packset");
3668 for (int i = 0; i < _packset.length(); i++) {
3669 tty->print_cr("Pack: %d", i);
3670 Node_List* p = _packset.at(i);
3671 print_pack(p);
3672 }
3673#endif
3674}
3675
3676//------------------------------print_pack---------------------------
3677void SuperWord::print_pack(Node_List* p) {
3678 for (uint i = 0; i < p->size(); i++) {
3679 print_stmt(p->at(i));
3680 }
3681}
3682
3683//------------------------------print_bb---------------------------
3684void SuperWord::print_bb() {
3685#ifndef PRODUCT
3686 tty->print_cr("\nBlock");
3687 for (int i = 0; i < _block.length(); i++) {
3688 Node* n = _block.at(i);
3689 tty->print("%d ", i);
3690 if (n) {
3691 n->dump();
3692 }
3693 }
3694#endif
3695}
3696
3697//------------------------------print_stmt---------------------------
3698void SuperWord::print_stmt(Node* s) {
3699#ifndef PRODUCT
3700 tty->print(" align: %d \t", alignment(s));
3701 s->dump();
3702#endif
3703}
3704
3705//------------------------------blank---------------------------
3706char* SuperWord::blank(uint depth) {
3707 static char blanks[101];
3708 assert(depth < 101, "too deep")do { if (!(depth < 101)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3708, "assert(" "depth < 101" ") failed", "too deep"); ::
breakpoint(); } } while (0)
;
3709 for (uint i = 0; i < depth; i++) blanks[i] = ' ';
3710 blanks[depth] = '\0';
3711 return blanks;
3712}
3713
3714
3715//==============================SWPointer===========================
3716#ifndef PRODUCT
3717int SWPointer::Tracer::_depth = 0;
3718#endif
3719//----------------------------SWPointer------------------------
3720SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) :
3721 _mem(mem), _slp(slp), _base(NULL__null), _adr(NULL__null),
3722 _scale(0), _offset(0), _invar(NULL__null), _negate_invar(false),
3723 _invar_scale(NULL__null),
3724 _nstack(nstack), _analyze_only(analyze_only),
3725 _stack_idx(0)
3726#ifndef PRODUCT
3727 , _tracer(slp)
3728#endif
3729{
3730 NOT_PRODUCT(_tracer.ctor_1(mem);)_tracer.ctor_1(mem);
3731
3732 Node* adr = mem->in(MemNode::Address);
3733 if (!adr->is_AddP()) {
3734 assert(!valid(), "too complex")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3734, "assert(" "!valid()" ") failed", "too complex"); ::breakpoint
(); } } while (0)
;
3735 return;
3736 }
3737 // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
3738 Node* base = adr->in(AddPNode::Base);
3739 // The base address should be loop invariant
3740 if (is_main_loop_member(base)) {
3741 assert(!valid(), "base address is loop variant")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3741, "assert(" "!valid()" ") failed", "base address is loop variant"
); ::breakpoint(); } } while (0)
;
3742 return;
3743 }
3744 // unsafe references require misaligned vector access support
3745 if (base->is_top() && !Matcher::misaligned_vectors_ok()) {
3746 assert(!valid(), "unsafe access")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3746, "assert(" "!valid()" ") failed", "unsafe access"); ::
breakpoint(); } } while (0)
;
3747 return;
3748 }
3749
3750 NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();)if(_slp->is_trace_alignment()) _tracer.store_depth();
3751 NOT_PRODUCT(_tracer.ctor_2(adr);)_tracer.ctor_2(adr);
3752
3753 int i;
3754 for (i = 0; i < 3; i++) {
3755 NOT_PRODUCT(_tracer.ctor_3(adr, i);)_tracer.ctor_3(adr, i);
3756
3757 if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
3758 assert(!valid(), "too complex")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3758, "assert(" "!valid()" ") failed", "too complex"); ::breakpoint
(); } } while (0)
;
3759 return;
3760 }
3761 adr = adr->in(AddPNode::Address);
3762 NOT_PRODUCT(_tracer.ctor_4(adr, i);)_tracer.ctor_4(adr, i);
3763
3764 if (base == adr || !adr->is_AddP()) {
3765 NOT_PRODUCT(_tracer.ctor_5(adr, base, i);)_tracer.ctor_5(adr, base, i);
3766 break; // stop looking at addp's
3767 }
3768 }
3769 if (is_main_loop_member(adr)) {
3770 assert(!valid(), "adr is loop variant")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3770, "assert(" "!valid()" ") failed", "adr is loop variant"
); ::breakpoint(); } } while (0)
;
3771 return;
3772 }
3773
3774 if (!base->is_top() && adr != base) {
3775 assert(!valid(), "adr and base differ")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3775, "assert(" "!valid()" ") failed", "adr and base differ"
); ::breakpoint(); } } while (0)
;
3776 return;
3777 }
3778
3779 NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();)if(_slp->is_trace_alignment()) _tracer.restore_depth();
3780 NOT_PRODUCT(_tracer.ctor_6(mem);)_tracer.ctor_6(mem);
3781
3782 _base = base;
3783 _adr = adr;
3784 assert(valid(), "Usable")do { if (!(valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 3784, "assert(" "valid()" ") failed", "Usable"); ::breakpoint
(); } } while (0)
;
3785}
3786
3787// Following is used to create a temporary object during
3788// the pattern match of an address expression.
3789SWPointer::SWPointer(SWPointer* p) :
3790 _mem(p->_mem), _slp(p->_slp), _base(NULL__null), _adr(NULL__null),
3791 _scale(0), _offset(0), _invar(NULL__null), _negate_invar(false),
3792 _invar_scale(NULL__null),
3793 _nstack(p->_nstack), _analyze_only(p->_analyze_only),
3794 _stack_idx(p->_stack_idx)
3795 #ifndef PRODUCT
3796 , _tracer(p->_slp)
3797 #endif
3798{}
3799
3800bool SWPointer::is_main_loop_member(Node* n) const {
3801 Node* n_c = phase()->get_ctrl(n);
3802 return lpt()->is_member(phase()->get_loop(n_c));
3803}
3804
3805bool SWPointer::invariant(Node* n) const {
3806 NOT_PRODUCT(Tracer::Depth dd;)Tracer::Depth dd;
3807 Node* n_c = phase()->get_ctrl(n);
3808 NOT_PRODUCT(_tracer.invariant_1(n, n_c);)_tracer.invariant_1(n, n_c);
3809 bool is_not_member = !is_main_loop_member(n);
3810 if (is_not_member && _slp->lp()->is_main_loop()) {
3811 // Check that n_c dominates the pre loop head node. If it does not, then we cannot use n as invariant for the pre loop
3812 // CountedLoopEndNode check because n_c is either part of the pre loop or between the pre and the main loop (illegal
3813 // invariant: Happens, for example, when n_c is a CastII node that prevents data nodes to flow above the main loop).
3814 return phase()->is_dominator(n_c, _slp->pre_loop_head());
3815 }
3816 return is_not_member;
3817}
3818
3819//------------------------scaled_iv_plus_offset--------------------
3820// Match: k*iv + offset
3821// where: k is a constant that maybe zero, and
3822// offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
3823bool SWPointer::scaled_iv_plus_offset(Node* n) {
3824 NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd;
3825 NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);)_tracer.scaled_iv_plus_offset_1(n);
3826
3827 if (scaled_iv(n)) {
3828 NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);)_tracer.scaled_iv_plus_offset_2(n);
3829 return true;
3830 }
3831
3832 if (offset_plus_k(n)) {
3833 NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);)_tracer.scaled_iv_plus_offset_3(n);
3834 return true;
3835 }
3836
3837 int opc = n->Opcode();
3838 if (opc == Op_AddI) {
3839 if (offset_plus_k(n->in(2)) && scaled_iv_plus_offset(n->in(1))) {
3840 NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);)_tracer.scaled_iv_plus_offset_4(n);
3841 return true;
3842 }
3843 if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) {
3844 NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);)_tracer.scaled_iv_plus_offset_5(n);
3845 return true;
3846 }
3847 } else if (opc == Op_SubI) {
3848 if (offset_plus_k(n->in(2), true) && scaled_iv_plus_offset(n->in(1))) {
3849 NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);)_tracer.scaled_iv_plus_offset_6(n);
3850 return true;
3851 }
3852 if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) {
3853 _scale *= -1;
3854 NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);)_tracer.scaled_iv_plus_offset_7(n);
3855 return true;
3856 }
3857 }
3858
3859 NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);)_tracer.scaled_iv_plus_offset_8(n);
3860 return false;
3861}
3862
3863//----------------------------scaled_iv------------------------
3864// Match: k*iv where k is a constant that's not zero
3865bool SWPointer::scaled_iv(Node* n) {
3866 NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd;
3867 NOT_PRODUCT(_tracer.scaled_iv_1(n);)_tracer.scaled_iv_1(n);
3868
3869 if (_scale != 0) { // already found a scale
3870 NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);)_tracer.scaled_iv_2(n, _scale);
3871 return false;
3872 }
3873
3874 if (n == iv()) {
3875 _scale = 1;
3876 NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);)_tracer.scaled_iv_3(n, _scale);
3877 return true;
3878 }
3879 if (_analyze_only && (is_main_loop_member(n))) {
3880 _nstack->push(n, _stack_idx++);
3881 }
3882
3883 int opc = n->Opcode();
3884 if (opc == Op_MulI) {
3885 if (n->in(1) == iv() && n->in(2)->is_Con()) {
3886 _scale = n->in(2)->get_int();
3887 NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);)_tracer.scaled_iv_4(n, _scale);
3888 return true;
3889 } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
3890 _scale = n->in(1)->get_int();
3891 NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);)_tracer.scaled_iv_5(n, _scale);
3892 return true;
3893 }
3894 } else if (opc == Op_LShiftI) {
3895 if (n->in(1) == iv() && n->in(2)->is_Con()) {
3896 _scale = 1 << n->in(2)->get_int();
3897 NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)_tracer.scaled_iv_6(n, _scale);
3898 return true;
3899 }
3900 } else if (opc == Op_ConvI2L || opc == Op_CastII) {
3901 if (scaled_iv_plus_offset(n->in(1))) {
3902 NOT_PRODUCT(_tracer.scaled_iv_7(n);)_tracer.scaled_iv_7(n);
3903 return true;
3904 }
3905 } else if (opc == Op_LShiftL && n->in(2)->is_Con()) {
3906 if (!has_iv() && _invar == NULL__null) {
3907 // Need to preserve the current _offset value, so
3908 // create a temporary object for this expression subtree.
3909 // Hacky, so should re-engineer the address pattern match.
3910 NOT_PRODUCT(Tracer::Depth dddd;)Tracer::Depth dddd;
3911 SWPointer tmp(this);
3912 NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)_tracer.scaled_iv_8(n, &tmp);
3913
3914 if (tmp.scaled_iv_plus_offset(n->in(1))) {
3915 int scale = n->in(2)->get_int();
3916 _scale = tmp._scale << scale;
3917 _offset += tmp._offset << scale;
3918 _invar = tmp._invar;
3919 if (_invar != NULL__null) {
3920 _negate_invar = tmp._negate_invar;
3921 _invar_scale = n->in(2);
3922 }
3923 NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar, _negate_invar);)_tracer.scaled_iv_9(n, _scale, _offset, _invar, _negate_invar
);
3924 return true;
3925 }
3926 }
3927 }
3928 NOT_PRODUCT(_tracer.scaled_iv_10(n);)_tracer.scaled_iv_10(n);
3929 return false;
3930}
3931
3932//----------------------------offset_plus_k------------------------
3933// Match: offset is (k [+/- invariant])
3934// where k maybe zero and invariant is optional, but not both.
3935bool SWPointer::offset_plus_k(Node* n, bool negate) {
3936 NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd;
3937 NOT_PRODUCT(_tracer.offset_plus_k_1(n);)_tracer.offset_plus_k_1(n);
3938
3939 int opc = n->Opcode();
3940 if (opc == Op_ConI) {
3941 _offset += negate ? -(n->get_int()) : n->get_int();
3942 NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)_tracer.offset_plus_k_2(n, _offset);
3943 return true;
3944 } else if (opc == Op_ConL) {
3945 // Okay if value fits into an int
3946 const TypeLong* t = n->find_long_type();
3947 if (t->higher_equal(TypeLong::INT)) {
3948 jlong loff = n->get_long();
3949 jint off = (jint)loff;
3950 _offset += negate ? -off : loff;
3951 NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)_tracer.offset_plus_k_3(n, _offset);
3952 return true;
3953 }
3954 NOT_PRODUCT(_tracer.offset_plus_k_4(n);)_tracer.offset_plus_k_4(n);
3955 return false;
3956 }
3957 if (_invar != NULL__null) { // already has an invariant
3958 NOT_PRODUCT(_tracer.offset_plus_k_5(n, _invar);)_tracer.offset_plus_k_5(n, _invar);
3959 return false;
3960 }
3961
3962 if (_analyze_only && is_main_loop_member(n)) {
3963 _nstack->push(n, _stack_idx++);
3964 }
3965 if (opc == Op_AddI) {
3966 if (n->in(2)->is_Con() && invariant(n->in(1))) {
3967 _negate_invar = negate;
3968 _invar = n->in(1);
3969 _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
3970 NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);
3971 return true;
3972 } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
3973 _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
3974 _negate_invar = negate;
3975 _invar = n->in(2);
3976 NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);
3977 return true;
3978 }
3979 }
3980 if (opc == Op_SubI) {
3981 if (n->in(2)->is_Con() && invariant(n->in(1))) {
3982 _negate_invar = negate;
3983 _invar = n->in(1);
3984 _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
3985 NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);
3986 return true;
3987 } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
3988 _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
3989 _negate_invar = !negate;
3990 _invar = n->in(2);
3991 NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);
3992 return true;
3993 }
3994 }
3995
3996 if (!is_main_loop_member(n)) {
3997 // 'n' is loop invariant. Skip ConvI2L and CastII nodes before checking if 'n' is dominating the pre loop.
3998 if (opc == Op_ConvI2L) {
3999 n = n->in(1);
4000 }
4001 if (n->Opcode() == Op_CastII) {
4002 // Skip CastII nodes
4003 assert(!is_main_loop_member(n), "sanity")do { if (!(!is_main_loop_member(n))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4003, "assert(" "!is_main_loop_member(n)" ") failed", "sanity"
); ::breakpoint(); } } while (0)
;
4004 n = n->in(1);
4005 }
4006 // Check if 'n' can really be used as invariant (not in main loop and dominating the pre loop).
4007 if (invariant(n)) {
4008 _negate_invar = negate;
4009 _invar = n;
4010 NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);
4011 return true;
4012 }
4013 }
4014
4015 NOT_PRODUCT(_tracer.offset_plus_k_11(n);)_tracer.offset_plus_k_11(n);
4016 return false;
4017}
4018
4019//----------------------------print------------------------
4020void SWPointer::print() {
4021#ifndef PRODUCT
4022 tty->print("base: [%d] adr: [%d] scale: %d offset: %d",
4023 _base != NULL__null ? _base->_idx : 0,
4024 _adr != NULL__null ? _adr->_idx : 0,
4025 _scale, _offset);
4026 if (_invar != NULL__null) {
4027 tty->print(" invar: %c[%d] << [%d]", _negate_invar?'-':'+', _invar->_idx, _invar_scale->_idx);
4028 }
4029 tty->cr();
4030#endif
4031}
4032
4033//----------------------------tracing------------------------
4034#ifndef PRODUCT
4035void SWPointer::Tracer::print_depth() const {
4036 for (int ii = 0; ii < _depth; ++ii) {
4037 tty->print(" ");
4038 }
4039}
4040
4041void SWPointer::Tracer::ctor_1 (Node* mem) {
4042 if(_slp->is_trace_alignment()) {
4043 print_depth(); tty->print(" %d SWPointer::SWPointer: start alignment analysis", mem->_idx); mem->dump();
4044 }
4045}
4046
4047void SWPointer::Tracer::ctor_2(Node* adr) {
4048 if(_slp->is_trace_alignment()) {
4049 //store_depth();
4050 inc_depth();
4051 print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump();
4052 inc_depth();
4053 print_depth(); tty->print(" %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump();
4054 }
4055}
4056
4057void SWPointer::Tracer::ctor_3(Node* adr, int i) {
4058 if(_slp->is_trace_alignment()) {
4059 inc_depth();
4060 Node* offset = adr->in(AddPNode::Offset);
4061 print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump();
4062 }
4063}
4064
4065void SWPointer::Tracer::ctor_4(Node* adr, int i) {
4066 if(_slp->is_trace_alignment()) {
4067 inc_depth();
4068 print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i); adr->dump();
4069 }
4070}
4071
4072void SWPointer::Tracer::ctor_5(Node* adr, Node* base, int i) {
4073 if(_slp->is_trace_alignment()) {
4074 inc_depth();
4075 if (base == adr) {
4076 print_depth(); tty->print_cr(" \\ %d (adr) == %d (base) SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i);
4077 } else if (!adr->is_AddP()) {
4078 print_depth(); tty->print_cr(" \\ %d (adr) is NOT Addp SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, i);
4079 }
4080 }
4081}
4082
4083void SWPointer::Tracer::ctor_6(Node* mem) {
4084 if(_slp->is_trace_alignment()) {
4085 //restore_depth();
4086 print_depth(); tty->print_cr(" %d (adr) SWPointer::SWPointer: stop analysis", mem->_idx);
4087 }
4088}
4089
4090void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) const {
4091 if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) {
4092 int is_member = _slp->_lpt->is_member(_slp->_phase->get_loop(n_c));
4093 int in_bb = _slp->in_bb(n);
4094 print_depth(); tty->print(" \\ "); tty->print_cr(" %d SWPointer::invariant conditions differ: n_c %d", n->_idx, n_c->_idx);
4095 print_depth(); tty->print(" \\ "); tty->print_cr("is_member %d, in_bb %d", is_member, in_bb);
4096 print_depth(); tty->print(" \\ "); n->dump();
4097 print_depth(); tty->print(" \\ "); n_c->dump();
4098 }
4099}
4100
4101void SWPointer::Tracer::scaled_iv_plus_offset_1(Node* n) {
4102 if(_slp->is_trace_alignment()) {
4103 print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx);
4104 n->dump();
4105 }
4106}
4107
4108void SWPointer::Tracer::scaled_iv_plus_offset_2(Node* n) {
4109 if(_slp->is_trace_alignment()) {
4110 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx);
4111 }
4112}
4113
4114void SWPointer::Tracer::scaled_iv_plus_offset_3(Node* n) {
4115 if(_slp->is_trace_alignment()) {
4116 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx);
4117 }
4118}
4119
4120void SWPointer::Tracer::scaled_iv_plus_offset_4(Node* n) {
4121 if(_slp->is_trace_alignment()) {
4122 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx);
4123 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump();
4124 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump();
4125 }
4126}
4127
4128void SWPointer::Tracer::scaled_iv_plus_offset_5(Node* n) {
4129 if(_slp->is_trace_alignment()) {
4130 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx);
4131 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump();
4132 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump();
4133 }
4134}
4135
4136void SWPointer::Tracer::scaled_iv_plus_offset_6(Node* n) {
4137 if(_slp->is_trace_alignment()) {
4138 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx);
4139 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump();
4140 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump();
4141 }
4142}
4143
4144void SWPointer::Tracer::scaled_iv_plus_offset_7(Node* n) {
4145 if(_slp->is_trace_alignment()) {
4146 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx);
4147 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump();
4148 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump();
4149 }
4150}
4151
4152void SWPointer::Tracer::scaled_iv_plus_offset_8(Node* n) {
4153 if(_slp->is_trace_alignment()) {
4154 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: FAILED", n->_idx);
4155 }
4156}
4157
4158void SWPointer::Tracer::scaled_iv_1(Node* n) {
4159 if(_slp->is_trace_alignment()) {
4160 print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump();
4161 }
4162}
4163
4164void SWPointer::Tracer::scaled_iv_2(Node* n, int scale) {
4165 if(_slp->is_trace_alignment()) {
4166 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx);
4167 print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: _scale (%d) != 0", scale);
4168 }
4169}
4170
4171void SWPointer::Tracer::scaled_iv_3(Node* n, int scale) {
4172 if(_slp->is_trace_alignment()) {
4173 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale);
4174 }
4175}
4176
4177void SWPointer::Tracer::scaled_iv_4(Node* n, int scale) {
4178 if(_slp->is_trace_alignment()) {
4179 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale);
4180 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump();
4181 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
4182 }
4183}
4184
4185void SWPointer::Tracer::scaled_iv_5(Node* n, int scale) {
4186 if(_slp->is_trace_alignment()) {
4187 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale);
4188 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump();
4189 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
4190 }
4191}
4192
4193void SWPointer::Tracer::scaled_iv_6(Node* n, int scale) {
4194 if(_slp->is_trace_alignment()) {
4195 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale);
4196 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump();
4197 print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
4198 }
4199}
4200
4201void SWPointer::Tracer::scaled_iv_7(Node* n) {
4202 if(_slp->is_trace_alignment()) {
4203 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx);
4204 print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx);
4205 inc_depth(); inc_depth();
4206 print_depth(); n->in(1)->dump();
4207 dec_depth(); dec_depth();
4208 }
4209}
4210
4211void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) {
4212 if(_slp->is_trace_alignment()) {
4213 print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp->print();
4214 }
4215}
4216
4217void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int offset, Node* invar, bool negate_invar) {
4218 if(_slp->is_trace_alignment()) {
4219 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, offset);
4220 print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) [%d] is scaled_iv_plus_offset, in(2) [%d] used to scale: _scale = %d, _offset = %d",
4221 n->in(1)->_idx, n->in(2)->_idx, scale, offset);
4222 if (invar != NULL__null) {
4223 print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: scaled invariant: %c[%d]", (negate_invar?'-':'+'), invar->_idx);
4224 }
4225 inc_depth(); inc_depth();
4226 print_depth(); n->in(1)->dump();
4227 print_depth(); n->in(2)->dump();
4228 if (invar != NULL__null) {
4229 print_depth(); invar->dump();
4230 }
4231 dec_depth(); dec_depth();
4232 }
4233}
4234
4235void SWPointer::Tracer::scaled_iv_10(Node* n) {
4236 if(_slp->is_trace_alignment()) {
4237 print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED", n->_idx);
4238 }
4239}
4240
4241void SWPointer::Tracer::offset_plus_k_1(Node* n) {
4242 if(_slp->is_trace_alignment()) {
4243 print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump();
4244 }
4245}
4246
4247void SWPointer::Tracer::offset_plus_k_2(Node* n, int _offset) {
4248 if(_slp->is_trace_alignment()) {
4249 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset);
4250 }
4251}
4252
4253void SWPointer::Tracer::offset_plus_k_3(Node* n, int _offset) {
4254 if(_slp->is_trace_alignment()) {
4255 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset);
4256 }
4257}
4258
4259void SWPointer::Tracer::offset_plus_k_4(Node* n) {
4260 if(_slp->is_trace_alignment()) {
4261 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx);
4262 print_depth(); tty->print_cr(" \\ " JLONG_FORMAT"%" "l" "d" " SWPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long());
4263 }
4264}
4265
4266void SWPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) {
4267 if(_slp->is_trace_alignment()) {
4268 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx);
4269 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: _invar != NULL: ", _invar->_idx); _invar->dump();
4270 }
4271}
4272
4273void SWPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) {
4274 if(_slp->is_trace_alignment()) {
4275 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
4276 n->_idx, _negate_invar, _invar->_idx, _offset);
4277 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
4278 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump();
4279 }
4280}
4281
4282void SWPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) {
4283 if(_slp->is_trace_alignment()) {
4284 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
4285 n->_idx, _negate_invar, _invar->_idx, _offset);
4286 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
4287 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump();
4288 }
4289}
4290
4291void SWPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) {
4292 if(_slp->is_trace_alignment()) {
4293 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
4294 n->_idx, _negate_invar, _invar->_idx, _offset);
4295 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
4296 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump();
4297 }
4298}
4299
4300void SWPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) {
4301 if(_slp->is_trace_alignment()) {
4302 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
4303 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
4304 print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump();
4305 }
4306}
4307
4308void SWPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) {
4309 if(_slp->is_trace_alignment()) {
4310 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
4311 print_depth(); tty->print_cr(" \\ %d SWPointer::offset_plus_k: is invariant", n->_idx);
4312 }
4313}
4314
4315void SWPointer::Tracer::offset_plus_k_11(Node* n) {
4316 if(_slp->is_trace_alignment()) {
4317 print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx);
4318 }
4319}
4320
4321#endif
4322// ========================= OrderedPair =====================
4323
4324const OrderedPair OrderedPair::initial;
4325
4326// ========================= SWNodeInfo =====================
4327
4328const SWNodeInfo SWNodeInfo::initial;
4329
4330
4331// ============================ DepGraph ===========================
4332
4333//------------------------------make_node---------------------------
4334// Make a new dependence graph node for an ideal node.
4335DepMem* DepGraph::make_node(Node* node) {
4336 DepMem* m = new (_arena) DepMem(node);
4337 if (node != NULL__null) {
4338 assert(_map.at_grow(node->_idx) == NULL, "one init only")do { if (!(_map.at_grow(node->_idx) == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4338, "assert(" "_map.at_grow(node->_idx) == __null" ") failed"
, "one init only"); ::breakpoint(); } } while (0)
;
4339 _map.at_put_grow(node->_idx, m);
4340 }
4341 return m;
4342}
4343
4344//------------------------------make_edge---------------------------
4345// Make a new dependence graph edge from dpred -> dsucc
4346DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) {
4347 DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head());
4348 dpred->set_out_head(e);
4349 dsucc->set_in_head(e);
4350 return e;
4351}
4352
4353// ========================== DepMem ========================
4354
4355//------------------------------in_cnt---------------------------
4356int DepMem::in_cnt() {
4357 int ct = 0;
4358 for (DepEdge* e = _in_head; e != NULL__null; e = e->next_in()) ct++;
4359 return ct;
4360}
4361
4362//------------------------------out_cnt---------------------------
4363int DepMem::out_cnt() {
4364 int ct = 0;
4365 for (DepEdge* e = _out_head; e != NULL__null; e = e->next_out()) ct++;
4366 return ct;
4367}
4368
4369//------------------------------print-----------------------------
4370void DepMem::print() {
4371#ifndef PRODUCT
4372 tty->print(" DepNode %d (", _node->_idx);
4373 for (DepEdge* p = _in_head; p != NULL__null; p = p->next_in()) {
4374 Node* pred = p->pred()->node();
4375 tty->print(" %d", pred != NULL__null ? pred->_idx : 0);
4376 }
4377 tty->print(") [");
4378 for (DepEdge* s = _out_head; s != NULL__null; s = s->next_out()) {
4379 Node* succ = s->succ()->node();
4380 tty->print(" %d", succ != NULL__null ? succ->_idx : 0);
4381 }
4382 tty->print_cr(" ]");
4383#endif
4384}
4385
4386// =========================== DepEdge =========================
4387
4388//------------------------------DepPreds---------------------------
4389void DepEdge::print() {
4390#ifndef PRODUCT
4391 tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx);
4392#endif
4393}
4394
4395// =========================== DepPreds =========================
4396// Iterator over predecessor edges in the dependence graph.
4397
4398//------------------------------DepPreds---------------------------
4399DepPreds::DepPreds(Node* n, DepGraph& dg) {
4400 _n = n;
4401 _done = false;
4402 if (_n->is_Store() || _n->is_Load()) {
4403 _next_idx = MemNode::Address;
4404 _end_idx = n->req();
4405 _dep_next = dg.dep(_n)->in_head();
4406 } else if (_n->is_Mem()) {
4407 _next_idx = 0;
4408 _end_idx = 0;
4409 _dep_next = dg.dep(_n)->in_head();
4410 } else {
4411 _next_idx = 1;
4412 _end_idx = _n->req();
4413 _dep_next = NULL__null;
4414 }
4415 next();
4416}
4417
4418//------------------------------next---------------------------
4419void DepPreds::next() {
4420 if (_dep_next != NULL__null) {
4421 _current = _dep_next->pred()->node();
4422 _dep_next = _dep_next->next_in();
4423 } else if (_next_idx < _end_idx) {
4424 _current = _n->in(_next_idx++);
4425 } else {
4426 _done = true;
4427 }
4428}
4429
4430// =========================== DepSuccs =========================
4431// Iterator over successor edges in the dependence graph.
4432
4433//------------------------------DepSuccs---------------------------
4434DepSuccs::DepSuccs(Node* n, DepGraph& dg) {
4435 _n = n;
4436 _done = false;
4437 if (_n->is_Load()) {
4438 _next_idx = 0;
4439 _end_idx = _n->outcnt();
4440 _dep_next = dg.dep(_n)->out_head();
4441 } else if (_n->is_Mem() || (_n->is_Phi() && _n->bottom_type() == Type::MEMORY)) {
4442 _next_idx = 0;
4443 _end_idx = 0;
4444 _dep_next = dg.dep(_n)->out_head();
4445 } else {
4446 _next_idx = 0;
4447 _end_idx = _n->outcnt();
4448 _dep_next = NULL__null;
4449 }
4450 next();
4451}
4452
4453//-------------------------------next---------------------------
4454void DepSuccs::next() {
4455 if (_dep_next != NULL__null) {
4456 _current = _dep_next->succ()->node();
4457 _dep_next = _dep_next->next_out();
4458 } else if (_next_idx < _end_idx) {
4459 _current = _n->raw_out(_next_idx++);
4460 } else {
4461 _done = true;
4462 }
4463}
4464
4465//
4466// --------------------------------- vectorization/simd -----------------------------------
4467//
4468bool SuperWord::same_origin_idx(Node* a, Node* b) const {
4469 return a != NULL__null && b != NULL__null && _clone_map.same_idx(a->_idx, b->_idx);
4470}
4471bool SuperWord::same_generation(Node* a, Node* b) const {
4472 return a != NULL__null && b != NULL__null && _clone_map.same_gen(a->_idx, b->_idx);
4473}
4474
4475Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) {
4476 assert(in_bb(ld), "must be in block")do { if (!(in_bb(ld))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4476, "assert(" "in_bb(ld)" ") failed", "must be in block")
; ::breakpoint(); } } while (0)
;
4477 if (_clone_map.gen(ld->_idx) == _ii_first) {
4478#ifndef PRODUCT
4479 if (_vector_loop_debug) {
4480 tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(ld->_idx)=%d",
4481 _clone_map.gen(ld->_idx));
4482 }
4483#endif
4484 return NULL__null; //we think that any ld in the first gen being vectorizable
4485 }
4486
4487 Node* mem = ld->in(MemNode::Memory);
4488 if (mem->outcnt() <= 1) {
4489 // we don't want to remove the only edge from mem node to load
4490#ifndef PRODUCT
4491 if (_vector_loop_debug) {
4492 tty->print_cr("SuperWord::find_phi_for_mem_dep input node %d to load %d has no other outputs and edge mem->load cannot be removed",
4493 mem->_idx, ld->_idx);
4494 ld->dump();
4495 mem->dump();
4496 }
4497#endif
4498 return NULL__null;
4499 }
4500 if (!in_bb(mem) || same_generation(mem, ld)) {
4501#ifndef PRODUCT
4502 if (_vector_loop_debug) {
4503 tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d",
4504 _clone_map.gen(mem->_idx));
4505 }
4506#endif
4507 return NULL__null; // does not depend on loop volatile node or depends on the same generation
4508 }
4509
4510 //otherwise first node should depend on mem-phi
4511 Node* first = first_node(ld);
4512 assert(first->is_Load(), "must be Load")do { if (!(first->is_Load())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4512, "assert(" "first->is_Load()" ") failed", "must be Load"
); ::breakpoint(); } } while (0)
;
4513 Node* phi = first->as_Load()->in(MemNode::Memory);
4514 if (!phi->is_Phi() || phi->bottom_type() != Type::MEMORY) {
4515#ifndef PRODUCT
4516 if (_vector_loop_debug) {
4517 tty->print_cr("SuperWord::find_phi_for_mem_dep load is not vectorizable node, since it's `first` does not take input from mem phi");
4518 ld->dump();
4519 first->dump();
4520 }
4521#endif
4522 return NULL__null;
4523 }
4524
4525 Node* tail = 0;
4526 for (int m = 0; m < _mem_slice_head.length(); m++) {
4527 if (_mem_slice_head.at(m) == phi) {
4528 tail = _mem_slice_tail.at(m);
4529 }
4530 }
4531 if (tail == 0) { //test that found phi is in the list _mem_slice_head
4532#ifndef PRODUCT
4533 if (_vector_loop_debug) {
4534 tty->print_cr("SuperWord::find_phi_for_mem_dep load %d is not vectorizable node, its phi %d is not _mem_slice_head",
4535 ld->_idx, phi->_idx);
4536 ld->dump();
4537 phi->dump();
4538 }
4539#endif
4540 return NULL__null;
4541 }
4542
4543 // now all conditions are met
4544 return phi;
4545}
4546
4547Node* SuperWord::first_node(Node* nd) {
4548 for (int ii = 0; ii < _iteration_first.length(); ii++) {
4549 Node* nnn = _iteration_first.at(ii);
4550 if (same_origin_idx(nnn, nd)) {
4551#ifndef PRODUCT
4552 if (_vector_loop_debug) {
4553 tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)",
4554 nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx));
4555 }
4556#endif
4557 return nnn;
4558 }
4559 }
4560
4561#ifndef PRODUCT
4562 if (_vector_loop_debug) {
4563 tty->print_cr("SuperWord::first_node: did not find first iteration node for %d (_clone_map.idx(nd->_idx)=%d)",
4564 nd->_idx, _clone_map.idx(nd->_idx));
4565 }
4566#endif
4567 return 0;
4568}
4569
4570Node* SuperWord::last_node(Node* nd) {
4571 for (int ii = 0; ii < _iteration_last.length(); ii++) {
4572 Node* nnn = _iteration_last.at(ii);
4573 if (same_origin_idx(nnn, nd)) {
4574#ifndef PRODUCT
4575 if (_vector_loop_debug) {
4576 tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d",
4577 _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx));
4578 }
4579#endif
4580 return nnn;
4581 }
4582 }
4583 return 0;
4584}
4585
4586int SuperWord::mark_generations() {
4587 Node *ii_err = NULL__null, *tail_err = NULL__null;
1
'tail_err' initialized to a null pointer value
4588 for (int i = 0; i < _mem_slice_head.length(); i++) {
2
Assuming the condition is true
3
Loop condition is true. Entering loop body
4589 Node* phi = _mem_slice_head.at(i);
4590 assert(phi->is_Phi(), "must be phi")do { if (!(phi->is_Phi())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4590, "assert(" "phi->is_Phi()" ") failed", "must be phi"
); ::breakpoint(); } } while (0)
;
4
Taking false branch
5
Loop condition is false. Exiting loop
4591
4592 Node* tail = _mem_slice_tail.at(i);
4593 if (_ii_last == -1) {
6
Assuming the condition is false
7
Taking false branch
4594 tail_err = tail;
4595 _ii_last = _clone_map.gen(tail->_idx);
4596 }
4597 else if (_ii_last != _clone_map.gen(tail->_idx)) {
8
Calling 'CloneMap::gen'
13
Returning from 'CloneMap::gen'
14
Assuming the condition is true
15
Taking true branch
4598#ifndef PRODUCT
4599 if (TraceSuperWord && Verbose) {
16
Assuming 'TraceSuperWord' is true
17
Assuming 'Verbose' is true
18
Taking true branch
4600 tty->print_cr("SuperWord::mark_generations _ii_last error - found different generations in two tail nodes ");
4601 tail->dump();
4602 tail_err->dump();
19
Called C++ object pointer is null
4603 }
4604#endif
4605 return -1;
4606 }
4607
4608 // find first iteration in the loop
4609 for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
4610 Node* ii = phi->fast_out(i);
4611 if (in_bb(ii) && ii->is_Store()) { // we speculate that normally Stores of one and one only generation have deps from mem phi
4612 if (_ii_first == -1) {
4613 ii_err = ii;
4614 _ii_first = _clone_map.gen(ii->_idx);
4615 } else if (_ii_first != _clone_map.gen(ii->_idx)) {
4616#ifndef PRODUCT
4617 if (TraceSuperWord && Verbose) {
4618 tty->print_cr("SuperWord::mark_generations: _ii_first was found before and not equal to one in this node (%d)", _ii_first);
4619 ii->dump();
4620 if (ii_err!= 0) {
4621 ii_err->dump();
4622 }
4623 }
4624#endif
4625 return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized
4626 }
4627 }
4628 }//for (DUIterator_Fast imax,
4629 }//for (int i...
4630
4631 if (_ii_first == -1 || _ii_last == -1) {
4632 if (TraceSuperWord && Verbose) {
4633 tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong");
4634 }
4635 return -1; // something vent wrong
4636 }
4637 // collect nodes in the first and last generations
4638 assert(_iteration_first.length() == 0, "_iteration_first must be empty")do { if (!(_iteration_first.length() == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4638, "assert(" "_iteration_first.length() == 0" ") failed"
, "_iteration_first must be empty"); ::breakpoint(); } } while
(0)
;
4639 assert(_iteration_last.length() == 0, "_iteration_last must be empty")do { if (!(_iteration_last.length() == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4639, "assert(" "_iteration_last.length() == 0" ") failed",
"_iteration_last must be empty"); ::breakpoint(); } } while (
0)
;
4640 for (int j = 0; j < _block.length(); j++) {
4641 Node* n = _block.at(j);
4642 node_idx_t gen = _clone_map.gen(n->_idx);
4643 if ((signed)gen == _ii_first) {
4644 _iteration_first.push(n);
4645 } else if ((signed)gen == _ii_last) {
4646 _iteration_last.push(n);
4647 }
4648 }
4649
4650 // building order of iterations
4651 if (_ii_order.length() == 0 && ii_err != 0) {
4652 assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb")do { if (!(in_bb(ii_err) && ii_err->is_Store())) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4652, "assert(" "in_bb(ii_err) && ii_err->is_Store()"
") failed", "should be Store in bb"); ::breakpoint(); } } while
(0)
;
4653 Node* nd = ii_err;
4654 while(_clone_map.gen(nd->_idx) != _ii_last) {
4655 _ii_order.push(_clone_map.gen(nd->_idx));
4656 bool found = false;
4657 for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) {
4658 Node* use = nd->fast_out(i);
4659 if (same_origin_idx(use, nd) && use->as_Store()->in(MemNode::Memory) == nd) {
4660 found = true;
4661 nd = use;
4662 break;
4663 }
4664 }//for
4665
4666 if (found == false) {
4667 if (TraceSuperWord && Verbose) {
4668 tty->print_cr("SuperWord::mark_generations: Cannot build order of iterations - no dependent Store for %d", nd->_idx);
4669 }
4670 _ii_order.clear();
4671 return -1;
4672 }
4673 } //while
4674 _ii_order.push(_clone_map.gen(nd->_idx));
4675 }
4676
4677#ifndef PRODUCT
4678 if (_vector_loop_debug) {
4679 tty->print_cr("SuperWord::mark_generations");
4680 tty->print_cr("First generation (%d) nodes:", _ii_first);
4681 for (int ii = 0; ii < _iteration_first.length(); ii++) _iteration_first.at(ii)->dump();
4682 tty->print_cr("Last generation (%d) nodes:", _ii_last);
4683 for (int ii = 0; ii < _iteration_last.length(); ii++) _iteration_last.at(ii)->dump();
4684 tty->print_cr(" ");
4685
4686 tty->print("SuperWord::List of generations: ");
4687 for (int jj = 0; jj < _ii_order.length(); ++jj) {
4688 tty->print("%d:%d ", jj, _ii_order.at(jj));
4689 }
4690 tty->print_cr(" ");
4691 }
4692#endif
4693
4694 return _ii_first;
4695}
4696
4697bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) {
4698 assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes")do { if (!(gold->is_Add() && fix->is_Add() || gold
->is_Mul() && fix->is_Mul())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4698, "assert(" "gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul()"
") failed", "should be only Add or Mul nodes"); ::breakpoint
(); } } while (0)
;
4699 assert(same_origin_idx(gold, fix), "should be clones of the same node")do { if (!(same_origin_idx(gold, fix))) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp"
, 4699, "assert(" "same_origin_idx(gold, fix)" ") failed", "should be clones of the same node"
); ::breakpoint(); } } while (0)
;
4700 Node* gin1 = gold->in(1);
4701 Node* gin2 = gold->in(2);
4702 Node* fin1 = fix->in(1);
4703 Node* fin2 = fix->in(2);
4704 bool swapped = false;
4705
4706 if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin2)) {
4707 if (same_origin_idx(gin1, fin1) &&
4708 same_origin_idx(gin2, fin2)) {
4709 return true; // nothing to fix
4710 }
4711 if (same_origin_idx(gin1, fin2) &&
4712 same_origin_idx(gin2, fin1)) {
4713 fix->swap_edges(1, 2);
4714 swapped = true;
4715 }
4716 }
4717 // at least one input comes from outside of bb
4718 if (gin1->_idx == fin1->_idx) {
4719 return true; // nothing to fix
4720 }
4721 if (!swapped && (gin1->_idx == fin2->_idx || gin2->_idx == fin1->_idx)) { //swapping is expensive, check condition first
4722 fix->swap_edges(1, 2);
4723 swapped = true;
4724 }
4725
4726 if (swapped) {
4727#ifndef PRODUCT
4728 if (_vector_loop_debug) {
4729 tty->print_cr("SuperWord::fix_commutative_inputs: fixed node %d", fix->_idx);
4730 }
4731#endif
4732 return true;
4733 }
4734
4735 if (TraceSuperWord && Verbose) {
4736 tty->print_cr("SuperWord::fix_commutative_inputs: cannot fix node %d", fix->_idx);
4737 }
4738
4739 return false;
4740}
4741
4742bool SuperWord::pack_parallel() {
4743#ifndef PRODUCT
4744 if (_vector_loop_debug) {
4745 tty->print_cr("SuperWord::pack_parallel: START");
4746 }
4747#endif
4748
4749 _packset.clear();
4750
4751 if (_ii_order.is_empty()) {
4752#ifndef PRODUCT
4753 if (_vector_loop_debug) {
4754 tty->print_cr("SuperWord::pack_parallel: EMPTY");
4755 }
4756#endif
4757 return false;
4758 }
4759
4760 for (int ii = 0; ii < _iteration_first.length(); ii++) {
4761 Node* nd = _iteration_first.at(ii);
4762 if (in_bb(nd) && (nd->is_Load() || nd->is_Store() || nd->is_Add() || nd->is_Mul())) {
4763 Node_List* pk = new Node_List();
4764 pk->push(nd);
4765 for (int gen = 1; gen < _ii_order.length(); ++gen) {
4766 for (int kk = 0; kk < _block.length(); kk++) {
4767 Node* clone = _block.at(kk);
4768 if (same_origin_idx(clone, nd) &&
4769 _clone_map.gen(clone->_idx) == _ii_order.at(gen)) {
4770 if (nd->is_Add() || nd->is_Mul()) {
4771 fix_commutative_inputs(nd, clone);
4772 }
4773 pk->push(clone);
4774 if (pk->size() == 4) {
4775 _packset.append(pk);
4776#ifndef PRODUCT
4777 if (_vector_loop_debug) {
4778 tty->print_cr("SuperWord::pack_parallel: added pack ");
4779 pk->dump();
4780 }
4781#endif
4782 if (_clone_map.gen(clone->_idx) != _ii_last) {
4783 pk = new Node_List();
4784 }
4785 }
4786 break;
4787 }
4788 }
4789 }//for
4790 }//if
4791 }//for
4792
4793#ifndef PRODUCT
4794 if (_vector_loop_debug) {
4795 tty->print_cr("SuperWord::pack_parallel: END");
4796 }
4797#endif
4798
4799 return true;
4800}
4801
4802bool SuperWord::hoist_loads_in_graph() {
4803 GrowableArray<Node*> loads;
4804
4805#ifndef PRODUCT
4806 if (_vector_loop_debug) {
4807 tty->print_cr("SuperWord::hoist_loads_in_graph: total number _mem_slice_head.length() = %d", _mem_slice_head.length());
4808 }
4809#endif
4810
4811 for (int i = 0; i < _mem_slice_head.length(); i++) {
4812 Node* n = _mem_slice_head.at(i);
4813 if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) {
4814 if (TraceSuperWord && Verbose) {
4815 tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx);
4816 }
4817 continue;
4818 }
4819
4820#ifndef PRODUCT
4821 if (_vector_loop_debug) {
4822 tty->print_cr("SuperWord::hoist_loads_in_graph: processing phi %d = _mem_slice_head.at(%d);", n->_idx, i);
4823 }
4824#endif
4825
4826 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
4827 Node* ld = n->fast_out(i);
4828 if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) {
4829 for (int i = 0; i < _block.length(); i++) {
4830 Node* ld2 = _block.at(i);
4831 if (ld2->is_Load() && same_origin_idx(ld, ld2) &&
4832 !same_generation(ld, ld2)) { // <= do not collect the first generation ld
4833#ifndef PRODUCT
4834 if (_vector_loop_debug) {
4835 tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)",
4836 ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx);
4837 }
4838#endif
4839 // could not do on-the-fly, since iterator is immutable
4840 loads.push(ld2);
4841 }
4842 }// for
4843 }//if
4844 }//for (DUIterator_Fast imax,
4845 }//for (int i = 0; i
4846
4847 for (int i = 0; i < loads.length(); i++) {
4848 LoadNode* ld = loads.at(i)->as_Load();
4849 Node* phi = find_phi_for_mem_dep(ld);
4850 if (phi != NULL__null) {
4851#ifndef PRODUCT
4852 if (_vector_loop_debug) {
4853 tty->print_cr("SuperWord::hoist_loads_in_graph replacing MemNode::Memory(%d) edge in %d with one from %d",
4854 MemNode::Memory, ld->_idx, phi->_idx);
4855 }
4856#endif
4857 _igvn.replace_input_of(ld, MemNode::Memory, phi);
4858 }
4859 }//for
4860
4861 restart(); // invalidate all basic structures, since we rebuilt the graph
4862
4863 if (TraceSuperWord && Verbose) {
4864 tty->print_cr("\nSuperWord::hoist_loads_in_graph() the graph was rebuilt, all structures invalidated and need rebuild");
4865 }
4866
4867 return true;
4868}

/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef SHARE_OPTO_COMPILE_HPP
26#define SHARE_OPTO_COMPILE_HPP
27
28#include "asm/codeBuffer.hpp"
29#include "ci/compilerInterface.hpp"
30#include "code/debugInfoRec.hpp"
31#include "compiler/compiler_globals.hpp"
32#include "compiler/compilerOracle.hpp"
33#include "compiler/compileBroker.hpp"
34#include "compiler/compilerEvent.hpp"
35#include "libadt/dict.hpp"
36#include "libadt/vectset.hpp"
37#include "memory/resourceArea.hpp"
38#include "oops/methodData.hpp"
39#include "opto/idealGraphPrinter.hpp"
40#include "opto/phasetype.hpp"
41#include "opto/phase.hpp"
42#include "opto/regmask.hpp"
43#include "runtime/deoptimization.hpp"
44#include "runtime/sharedRuntime.hpp"
45#include "runtime/timerTrace.hpp"
46#include "runtime/vmThread.hpp"
47#include "utilities/ticks.hpp"
48
49class AbstractLockNode;
50class AddPNode;
51class Block;
52class Bundle;
53class CallGenerator;
54class CloneMap;
55class ConnectionGraph;
56class IdealGraphPrinter;
57class InlineTree;
58class Int_Array;
59class Matcher;
60class MachConstantNode;
61class MachConstantBaseNode;
62class MachNode;
63class MachOper;
64class MachSafePointNode;
65class Node;
66class Node_Array;
67class Node_List;
68class Node_Notes;
69class NodeCloneInfo;
70class OptoReg;
71class PhaseCFG;
72class PhaseGVN;
73class PhaseIterGVN;
74class PhaseRegAlloc;
75class PhaseCCP;
76class PhaseOutput;
77class RootNode;
78class relocInfo;
79class Scope;
80class StartNode;
81class SafePointNode;
82class JVMState;
83class Type;
84class TypeData;
85class TypeInt;
86class TypeInteger;
87class TypePtr;
88class TypeOopPtr;
89class TypeFunc;
90class TypeVect;
91class Unique_Node_List;
92class nmethod;
93class Node_Stack;
94struct Final_Reshape_Counts;
95
96enum LoopOptsMode {
97 LoopOptsDefault,
98 LoopOptsNone,
99 LoopOptsMaxUnroll,
100 LoopOptsShenandoahExpand,
101 LoopOptsShenandoahPostExpand,
102 LoopOptsSkipSplitIf,
103 LoopOptsVerify
104};
105
106typedef unsigned int node_idx_t;
107class NodeCloneInfo {
108 private:
109 uint64_t _idx_clone_orig;
110 public:
111
112 void set_idx(node_idx_t idx) {
113 _idx_clone_orig = (_idx_clone_orig & CONST64(0xFFFFFFFF00000000)(0xFFFFFFFF00000000LL)) | idx;
114 }
115 node_idx_t idx() const { return (node_idx_t)(_idx_clone_orig & 0xFFFFFFFF); }
116
117 void set_gen(int generation) {
118 uint64_t g = (uint64_t)generation << 32;
119 _idx_clone_orig = (_idx_clone_orig & 0xFFFFFFFF) | g;
120 }
121 int gen() const { return (int)(_idx_clone_orig >> 32); }
122
123 void set(uint64_t x) { _idx_clone_orig = x; }
124 void set(node_idx_t x, int g) { set_idx(x); set_gen(g); }
125 uint64_t get() const { return _idx_clone_orig; }
126
127 NodeCloneInfo(uint64_t idx_clone_orig) : _idx_clone_orig(idx_clone_orig) {}
128 NodeCloneInfo(node_idx_t x, int g) : _idx_clone_orig(0) { set(x, g); }
129
130 void dump() const;
131};
132
133class CloneMap {
134 friend class Compile;
135 private:
136 bool _debug;
137 Dict* _dict;
138 int _clone_idx; // current cloning iteration/generation in loop unroll
139 public:
140 void* _2p(node_idx_t key) const { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy
141 node_idx_t _2_node_idx_t(const void* k) const { return (node_idx_t)(intptr_t)k; }
142 Dict* dict() const { return _dict; }
143 void insert(node_idx_t key, uint64_t val) { assert(_dict->operator[](_2p(key)) == NULL, "key existed")do { if (!(_dict->operator[](_2p(key)) == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 143, "assert(" "_dict->operator[](_2p(key)) == __null" ") failed"
, "key existed"); ::breakpoint(); } } while (0)
; _dict->Insert(_2p(key), (void*)val); }
144 void insert(node_idx_t key, NodeCloneInfo& ci) { insert(key, ci.get()); }
145 void remove(node_idx_t key) { _dict->Delete(_2p(key)); }
146 uint64_t value(node_idx_t key) const { return (uint64_t)_dict->operator[](_2p(key)); }
10
Value assigned to 'TraceSuperWord', which participates in a condition later
11
Value assigned to 'Verbose', which participates in a condition later
147 node_idx_t idx(node_idx_t key) const { return NodeCloneInfo(value(key)).idx(); }
148 int gen(node_idx_t key) const { return NodeCloneInfo(value(key)).gen(); }
9
Calling 'CloneMap::value'
12
Returning from 'CloneMap::value'
149 int gen(const void* k) const { return gen(_2_node_idx_t(k)); }
150 int max_gen() const;
151 void clone(Node* old, Node* nnn, int gen);
152 void verify_insert_and_clone(Node* old, Node* nnn, int gen);
153 void dump(node_idx_t key) const;
154
155 int clone_idx() const { return _clone_idx; }
156 void set_clone_idx(int x) { _clone_idx = x; }
157 bool is_debug() const { return _debug; }
158 void set_debug(bool debug) { _debug = debug; }
159 static const char* debug_option_name;
160
161 bool same_idx(node_idx_t k1, node_idx_t k2) const { return idx(k1) == idx(k2); }
162 bool same_gen(node_idx_t k1, node_idx_t k2) const { return gen(k1) == gen(k2); }
163};
164
165class Options {
166 friend class Compile;
167 friend class VMStructs;
168 private:
169 const bool _subsume_loads; // Load can be matched as part of a larger op.
170 const bool _do_escape_analysis; // Do escape analysis.
171 const bool _do_iterative_escape_analysis; // Do iterative escape analysis.
172 const bool _eliminate_boxing; // Do boxing elimination.
173 const bool _do_locks_coarsening; // Do locks coarsening
174 const bool _install_code; // Install the code that was compiled
175 public:
176 Options(bool subsume_loads, bool do_escape_analysis,
177 bool do_iterative_escape_analysis,
178 bool eliminate_boxing, bool do_locks_coarsening,
179 bool install_code) :
180 _subsume_loads(subsume_loads),
181 _do_escape_analysis(do_escape_analysis),
182 _do_iterative_escape_analysis(do_iterative_escape_analysis),
183 _eliminate_boxing(eliminate_boxing),
184 _do_locks_coarsening(do_locks_coarsening),
185 _install_code(install_code) {
186 }
187
188 static Options for_runtime_stub() {
189 return Options(
190 /* subsume_loads = */ true,
191 /* do_escape_analysis = */ false,
192 /* do_iterative_escape_analysis = */ false,
193 /* eliminate_boxing = */ false,
194 /* do_lock_coarsening = */ false,
195 /* install_code = */ true
196 );
197 }
198};
199
200//------------------------------Compile----------------------------------------
201// This class defines a top-level Compiler invocation.
202
203class Compile : public Phase {
204 friend class VMStructs;
205
206 public:
207 // Fixed alias indexes. (See also MergeMemNode.)
208 enum {
209 AliasIdxTop = 1, // pseudo-index, aliases to nothing (used as sentinel value)
210 AliasIdxBot = 2, // pseudo-index, aliases to everything
211 AliasIdxRaw = 3 // hard-wired index for TypeRawPtr::BOTTOM
212 };
213
214 // Variant of TraceTime(NULL, &_t_accumulator, CITime);
215 // Integrated with logging. If logging is turned on, and CITimeVerbose is true,
216 // then brackets are put into the log, with time stamps and node counts.
217 // (The time collection itself is always conditionalized on CITime.)
218 class TracePhase : public TraceTime {
219 private:
220 Compile* C;
221 CompileLog* _log;
222 const char* _phase_name;
223 bool _dolog;
224 public:
225 TracePhase(const char* name, elapsedTimer* accumulator);
226 ~TracePhase();
227 };
228
229 // Information per category of alias (memory slice)
230 class AliasType {
231 private:
232 friend class Compile;
233
234 int _index; // unique index, used with MergeMemNode
235 const TypePtr* _adr_type; // normalized address type
236 ciField* _field; // relevant instance field, or null if none
237 const Type* _element; // relevant array element type, or null if none
238 bool _is_rewritable; // false if the memory is write-once only
239 int _general_index; // if this is type is an instance, the general
240 // type that this is an instance of
241
242 void Init(int i, const TypePtr* at);
243
244 public:
245 int index() const { return _index; }
246 const TypePtr* adr_type() const { return _adr_type; }
247 ciField* field() const { return _field; }
248 const Type* element() const { return _element; }
249 bool is_rewritable() const { return _is_rewritable; }
250 bool is_volatile() const { return (_field ? _field->is_volatile() : false); }
251 int general_index() const { return (_general_index != 0) ? _general_index : _index; }
252
253 void set_rewritable(bool z) { _is_rewritable = z; }
254 void set_field(ciField* f) {
255 assert(!_field,"")do { if (!(!_field)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 255, "assert(" "!_field" ") failed", ""); ::breakpoint(); }
} while (0)
;
256 _field = f;
257 if (f->is_final() || f->is_stable()) {
258 // In the case of @Stable, multiple writes are possible but may be assumed to be no-ops.
259 _is_rewritable = false;
260 }
261 }
262 void set_element(const Type* e) {
263 assert(_element == NULL, "")do { if (!(_element == __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 263, "assert(" "_element == __null" ") failed", ""); ::breakpoint
(); } } while (0)
;
264 _element = e;
265 }
266
267 BasicType basic_type() const;
268
269 void print_on(outputStream* st) PRODUCT_RETURN;
270 };
271
272 enum {
273 logAliasCacheSize = 6,
274 AliasCacheSize = (1<<logAliasCacheSize)
275 };
276 struct AliasCacheEntry { const TypePtr* _adr_type; int _index; }; // simple duple type
277 enum {
278 trapHistLength = MethodData::_trap_hist_limit
279 };
280
281 private:
282 // Fixed parameters to this compilation.
283 const int _compile_id;
284 const Options _options; // Compilation options
285 ciMethod* _method; // The method being compiled.
286 int _entry_bci; // entry bci for osr methods.
287 const TypeFunc* _tf; // My kind of signature
288 InlineTree* _ilt; // Ditto (temporary).
289 address _stub_function; // VM entry for stub being compiled, or NULL
290 const char* _stub_name; // Name of stub or adapter being compiled, or NULL
291 address _stub_entry_point; // Compile code entry for generated stub, or NULL
292
293 // Control of this compilation.
294 int _max_inline_size; // Max inline size for this compilation
295 int _freq_inline_size; // Max hot method inline size for this compilation
296 int _fixed_slots; // count of frame slots not allocated by the register
297 // allocator i.e. locks, original deopt pc, etc.
298 uintx _max_node_limit; // Max unique node count during a single compilation.
299
300 bool _post_loop_opts_phase; // Loop opts are finished.
301
302 int _major_progress; // Count of something big happening
303 bool _inlining_progress; // progress doing incremental inlining?
304 bool _inlining_incrementally;// Are we doing incremental inlining (post parse)
305 bool _do_cleanup; // Cleanup is needed before proceeding with incremental inlining
306 bool _has_loops; // True if the method _may_ have some loops
307 bool _has_split_ifs; // True if the method _may_ have some split-if
308 bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores.
309 bool _has_stringbuilder; // True StringBuffers or StringBuilders are allocated
310 bool _has_boxed_value; // True if a boxed object is allocated
311 bool _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess
312 uint _max_vector_size; // Maximum size of generated vectors
313 bool _clear_upper_avx; // Clear upper bits of ymm registers using vzeroupper
314 uint _trap_hist[trapHistLength]; // Cumulative traps
315 bool _trap_can_recompile; // Have we emitted a recompiling trap?
316 uint _decompile_count; // Cumulative decompilation counts.
317 bool _do_inlining; // True if we intend to do inlining
318 bool _do_scheduling; // True if we intend to do scheduling
319 bool _do_freq_based_layout; // True if we intend to do frequency based block layout
320 bool _do_vector_loop; // True if allowed to execute loop in parallel iterations
321 bool _use_cmove; // True if CMove should be used without profitability analysis
322 bool _age_code; // True if we need to profile code age (decrement the aging counter)
323 int _AliasLevel; // Locally-adjusted version of AliasLevel flag.
324 bool _print_assembly; // True if we should dump assembly code for this compilation
325 bool _print_inlining; // True if we should print inlining for this compilation
326 bool _print_intrinsics; // True if we should print intrinsics for this compilation
327#ifndef PRODUCT
328 uint _igv_idx; // Counter for IGV node identifiers
329 bool _trace_opto_output;
330 bool _print_ideal;
331 bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
332#endif
333 bool _has_irreducible_loop; // Found irreducible loops
334 // JSR 292
335 bool _has_method_handle_invokes; // True if this method has MethodHandle invokes.
336 RTMState _rtm_state; // State of Restricted Transactional Memory usage
337 int _loop_opts_cnt; // loop opts round
338 bool _clinit_barrier_on_entry; // True if clinit barrier is needed on nmethod entry
339 uint _stress_seed; // Seed for stress testing
340
341 // Compilation environment.
342 Arena _comp_arena; // Arena with lifetime equivalent to Compile
343 void* _barrier_set_state; // Potential GC barrier state for Compile
344 ciEnv* _env; // CI interface
345 DirectiveSet* _directive; // Compiler directive
346 CompileLog* _log; // from CompilerThread
347 const char* _failure_reason; // for record_failure/failing pattern
348 GrowableArray<CallGenerator*> _intrinsics; // List of intrinsics.
349 GrowableArray<Node*> _macro_nodes; // List of nodes which need to be expanded before matching.
350 GrowableArray<Node*> _predicate_opaqs; // List of Opaque1 nodes for the loop predicates.
351 GrowableArray<Node*> _skeleton_predicate_opaqs; // List of Opaque4 nodes for the loop skeleton predicates.
352 GrowableArray<Node*> _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
353 GrowableArray<Node*> _for_post_loop_igvn; // List of nodes for IGVN after loop opts are over
354 GrowableArray<Node_List*> _coarsened_locks; // List of coarsened Lock and Unlock nodes
355 ConnectionGraph* _congraph;
356#ifndef PRODUCT
357 IdealGraphPrinter* _printer;
358 static IdealGraphPrinter* _debug_file_printer;
359 static IdealGraphPrinter* _debug_network_printer;
360#endif
361
362
363 // Node management
364 uint _unique; // Counter for unique Node indices
365 VectorSet _dead_node_list; // Set of dead nodes
366 uint _dead_node_count; // Number of dead nodes; VectorSet::Size() is O(N).
367 // So use this to keep count and make the call O(1).
368 DEBUG_ONLY(Unique_Node_List* _modified_nodes;)Unique_Node_List* _modified_nodes; // List of nodes which inputs were modified
369 DEBUG_ONLY(bool _phase_optimize_finished;)bool _phase_optimize_finished; // Used for live node verification while creating new nodes
370
371 debug_only(static int _debug_idx;)static int _debug_idx; // Monotonic counter (not reset), use -XX:BreakAtNode=<idx>
372 Arena _node_arena; // Arena for new-space Nodes
373 Arena _old_arena; // Arena for old-space Nodes, lifetime during xform
374 RootNode* _root; // Unique root of compilation, or NULL after bail-out.
375 Node* _top; // Unique top node. (Reset by various phases.)
376
377 Node* _immutable_memory; // Initial memory state
378
379 Node* _recent_alloc_obj;
380 Node* _recent_alloc_ctl;
381
382 // Constant table
383 MachConstantBaseNode* _mach_constant_base_node; // Constant table base node singleton.
384
385
386 // Blocked array of debugging and profiling information,
387 // tracked per node.
388 enum { _log2_node_notes_block_size = 8,
389 _node_notes_block_size = (1<<_log2_node_notes_block_size)
390 };
391 GrowableArray<Node_Notes*>* _node_note_array;
392 Node_Notes* _default_node_notes; // default notes for new nodes
393
394 // After parsing and every bulk phase we hang onto the Root instruction.
395 // The RootNode instruction is where the whole program begins. It produces
396 // the initial Control and BOTTOM for everybody else.
397
398 // Type management
399 Arena _Compile_types; // Arena for all types
400 Arena* _type_arena; // Alias for _Compile_types except in Initialize_shared()
401 Dict* _type_dict; // Intern table
402 CloneMap _clone_map; // used for recording history of cloned nodes
403 size_t _type_last_size; // Last allocation size (see Type::operator new/delete)
404 ciMethod* _last_tf_m; // Cache for
405 const TypeFunc* _last_tf; // TypeFunc::make
406 AliasType** _alias_types; // List of alias types seen so far.
407 int _num_alias_types; // Logical length of _alias_types
408 int _max_alias_types; // Physical length of _alias_types
409 AliasCacheEntry _alias_cache[AliasCacheSize]; // Gets aliases w/o data structure walking
410
411 // Parsing, optimization
412 PhaseGVN* _initial_gvn; // Results of parse-time PhaseGVN
413 Unique_Node_List* _for_igvn; // Initial work-list for next round of Iterative GVN
414
415 GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after main parsing has finished.
416 GrowableArray<CallGenerator*> _string_late_inlines; // same but for string operations
417 GrowableArray<CallGenerator*> _boxing_late_inlines; // same but for boxing operations
418
419 GrowableArray<CallGenerator*> _vector_reboxing_late_inlines; // same but for vector reboxing operations
420
421 int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
422 uint _number_of_mh_late_inlines; // number of method handle late inlining still pending
423
424 GrowableArray<RuntimeStub*> _native_invokers;
425
426 // Inlining may not happen in parse order which would make
427 // PrintInlining output confusing. Keep track of PrintInlining
428 // pieces in order.
429 class PrintInliningBuffer : public CHeapObj<mtCompiler> {
430 private:
431 CallGenerator* _cg;
432 stringStream _ss;
433 static const size_t default_stream_buffer_size = 128;
434
435 public:
436 PrintInliningBuffer()
437 : _cg(NULL__null), _ss(default_stream_buffer_size) {}
438
439 stringStream* ss() { return &_ss; }
440 CallGenerator* cg() { return _cg; }
441 void set_cg(CallGenerator* cg) { _cg = cg; }
442 };
443
444 stringStream* _print_inlining_stream;
445 GrowableArray<PrintInliningBuffer*>* _print_inlining_list;
446 int _print_inlining_idx;
447 char* _print_inlining_output;
448
449 // Only keep nodes in the expensive node list that need to be optimized
450 void cleanup_expensive_nodes(PhaseIterGVN &igvn);
451 // Use for sorting expensive nodes to bring similar nodes together
452 static int cmp_expensive_nodes(Node** n1, Node** n2);
453 // Expensive nodes list already sorted?
454 bool expensive_nodes_sorted() const;
455 // Remove the speculative part of types and clean up the graph
456 void remove_speculative_types(PhaseIterGVN &igvn);
457
458 void* _replay_inline_data; // Pointer to data loaded from file
459
460 void print_inlining_stream_free();
461 void print_inlining_init();
462 void print_inlining_reinit();
463 void print_inlining_commit();
464 void print_inlining_push();
465 PrintInliningBuffer* print_inlining_current();
466
467 void log_late_inline_failure(CallGenerator* cg, const char* msg);
468 DEBUG_ONLY(bool _exception_backedge;)bool _exception_backedge;
469
470 public:
471
472 void* barrier_set_state() const { return _barrier_set_state; }
473
474 outputStream* print_inlining_stream() const {
475 assert(print_inlining() || print_intrinsics(), "PrintInlining off?")do { if (!(print_inlining() || print_intrinsics())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 475, "assert(" "print_inlining() || print_intrinsics()" ") failed"
, "PrintInlining off?"); ::breakpoint(); } } while (0)
;
476 return _print_inlining_stream;
477 }
478
479 void print_inlining_update(CallGenerator* cg);
480 void print_inlining_update_delayed(CallGenerator* cg);
481 void print_inlining_move_to(CallGenerator* cg);
482 void print_inlining_assert_ready();
483 void print_inlining_reset();
484
485 void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL__null) {
486 stringStream ss;
487 CompileTask::print_inlining_inner(&ss, method, inline_level, bci, msg);
488 print_inlining_stream()->print("%s", ss.as_string());
489 }
490
491#ifndef PRODUCT
492 IdealGraphPrinter* printer() { return _printer; }
493#endif
494
495 void log_late_inline(CallGenerator* cg);
496 void log_inline_id(CallGenerator* cg);
497 void log_inline_failure(const char* msg);
498
499 void* replay_inline_data() const { return _replay_inline_data; }
500
501 // Dump inlining replay data to the stream.
502 void dump_inline_data(outputStream* out);
503
504 private:
505 // Matching, CFG layout, allocation, code generation
506 PhaseCFG* _cfg; // Results of CFG finding
507 int _java_calls; // Number of java calls in the method
508 int _inner_loops; // Number of inner loops in the method
509 Matcher* _matcher; // Engine to map ideal to machine instructions
510 PhaseRegAlloc* _regalloc; // Results of register allocation.
511 RegMask _FIRST_STACK_mask; // All stack slots usable for spills (depends on frame layout)
512 Arena* _indexSet_arena; // control IndexSet allocation within PhaseChaitin
513 void* _indexSet_free_block_list; // free list of IndexSet bit blocks
514 int _interpreter_frame_size;
515
516 PhaseOutput* _output;
517
518 public:
519 // Accessors
520
521 // The Compile instance currently active in this (compiler) thread.
522 static Compile* current() {
523 return (Compile*) ciEnv::current()->compiler_data();
524 }
525
526 int interpreter_frame_size() const { return _interpreter_frame_size; }
527
528 PhaseOutput* output() const { return _output; }
529 void set_output(PhaseOutput* o) { _output = o; }
530
531 // ID for this compilation. Useful for setting breakpoints in the debugger.
532 int compile_id() const { return _compile_id; }
533 DirectiveSet* directive() const { return _directive; }
534
535 // Does this compilation allow instructions to subsume loads? User
536 // instructions that subsume a load may result in an unschedulable
537 // instruction sequence.
538 bool subsume_loads() const { return _options._subsume_loads; }
539 /** Do escape analysis. */
540 bool do_escape_analysis() const { return _options._do_escape_analysis; }
541 bool do_iterative_escape_analysis() const { return _options._do_iterative_escape_analysis; }
542 /** Do boxing elimination. */
543 bool eliminate_boxing() const { return _options._eliminate_boxing; }
544 /** Do aggressive boxing elimination. */
545 bool aggressive_unboxing() const { return _options._eliminate_boxing && AggressiveUnboxing; }
546 bool should_install_code() const { return _options._install_code; }
547 /** Do locks coarsening. */
548 bool do_locks_coarsening() const { return _options._do_locks_coarsening; }
549
550 // Other fixed compilation parameters.
551 ciMethod* method() const { return _method; }
552 int entry_bci() const { return _entry_bci; }
553 bool is_osr_compilation() const { return _entry_bci != InvocationEntryBci; }
554 bool is_method_compilation() const { return (_method != NULL__null && !_method->flags().is_native()); }
555 const TypeFunc* tf() const { assert(_tf!=NULL, "")do { if (!(_tf!=__null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 555, "assert(" "_tf!=__null" ") failed", ""); ::breakpoint(
); } } while (0)
; return _tf; }
556 void init_tf(const TypeFunc* tf) { assert(_tf==NULL, "")do { if (!(_tf==__null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 556, "assert(" "_tf==__null" ") failed", ""); ::breakpoint(
); } } while (0)
; _tf = tf; }
557 InlineTree* ilt() const { return _ilt; }
558 address stub_function() const { return _stub_function; }
559 const char* stub_name() const { return _stub_name; }
560 address stub_entry_point() const { return _stub_entry_point; }
561 void set_stub_entry_point(address z) { _stub_entry_point = z; }
562
563 // Control of this compilation.
564 int fixed_slots() const { assert(_fixed_slots >= 0, "")do { if (!(_fixed_slots >= 0)) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 564, "assert(" "_fixed_slots >= 0" ") failed", ""); ::breakpoint
(); } } while (0)
; return _fixed_slots; }
565 void set_fixed_slots(int n) { _fixed_slots = n; }
566 int major_progress() const { return _major_progress; }
567 void set_inlining_progress(bool z) { _inlining_progress = z; }
568 int inlining_progress() const { return _inlining_progress; }
569 void set_inlining_incrementally(bool z) { _inlining_incrementally = z; }
570 int inlining_incrementally() const { return _inlining_incrementally; }
571 void set_do_cleanup(bool z) { _do_cleanup = z; }
572 int do_cleanup() const { return _do_cleanup; }
573 void set_major_progress() { _major_progress++; }
574 void restore_major_progress(int progress) { _major_progress += progress; }
575 void clear_major_progress() { _major_progress = 0; }
576 int max_inline_size() const { return _max_inline_size; }
577 void set_freq_inline_size(int n) { _freq_inline_size = n; }
578 int freq_inline_size() const { return _freq_inline_size; }
579 void set_max_inline_size(int n) { _max_inline_size = n; }
580 bool has_loops() const { return _has_loops; }
581 void set_has_loops(bool z) { _has_loops = z; }
582 bool has_split_ifs() const { return _has_split_ifs; }
583 void set_has_split_ifs(bool z) { _has_split_ifs = z; }
584 bool has_unsafe_access() const { return _has_unsafe_access; }
585 void set_has_unsafe_access(bool z) { _has_unsafe_access = z; }
586 bool has_stringbuilder() const { return _has_stringbuilder; }
587 void set_has_stringbuilder(bool z) { _has_stringbuilder = z; }
588 bool has_boxed_value() const { return _has_boxed_value; }
589 void set_has_boxed_value(bool z) { _has_boxed_value = z; }
590 bool has_reserved_stack_access() const { return _has_reserved_stack_access; }
591 void set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; }
592 uint max_vector_size() const { return _max_vector_size; }
593 void set_max_vector_size(uint s) { _max_vector_size = s; }
594 bool clear_upper_avx() const { return _clear_upper_avx; }
595 void set_clear_upper_avx(bool s) { _clear_upper_avx = s; }
596 void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob")do { if (!(r < trapHistLength)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 596, "assert(" "r < trapHistLength" ") failed", "oob"); ::
breakpoint(); } } while (0)
; _trap_hist[r] = c; }
597 uint trap_count(uint r) const { assert(r < trapHistLength, "oob")do { if (!(r < trapHistLength)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 597, "assert(" "r < trapHistLength" ") failed", "oob"); ::
breakpoint(); } } while (0)
; return _trap_hist[r]; }
598 bool trap_can_recompile() const { return _trap_can_recompile; }
599 void set_trap_can_recompile(bool z) { _trap_can_recompile = z; }
600 uint decompile_count() const { return _decompile_count; }
601 void set_decompile_count(uint c) { _decompile_count = c; }
602 bool allow_range_check_smearing() const;
603 bool do_inlining() const { return _do_inlining; }
604 void set_do_inlining(bool z) { _do_inlining = z; }
605 bool do_scheduling() const { return _do_scheduling; }
606 void set_do_scheduling(bool z) { _do_scheduling = z; }
607 bool do_freq_based_layout() const{ return _do_freq_based_layout; }
608 void set_do_freq_based_layout(bool z){ _do_freq_based_layout = z; }
609 bool do_vector_loop() const { return _do_vector_loop; }
610 void set_do_vector_loop(bool z) { _do_vector_loop = z; }
611 bool use_cmove() const { return _use_cmove; }
612 void set_use_cmove(bool z) { _use_cmove = z; }
613 bool age_code() const { return _age_code; }
614 void set_age_code(bool z) { _age_code = z; }
615 int AliasLevel() const { return _AliasLevel; }
616 bool print_assembly() const { return _print_assembly; }
617 void set_print_assembly(bool z) { _print_assembly = z; }
618 bool print_inlining() const { return _print_inlining; }
619 void set_print_inlining(bool z) { _print_inlining = z; }
620 bool print_intrinsics() const { return _print_intrinsics; }
621 void set_print_intrinsics(bool z) { _print_intrinsics = z; }
622 RTMState rtm_state() const { return _rtm_state; }
623 void set_rtm_state(RTMState s) { _rtm_state = s; }
624 bool use_rtm() const { return (_rtm_state & NoRTM) == 0; }
625 bool profile_rtm() const { return _rtm_state == ProfileRTM; }
626 uint max_node_limit() const { return (uint)_max_node_limit; }
627 void set_max_node_limit(uint n) { _max_node_limit = n; }
628 bool clinit_barrier_on_entry() { return _clinit_barrier_on_entry; }
629 void set_clinit_barrier_on_entry(bool z) { _clinit_barrier_on_entry = z; }
630
631 // check the CompilerOracle for special behaviours for this compile
632 bool method_has_option(enum CompileCommand option) {
633 return method() != NULL__null && method()->has_option(option);
634 }
635
636#ifndef PRODUCT
637 uint next_igv_idx() { return _igv_idx++; }
638 bool trace_opto_output() const { return _trace_opto_output; }
639 bool print_ideal() const { return _print_ideal; }
640 bool parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
641 void set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; }
642 int _in_dump_cnt; // Required for dumping ir nodes.
643#endif
644 bool has_irreducible_loop() const { return _has_irreducible_loop; }
645 void set_has_irreducible_loop(bool z) { _has_irreducible_loop = z; }
646
647 // JSR 292
648 bool has_method_handle_invokes() const { return _has_method_handle_invokes; }
649 void set_has_method_handle_invokes(bool z) { _has_method_handle_invokes = z; }
650
651 Ticks _latest_stage_start_counter;
652
653 void begin_method(int level = 1) {
654#ifndef PRODUCT
655 if (_method != NULL__null && should_print(level)) {
656 _printer->begin_method();
657 }
658#endif
659 C->_latest_stage_start_counter.stamp();
660 }
661
662 bool should_print(int level = 1) {
663#ifndef PRODUCT
664 if (PrintIdealGraphLevel < 0) { // disabled by the user
665 return false;
666 }
667
668 bool need = directive()->IGVPrintLevelOption >= level;
669 if (need && !_printer) {
670 _printer = IdealGraphPrinter::printer();
671 assert(_printer != NULL, "_printer is NULL when we need it!")do { if (!(_printer != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 671, "assert(" "_printer != __null" ") failed", "_printer is NULL when we need it!"
); ::breakpoint(); } } while (0)
;
672 _printer->set_compile(this);
673 }
674 return need;
675#else
676 return false;
677#endif
678 }
679
680 void print_method(CompilerPhaseType cpt, const char *name, int level = 1);
681 void print_method(CompilerPhaseType cpt, int level = 1, int idx = 0);
682 void print_method(CompilerPhaseType cpt, Node* n, int level = 3);
683
684#ifndef PRODUCT
685 void igv_print_method_to_file(const char* phase_name = "Debug", bool append = false);
686 void igv_print_method_to_network(const char* phase_name = "Debug");
687 static IdealGraphPrinter* debug_file_printer() { return _debug_file_printer; }
688 static IdealGraphPrinter* debug_network_printer() { return _debug_network_printer; }
689#endif
690
691 void end_method(int level = 1);
692
693 int macro_count() const { return _macro_nodes.length(); }
694 int predicate_count() const { return _predicate_opaqs.length(); }
695 int skeleton_predicate_count() const { return _skeleton_predicate_opaqs.length(); }
696 int expensive_count() const { return _expensive_nodes.length(); }
697 int coarsened_count() const { return _coarsened_locks.length(); }
698
699 Node* macro_node(int idx) const { return _macro_nodes.at(idx); }
700 Node* predicate_opaque1_node(int idx) const { return _predicate_opaqs.at(idx); }
701 Node* skeleton_predicate_opaque4_node(int idx) const { return _skeleton_predicate_opaqs.at(idx); }
702 Node* expensive_node(int idx) const { return _expensive_nodes.at(idx); }
703
704 ConnectionGraph* congraph() { return _congraph;}
705 void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;}
706 void add_macro_node(Node * n) {
707 //assert(n->is_macro(), "must be a macro node");
708 assert(!_macro_nodes.contains(n), "duplicate entry in expand list")do { if (!(!_macro_nodes.contains(n))) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 708, "assert(" "!_macro_nodes.contains(n)" ") failed", "duplicate entry in expand list"
); ::breakpoint(); } } while (0)
;
709 _macro_nodes.append(n);
710 }
711 void remove_macro_node(Node* n) {
712 // this function may be called twice for a node so we can only remove it
713 // if it's still existing.
714 _macro_nodes.remove_if_existing(n);
715 // remove from _predicate_opaqs list also if it is there
716 if (predicate_count() > 0) {
717 _predicate_opaqs.remove_if_existing(n);
718 }
719 // Remove from coarsened locks list if present
720 if (coarsened_count() > 0) {
721 remove_coarsened_lock(n);
722 }
723 }
724 void add_expensive_node(Node* n);
725 void remove_expensive_node(Node* n) {
726 _expensive_nodes.remove_if_existing(n);
727 }
728 void add_predicate_opaq(Node* n) {
729 assert(!_predicate_opaqs.contains(n), "duplicate entry in predicate opaque1")do { if (!(!_predicate_opaqs.contains(n))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 729, "assert(" "!_predicate_opaqs.contains(n)" ") failed", "duplicate entry in predicate opaque1"
); ::breakpoint(); } } while (0)
;
730 assert(_macro_nodes.contains(n), "should have already been in macro list")do { if (!(_macro_nodes.contains(n))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 730, "assert(" "_macro_nodes.contains(n)" ") failed", "should have already been in macro list"
); ::breakpoint(); } } while (0)
;
731 _predicate_opaqs.append(n);
732 }
733 void add_skeleton_predicate_opaq(Node* n) {
734 assert(!_skeleton_predicate_opaqs.contains(n), "duplicate entry in skeleton predicate opaque4 list")do { if (!(!_skeleton_predicate_opaqs.contains(n))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 734, "assert(" "!_skeleton_predicate_opaqs.contains(n)" ") failed"
, "duplicate entry in skeleton predicate opaque4 list"); ::breakpoint
(); } } while (0)
;
735 _skeleton_predicate_opaqs.append(n);
736 }
737 void remove_skeleton_predicate_opaq(Node* n) {
738 if (skeleton_predicate_count() > 0) {
739 _skeleton_predicate_opaqs.remove_if_existing(n);
740 }
741 }
742 void add_coarsened_locks(GrowableArray<AbstractLockNode*>& locks);
743 void remove_coarsened_lock(Node* n);
744 bool coarsened_locks_consistent();
745
746 bool post_loop_opts_phase() { return _post_loop_opts_phase; }
747 void set_post_loop_opts_phase() { _post_loop_opts_phase = true; }
748 void reset_post_loop_opts_phase() { _post_loop_opts_phase = false; }
749
750 void record_for_post_loop_opts_igvn(Node* n);
751 void remove_from_post_loop_opts_igvn(Node* n);
752 void process_for_post_loop_opts_igvn(PhaseIterGVN& igvn);
753
754 void sort_macro_nodes();
755
756 // remove the opaque nodes that protect the predicates so that the unused checks and
757 // uncommon traps will be eliminated from the graph.
758 void cleanup_loop_predicates(PhaseIterGVN &igvn);
759 bool is_predicate_opaq(Node* n) {
760 return _predicate_opaqs.contains(n);
761 }
762
763 // Are there candidate expensive nodes for optimization?
764 bool should_optimize_expensive_nodes(PhaseIterGVN &igvn);
765 // Check whether n1 and n2 are similar
766 static int cmp_expensive_nodes(Node* n1, Node* n2);
767 // Sort expensive nodes to locate similar expensive nodes
768 void sort_expensive_nodes();
769
770 // Compilation environment.
771 Arena* comp_arena() { return &_comp_arena; }
772 ciEnv* env() const { return _env; }
773 CompileLog* log() const { return _log; }
774 bool failing() const { return _env->failing() || _failure_reason != NULL__null; }
775 const char* failure_reason() const { return (_env->failing()) ? _env->failure_reason() : _failure_reason; }
776
777 bool failure_reason_is(const char* r) const {
778 return (r == _failure_reason) || (r != NULL__null && _failure_reason != NULL__null && strcmp(r, _failure_reason) == 0);
779 }
780
781 void record_failure(const char* reason);
782 void record_method_not_compilable(const char* reason) {
783 env()->record_method_not_compilable(reason);
784 // Record failure reason.
785 record_failure(reason);
786 }
787 bool check_node_count(uint margin, const char* reason) {
788 if (live_nodes() + margin > max_node_limit()) {
789 record_method_not_compilable(reason);
790 return true;
791 } else {
792 return false;
793 }
794 }
795
796 // Node management
797 uint unique() const { return _unique; }
798 uint next_unique() { return _unique++; }
799 void set_unique(uint i) { _unique = i; }
800 static int debug_idx() { return debug_only(_debug_idx)_debug_idx+0; }
801 static void set_debug_idx(int i) { debug_only(_debug_idx = i)_debug_idx = i; }
802 Arena* node_arena() { return &_node_arena; }
803 Arena* old_arena() { return &_old_arena; }
804 RootNode* root() const { return _root; }
805 void set_root(RootNode* r) { _root = r; }
806 StartNode* start() const; // (Derived from root.)
807 void init_start(StartNode* s);
808 Node* immutable_memory();
809
810 Node* recent_alloc_ctl() const { return _recent_alloc_ctl; }
811 Node* recent_alloc_obj() const { return _recent_alloc_obj; }
812 void set_recent_alloc(Node* ctl, Node* obj) {
813 _recent_alloc_ctl = ctl;
814 _recent_alloc_obj = obj;
815 }
816 void record_dead_node(uint idx) { if (_dead_node_list.test_set(idx)) return;
817 _dead_node_count++;
818 }
819 void reset_dead_node_list() { _dead_node_list.reset();
820 _dead_node_count = 0;
821 }
822 uint live_nodes() const {
823 int val = _unique - _dead_node_count;
824 assert (val >= 0, "number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count)do { if (!(val >= 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 824, "assert(" "val >= 0" ") failed", "number of tracked dead nodes %d more than created nodes %d"
, _unique, _dead_node_count); ::breakpoint(); } } while (0)
;
825 return (uint) val;
826 }
827#ifdef ASSERT1
828 void set_phase_optimize_finished() { _phase_optimize_finished = true; }
829 bool phase_optimize_finished() const { return _phase_optimize_finished; }
830 uint count_live_nodes_by_graph_walk();
831 void print_missing_nodes();
832#endif
833
834 // Record modified nodes to check that they are put on IGVN worklist
835 void record_modified_node(Node* n) NOT_DEBUG_RETURN;
836 void remove_modified_node(Node* n) NOT_DEBUG_RETURN;
837 DEBUG_ONLY( Unique_Node_List* modified_nodes() const { return _modified_nodes; } )Unique_Node_List* modified_nodes() const { return _modified_nodes
; }
838
839 MachConstantBaseNode* mach_constant_base_node();
840 bool has_mach_constant_base_node() const { return _mach_constant_base_node != NULL__null; }
841 // Generated by adlc, true if CallNode requires MachConstantBase.
842 bool needs_deep_clone_jvms();
843
844 // Handy undefined Node
845 Node* top() const { return _top; }
846
847 // these are used by guys who need to know about creation and transformation of top:
848 Node* cached_top_node() { return _top; }
849 void set_cached_top_node(Node* tn);
850
851 GrowableArray<Node_Notes*>* node_note_array() const { return _node_note_array; }
852 void set_node_note_array(GrowableArray<Node_Notes*>* arr) { _node_note_array = arr; }
853 Node_Notes* default_node_notes() const { return _default_node_notes; }
854 void set_default_node_notes(Node_Notes* n) { _default_node_notes = n; }
855
856 Node_Notes* node_notes_at(int idx) {
857 return locate_node_notes(_node_note_array, idx, false);
858 }
859 inline bool set_node_notes_at(int idx, Node_Notes* value);
860
861 // Copy notes from source to dest, if they exist.
862 // Overwrite dest only if source provides something.
863 // Return true if information was moved.
864 bool copy_node_notes_to(Node* dest, Node* source);
865
866 // Workhorse function to sort out the blocked Node_Notes array:
867 inline Node_Notes* locate_node_notes(GrowableArray<Node_Notes*>* arr,
868 int idx, bool can_grow = false);
869
870 void grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by);
871
872 // Type management
873 Arena* type_arena() { return _type_arena; }
874 Dict* type_dict() { return _type_dict; }
875 size_t type_last_size() { return _type_last_size; }
876 int num_alias_types() { return _num_alias_types; }
877
878 void init_type_arena() { _type_arena = &_Compile_types; }
879 void set_type_arena(Arena* a) { _type_arena = a; }
880 void set_type_dict(Dict* d) { _type_dict = d; }
881 void set_type_last_size(size_t sz) { _type_last_size = sz; }
882
883 const TypeFunc* last_tf(ciMethod* m) {
884 return (m == _last_tf_m) ? _last_tf : NULL__null;
885 }
886 void set_last_tf(ciMethod* m, const TypeFunc* tf) {
887 assert(m != NULL || tf == NULL, "")do { if (!(m != __null || tf == __null)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 887, "assert(" "m != __null || tf == __null" ") failed", ""
); ::breakpoint(); } } while (0)
;
888 _last_tf_m = m;
889 _last_tf = tf;
890 }
891
892 AliasType* alias_type(int idx) { assert(idx < num_alias_types(), "oob")do { if (!(idx < num_alias_types())) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 892, "assert(" "idx < num_alias_types()" ") failed", "oob"
); ::breakpoint(); } } while (0)
; return _alias_types[idx]; }
893 AliasType* alias_type(const TypePtr* adr_type, ciField* field = NULL__null) { return find_alias_type(adr_type, false, field); }
894 bool have_alias_type(const TypePtr* adr_type);
895 AliasType* alias_type(ciField* field);
896
897 int get_alias_index(const TypePtr* at) { return alias_type(at)->index(); }
898 const TypePtr* get_adr_type(uint aidx) { return alias_type(aidx)->adr_type(); }
899 int get_general_index(uint aidx) { return alias_type(aidx)->general_index(); }
900
901 // Building nodes
902 void rethrow_exceptions(JVMState* jvms);
903 void return_values(JVMState* jvms);
904 JVMState* build_start_state(StartNode* start, const TypeFunc* tf);
905
906 // Decide how to build a call.
907 // The profile factor is a discount to apply to this site's interp. profile.
908 CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_does_dispatch,
909 JVMState* jvms, bool allow_inline, float profile_factor, ciKlass* speculative_receiver_type = NULL__null,
910 bool allow_intrinsics = true);
911 bool should_delay_inlining(ciMethod* call_method, JVMState* jvms) {
912 return should_delay_string_inlining(call_method, jvms) ||
913 should_delay_boxing_inlining(call_method, jvms) ||
914 should_delay_vector_inlining(call_method, jvms);
915 }
916 bool should_delay_string_inlining(ciMethod* call_method, JVMState* jvms);
917 bool should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms);
918 bool should_delay_vector_inlining(ciMethod* call_method, JVMState* jvms);
919 bool should_delay_vector_reboxing_inlining(ciMethod* call_method, JVMState* jvms);
920
921 // Helper functions to identify inlining potential at call-site
922 ciMethod* optimize_virtual_call(ciMethod* caller, ciInstanceKlass* klass,
923 ciKlass* holder, ciMethod* callee,
924 const TypeOopPtr* receiver_type, bool is_virtual,
925 bool &call_does_dispatch, int &vtable_index,
926 bool check_access = true);
927 ciMethod* optimize_inlining(ciMethod* caller, ciInstanceKlass* klass, ciKlass* holder,
928 ciMethod* callee, const TypeOopPtr* receiver_type,
929 bool check_access = true);
930
931 // Report if there were too many traps at a current method and bci.
932 // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
933 // If there is no MDO at all, report no trap unless told to assume it.
934 bool too_many_traps(ciMethod* method, int bci, Deoptimization::DeoptReason reason);
935 // This version, unspecific to a particular bci, asks if
936 // PerMethodTrapLimit was exceeded for all inlined methods seen so far.
937 bool too_many_traps(Deoptimization::DeoptReason reason,
938 // Privately used parameter for logging:
939 ciMethodData* logmd = NULL__null);
940 // Report if there were too many recompiles at a method and bci.
941 bool too_many_recompiles(ciMethod* method, int bci, Deoptimization::DeoptReason reason);
942 // Report if there were too many traps or recompiles at a method and bci.
943 bool too_many_traps_or_recompiles(ciMethod* method, int bci, Deoptimization::DeoptReason reason) {
944 return too_many_traps(method, bci, reason) ||
945 too_many_recompiles(method, bci, reason);
946 }
947 // Return a bitset with the reasons where deoptimization is allowed,
948 // i.e., where there were not too many uncommon traps.
949 int _allowed_reasons;
950 int allowed_deopt_reasons() { return _allowed_reasons; }
951 void set_allowed_deopt_reasons();
952
953 // Parsing, optimization
954 PhaseGVN* initial_gvn() { return _initial_gvn; }
955 Unique_Node_List* for_igvn() { return _for_igvn; }
956 inline void record_for_igvn(Node* n); // Body is after class Unique_Node_List.
957 void set_initial_gvn(PhaseGVN *gvn) { _initial_gvn = gvn; }
958 void set_for_igvn(Unique_Node_List *for_igvn) { _for_igvn = for_igvn; }
959
960 // Replace n by nn using initial_gvn, calling hash_delete and
961 // record_for_igvn as needed.
962 void gvn_replace_by(Node* n, Node* nn);
963
964
965 void identify_useful_nodes(Unique_Node_List &useful);
966 void update_dead_node_list(Unique_Node_List &useful);
967 void remove_useless_nodes (Unique_Node_List &useful);
968
969 void remove_useless_node(Node* dead);
970
971 // Record this CallGenerator for inlining at the end of parsing.
972 void add_late_inline(CallGenerator* cg) {
973 _late_inlines.insert_before(_late_inlines_pos, cg);
974 _late_inlines_pos++;
975 }
976
977 void prepend_late_inline(CallGenerator* cg) {
978 _late_inlines.insert_before(0, cg);
979 }
980
981 void add_string_late_inline(CallGenerator* cg) {
982 _string_late_inlines.push(cg);
983 }
984
985 void add_boxing_late_inline(CallGenerator* cg) {
986 _boxing_late_inlines.push(cg);
987 }
988
989 void add_vector_reboxing_late_inline(CallGenerator* cg) {
990 _vector_reboxing_late_inlines.push(cg);
991 }
992
993 void add_native_invoker(RuntimeStub* stub);
994
995 const GrowableArray<RuntimeStub*> native_invokers() const { return _native_invokers; }
996
997 void remove_useless_nodes (GrowableArray<Node*>& node_list, Unique_Node_List &useful);
998
999 void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful);
1000 void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Node* dead);
1001
1002 void remove_useless_coarsened_locks(Unique_Node_List& useful);
1003
1004 void process_print_inlining();
1005 void dump_print_inlining();
1006
1007 bool over_inlining_cutoff() const {
1008 if (!inlining_incrementally()) {
1009 return unique() > (uint)NodeCountInliningCutoff;
1010 } else {
1011 // Give some room for incremental inlining algorithm to "breathe"
1012 // and avoid thrashing when live node count is close to the limit.
1013 // Keep in mind that live_nodes() isn't accurate during inlining until
1014 // dead node elimination step happens (see Compile::inline_incrementally).
1015 return live_nodes() > (uint)LiveNodeCountInliningCutoff * 11 / 10;
1016 }
1017 }
1018
1019 void inc_number_of_mh_late_inlines() { _number_of_mh_late_inlines++; }
1020 void dec_number_of_mh_late_inlines() { assert(_number_of_mh_late_inlines > 0, "_number_of_mh_late_inlines < 0 !")do { if (!(_number_of_mh_late_inlines > 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp"
, 1020, "assert(" "_number_of_mh_late_inlines > 0" ") failed"
, "_number_of_mh_late_inlines < 0 !"); ::breakpoint(); } }
while (0)
; _number_of_mh_late_inlines--; }
1021 bool has_mh_late_inlines() const { return _number_of_mh_late_inlines > 0; }
1022
1023 bool inline_incrementally_one();
1024 void inline_incrementally_cleanup(PhaseIterGVN& igvn);
1025 void inline_incrementally(PhaseIterGVN& igvn);
1026 void inline_string_calls(bool parse_time);
1027 void inline_boxing_calls(PhaseIterGVN& igvn);
1028 bool optimize_loops(PhaseIterGVN& igvn, LoopOptsMode mode);
1029 void remove_root_to_sfpts_edges(PhaseIterGVN& igvn);
1030
1031 void inline_vector_reboxing_calls();
1032 bool has_vbox_nodes();
1033
1034 void process_late_inline_calls_no_inline(PhaseIterGVN& igvn);
1035
1036 // Matching, CFG layout, allocation, code generation
1037 PhaseCFG* cfg() { return _cfg; }
1038 bool has_java_calls() const { return _java_calls > 0; }
1039 int java_calls() const { return _java_calls; }
1040 int inner_loops() const { return _inner_loops; }
1041 Matcher* matcher() { return _matcher; }
1042 PhaseRegAlloc* regalloc() { return _regalloc; }
1043 RegMask& FIRST_STACK_mask() { return _FIRST_STACK_mask; }
1044 Arena* indexSet_arena() { return _indexSet_arena; }
1045 void* indexSet_free_block_list() { return _indexSet_free_block_list; }
1046 DebugInformationRecorder* debug_info() { return env()->debug_info(); }
1047
1048 void update_interpreter_frame_size(int size) {
1049 if (_interpreter_frame_size < size) {
1050 _interpreter_frame_size = size;
1051 }
1052 }
1053
1054 void set_matcher(Matcher* m) { _matcher = m; }
1055//void set_regalloc(PhaseRegAlloc* ra) { _regalloc = ra; }
1056 void set_indexSet_arena(Arena* a) { _indexSet_arena = a; }
1057 void set_indexSet_free_block_list(void* p) { _indexSet_free_block_list = p; }
1058
1059 void set_java_calls(int z) { _java_calls = z; }
1060 void set_inner_loops(int z) { _inner_loops = z; }
1061
1062 Dependencies* dependencies() { return env()->dependencies(); }
1063
1064 // Major entry point. Given a Scope, compile the associated method.
1065 // For normal compilations, entry_bci is InvocationEntryBci. For on stack
1066 // replacement, entry_bci indicates the bytecode for which to compile a
1067 // continuation.
1068 Compile(ciEnv* ci_env, ciMethod* target,
1069 int entry_bci, Options options, DirectiveSet* directive);
1070
1071 // Second major entry point. From the TypeFunc signature, generate code
1072 // to pass arguments from the Java calling convention to the C calling
1073 // convention.
1074 Compile(ciEnv* ci_env, const TypeFunc *(*gen)(),
1075 address stub_function, const char *stub_name,
1076 int is_fancy_jump, bool pass_tls,
1077 bool return_pc, DirectiveSet* directive);
1078
1079 // Are we compiling a method?
1080 bool has_method() { return method() != NULL__null; }
1081
1082 // Maybe print some information about this compile.
1083 void print_compile_messages();
1084
1085 // Final graph reshaping, a post-pass after the regular optimizer is done.
1086 bool final_graph_reshaping();
1087
1088 // returns true if adr is completely contained in the given alias category
1089 bool must_alias(const TypePtr* adr, int alias_idx);
1090
1091 // returns true if adr overlaps with the given alias category
1092 bool can_alias(const TypePtr* adr, int alias_idx);
1093
1094 // Stack slots that may be unused by the calling convention but must
1095 // otherwise be preserved. On Intel this includes the return address.
1096 // On PowerPC it includes the 4 words holding the old TOC & LR glue.
1097 uint in_preserve_stack_slots() {
1098 return SharedRuntime::in_preserve_stack_slots();
1099 }
1100
1101 // "Top of Stack" slots that may be unused by the calling convention but must
1102 // otherwise be preserved.
1103 // On Intel these are not necessary and the value can be zero.
1104 static uint out_preserve_stack_slots() {
1105 return SharedRuntime::out_preserve_stack_slots();
1106 }
1107
1108 // Number of outgoing stack slots killed above the out_preserve_stack_slots
1109 // for calls to C. Supports the var-args backing area for register parms.
1110 uint varargs_C_out_slots_killed() const;
1111
1112 // Number of Stack Slots consumed by a synchronization entry
1113 int sync_stack_slots() const;
1114
1115 // Compute the name of old_SP. See <arch>.ad for frame layout.
1116 OptoReg::Name compute_old_SP();
1117
1118 private:
1119 // Phase control:
1120 void Init(int aliaslevel); // Prepare for a single compilation
1121 int Inline_Warm(); // Find more inlining work.
1122 void Finish_Warm(); // Give up on further inlines.
1123 void Optimize(); // Given a graph, optimize it
1124 void Code_Gen(); // Generate code from a graph
1125
1126 // Management of the AliasType table.
1127 void grow_alias_types();
1128 AliasCacheEntry* probe_alias_cache(const TypePtr* adr_type);
1129 const TypePtr *flatten_alias_type(const TypePtr* adr_type) const;
1130 AliasType* find_alias_type(const TypePtr* adr_type, bool no_create, ciField* field);
1131
1132 void verify_top(Node*) const PRODUCT_RETURN;
1133
1134 // Intrinsic setup.
1135 CallGenerator* make_vm_intrinsic(ciMethod* m, bool is_virtual); // constructor
1136 int intrinsic_insertion_index(ciMethod* m, bool is_virtual, bool& found); // helper
1137 CallGenerator* find_intrinsic(ciMethod* m, bool is_virtual); // query fn
1138 void register_intrinsic(CallGenerator* cg); // update fn
1139
1140#ifndef PRODUCT
1141 static juint _intrinsic_hist_count[];
1142 static jubyte _intrinsic_hist_flags[];
1143#endif
1144 // Function calls made by the public function final_graph_reshaping.
1145 // No need to be made public as they are not called elsewhere.
1146 void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc);
1147 void final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& frc, uint nop);
1148 void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc );
1149 void eliminate_redundant_card_marks(Node* n);
1150
1151 // Logic cone optimization.
1152 void optimize_logic_cones(PhaseIterGVN &igvn);
1153 void collect_logic_cone_roots(Unique_Node_List& list);
1154 void process_logic_cone_root(PhaseIterGVN &igvn, Node* n, VectorSet& visited);
1155 bool compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs);
1156 uint compute_truth_table(Unique_Node_List& partition, Unique_Node_List& inputs);
1157 uint eval_macro_logic_op(uint func, uint op1, uint op2, uint op3);
1158 Node* xform_to_MacroLogicV(PhaseIterGVN &igvn, const TypeVect* vt, Unique_Node_List& partitions, Unique_Node_List& inputs);
1159 void check_no_dead_use() const NOT_DEBUG_RETURN;
1160
1161 public:
1162
1163 // Note: Histogram array size is about 1 Kb.
1164 enum { // flag bits:
1165 _intrinsic_worked = 1, // succeeded at least once
1166 _intrinsic_failed = 2, // tried it but it failed
1167 _intrinsic_disabled = 4, // was requested but disabled (e.g., -XX:-InlineUnsafeOps)
1168 _intrinsic_virtual = 8, // was seen in the virtual form (rare)
1169 _intrinsic_both = 16 // was seen in the non-virtual form (usual)
1170 };
1171 // Update histogram. Return boolean if this is a first-time occurrence.
1172 static bool gather_intrinsic_statistics(vmIntrinsics::ID id,
1173 bool is_virtual, int flags) PRODUCT_RETURN0;
1174 static void print_intrinsic_statistics() PRODUCT_RETURN;
1175
1176 // Graph verification code
1177 // Walk the node list, verifying that there is a one-to-one
1178 // correspondence between Use-Def edges and Def-Use edges
1179 // The option no_dead_code enables stronger checks that the
1180 // graph is strongly connected from root in both directions.
1181 void verify_graph_edges(bool no_dead_code = false) PRODUCT_RETURN;
1182
1183 // End-of-run dumps.
1184 static void print_statistics() PRODUCT_RETURN;
1185
1186 // Verify ADLC assumptions during startup
1187 static void adlc_verification() PRODUCT_RETURN;
1188
1189 // Definitions of pd methods
1190 static void pd_compiler2_init();
1191
1192 // Static parse-time type checking logic for gen_subtype_check:
1193 enum { SSC_always_false, SSC_always_true, SSC_easy_test, SSC_full_test };
1194 int static_subtype_check(ciKlass* superk, ciKlass* subk);
1195
1196 static Node* conv_I2X_index(PhaseGVN* phase, Node* offset, const TypeInt* sizetype,
1197 // Optional control dependency (for example, on range check)
1198 Node* ctrl = NULL__null);
1199
1200 // Convert integer value to a narrowed long type dependent on ctrl (for example, a range check)
1201 static Node* constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl, bool carry_dependency = false);
1202
1203 // Auxiliary methods for randomized fuzzing/stressing
1204 int random();
1205 bool randomized_select(int count);
1206
1207 // supporting clone_map
1208 CloneMap& clone_map();
1209 void set_clone_map(Dict* d);
1210
1211 bool needs_clinit_barrier(ciField* ik, ciMethod* accessing_method);
1212 bool needs_clinit_barrier(ciMethod* ik, ciMethod* accessing_method);
1213 bool needs_clinit_barrier(ciInstanceKlass* ik, ciMethod* accessing_method);
1214
1215#ifdef IA32
1216 private:
1217 bool _select_24_bit_instr; // We selected an instruction with a 24-bit result
1218 bool _in_24_bit_fp_mode; // We are emitting instructions with 24-bit results
1219
1220 // Remember if this compilation changes hardware mode to 24-bit precision.
1221 void set_24_bit_selection_and_mode(bool selection, bool mode) {
1222 _select_24_bit_instr = selection;
1223 _in_24_bit_fp_mode = mode;
1224 }
1225
1226 public:
1227 bool select_24_bit_instr() const { return _select_24_bit_instr; }
1228 bool in_24_bit_fp_mode() const { return _in_24_bit_fp_mode; }
1229#endif // IA32
1230#ifdef ASSERT1
1231 bool _type_verify_symmetry;
1232 void set_exception_backedge() { _exception_backedge = true; }
1233 bool has_exception_backedge() const { return _exception_backedge; }
1234#endif
1235
1236 static bool push_thru_add(PhaseGVN* phase, Node* z, const TypeInteger* tz, const TypeInteger*& rx, const TypeInteger*& ry,
1237 BasicType bt);
1238
1239 static Node* narrow_value(BasicType bt, Node* value, const Type* type, PhaseGVN* phase, bool transform_res);
1240};
1241
1242#endif // SHARE_OPTO_COMPILE_HPP