Bug Summary

File:jdk/src/hotspot/share/opto/loopTransform.cpp
Warning:line 1525, column 9
Value stored to 'pre_header' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name loopTransform.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp
1/*
2 * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "compiler/compileLog.hpp"
27#include "memory/allocation.inline.hpp"
28#include "opto/addnode.hpp"
29#include "opto/callnode.hpp"
30#include "opto/castnode.hpp"
31#include "opto/connode.hpp"
32#include "opto/convertnode.hpp"
33#include "opto/divnode.hpp"
34#include "opto/loopnode.hpp"
35#include "opto/mulnode.hpp"
36#include "opto/movenode.hpp"
37#include "opto/opaquenode.hpp"
38#include "opto/rootnode.hpp"
39#include "opto/runtime.hpp"
40#include "opto/subnode.hpp"
41#include "opto/superword.hpp"
42#include "opto/vectornode.hpp"
43#include "runtime/globals_extension.hpp"
44#include "runtime/stubRoutines.hpp"
45
46//------------------------------is_loop_exit-----------------------------------
47// Given an IfNode, return the loop-exiting projection or NULL if both
48// arms remain in the loop.
49Node *IdealLoopTree::is_loop_exit(Node *iff) const {
50 if (iff->outcnt() != 2) return NULL__null; // Ignore partially dead tests
51 PhaseIdealLoop *phase = _phase;
52 // Test is an IfNode, has 2 projections. If BOTH are in the loop
53 // we need loop unswitching instead of peeling.
54 if (!is_member(phase->get_loop(iff->raw_out(0))))
55 return iff->raw_out(0);
56 if (!is_member(phase->get_loop(iff->raw_out(1))))
57 return iff->raw_out(1);
58 return NULL__null;
59}
60
61
62//=============================================================================
63
64
65//------------------------------record_for_igvn----------------------------
66// Put loop body on igvn work list
67void IdealLoopTree::record_for_igvn() {
68 for (uint i = 0; i < _body.size(); i++) {
69 Node *n = _body.at(i);
70 _phase->_igvn._worklist.push(n);
71 }
72 // put body of outer strip mined loop on igvn work list as well
73 if (_head->is_CountedLoop() && _head->as_Loop()->is_strip_mined()) {
74 CountedLoopNode* l = _head->as_CountedLoop();
75 Node* outer_loop = l->outer_loop();
76 assert(outer_loop != NULL, "missing piece of strip mined loop")do { if (!(outer_loop != __null)) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 76, "assert(" "outer_loop != __null" ") failed", "missing piece of strip mined loop"
); ::breakpoint(); } } while (0)
;
77 _phase->_igvn._worklist.push(outer_loop);
78 Node* outer_loop_tail = l->outer_loop_tail();
79 assert(outer_loop_tail != NULL, "missing piece of strip mined loop")do { if (!(outer_loop_tail != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 79, "assert(" "outer_loop_tail != __null" ") failed", "missing piece of strip mined loop"
); ::breakpoint(); } } while (0)
;
80 _phase->_igvn._worklist.push(outer_loop_tail);
81 Node* outer_loop_end = l->outer_loop_end();
82 assert(outer_loop_end != NULL, "missing piece of strip mined loop")do { if (!(outer_loop_end != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 82, "assert(" "outer_loop_end != __null" ") failed", "missing piece of strip mined loop"
); ::breakpoint(); } } while (0)
;
83 _phase->_igvn._worklist.push(outer_loop_end);
84 Node* outer_safepoint = l->outer_safepoint();
85 assert(outer_safepoint != NULL, "missing piece of strip mined loop")do { if (!(outer_safepoint != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 85, "assert(" "outer_safepoint != __null" ") failed", "missing piece of strip mined loop"
); ::breakpoint(); } } while (0)
;
86 _phase->_igvn._worklist.push(outer_safepoint);
87 Node* cle_out = _head->as_CountedLoop()->loopexit()->proj_out(false);
88 assert(cle_out != NULL, "missing piece of strip mined loop")do { if (!(cle_out != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 88, "assert(" "cle_out != __null" ") failed", "missing piece of strip mined loop"
); ::breakpoint(); } } while (0)
;
89 _phase->_igvn._worklist.push(cle_out);
90 }
91}
92
93//------------------------------compute_exact_trip_count-----------------------
94// Compute loop trip count if possible. Do not recalculate trip count for
95// split loops (pre-main-post) which have their limits and inits behind Opaque node.
96void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
97 if (!_head->as_Loop()->is_valid_counted_loop(T_INT)) {
98 return;
99 }
100 CountedLoopNode* cl = _head->as_CountedLoop();
101 // Trip count may become nonexact for iteration split loops since
102 // RCE modifies limits. Note, _trip_count value is not reset since
103 // it is used to limit unrolling of main loop.
104 cl->set_nonexact_trip_count();
105
106 // Loop's test should be part of loop.
107 if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
108 return; // Infinite loop
109
110#ifdef ASSERT1
111 BoolTest::mask bt = cl->loopexit()->test_trip();
112 assert(bt == BoolTest::lt || bt == BoolTest::gt ||do { if (!(bt == BoolTest::lt || bt == BoolTest::gt || bt == BoolTest
::ne)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 113, "assert(" "bt == BoolTest::lt || bt == BoolTest::gt || bt == BoolTest::ne"
") failed", "canonical test is expected"); ::breakpoint(); }
} while (0)
113 bt == BoolTest::ne, "canonical test is expected")do { if (!(bt == BoolTest::lt || bt == BoolTest::gt || bt == BoolTest
::ne)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 113, "assert(" "bt == BoolTest::lt || bt == BoolTest::gt || bt == BoolTest::ne"
") failed", "canonical test is expected"); ::breakpoint(); }
} while (0)
;
114#endif
115
116 Node* init_n = cl->init_trip();
117 Node* limit_n = cl->limit();
118 if (init_n != NULL__null && limit_n != NULL__null) {
119 // Use longs to avoid integer overflow.
120 int stride_con = cl->stride_con();
121 const TypeInt* init_type = phase->_igvn.type(init_n)->is_int();
122 const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
123 jlong init_con = (stride_con > 0) ? init_type->_lo : init_type->_hi;
124 jlong limit_con = (stride_con > 0) ? limit_type->_hi : limit_type->_lo;
125 int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
126 jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
127 // The loop body is always executed at least once even if init >= limit (for stride_con > 0) or
128 // init <= limit (for stride_con < 0).
129 trip_count = MAX2(trip_count, (jlong)1);
130 if (trip_count < (jlong)max_juint) {
131 if (init_n->is_Con() && limit_n->is_Con()) {
132 // Set exact trip count.
133 cl->set_exact_trip_count((uint)trip_count);
134 } else if (cl->unrolled_count() == 1) {
135 // Set maximum trip count before unrolling.
136 cl->set_trip_count((uint)trip_count);
137 }
138 }
139 }
140}
141
142//------------------------------compute_profile_trip_cnt----------------------------
143// Compute loop trip count from profile data as
144// (backedge_count + loop_exit_count) / loop_exit_count
145
146float IdealLoopTree::compute_profile_trip_cnt_helper(Node* n) {
147 if (n->is_If()) {
148 IfNode *iff = n->as_If();
149 if (iff->_fcnt != COUNT_UNKNOWN(-1.0f) && iff->_prob != PROB_UNKNOWN(-1.0f)) {
150 Node *exit = is_loop_exit(iff);
151 if (exit) {
152 float exit_prob = iff->_prob;
153 if (exit->Opcode() == Op_IfFalse) {
154 exit_prob = 1.0 - exit_prob;
155 }
156 if (exit_prob > PROB_MIN(1e-6f)) {
157 float exit_cnt = iff->_fcnt * exit_prob;
158 return exit_cnt;
159 }
160 }
161 }
162 }
163 if (n->is_Jump()) {
164 JumpNode *jmp = n->as_Jump();
165 if (jmp->_fcnt != COUNT_UNKNOWN(-1.0f)) {
166 float* probs = jmp->_probs;
167 float exit_prob = 0;
168 PhaseIdealLoop *phase = _phase;
169 for (DUIterator_Fast imax, i = jmp->fast_outs(imax); i < imax; i++) {
170 JumpProjNode* u = jmp->fast_out(i)->as_JumpProj();
171 if (!is_member(_phase->get_loop(u))) {
172 exit_prob += probs[u->_con];
173 }
174 }
175 return exit_prob * jmp->_fcnt;
176 }
177 }
178 return 0;
179}
180
181void IdealLoopTree::compute_profile_trip_cnt(PhaseIdealLoop *phase) {
182 if (!_head->is_Loop()) {
183 return;
184 }
185 LoopNode* head = _head->as_Loop();
186 if (head->profile_trip_cnt() != COUNT_UNKNOWN(-1.0f)) {
187 return; // Already computed
188 }
189 float trip_cnt = (float)max_jint; // default is big
190
191 Node* back = head->in(LoopNode::LoopBackControl);
192 while (back != head) {
193 if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
194 back->in(0) &&
195 back->in(0)->is_If() &&
196 back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN(-1.0f) &&
197 back->in(0)->as_If()->_prob != PROB_UNKNOWN(-1.0f) &&
198 (back->Opcode() == Op_IfTrue ? 1-back->in(0)->as_If()->_prob : back->in(0)->as_If()->_prob) > PROB_MIN(1e-6f)) {
199 break;
200 }
201 back = phase->idom(back);
202 }
203 if (back != head) {
204 assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&do { if (!((back->Opcode() == Op_IfTrue || back->Opcode
() == Op_IfFalse) && back->in(0))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 205, "assert(" "(back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) && back->in(0)"
") failed", "if-projection exists"); ::breakpoint(); } } while
(0)
205 back->in(0), "if-projection exists")do { if (!((back->Opcode() == Op_IfTrue || back->Opcode
() == Op_IfFalse) && back->in(0))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 205, "assert(" "(back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) && back->in(0)"
") failed", "if-projection exists"); ::breakpoint(); } } while
(0)
;
206 IfNode* back_if = back->in(0)->as_If();
207 float loop_back_cnt = back_if->_fcnt * (back->Opcode() == Op_IfTrue ? back_if->_prob : (1 - back_if->_prob));
208
209 // Now compute a loop exit count
210 float loop_exit_cnt = 0.0f;
211 if (_child == NULL__null) {
212 for (uint i = 0; i < _body.size(); i++) {
213 Node *n = _body[i];
214 loop_exit_cnt += compute_profile_trip_cnt_helper(n);
215 }
216 } else {
217 ResourceMark rm;
218 Unique_Node_List wq;
219 wq.push(back);
220 for (uint i = 0; i < wq.size(); i++) {
221 Node *n = wq.at(i);
222 assert(n->is_CFG(), "only control nodes")do { if (!(n->is_CFG())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 222, "assert(" "n->is_CFG()" ") failed", "only control nodes"
); ::breakpoint(); } } while (0)
;
223 if (n != head) {
224 if (n->is_Region()) {
225 for (uint j = 1; j < n->req(); j++) {
226 wq.push(n->in(j));
227 }
228 } else {
229 loop_exit_cnt += compute_profile_trip_cnt_helper(n);
230 wq.push(n->in(0));
231 }
232 }
233 }
234
235 }
236 if (loop_exit_cnt > 0.0f) {
237 trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
238 } else {
239 // No exit count so use
240 trip_cnt = loop_back_cnt;
241 }
242 } else {
243 head->mark_profile_trip_failed();
244 }
245#ifndef PRODUCT
246 if (TraceProfileTripCount) {
247 tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt);
248 }
249#endif
250 head->set_profile_trip_cnt(trip_cnt);
251}
252
253//---------------------find_invariant-----------------------------
254// Return nonzero index of invariant operand for an associative
255// binary operation of (nonconstant) invariant and variant values.
256// Helper for reassociate_invariants.
257int IdealLoopTree::find_invariant(Node* n, PhaseIdealLoop *phase) {
258 bool in1_invar = this->is_invariant(n->in(1));
259 bool in2_invar = this->is_invariant(n->in(2));
260 if (in1_invar && !in2_invar) return 1;
261 if (!in1_invar && in2_invar) return 2;
262 return 0;
263}
264
265//---------------------is_associative-----------------------------
266// Return TRUE if "n" is an associative binary node. If "base" is
267// not NULL, "n" must be re-associative with it.
268bool IdealLoopTree::is_associative(Node* n, Node* base) {
269 int op = n->Opcode();
270 if (base != NULL__null) {
271 assert(is_associative(base), "Base node should be associative")do { if (!(is_associative(base))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 271, "assert(" "is_associative(base)" ") failed", "Base node should be associative"
); ::breakpoint(); } } while (0)
;
272 int base_op = base->Opcode();
273 if (base_op == Op_AddI || base_op == Op_SubI) {
274 return op == Op_AddI || op == Op_SubI;
275 }
276 if (base_op == Op_AddL || base_op == Op_SubL) {
277 return op == Op_AddL || op == Op_SubL;
278 }
279 return op == base_op;
280 } else {
281 // Integer "add/sub/mul/and/or/xor" operations are associative.
282 return op == Op_AddI || op == Op_AddL
283 || op == Op_SubI || op == Op_SubL
284 || op == Op_MulI || op == Op_MulL
285 || op == Op_AndI || op == Op_AndL
286 || op == Op_OrI || op == Op_OrL
287 || op == Op_XorI || op == Op_XorL;
288 }
289}
290
291//---------------------reassociate_add_sub------------------------
292// Reassociate invariant add and subtract expressions:
293//
294// inv1 + (x + inv2) => ( inv1 + inv2) + x
295// (x + inv2) + inv1 => ( inv1 + inv2) + x
296// inv1 + (x - inv2) => ( inv1 - inv2) + x
297// inv1 - (inv2 - x) => ( inv1 - inv2) + x
298// (x + inv2) - inv1 => (-inv1 + inv2) + x
299// (x - inv2) + inv1 => ( inv1 - inv2) + x
300// (x - inv2) - inv1 => (-inv1 - inv2) + x
301// inv1 + (inv2 - x) => ( inv1 + inv2) - x
302// inv1 - (x - inv2) => ( inv1 + inv2) - x
303// (inv2 - x) + inv1 => ( inv1 + inv2) - x
304// (inv2 - x) - inv1 => (-inv1 + inv2) - x
305// inv1 - (x + inv2) => ( inv1 - inv2) - x
306//
307Node* IdealLoopTree::reassociate_add_sub(Node* n1, int inv1_idx, int inv2_idx, PhaseIdealLoop *phase) {
308 assert(n1->is_Add() || n1->is_Sub(), "Target node should be add or subtract")do { if (!(n1->is_Add() || n1->is_Sub())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 308, "assert(" "n1->is_Add() || n1->is_Sub()" ") failed"
, "Target node should be add or subtract"); ::breakpoint(); }
} while (0)
;
309 Node* n2 = n1->in(3 - inv1_idx);
310 Node* inv1 = n1->in(inv1_idx);
311 Node* inv2 = n2->in(inv2_idx);
312 Node* x = n2->in(3 - inv2_idx);
313
314 bool neg_x = n2->is_Sub() && inv2_idx == 1;
315 bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
316 bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
317 if (n1->is_Sub() && inv1_idx == 1) {
318 neg_x = !neg_x;
319 neg_inv2 = !neg_inv2;
320 }
321
322 bool is_int = n1->bottom_type()->isa_int() != NULL__null;
323 Node* inv1_c = phase->get_ctrl(inv1);
324 Node* n_inv1;
325 if (neg_inv1) {
326 Node* zero;
327 if (is_int) {
328 zero = phase->_igvn.intcon(0);
329 n_inv1 = new SubINode(zero, inv1);
330 } else {
331 zero = phase->_igvn.longcon(0L);
332 n_inv1 = new SubLNode(zero, inv1);
333 }
334 phase->set_ctrl(zero, phase->C->root());
335 phase->register_new_node(n_inv1, inv1_c);
336 } else {
337 n_inv1 = inv1;
338 }
339
340 Node* inv;
341 if (is_int) {
342 if (neg_inv2) {
343 inv = new SubINode(n_inv1, inv2);
344 } else {
345 inv = new AddINode(n_inv1, inv2);
346 }
347 phase->register_new_node(inv, phase->get_early_ctrl(inv));
348 if (neg_x) {
349 return new SubINode(inv, x);
350 } else {
351 return new AddINode(x, inv);
352 }
353 } else {
354 if (neg_inv2) {
355 inv = new SubLNode(n_inv1, inv2);
356 } else {
357 inv = new AddLNode(n_inv1, inv2);
358 }
359 phase->register_new_node(inv, phase->get_early_ctrl(inv));
360 if (neg_x) {
361 return new SubLNode(inv, x);
362 } else {
363 return new AddLNode(x, inv);
364 }
365 }
366}
367
368//---------------------reassociate-----------------------------
369// Reassociate invariant binary expressions with add/sub/mul/
370// and/or/xor operators.
371// For add/sub expressions: see "reassociate_add_sub"
372//
373// For mul/and/or/xor expressions:
374//
375// inv1 op (x op inv2) => (inv1 op inv2) op x
376//
377Node* IdealLoopTree::reassociate(Node* n1, PhaseIdealLoop *phase) {
378 if (!is_associative(n1) || n1->outcnt() == 0) return NULL__null;
379 if (is_invariant(n1)) return NULL__null;
380 // Don't mess with add of constant (igvn moves them to expression tree root.)
381 if (n1->is_Add() && n1->in(2)->is_Con()) return NULL__null;
382
383 int inv1_idx = find_invariant(n1, phase);
384 if (!inv1_idx) return NULL__null;
385 Node* n2 = n1->in(3 - inv1_idx);
386 if (!is_associative(n2, n1)) return NULL__null;
387 int inv2_idx = find_invariant(n2, phase);
388 if (!inv2_idx) return NULL__null;
389
390 if (!phase->may_require_nodes(10, 10)) return NULL__null;
391
392 Node* result = NULL__null;
393 switch (n1->Opcode()) {
394 case Op_AddI:
395 case Op_AddL:
396 case Op_SubI:
397 case Op_SubL:
398 result = reassociate_add_sub(n1, inv1_idx, inv2_idx, phase);
399 break;
400 case Op_MulI:
401 case Op_MulL:
402 case Op_AndI:
403 case Op_AndL:
404 case Op_OrI:
405 case Op_OrL:
406 case Op_XorI:
407 case Op_XorL: {
408 Node* inv1 = n1->in(inv1_idx);
409 Node* inv2 = n2->in(inv2_idx);
410 Node* x = n2->in(3 - inv2_idx);
411 Node* inv = n2->clone_with_data_edge(inv1, inv2);
412 phase->register_new_node(inv, phase->get_early_ctrl(inv));
413 result = n1->clone_with_data_edge(x, inv);
414 break;
415 }
416 default:
417 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 417); ::breakpoint(); } while (0)
;
418 }
419
420 assert(result != NULL, "")do { if (!(result != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 420, "assert(" "result != __null" ") failed", ""); ::breakpoint
(); } } while (0)
;
421 phase->register_new_node(result, phase->get_ctrl(n1));
422 phase->_igvn.replace_node(n1, result);
423 assert(phase->get_loop(phase->get_ctrl(n1)) == this, "")do { if (!(phase->get_loop(phase->get_ctrl(n1)) == this
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 423, "assert(" "phase->get_loop(phase->get_ctrl(n1)) == this"
") failed", ""); ::breakpoint(); } } while (0)
;
424 _body.yank(n1);
425 return result;
426}
427
428//---------------------reassociate_invariants-----------------------------
429// Reassociate invariant expressions:
430void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
431 for (int i = _body.size() - 1; i >= 0; i--) {
432 Node *n = _body.at(i);
433 for (int j = 0; j < 5; j++) {
434 Node* nn = reassociate(n, phase);
435 if (nn == NULL__null) break;
436 n = nn; // again
437 }
438 }
439}
440
441//------------------------------policy_peeling---------------------------------
442// Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
443// is applicable if we can make a loop-invariant test (usually a null-check)
444// execute before we enter the loop. When TRUE, the estimated node budget is
445// also requested.
446bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
447 uint estimate = estimate_peeling(phase);
448
449 return estimate == 0 ? false : phase->may_require_nodes(estimate);
450}
451
452// Perform actual policy and size estimate for the loop peeling transform, and
453// return the estimated loop size if peeling is applicable, otherwise return
454// zero. No node budget is allocated.
455uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
456
457 // If nodes are depleted, some transform has miscalculated its needs.
458 assert(!phase->exceeding_node_budget(), "sanity")do { if (!(!phase->exceeding_node_budget())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 458, "assert(" "!phase->exceeding_node_budget()" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
459
460 // Peeling does loop cloning which can result in O(N^2) node construction.
461 if (_body.size() > 255) {
462 return 0; // Suppress too large body size.
463 }
464 // Optimistic estimate that approximates loop body complexity via data and
465 // control flow fan-out (instead of using the more pessimistic: BodySize^2).
466 uint estimate = est_loop_clone_sz(2);
467
468 if (phase->exceeding_node_budget(estimate)) {
469 return 0; // Too large to safely clone.
470 }
471
472 // Check for vectorized loops, any peeling done was already applied.
473 if (_head->is_CountedLoop()) {
474 CountedLoopNode* cl = _head->as_CountedLoop();
475 if (cl->is_unroll_only() || cl->trip_count() == 1) {
476 return 0;
477 }
478 }
479
480 Node* test = tail();
481
482 while (test != _head) { // Scan till run off top of loop
483 if (test->is_If()) { // Test?
484 Node *ctrl = phase->get_ctrl(test->in(1));
485 if (ctrl->is_top()) {
486 return 0; // Found dead test on live IF? No peeling!
487 }
488 // Standard IF only has one input value to check for loop invariance.
489 assert(test->Opcode() == Op_If ||do { if (!(test->Opcode() == Op_If || test->Opcode() ==
Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd
|| test->Opcode() == Op_RangeCheck)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 493, "assert(" "test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd || test->Opcode() == Op_RangeCheck"
") failed", "Check this code when new subtype is added"); ::
breakpoint(); } } while (0)
490 test->Opcode() == Op_CountedLoopEnd ||do { if (!(test->Opcode() == Op_If || test->Opcode() ==
Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd
|| test->Opcode() == Op_RangeCheck)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 493, "assert(" "test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd || test->Opcode() == Op_RangeCheck"
") failed", "Check this code when new subtype is added"); ::
breakpoint(); } } while (0)
491 test->Opcode() == Op_LongCountedLoopEnd ||do { if (!(test->Opcode() == Op_If || test->Opcode() ==
Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd
|| test->Opcode() == Op_RangeCheck)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 493, "assert(" "test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd || test->Opcode() == Op_RangeCheck"
") failed", "Check this code when new subtype is added"); ::
breakpoint(); } } while (0)
492 test->Opcode() == Op_RangeCheck,do { if (!(test->Opcode() == Op_If || test->Opcode() ==
Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd
|| test->Opcode() == Op_RangeCheck)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 493, "assert(" "test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd || test->Opcode() == Op_RangeCheck"
") failed", "Check this code when new subtype is added"); ::
breakpoint(); } } while (0)
493 "Check this code when new subtype is added")do { if (!(test->Opcode() == Op_If || test->Opcode() ==
Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd
|| test->Opcode() == Op_RangeCheck)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 493, "assert(" "test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_LongCountedLoopEnd || test->Opcode() == Op_RangeCheck"
") failed", "Check this code when new subtype is added"); ::
breakpoint(); } } while (0)
;
494 // Condition is not a member of this loop?
495 if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
496 return estimate; // Found reason to peel!
497 }
498 }
499 // Walk up dominators to loop _head looking for test which is executed on
500 // every path through the loop.
501 test = phase->idom(test);
502 }
503 return 0;
504}
505
506//------------------------------peeled_dom_test_elim---------------------------
507// If we got the effect of peeling, either by actually peeling or by making
508// a pre-loop which must execute at least once, we can remove all
509// loop-invariant dominated tests in the main body.
510void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_new) {
511 bool progress = true;
512 while (progress) {
513 progress = false; // Reset for next iteration
514 Node* prev = loop->_head->in(LoopNode::LoopBackControl); // loop->tail();
515 Node* test = prev->in(0);
516 while (test != loop->_head) { // Scan till run off top of loop
517 int p_op = prev->Opcode();
518 assert(test != NULL, "test cannot be NULL")do { if (!(test != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 518, "assert(" "test != __null" ") failed", "test cannot be NULL"
); ::breakpoint(); } } while (0)
;
519 Node* test_cond = NULL__null;
520 if ((p_op == Op_IfFalse || p_op == Op_IfTrue) && test->is_If()) {
521 test_cond = test->in(1);
522 }
523 if (test_cond != NULL__null && // Test?
524 !test_cond->is_Con() && // And not already obvious?
525 // And condition is not a member of this loop?
526 !loop->is_member(get_loop(get_ctrl(test_cond)))) {
527 // Walk loop body looking for instances of this test
528 for (uint i = 0; i < loop->_body.size(); i++) {
529 Node* n = loop->_body.at(i);
530 // Check against cached test condition because dominated_by()
531 // replaces the test condition with a constant.
532 if (n->is_If() && n->in(1) == test_cond) {
533 // IfNode was dominated by version in peeled loop body
534 progress = true;
535 dominated_by(old_new[prev->_idx], n);
536 }
537 }
538 }
539 prev = test;
540 test = idom(test);
541 } // End of scan tests in loop
542 } // End of while (progress)
543}
544
545//------------------------------do_peeling-------------------------------------
546// Peel the first iteration of the given loop.
547// Step 1: Clone the loop body. The clone becomes the peeled iteration.
548// The pre-loop illegally has 2 control users (old & new loops).
549// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
550// Do this by making the old-loop fall-in edges act as if they came
551// around the loopback from the prior iteration (follow the old-loop
552// backedges) and then map to the new peeled iteration. This leaves
553// the pre-loop with only 1 user (the new peeled iteration), but the
554// peeled-loop backedge has 2 users.
555// Step 3: Cut the backedge on the clone (so its not a loop) and remove the
556// extra backedge user.
557//
558// orig
559//
560// stmt1
561// |
562// v
563// loop predicate
564// |
565// v
566// loop<----+
567// | |
568// stmt2 |
569// | |
570// v |
571// if ^
572// / \ |
573// / \ |
574// v v |
575// false true |
576// / \ |
577// / ----+
578// |
579// v
580// exit
581//
582//
583// after clone loop
584//
585// stmt1
586// |
587// v
588// loop predicate
589// / \
590// clone / \ orig
591// / \
592// / \
593// v v
594// +---->loop clone loop<----+
595// | | | |
596// | stmt2 clone stmt2 |
597// | | | |
598// | v v |
599// ^ if clone If ^
600// | / \ / \ |
601// | / \ / \ |
602// | v v v v |
603// | true false false true |
604// | / \ / \ |
605// +---- \ / ----+
606// \ /
607// 1v v2
608// region
609// |
610// v
611// exit
612//
613//
614// after peel and predicate move
615//
616// stmt1
617// /
618// /
619// clone / orig
620// /
621// / +----------+
622// / | |
623// / loop predicate |
624// / | |
625// v v |
626// TOP-->loop clone loop<----+ |
627// | | | |
628// stmt2 clone stmt2 | |
629// | | | ^
630// v v | |
631// if clone If ^ |
632// / \ / \ | |
633// / \ / \ | |
634// v v v v | |
635// true false false true | |
636// | \ / \ | |
637// | \ / ----+ ^
638// | \ / |
639// | 1v v2 |
640// v region |
641// | | |
642// | v |
643// | exit |
644// | |
645// +--------------->-----------------+
646//
647//
648// final graph
649//
650// stmt1
651// |
652// v
653// stmt2 clone
654// |
655// v
656// if clone
657// / |
658// / |
659// v v
660// false true
661// | |
662// | v
663// | loop predicate
664// | |
665// | v
666// | loop<----+
667// | | |
668// | stmt2 |
669// | | |
670// | v |
671// v if ^
672// | / \ |
673// | / \ |
674// | v v |
675// | false true |
676// | | \ |
677// v v --+
678// region
679// |
680// v
681// exit
682//
683void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
684
685 C->set_major_progress();
686 // Peeling a 'main' loop in a pre/main/post situation obfuscates the
687 // 'pre' loop from the main and the 'pre' can no longer have its
688 // iterations adjusted. Therefore, we need to declare this loop as
689 // no longer a 'main' loop; it will need new pre and post loops before
690 // we can do further RCE.
691#ifndef PRODUCT
692 if (TraceLoopOpts) {
693 tty->print("Peel ");
694 loop->dump_head();
695 }
696#endif
697 LoopNode* head = loop->_head->as_Loop();
698 bool counted_loop = head->is_CountedLoop();
699 if (counted_loop) {
700 CountedLoopNode *cl = head->as_CountedLoop();
701 assert(cl->trip_count() > 0, "peeling a fully unrolled loop")do { if (!(cl->trip_count() > 0)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 701, "assert(" "cl->trip_count() > 0" ") failed", "peeling a fully unrolled loop"
); ::breakpoint(); } } while (0)
;
702 cl->set_trip_count(cl->trip_count() - 1);
703 if (cl->is_main_loop()) {
704 cl->set_normal_loop();
705#ifndef PRODUCT
706 if (PrintOpto && VerifyLoopOptimizations) {
707 tty->print("Peeling a 'main' loop; resetting to 'normal' ");
708 loop->dump_head();
709 }
710#endif
711 }
712 }
713 Node* entry = head->in(LoopNode::EntryControl);
714
715 // Step 1: Clone the loop body. The clone becomes the peeled iteration.
716 // The pre-loop illegally has 2 control users (old & new loops).
717 clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined);
718
719 // Step 2: Make the old-loop fall-in edges point to the peeled iteration.
720 // Do this by making the old-loop fall-in edges act as if they came
721 // around the loopback from the prior iteration (follow the old-loop
722 // backedges) and then map to the new peeled iteration. This leaves
723 // the pre-loop with only 1 user (the new peeled iteration), but the
724 // peeled-loop backedge has 2 users.
725 Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx];
726 _igvn.hash_delete(head->skip_strip_mined());
727 head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry);
728 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
729 Node* old = head->fast_out(j);
730 if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
731 Node* new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx];
732 if (!new_exit_value) // Backedge value is ALSO loop invariant?
733 // Then loop body backedge value remains the same.
734 new_exit_value = old->in(LoopNode::LoopBackControl);
735 _igvn.hash_delete(old);
736 old->set_req(LoopNode::EntryControl, new_exit_value);
737 }
738 }
739
740
741 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
742 // extra backedge user.
743 Node* new_head = old_new[head->_idx];
744 _igvn.hash_delete(new_head);
745 new_head->set_req(LoopNode::LoopBackControl, C->top());
746 for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
747 Node* use = new_head->fast_out(j2);
748 if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) {
749 _igvn.hash_delete(use);
750 use->set_req(LoopNode::LoopBackControl, C->top());
751 }
752 }
753
754 // Step 4: Correct dom-depth info. Set to loop-head depth.
755
756 int dd = dom_depth(head->skip_strip_mined());
757 set_idom(head->skip_strip_mined(), head->skip_strip_mined()->in(LoopNode::EntryControl), dd);
758 for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
759 Node *old = loop->_body.at(j3);
760 Node *nnn = old_new[old->_idx];
761 if (!has_ctrl(nnn)) {
762 set_idom(nnn, idom(nnn), dd-1);
763 }
764 }
765
766 // Now force out all loop-invariant dominating tests. The optimizer
767 // finds some, but we _know_ they are all useless.
768 peeled_dom_test_elim(loop,old_new);
769
770 loop->record_for_igvn();
771}
772
773//------------------------------policy_maximally_unroll------------------------
774// Calculate the exact loop trip-count and return TRUE if loop can be fully,
775// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
776// node budget is also requested.
777bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop* phase) const {
778 CountedLoopNode* cl = _head->as_CountedLoop();
779 assert(cl->is_normal_loop(), "")do { if (!(cl->is_normal_loop())) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 779, "assert(" "cl->is_normal_loop()" ") failed", ""); ::
breakpoint(); } } while (0)
;
780 if (!cl->is_valid_counted_loop(T_INT)) {
781 return false; // Malformed counted loop.
782 }
783 if (!cl->has_exact_trip_count()) {
784 return false; // Trip count is not exact.
785 }
786
787 uint trip_count = cl->trip_count();
788 // Note, max_juint is used to indicate unknown trip count.
789 assert(trip_count > 1, "one iteration loop should be optimized out already")do { if (!(trip_count > 1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 789, "assert(" "trip_count > 1" ") failed", "one iteration loop should be optimized out already"
); ::breakpoint(); } } while (0)
;
790 assert(trip_count < max_juint, "exact trip_count should be less than max_juint.")do { if (!(trip_count < max_juint)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 790, "assert(" "trip_count < max_juint" ") failed", "exact trip_count should be less than max_juint."
); ::breakpoint(); } } while (0)
;
791
792 // If nodes are depleted, some transform has miscalculated its needs.
793 assert(!phase->exceeding_node_budget(), "sanity")do { if (!(!phase->exceeding_node_budget())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 793, "assert(" "!phase->exceeding_node_budget()" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
794
795 // Allow the unrolled body to get larger than the standard loop size limit.
796 uint unroll_limit = (uint)LoopUnrollLimit * 4;
797 assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits")do { if (!((intx)unroll_limit == LoopUnrollLimit * 4)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 797, "assert(" "(intx)unroll_limit == LoopUnrollLimit * 4" ") failed"
, "LoopUnrollLimit must fit in 32bits"); ::breakpoint(); } } while
(0)
;
798 if (trip_count > unroll_limit || _body.size() > unroll_limit) {
799 return false;
800 }
801
802 uint new_body_size = est_loop_unroll_sz(trip_count);
803
804 if (new_body_size == UINT_MAX(2147483647 *2U +1U)) { // Check for bad estimate (overflow).
805 return false;
806 }
807
808 // Fully unroll a loop with few iterations, regardless of other conditions,
809 // since the following (general) loop optimizations will split such loop in
810 // any case (into pre-main-post).
811 if (trip_count <= 3) {
812 return phase->may_require_nodes(new_body_size);
813 }
814
815 // Reject if unrolling will result in too much node construction.
816 if (new_body_size > unroll_limit || phase->exceeding_node_budget(new_body_size)) {
817 return false;
818 }
819
820 // Do not unroll a loop with String intrinsics code.
821 // String intrinsics are large and have loops.
822 for (uint k = 0; k < _body.size(); k++) {
823 Node* n = _body.at(k);
824 switch (n->Opcode()) {
825 case Op_StrComp:
826 case Op_StrEquals:
827 case Op_StrIndexOf:
828 case Op_StrIndexOfChar:
829 case Op_EncodeISOArray:
830 case Op_AryEq:
831 case Op_HasNegatives: {
832 return false;
833 }
834#if INCLUDE_RTM_OPT1
835 case Op_FastLock:
836 case Op_FastUnlock: {
837 // Don't unroll RTM locking code because it is large.
838 if (UseRTMLocking) {
839 return false;
840 }
841 }
842#endif
843 } // switch
844 }
845
846 return phase->may_require_nodes(new_body_size);
847}
848
849
850//------------------------------policy_unroll----------------------------------
851// Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
852// the loop is a counted loop and the loop body is small enough. When TRUE,
853// the estimated node budget is also requested.
854bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
855
856 CountedLoopNode *cl = _head->as_CountedLoop();
857 assert(cl->is_normal_loop() || cl->is_main_loop(), "")do { if (!(cl->is_normal_loop() || cl->is_main_loop()))
{ (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 857, "assert(" "cl->is_normal_loop() || cl->is_main_loop()"
") failed", ""); ::breakpoint(); } } while (0)
;
858
859 if (!cl->is_valid_counted_loop(T_INT)) {
860 return false; // Malformed counted loop
861 }
862
863 // If nodes are depleted, some transform has miscalculated its needs.
864 assert(!phase->exceeding_node_budget(), "sanity")do { if (!(!phase->exceeding_node_budget())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 864, "assert(" "!phase->exceeding_node_budget()" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
865
866 // Protect against over-unrolling.
867 // After split at least one iteration will be executed in pre-loop.
868 if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) {
869 return false;
870 }
871 _local_loop_unroll_limit = LoopUnrollLimit;
872 _local_loop_unroll_factor = 4;
873 int future_unroll_cnt = cl->unrolled_count() * 2;
874 if (!cl->is_vectorized_loop()) {
875 if (future_unroll_cnt > LoopMaxUnroll) return false;
876 } else {
877 // obey user constraints on vector mapped loops with additional unrolling applied
878 int unroll_constraint = (cl->slp_max_unroll()) ? cl->slp_max_unroll() : 1;
879 if ((future_unroll_cnt / unroll_constraint) > LoopMaxUnroll) return false;
880 }
881
882 const int stride_con = cl->stride_con();
883
884 // Check for initial stride being a small enough constant
885 const int initial_stride_sz = MAX2(1<<2, Matcher::max_vector_size(T_BYTE) / 2);
886 // Maximum stride size should protect against overflow, when doubling stride unroll_count times
887 const int max_stride_size = MIN2<int>(max_jint / 2 - 2, initial_stride_sz * future_unroll_cnt);
888 // No abs() use; abs(min_jint) = min_jint
889 if (stride_con < -max_stride_size || stride_con > max_stride_size) return false;
890
891 // Don't unroll if the next round of unrolling would push us
892 // over the expected trip count of the loop. One is subtracted
893 // from the expected trip count because the pre-loop normally
894 // executes 1 iteration.
895 if (UnrollLimitForProfileCheck > 0 &&
896 cl->profile_trip_cnt() != COUNT_UNKNOWN(-1.0f) &&
897 future_unroll_cnt > UnrollLimitForProfileCheck &&
898 (float)future_unroll_cnt > cl->profile_trip_cnt() - 1.0) {
899 return false;
900 }
901
902 // When unroll count is greater than LoopUnrollMin, don't unroll if:
903 // the residual iterations are more than 10% of the trip count
904 // and rounds of "unroll,optimize" are not making significant progress
905 // Progress defined as current size less than 20% larger than previous size.
906 if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
907 future_unroll_cnt > LoopUnrollMin &&
908 (future_unroll_cnt - 1) * (100.0 / LoopPercentProfileLimit) > cl->profile_trip_cnt() &&
909 1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
910 return false;
911 }
912
913 Node *init_n = cl->init_trip();
914 Node *limit_n = cl->limit();
915 if (limit_n == NULL__null) return false; // We will dereference it below.
916
917 // Non-constant bounds.
918 // Protect against over-unrolling when init or/and limit are not constant
919 // (so that trip_count's init value is maxint) but iv range is known.
920 if (init_n == NULL__null || !init_n->is_Con() || !limit_n->is_Con()) {
921 Node* phi = cl->phi();
922 if (phi != NULL__null) {
923 assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.")do { if (!(phi->is_Phi() && phi->in(0) == _head
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 923, "assert(" "phi->is_Phi() && phi->in(0) == _head"
") failed", "Counted loop should have iv phi."); ::breakpoint
(); } } while (0)
;
924 const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
925 int next_stride = stride_con * 2; // stride after this unroll
926 if (next_stride > 0) {
927 if (iv_type->_lo > max_jint - next_stride || // overflow
928 iv_type->_lo + next_stride > iv_type->_hi) {
929 return false; // over-unrolling
930 }
931 } else if (next_stride < 0) {
932 if (iv_type->_hi < min_jint - next_stride || // overflow
933 iv_type->_hi + next_stride < iv_type->_lo) {
934 return false; // over-unrolling
935 }
936 }
937 }
938 }
939
940 // After unroll limit will be adjusted: new_limit = limit-stride.
941 // Bailout if adjustment overflow.
942 const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
943 if ((stride_con > 0 && ((min_jint + stride_con) > limit_type->_hi)) ||
944 (stride_con < 0 && ((max_jint + stride_con) < limit_type->_lo)))
945 return false; // overflow
946
947 // Adjust body_size to determine if we unroll or not
948 uint body_size = _body.size();
949 // Key test to unroll loop in CRC32 java code
950 int xors_in_loop = 0;
951 // Also count ModL, DivL and MulL which expand mightly
952 for (uint k = 0; k < _body.size(); k++) {
953 Node* n = _body.at(k);
954 switch (n->Opcode()) {
955 case Op_XorI: xors_in_loop++; break; // CRC32 java code
956 case Op_ModL: body_size += 30; break;
957 case Op_DivL: body_size += 30; break;
958 case Op_MulL: body_size += 10; break;
959 case Op_StrComp:
960 case Op_StrEquals:
961 case Op_StrIndexOf:
962 case Op_StrIndexOfChar:
963 case Op_EncodeISOArray:
964 case Op_AryEq:
965 case Op_HasNegatives: {
966 // Do not unroll a loop with String intrinsics code.
967 // String intrinsics are large and have loops.
968 return false;
969 }
970#if INCLUDE_RTM_OPT1
971 case Op_FastLock:
972 case Op_FastUnlock: {
973 // Don't unroll RTM locking code because it is large.
974 if (UseRTMLocking) {
975 return false;
976 }
977 }
978#endif
979 } // switch
980 }
981
982 if (UseSuperWord) {
983 if (!cl->is_reduction_loop()) {
984 phase->mark_reductions(this);
985 }
986
987 // Only attempt slp analysis when user controls do not prohibit it
988 if (LoopMaxUnroll > _local_loop_unroll_factor) {
989 // Once policy_slp_analysis succeeds, mark the loop with the
990 // maximal unroll factor so that we minimize analysis passes
991 if (future_unroll_cnt >= _local_loop_unroll_factor) {
992 policy_unroll_slp_analysis(cl, phase, future_unroll_cnt);
993 }
994 }
995 }
996
997 int slp_max_unroll_factor = cl->slp_max_unroll();
998 if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll)(JVMFlag::is_default(Flag_LoopMaxUnroll_enum)) && UseSubwordForMaxVector) {
999 LoopMaxUnroll = slp_max_unroll_factor;
1000 }
1001
1002 uint estimate = est_loop_clone_sz(2);
1003
1004 if (cl->has_passed_slp()) {
1005 if (slp_max_unroll_factor >= future_unroll_cnt) {
1006 return phase->may_require_nodes(estimate);
1007 }
1008 return false; // Loop too big.
1009 }
1010
1011 // Check for being too big
1012 if (body_size > (uint)_local_loop_unroll_limit) {
1013 if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) {
1014 return phase->may_require_nodes(estimate);
1015 }
1016 return false; // Loop too big.
1017 }
1018
1019 if (cl->is_unroll_only()) {
1020 if (TraceSuperWordLoopUnrollAnalysis) {
1021 tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
1022 slp_max_unroll_factor, future_unroll_cnt);
1023 }
1024 }
1025
1026 // Unroll once! (Each trip will soon do double iterations)
1027 return phase->may_require_nodes(estimate);
1028}
1029
1030void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_cnt) {
1031
1032 // If nodes are depleted, some transform has miscalculated its needs.
1033 assert(!phase->exceeding_node_budget(), "sanity")do { if (!(!phase->exceeding_node_budget())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1033, "assert(" "!phase->exceeding_node_budget()" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
1034
1035 // Enable this functionality target by target as needed
1036 if (SuperWordLoopUnrollAnalysis) {
1037 if (!cl->was_slp_analyzed()) {
1038 SuperWord sw(phase);
1039 sw.transform_loop(this, false);
1040
1041 // If the loop is slp canonical analyze it
1042 if (sw.early_return() == false) {
1043 sw.unrolling_analysis(_local_loop_unroll_factor);
1044 }
1045 }
1046
1047 if (cl->has_passed_slp()) {
1048 int slp_max_unroll_factor = cl->slp_max_unroll();
1049 if (slp_max_unroll_factor >= future_unroll_cnt) {
1050 int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
1051 if (new_limit > LoopUnrollLimit) {
1052 if (TraceSuperWordLoopUnrollAnalysis) {
1053 tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
1054 }
1055 _local_loop_unroll_limit = new_limit;
1056 }
1057 }
1058 }
1059 }
1060}
1061
1062
1063//------------------------------policy_range_check-----------------------------
1064// Return TRUE or FALSE if the loop should be range-check-eliminated or not.
1065// When TRUE, the estimated node budget is also requested.
1066//
1067// We will actually perform iteration-splitting, a more powerful form of RCE.
1068bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional, BasicType bt) const {
1069 if (!provisional && !RangeCheckElimination) return false;
1070
1071 // If nodes are depleted, some transform has miscalculated its needs.
1072 assert(provisional || !phase->exceeding_node_budget(), "sanity")do { if (!(provisional || !phase->exceeding_node_budget())
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1072, "assert(" "provisional || !phase->exceeding_node_budget()"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
1073
1074 if (_head->is_CountedLoop()) {
1075 CountedLoopNode *cl = _head->as_CountedLoop();
1076 // If we unrolled with no intention of doing RCE and we later changed our
1077 // minds, we got no pre-loop. Either we need to make a new pre-loop, or we
1078 // have to disallow RCE.
1079 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
1080
1081 // check for vectorized loops, some opts are no longer needed
1082 // RCE needs pre/main/post loops. Don't apply it on a single iteration loop.
1083 if (cl->is_unroll_only() || (cl->is_normal_loop() && cl->trip_count() == 1)) return false;
1084 } else {
1085 assert(provisional, "no long counted loop expected")do { if (!(provisional)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1085, "assert(" "provisional" ") failed", "no long counted loop expected"
); ::breakpoint(); } } while (0)
;
1086 }
1087
1088 BaseCountedLoopNode* cl = _head->as_BaseCountedLoop();
1089 Node *trip_counter = cl->phi();
1090 assert(!cl->is_LongCountedLoop() || bt == T_LONG, "only long range checks in long counted loops")do { if (!(!cl->is_LongCountedLoop() || bt == T_LONG)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1090, "assert(" "!cl->is_LongCountedLoop() || bt == T_LONG"
") failed", "only long range checks in long counted loops");
::breakpoint(); } } while (0)
;
1091
1092 // Check loop body for tests of trip-counter plus loop-invariant vs
1093 // loop-invariant.
1094 for (uint i = 0; i < _body.size(); i++) {
1095 Node *iff = _body[i];
1096 if (iff->Opcode() == Op_If ||
1097 iff->Opcode() == Op_RangeCheck) { // Test?
1098
1099 // Comparing trip+off vs limit
1100 Node *bol = iff->in(1);
1101 if (bol->req() != 2) {
1102 continue; // dead constant test
1103 }
1104 if (!bol->is_Bool()) {
1105 assert(bol->Opcode() == Op_Conv2B, "predicate check only")do { if (!(bol->Opcode() == Op_Conv2B)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1105, "assert(" "bol->Opcode() == Op_Conv2B" ") failed",
"predicate check only"); ::breakpoint(); } } while (0)
;
1106 continue;
1107 }
1108 if (bol->as_Bool()->_test._test == BoolTest::ne) {
1109 continue; // not RC
1110 }
1111 Node *cmp = bol->in(1);
1112 Node *rc_exp = cmp->in(1);
1113 Node *limit = cmp->in(2);
1114
1115 if (provisional) {
1116 // Try to pattern match with either cmp inputs, do not check
1117 // whether one of the inputs is loop independent as it may not
1118 // have had a chance to be hoisted yet.
1119 if (!phase->is_scaled_iv_plus_offset(cmp->in(1), trip_counter, NULL__null, NULL__null, bt) &&
1120 !phase->is_scaled_iv_plus_offset(cmp->in(2), trip_counter, NULL__null, NULL__null, bt)) {
1121 continue;
1122 }
1123 } else {
1124 Node *limit_c = phase->get_ctrl(limit);
1125 if (limit_c == phase->C->top()) {
1126 return false; // Found dead test on live IF? No RCE!
1127 }
1128 if (is_member(phase->get_loop(limit_c))) {
1129 // Compare might have operands swapped; commute them
1130 rc_exp = cmp->in(2);
1131 limit = cmp->in(1);
1132 limit_c = phase->get_ctrl(limit);
1133 if (is_member(phase->get_loop(limit_c))) {
1134 continue; // Both inputs are loop varying; cannot RCE
1135 }
1136 }
1137
1138 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL__null, NULL__null, bt)) {
1139 continue;
1140 }
1141 }
1142 // Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
1143 // projections. If BOTH are in the loop we need loop unswitching instead
1144 // of iteration splitting.
1145 if (is_loop_exit(iff)) {
1146 // Found valid reason to split iterations (if there is room).
1147 // NOTE: Usually a gross overestimate.
1148 // Long range checks cause the loop to be transformed in a loop nest which only causes a fixed number of nodes
1149 // to be added
1150 return provisional || bt == T_LONG || phase->may_require_nodes(est_loop_clone_sz(2));
1151 }
1152 } // End of is IF
1153 }
1154
1155 return false;
1156}
1157
1158//------------------------------policy_peel_only-------------------------------
1159// Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful
1160// for unrolling loops with NO array accesses.
1161bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
1162
1163 // If nodes are depleted, some transform has miscalculated its needs.
1164 assert(!phase->exceeding_node_budget(), "sanity")do { if (!(!phase->exceeding_node_budget())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1164, "assert(" "!phase->exceeding_node_budget()" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
1165
1166 // check for vectorized loops, any peeling done was already applied
1167 if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
1168 return false;
1169 }
1170
1171 for (uint i = 0; i < _body.size(); i++) {
1172 if (_body[i]->is_Mem()) {
1173 return false;
1174 }
1175 }
1176 // No memory accesses at all!
1177 return true;
1178}
1179
1180//------------------------------clone_up_backedge_goo--------------------------
1181// If Node n lives in the back_ctrl block and cannot float, we clone a private
1182// version of n in preheader_ctrl block and return that, otherwise return n.
1183Node *PhaseIdealLoop::clone_up_backedge_goo(Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones) {
1184 if (get_ctrl(n) != back_ctrl) return n;
1185
1186 // Only visit once
1187 if (visited.test_set(n->_idx)) {
1188 Node *x = clones.find(n->_idx);
1189 return (x != NULL__null) ? x : n;
1190 }
1191
1192 Node *x = NULL__null; // If required, a clone of 'n'
1193 // Check for 'n' being pinned in the backedge.
1194 if (n->in(0) && n->in(0) == back_ctrl) {
1195 assert(clones.find(n->_idx) == NULL, "dead loop")do { if (!(clones.find(n->_idx) == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1195, "assert(" "clones.find(n->_idx) == __null" ") failed"
, "dead loop"); ::breakpoint(); } } while (0)
;
1196 x = n->clone(); // Clone a copy of 'n' to preheader
1197 clones.push(x, n->_idx);
1198 x->set_req(0, preheader_ctrl); // Fix x's control input to preheader
1199 }
1200
1201 // Recursive fixup any other input edges into x.
1202 // If there are no changes we can just return 'n', otherwise
1203 // we need to clone a private copy and change it.
1204 for (uint i = 1; i < n->req(); i++) {
1205 Node *g = clone_up_backedge_goo(back_ctrl, preheader_ctrl, n->in(i), visited, clones);
1206 if (g != n->in(i)) {
1207 if (!x) {
1208 assert(clones.find(n->_idx) == NULL, "dead loop")do { if (!(clones.find(n->_idx) == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1208, "assert(" "clones.find(n->_idx) == __null" ") failed"
, "dead loop"); ::breakpoint(); } } while (0)
;
1209 x = n->clone();
1210 clones.push(x, n->_idx);
1211 }
1212 x->set_req(i, g);
1213 }
1214 }
1215 if (x) { // x can legally float to pre-header location
1216 register_new_node(x, preheader_ctrl);
1217 return x;
1218 } else { // raise n to cover LCA of uses
1219 set_ctrl(n, find_non_split_ctrl(back_ctrl->in(0)));
1220 }
1221 return n;
1222}
1223
1224Node* PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
1225 Node* castii = new CastIINode(incr, TypeInt::INT, ConstraintCastNode::StrongDependency);
1226 castii->set_req(0, ctrl);
1227 register_new_node(castii, ctrl);
1228 for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
1229 Node* n = incr->fast_out(i);
1230 if (n->is_Phi() && n->in(0) == loop) {
1231 int nrep = n->replace_edge(incr, castii, &_igvn);
1232 return castii;
1233 }
1234 }
1235 return NULL__null;
1236}
1237
1238#ifdef ASSERT1
1239void PhaseIdealLoop::ensure_zero_trip_guard_proj(Node* node, bool is_main_loop) {
1240 assert(node->is_IfProj(), "must be the zero trip guard If node")do { if (!(node->is_IfProj())) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1240, "assert(" "node->is_IfProj()" ") failed", "must be the zero trip guard If node"
); ::breakpoint(); } } while (0)
;
1241 Node* zer_bol = node->in(0)->in(1);
1242 assert(zer_bol != NULL && zer_bol->is_Bool(), "must be Bool")do { if (!(zer_bol != __null && zer_bol->is_Bool()
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1242, "assert(" "zer_bol != __null && zer_bol->is_Bool()"
") failed", "must be Bool"); ::breakpoint(); } } while (0)
;
1243 Node* zer_cmp = zer_bol->in(1);
1244 assert(zer_cmp != NULL && zer_cmp->Opcode() == Op_CmpI, "must be CmpI")do { if (!(zer_cmp != __null && zer_cmp->Opcode() ==
Op_CmpI)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1244, "assert(" "zer_cmp != __null && zer_cmp->Opcode() == Op_CmpI"
") failed", "must be CmpI"); ::breakpoint(); } } while (0)
;
1245 // For the main loop, the opaque node is the second input to zer_cmp, for the post loop it's the first input node
1246 Node* zer_opaq = zer_cmp->in(is_main_loop ? 2 : 1);
1247 assert(zer_opaq != NULL && zer_opaq->Opcode() == Op_Opaque1, "must be Opaque1")do { if (!(zer_opaq != __null && zer_opaq->Opcode(
) == Op_Opaque1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1247, "assert(" "zer_opaq != __null && zer_opaq->Opcode() == Op_Opaque1"
") failed", "must be Opaque1"); ::breakpoint(); } } while (0
)
;
1248}
1249#endif
1250
1251// Make a copy of the skeleton range check predicates before the main
1252// loop and set the initial value of loop as input. After unrolling,
1253// the range of values for the induction variable in the main loop can
1254// fall outside the allowed range of values by the array access (main
1255// loop is never executed). When that happens, range check
1256// CastII/ConvI2L nodes cause some data paths to die. For consistency,
1257// the control paths must die too but the range checks were removed by
1258// predication. The range checks that we add here guarantee that they do.
1259void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop_helper(Node* predicate, Node* init, Node* stride,
1260 IdealLoopTree* outer_loop, LoopNode* outer_main_head,
1261 uint dd_main_head, const uint idx_before_pre_post,
1262 const uint idx_after_post_before_pre, Node* zero_trip_guard_proj_main,
1263 Node* zero_trip_guard_proj_post, const Node_List &old_new) {
1264 if (predicate != NULL__null) {
1265#ifdef ASSERT1
1266 ensure_zero_trip_guard_proj(zero_trip_guard_proj_main, true);
1267 ensure_zero_trip_guard_proj(zero_trip_guard_proj_post, false);
1268#endif
1269 IfNode* iff = predicate->in(0)->as_If();
1270 ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1271 Node* rgn = uncommon_proj->unique_ctrl_out();
1272 assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct")do { if (!(rgn->is_Region() || rgn->is_Call())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1272, "assert(" "rgn->is_Region() || rgn->is_Call()" ") failed"
, "must be a region or call uct"); ::breakpoint(); } } while (
0)
;
1273 assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape")do { if (!(iff->in(1)->in(1)->Opcode() == Op_Opaque1
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1273, "assert(" "iff->in(1)->in(1)->Opcode() == Op_Opaque1"
") failed", "unexpected predicate shape"); ::breakpoint(); }
} while (0)
;
1274 predicate = iff->in(0);
1275 Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
1276 Node* prev_proj = current_proj;
1277 Node* opaque_init = new OpaqueLoopInitNode(C, init);
1278 register_new_node(opaque_init, outer_main_head->in(LoopNode::EntryControl));
1279 Node* opaque_stride = new OpaqueLoopStrideNode(C, stride);
1280 register_new_node(opaque_stride, outer_main_head->in(LoopNode::EntryControl));
1281
1282 while (predicate != NULL__null && predicate->is_Proj() && predicate->in(0)->is_If()) {
1283 iff = predicate->in(0)->as_If();
1284 uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1285 if (uncommon_proj->unique_ctrl_out() != rgn)
1286 break;
1287 if (iff->in(1)->Opcode() == Op_Opaque4) {
1288 assert(skeleton_predicate_has_opaque(iff), "unexpected")do { if (!(skeleton_predicate_has_opaque(iff))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1288, "assert(" "skeleton_predicate_has_opaque(iff)" ") failed"
, "unexpected"); ::breakpoint(); } } while (0)
;
1289 // Clone the skeleton predicate twice and initialize one with the initial
1290 // value of the loop induction variable. Leave the other predicate
1291 // to be initialized when increasing the stride during loop unrolling.
1292 prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, opaque_init, NULL__null, predicate, uncommon_proj,
1293 current_proj, outer_loop, prev_proj);
1294 assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "")do { if (!(skeleton_predicate_has_opaque(prev_proj->in(0)->
as_If()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1294, "assert(" "skeleton_predicate_has_opaque(prev_proj->in(0)->as_If())"
") failed", ""); ::breakpoint(); } } while (0)
;
1295
1296 prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, stride, predicate, uncommon_proj,
1297 current_proj, outer_loop, prev_proj);
1298 assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "")do { if (!(!skeleton_predicate_has_opaque(prev_proj->in(0)
->as_If()))) { (*g_assert_poison) = 'X';; report_vm_error(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1298, "assert(" "!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If())"
") failed", ""); ::breakpoint(); } } while (0)
;
1299
1300 // Rewire any control inputs from the cloned skeleton predicates down to the main and post loop for data nodes that are part of the
1301 // main loop (and were cloned to the pre and post loop).
1302 for (DUIterator i = predicate->outs(); predicate->has_out(i); i++) {
1303 Node* loop_node = predicate->out(i);
1304 Node* pre_loop_node = old_new[loop_node->_idx];
1305 // Change the control if 'loop_node' is part of the main loop. If there is an old->new mapping and the index of
1306 // 'pre_loop_node' is greater than idx_before_pre_post, then we know that 'loop_node' was cloned and is part of
1307 // the main loop (and 'pre_loop_node' is part of the pre loop).
1308 if (!loop_node->is_CFG() && (pre_loop_node != NULL__null && pre_loop_node->_idx > idx_after_post_before_pre)) {
1309 // 'loop_node' is a data node and part of the main loop. Rewire the control to the projection of the zero-trip guard if node
1310 // of the main loop that is immediately preceding the cloned predicates.
1311 _igvn.replace_input_of(loop_node, 0, zero_trip_guard_proj_main);
1312 --i;
1313 } else if (loop_node->_idx > idx_before_pre_post && loop_node->_idx < idx_after_post_before_pre) {
1314 // 'loop_node' is a data node and part of the post loop. Rewire the control to the projection of the zero-trip guard if node
1315 // of the post loop that is immediately preceding the post loop header node (there are no cloned predicates for the post loop).
1316 assert(pre_loop_node == NULL, "a node belonging to the post loop should not have an old_new mapping at this stage")do { if (!(pre_loop_node == __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1316, "assert(" "pre_loop_node == __null" ") failed", "a node belonging to the post loop should not have an old_new mapping at this stage"
); ::breakpoint(); } } while (0)
;
1317 _igvn.replace_input_of(loop_node, 0, zero_trip_guard_proj_post);
1318 --i;
1319 }
1320 }
1321
1322 // Remove the skeleton predicate from the pre-loop
1323 _igvn.replace_input_of(iff, 1, _igvn.intcon(1));
1324 }
1325 predicate = predicate->in(0)->in(0);
1326 }
1327 _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
1328 set_idom(outer_main_head, prev_proj, dd_main_head);
1329 }
1330}
1331
1332static bool skeleton_follow_inputs(Node* n, int op) {
1333 return (n->is_Bool() ||
1334 n->is_Cmp() ||
1335 op == Op_AndL ||
1336 op == Op_OrL ||
1337 op == Op_RShiftL ||
1338 op == Op_LShiftL ||
1339 op == Op_AddL ||
1340 op == Op_AddI ||
1341 op == Op_MulL ||
1342 op == Op_MulI ||
1343 op == Op_SubL ||
1344 op == Op_SubI ||
1345 op == Op_ConvI2L);
1346}
1347
1348bool PhaseIdealLoop::skeleton_predicate_has_opaque(IfNode* iff) {
1349 ResourceMark rm;
1350 Unique_Node_List wq;
1351 wq.push(iff->in(1)->in(1));
1352 for (uint i = 0; i < wq.size(); i++) {
1353 Node* n = wq.at(i);
1354 int op = n->Opcode();
1355 if (skeleton_follow_inputs(n, op)) {
1356 for (uint j = 1; j < n->req(); j++) {
1357 Node* m = n->in(j);
1358 if (m != NULL__null) {
1359 wq.push(m);
1360 }
1361 }
1362 continue;
1363 }
1364 if (n->is_Opaque1()) {
1365 return true;
1366 }
1367 }
1368 return false;
1369}
1370
1371// Clone the skeleton predicate bool for a main or unswitched loop:
1372// Main loop: Set new_init and new_stride nodes as new inputs.
1373// Unswitched loop: new_init and new_stride are both NULL. Clone OpaqueLoopInit and OpaqueLoopStride instead.
1374Node* PhaseIdealLoop::clone_skeleton_predicate_bool(Node* iff, Node* new_init, Node* new_stride, Node* control) {
1375 Node_Stack to_clone(2);
1376 to_clone.push(iff->in(1), 1);
1377 uint current = C->unique();
1378 Node* result = NULL__null;
1379 bool is_unswitched_loop = new_init == NULL__null && new_stride == NULL__null;
1380 assert(new_init != NULL || is_unswitched_loop, "new_init must be set when new_stride is non-null")do { if (!(new_init != __null || is_unswitched_loop)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1380, "assert(" "new_init != __null || is_unswitched_loop" ") failed"
, "new_init must be set when new_stride is non-null"); ::breakpoint
(); } } while (0)
;
1381 // Look for the opaque node to replace with the new value
1382 // and clone everything in between. We keep the Opaque4 node
1383 // so the duplicated predicates are eliminated once loop
1384 // opts are over: they are here only to keep the IR graph
1385 // consistent.
1386 do {
1387 Node* n = to_clone.node();
1388 uint i = to_clone.index();
1389 Node* m = n->in(i);
1390 int op = m->Opcode();
1391 if (skeleton_follow_inputs(m, op)) {
1392 to_clone.push(m, 1);
1393 continue;
1394 }
1395 if (m->is_Opaque1()) {
1396 if (n->_idx < current) {
1397 n = n->clone();
1398 register_new_node(n, control);
1399 }
1400 if (op == Op_OpaqueLoopInit) {
1401 if (is_unswitched_loop && m->_idx < current && new_init == NULL__null) {
1402 new_init = m->clone();
1403 register_new_node(new_init, control);
1404 }
1405 n->set_req(i, new_init);
1406 } else {
1407 assert(op == Op_OpaqueLoopStride, "unexpected opaque node")do { if (!(op == Op_OpaqueLoopStride)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1407, "assert(" "op == Op_OpaqueLoopStride" ") failed", "unexpected opaque node"
); ::breakpoint(); } } while (0)
;
1408 if (is_unswitched_loop && m->_idx < current && new_stride == NULL__null) {
1409 new_stride = m->clone();
1410 register_new_node(new_stride, control);
1411 }
1412 if (new_stride != NULL__null) {
1413 n->set_req(i, new_stride);
1414 }
1415 }
1416 to_clone.set_node(n);
1417 }
1418 while (true) {
1419 Node* cur = to_clone.node();
1420 uint j = to_clone.index();
1421 if (j+1 < cur->req()) {
1422 to_clone.set_index(j+1);
1423 break;
1424 }
1425 to_clone.pop();
1426 if (to_clone.size() == 0) {
1427 result = cur;
1428 break;
1429 }
1430 Node* next = to_clone.node();
1431 j = to_clone.index();
1432 if (next->in(j) != cur) {
1433 assert(cur->_idx >= current || next->in(j)->Opcode() == Op_Opaque1, "new node or Opaque1 being replaced")do { if (!(cur->_idx >= current || next->in(j)->Opcode
() == Op_Opaque1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1433, "assert(" "cur->_idx >= current || next->in(j)->Opcode() == Op_Opaque1"
") failed", "new node or Opaque1 being replaced"); ::breakpoint
(); } } while (0)
;
1434 if (next->_idx < current) {
1435 next = next->clone();
1436 register_new_node(next, control);
1437 to_clone.set_node(next);
1438 }
1439 next->set_req(j, cur);
1440 }
1441 }
1442 } while (result == NULL__null);
1443 assert(result->_idx >= current, "new node expected")do { if (!(result->_idx >= current)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1443, "assert(" "result->_idx >= current" ") failed",
"new node expected"); ::breakpoint(); } } while (0)
;
1444 assert(!is_unswitched_loop || new_init != NULL, "new_init must always be found and cloned")do { if (!(!is_unswitched_loop || new_init != __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1444, "assert(" "!is_unswitched_loop || new_init != __null"
") failed", "new_init must always be found and cloned"); ::breakpoint
(); } } while (0)
;
1445 return result;
1446}
1447
1448// Clone a skeleton predicate for the main loop. new_init and new_stride are set as new inputs. Since the predicates cannot fail at runtime,
1449// Halt nodes are inserted instead of uncommon traps.
1450Node* PhaseIdealLoop::clone_skeleton_predicate_for_main_or_post_loop(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj,
1451 Node* control, IdealLoopTree* outer_loop, Node* input_proj) {
1452 Node* result = clone_skeleton_predicate_bool(iff, new_init, new_stride, control);
1453 Node* proj = predicate->clone();
1454 Node* other_proj = uncommon_proj->clone();
1455 Node* new_iff = iff->clone();
1456 new_iff->set_req(1, result);
1457 proj->set_req(0, new_iff);
1458 other_proj->set_req(0, new_iff);
1459 Node* frame = new ParmNode(C->start(), TypeFunc::FramePtr);
1460 register_new_node(frame, C->start());
1461 // It's impossible for the predicate to fail at runtime. Use an Halt node.
1462 Node* halt = new HaltNode(other_proj, frame, "duplicated predicate failed which is impossible");
1463 C->root()->add_req(halt);
1464 new_iff->set_req(0, input_proj);
1465
1466 register_control(new_iff, outer_loop == _ltree_root ? _ltree_root : outer_loop->_parent, input_proj);
1467 register_control(proj, outer_loop == _ltree_root ? _ltree_root : outer_loop->_parent, new_iff);
1468 register_control(other_proj, _ltree_root, new_iff);
1469 register_control(halt, _ltree_root, other_proj);
1470 return proj;
1471}
1472
1473void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop(CountedLoopNode* pre_head, Node* init, Node* stride,
1474 IdealLoopTree* outer_loop, LoopNode* outer_main_head,
1475 uint dd_main_head, const uint idx_before_pre_post,
1476 const uint idx_after_post_before_pre, Node* zero_trip_guard_proj_main,
1477 Node* zero_trip_guard_proj_post, const Node_List &old_new) {
1478 if (UseLoopPredicate) {
1479 Node* entry = pre_head->in(LoopNode::EntryControl);
1480 Node* predicate = NULL__null;
1481 predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
1482 if (predicate != NULL__null) {
1483 entry = skip_loop_predicates(entry);
1484 }
1485 Node* profile_predicate = NULL__null;
1486 if (UseProfiledLoopPredicate) {
1487 profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
1488 if (profile_predicate != NULL__null) {
1489 entry = skip_loop_predicates(entry);
1490 }
1491 }
1492 predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
1493 copy_skeleton_predicates_to_main_loop_helper(predicate, init, stride, outer_loop, outer_main_head, dd_main_head,
1494 idx_before_pre_post, idx_after_post_before_pre, zero_trip_guard_proj_main,
1495 zero_trip_guard_proj_post, old_new);
1496 copy_skeleton_predicates_to_main_loop_helper(profile_predicate, init, stride, outer_loop, outer_main_head, dd_main_head,
1497 idx_before_pre_post, idx_after_post_before_pre, zero_trip_guard_proj_main,
1498 zero_trip_guard_proj_post, old_new);
1499 }
1500}
1501
1502//------------------------------insert_pre_post_loops--------------------------
1503// Insert pre and post loops. If peel_only is set, the pre-loop can not have
1504// more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
1505// alignment. Useful to unroll loops that do no array accesses.
1506void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_new, bool peel_only) {
1507
1508#ifndef PRODUCT
1509 if (TraceLoopOpts) {
1510 if (peel_only)
1511 tty->print("PeelMainPost ");
1512 else
1513 tty->print("PreMainPost ");
1514 loop->dump_head();
1515 }
1516#endif
1517 C->set_major_progress();
1518
1519 // Find common pieces of the loop being guarded with pre & post loops
1520 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1521 assert(main_head->is_normal_loop(), "")do { if (!(main_head->is_normal_loop())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1521, "assert(" "main_head->is_normal_loop()" ") failed"
, ""); ::breakpoint(); } } while (0)
;
1522 CountedLoopEndNode *main_end = main_head->loopexit();
1523 assert(main_end->outcnt() == 2, "1 true, 1 false path only")do { if (!(main_end->outcnt() == 2)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1523, "assert(" "main_end->outcnt() == 2" ") failed", "1 true, 1 false path only"
); ::breakpoint(); } } while (0)
;
1524
1525 Node *pre_header= main_head->in(LoopNode::EntryControl);
Value stored to 'pre_header' during its initialization is never read
1526 Node *init = main_head->init_trip();
1527 Node *incr = main_end ->incr();
1528 Node *limit = main_end ->limit();
1529 Node *stride = main_end ->stride();
1530 Node *cmp = main_end ->cmp_node();
1531 BoolTest::mask b_test = main_end->test_trip();
1532
1533 // Need only 1 user of 'bol' because I will be hacking the loop bounds.
1534 Node *bol = main_end->in(CountedLoopEndNode::TestValue);
1535 if (bol->outcnt() != 1) {
1536 bol = bol->clone();
1537 register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
1538 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
1539 }
1540 // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
1541 if (cmp->outcnt() != 1) {
1542 cmp = cmp->clone();
1543 register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
1544 _igvn.replace_input_of(bol, 1, cmp);
1545 }
1546
1547 // Add the post loop
1548 const uint idx_before_pre_post = Compile::current()->unique();
1549 CountedLoopNode *post_head = NULL__null;
1550 Node* post_incr = incr;
1551 Node* main_exit = insert_post_loop(loop, old_new, main_head, main_end, post_incr, limit, post_head);
1552 const uint idx_after_post_before_pre = Compile::current()->unique();
1553
1554 //------------------------------
1555 // Step B: Create Pre-Loop.
1556
1557 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1558 // loop pre-header illegally has 2 control users (old & new loops).
1559 LoopNode* outer_main_head = main_head;
1560 IdealLoopTree* outer_loop = loop;
1561 if (main_head->is_strip_mined()) {
1562 main_head->verify_strip_mined(1);
1563 outer_main_head = main_head->outer_loop();
1564 outer_loop = loop->_parent;
1565 assert(outer_loop->_head == outer_main_head, "broken loop tree")do { if (!(outer_loop->_head == outer_main_head)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1565, "assert(" "outer_loop->_head == outer_main_head" ") failed"
, "broken loop tree"); ::breakpoint(); } } while (0)
;
1566 }
1567 uint dd_main_head = dom_depth(outer_main_head);
1568 clone_loop(loop, old_new, dd_main_head, ControlAroundStripMined);
1569 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1570 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1571 pre_head->set_pre_loop(main_head);
1572 Node *pre_incr = old_new[incr->_idx];
1573
1574 // Reduce the pre-loop trip count.
1575 pre_end->_prob = PROB_FAIR(0.5f);
1576
1577 // Find the pre-loop normal exit.
1578 Node* pre_exit = pre_end->proj_out(false);
1579 assert(pre_exit->Opcode() == Op_IfFalse, "")do { if (!(pre_exit->Opcode() == Op_IfFalse)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1579, "assert(" "pre_exit->Opcode() == Op_IfFalse" ") failed"
, ""); ::breakpoint(); } } while (0)
;
1580 IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1581 _igvn.register_new_node_with_optimizer(new_pre_exit);
1582 set_idom(new_pre_exit, pre_end, dd_main_head);
1583 set_loop(new_pre_exit, outer_loop->_parent);
1584
1585 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1586 // pre-loop, the main-loop may not execute at all. Later in life this
1587 // zero-trip guard will become the minimum-trip guard when we unroll
1588 // the main-loop.
1589 Node *min_opaq = new Opaque1Node(C, limit);
1590 Node *min_cmp = new CmpINode(pre_incr, min_opaq);
1591 Node *min_bol = new BoolNode(min_cmp, b_test);
1592 register_new_node(min_opaq, new_pre_exit);
1593 register_new_node(min_cmp , new_pre_exit);
1594 register_new_node(min_bol , new_pre_exit);
1595
1596 // Build the IfNode (assume the main-loop is executed always).
1597 IfNode *min_iff = new IfNode(new_pre_exit, min_bol, PROB_ALWAYS(1.0f-(1e-6f)), COUNT_UNKNOWN(-1.0f));
1598 _igvn.register_new_node_with_optimizer(min_iff);
1599 set_idom(min_iff, new_pre_exit, dd_main_head);
1600 set_loop(min_iff, outer_loop->_parent);
1601
1602 // Plug in the false-path, taken if we need to skip main-loop
1603 _igvn.hash_delete(pre_exit);
1604 pre_exit->set_req(0, min_iff);
1605 set_idom(pre_exit, min_iff, dd_main_head);
1606 set_idom(pre_exit->unique_ctrl_out(), min_iff, dd_main_head);
1607 // Make the true-path, must enter the main loop
1608 Node *min_taken = new IfTrueNode(min_iff);
1609 _igvn.register_new_node_with_optimizer(min_taken);
1610 set_idom(min_taken, min_iff, dd_main_head);
1611 set_loop(min_taken, outer_loop->_parent);
1612 // Plug in the true path
1613 _igvn.hash_delete(outer_main_head);
1614 outer_main_head->set_req(LoopNode::EntryControl, min_taken);
1615 set_idom(outer_main_head, min_taken, dd_main_head);
1616
1617 VectorSet visited;
1618 Node_Stack clones(main_head->back_control()->outcnt());
1619 // Step B3: Make the fall-in values to the main-loop come from the
1620 // fall-out values of the pre-loop.
1621 for (DUIterator i2 = main_head->outs(); main_head->has_out(i2); i2++) {
1622 Node* main_phi = main_head->out(i2);
1623 if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0) {
1624 Node* pre_phi = old_new[main_phi->_idx];
1625 Node* fallpre = clone_up_backedge_goo(pre_head->back_control(),
1626 main_head->skip_strip_mined()->in(LoopNode::EntryControl),
1627 pre_phi->in(LoopNode::LoopBackControl),
1628 visited, clones);
1629 _igvn.hash_delete(main_phi);
1630 main_phi->set_req(LoopNode::EntryControl, fallpre);
1631 }
1632 }
1633
1634 // Nodes inside the loop may be control dependent on a predicate
1635 // that was moved before the preloop. If the back branch of the main
1636 // or post loops becomes dead, those nodes won't be dependent on the
1637 // test that guards that loop nest anymore which could lead to an
1638 // incorrect array access because it executes independently of the
1639 // test that was guarding the loop nest. We add a special CastII on
1640 // the if branch that enters the loop, between the input induction
1641 // variable value and the induction variable Phi to preserve correct
1642 // dependencies.
1643
1644 // CastII for the main loop:
1645 Node* castii = cast_incr_before_loop(pre_incr, min_taken, main_head);
1646 assert(castii != NULL, "no castII inserted")do { if (!(castii != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1646, "assert(" "castii != __null" ") failed", "no castII inserted"
); ::breakpoint(); } } while (0)
;
1647 assert(post_head->in(1)->is_IfProj(), "must be zero-trip guard If node projection of the post loop")do { if (!(post_head->in(1)->is_IfProj())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1647, "assert(" "post_head->in(1)->is_IfProj()" ") failed"
, "must be zero-trip guard If node projection of the post loop"
); ::breakpoint(); } } while (0)
;
1648 copy_skeleton_predicates_to_main_loop(pre_head, castii, stride, outer_loop, outer_main_head, dd_main_head,
1649 idx_before_pre_post, idx_after_post_before_pre, min_taken, post_head->in(1), old_new);
1650 copy_skeleton_predicates_to_post_loop(outer_main_head, post_head, post_incr, stride);
1651
1652 // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1653 // RCE and alignment may change this later.
1654 Node *cmp_end = pre_end->cmp_node();
1655 assert(cmp_end->in(2) == limit, "")do { if (!(cmp_end->in(2) == limit)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1655, "assert(" "cmp_end->in(2) == limit" ") failed", ""
); ::breakpoint(); } } while (0)
;
1656 Node *pre_limit = new AddINode(init, stride);
1657
1658 // Save the original loop limit in this Opaque1 node for
1659 // use by range check elimination.
1660 Node *pre_opaq = new Opaque1Node(C, pre_limit, limit);
1661
1662 register_new_node(pre_limit, pre_head->in(0));
1663 register_new_node(pre_opaq , pre_head->in(0));
1664
1665 // Since no other users of pre-loop compare, I can hack limit directly
1666 assert(cmp_end->outcnt() == 1, "no other users")do { if (!(cmp_end->outcnt() == 1)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1666, "assert(" "cmp_end->outcnt() == 1" ") failed", "no other users"
); ::breakpoint(); } } while (0)
;
1667 _igvn.hash_delete(cmp_end);
1668 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1669
1670 // Special case for not-equal loop bounds:
1671 // Change pre loop test, main loop test, and the
1672 // main loop guard test to use lt or gt depending on stride
1673 // direction:
1674 // positive stride use <
1675 // negative stride use >
1676 //
1677 // not-equal test is kept for post loop to handle case
1678 // when init > limit when stride > 0 (and reverse).
1679
1680 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1681
1682 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1683 // Modify pre loop end condition
1684 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1685 BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
1686 register_new_node(new_bol0, pre_head->in(0));
1687 _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
1688 // Modify main loop guard condition
1689 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay")do { if (!(min_iff->in(CountedLoopEndNode::TestValue) == min_bol
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1689, "assert(" "min_iff->in(CountedLoopEndNode::TestValue) == min_bol"
") failed", "guard okay"); ::breakpoint(); } } while (0)
;
1690 BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
1691 register_new_node(new_bol1, new_pre_exit);
1692 _igvn.hash_delete(min_iff);
1693 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1694 // Modify main loop end condition
1695 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1696 BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
1697 register_new_node(new_bol2, main_end->in(CountedLoopEndNode::TestControl));
1698 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
1699 }
1700
1701 // Flag main loop
1702 main_head->set_main_loop();
1703 if (peel_only) {
1704 main_head->set_main_no_pre_loop();
1705 }
1706
1707 // Subtract a trip count for the pre-loop.
1708 main_head->set_trip_count(main_head->trip_count() - 1);
1709
1710 // It's difficult to be precise about the trip-counts
1711 // for the pre/post loops. They are usually very short,
1712 // so guess that 4 trips is a reasonable value.
1713 post_head->set_profile_trip_cnt(4.0);
1714 pre_head->set_profile_trip_cnt(4.0);
1715
1716 // Now force out all loop-invariant dominating tests. The optimizer
1717 // finds some, but we _know_ they are all useless.
1718 peeled_dom_test_elim(loop,old_new);
1719 loop->record_for_igvn();
1720}
1721
1722//------------------------------insert_vector_post_loop------------------------
1723// Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1724// unroll_policy has already informed us that more unrolling is about to
1725// happen to the main loop. The resultant post loop will serve as a
1726// vectorized drain loop.
1727void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1728 if (!loop->_head->is_CountedLoop()) return;
1729
1730 CountedLoopNode *cl = loop->_head->as_CountedLoop();
1731
1732 // only process vectorized main loops
1733 if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1734
1735 int slp_max_unroll_factor = cl->slp_max_unroll();
1736 int cur_unroll = cl->unrolled_count();
1737
1738 if (slp_max_unroll_factor == 0) return;
1739
1740 // only process atomic unroll vector loops (not super unrolled after vectorization)
1741 if (cur_unroll != slp_max_unroll_factor) return;
1742
1743 // we only ever process this one time
1744 if (cl->has_atomic_post_loop()) return;
1745
1746 if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
1747 return;
1748 }
1749
1750#ifndef PRODUCT
1751 if (TraceLoopOpts) {
1752 tty->print("PostVector ");
1753 loop->dump_head();
1754 }
1755#endif
1756 C->set_major_progress();
1757
1758 // Find common pieces of the loop being guarded with pre & post loops
1759 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1760 CountedLoopEndNode *main_end = main_head->loopexit();
1761 // diagnostic to show loop end is not properly formed
1762 assert(main_end->outcnt() == 2, "1 true, 1 false path only")do { if (!(main_end->outcnt() == 2)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1762, "assert(" "main_end->outcnt() == 2" ") failed", "1 true, 1 false path only"
); ::breakpoint(); } } while (0)
;
1763
1764 // mark this loop as processed
1765 main_head->mark_has_atomic_post_loop();
1766
1767 Node *incr = main_end->incr();
1768 Node *limit = main_end->limit();
1769
1770 // In this case we throw away the result as we are not using it to connect anything else.
1771 CountedLoopNode *post_head = NULL__null;
1772 insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1773 copy_skeleton_predicates_to_post_loop(main_head->skip_strip_mined(), post_head, incr, main_head->stride());
1774
1775 // It's difficult to be precise about the trip-counts
1776 // for post loops. They are usually very short,
1777 // so guess that unit vector trips is a reasonable value.
1778 post_head->set_profile_trip_cnt(cur_unroll);
1779
1780 // Now force out all loop-invariant dominating tests. The optimizer
1781 // finds some, but we _know_ they are all useless.
1782 peeled_dom_test_elim(loop, old_new);
1783 loop->record_for_igvn();
1784}
1785
1786
1787//-------------------------insert_scalar_rced_post_loop------------------------
1788// Insert a copy of the rce'd main loop as a post loop,
1789// We have not unrolled the main loop, so this is the right time to inject this.
1790// Later we will examine the partner of this post loop pair which still has range checks
1791// to see inject code which tests at runtime if the range checks are applicable.
1792void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1793 if (!loop->_head->is_CountedLoop()) return;
1794
1795 CountedLoopNode *cl = loop->_head->as_CountedLoop();
1796
1797 // only process RCE'd main loops
1798 if (!cl->is_main_loop() || cl->range_checks_present()) return;
1799
1800#ifndef PRODUCT
1801 if (TraceLoopOpts) {
1802 tty->print("PostScalarRce ");
1803 loop->dump_head();
1804 }
1805#endif
1806 C->set_major_progress();
1807
1808 // Find common pieces of the loop being guarded with pre & post loops
1809 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1810 CountedLoopEndNode *main_end = main_head->loopexit();
1811 // diagnostic to show loop end is not properly formed
1812 assert(main_end->outcnt() == 2, "1 true, 1 false path only")do { if (!(main_end->outcnt() == 2)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1812, "assert(" "main_end->outcnt() == 2" ") failed", "1 true, 1 false path only"
); ::breakpoint(); } } while (0)
;
1813
1814 Node *incr = main_end->incr();
1815 Node *limit = main_end->limit();
1816
1817 // In this case we throw away the result as we are not using it to connect anything else.
1818 CountedLoopNode *post_head = NULL__null;
1819 insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1820 copy_skeleton_predicates_to_post_loop(main_head->skip_strip_mined(), post_head, incr, main_head->stride());
1821
1822 // It's difficult to be precise about the trip-counts
1823 // for post loops. They are usually very short,
1824 // so guess that unit vector trips is a reasonable value.
1825 post_head->set_profile_trip_cnt(4.0);
1826 post_head->set_is_rce_post_loop();
1827
1828 // Now force out all loop-invariant dominating tests. The optimizer
1829 // finds some, but we _know_ they are all useless.
1830 peeled_dom_test_elim(loop, old_new);
1831 loop->record_for_igvn();
1832}
1833
1834
1835//------------------------------insert_post_loop-------------------------------
1836// Insert post loops. Add a post loop to the given loop passed.
1837Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree* loop, Node_List& old_new,
1838 CountedLoopNode* main_head, CountedLoopEndNode* main_end,
1839 Node*& incr, Node* limit, CountedLoopNode*& post_head) {
1840 IfNode* outer_main_end = main_end;
1841 IdealLoopTree* outer_loop = loop;
1842 if (main_head->is_strip_mined()) {
1843 main_head->verify_strip_mined(1);
1844 outer_main_end = main_head->outer_loop_end();
1845 outer_loop = loop->_parent;
1846 assert(outer_loop->_head == main_head->in(LoopNode::EntryControl), "broken loop tree")do { if (!(outer_loop->_head == main_head->in(LoopNode::
EntryControl))) { (*g_assert_poison) = 'X';; report_vm_error(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1846, "assert(" "outer_loop->_head == main_head->in(LoopNode::EntryControl)"
") failed", "broken loop tree"); ::breakpoint(); } } while (
0)
;
1847 }
1848
1849 //------------------------------
1850 // Step A: Create a new post-Loop.
1851 Node* main_exit = outer_main_end->proj_out(false);
1852 assert(main_exit->Opcode() == Op_IfFalse, "")do { if (!(main_exit->Opcode() == Op_IfFalse)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1852, "assert(" "main_exit->Opcode() == Op_IfFalse" ") failed"
, ""); ::breakpoint(); } } while (0)
;
1853 int dd_main_exit = dom_depth(main_exit);
1854
1855 // Step A1: Clone the loop body of main. The clone becomes the post-loop.
1856 // The main loop pre-header illegally has 2 control users (old & new loops).
1857 clone_loop(loop, old_new, dd_main_exit, ControlAroundStripMined);
1858 assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "")do { if (!(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1858, "assert(" "old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd"
") failed", ""); ::breakpoint(); } } while (0)
;
1859 post_head = old_new[main_head->_idx]->as_CountedLoop();
1860 post_head->set_normal_loop();
1861 post_head->set_post_loop(main_head);
1862
1863 // Reduce the post-loop trip count.
1864 CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1865 post_end->_prob = PROB_FAIR(0.5f);
1866
1867 // Build the main-loop normal exit.
1868 IfFalseNode *new_main_exit = new IfFalseNode(outer_main_end);
1869 _igvn.register_new_node_with_optimizer(new_main_exit);
1870 set_idom(new_main_exit, outer_main_end, dd_main_exit);
1871 set_loop(new_main_exit, outer_loop->_parent);
1872
1873 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
1874 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
1875 // (the previous loop trip-counter exit value) because we will be changing
1876 // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1877 // trip guard until all unrolling is done.
1878 Node *zer_opaq = new Opaque1Node(C, incr);
1879 Node *zer_cmp = new CmpINode(zer_opaq, limit);
1880 Node *zer_bol = new BoolNode(zer_cmp, main_end->test_trip());
1881 register_new_node(zer_opaq, new_main_exit);
1882 register_new_node(zer_cmp, new_main_exit);
1883 register_new_node(zer_bol, new_main_exit);
1884
1885 // Build the IfNode
1886 IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR(0.5f), COUNT_UNKNOWN(-1.0f));
1887 _igvn.register_new_node_with_optimizer(zer_iff);
1888 set_idom(zer_iff, new_main_exit, dd_main_exit);
1889 set_loop(zer_iff, outer_loop->_parent);
1890
1891 // Plug in the false-path, taken if we need to skip this post-loop
1892 _igvn.replace_input_of(main_exit, 0, zer_iff);
1893 set_idom(main_exit, zer_iff, dd_main_exit);
1894 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1895 // Make the true-path, must enter this post loop
1896 Node *zer_taken = new IfTrueNode(zer_iff);
1897 _igvn.register_new_node_with_optimizer(zer_taken);
1898 set_idom(zer_taken, zer_iff, dd_main_exit);
1899 set_loop(zer_taken, outer_loop->_parent);
1900 // Plug in the true path
1901 _igvn.hash_delete(post_head);
1902 post_head->set_req(LoopNode::EntryControl, zer_taken);
1903 set_idom(post_head, zer_taken, dd_main_exit);
1904
1905 VectorSet visited;
1906 Node_Stack clones(main_head->back_control()->outcnt());
1907 // Step A3: Make the fall-in values to the post-loop come from the
1908 // fall-out values of the main-loop.
1909 for (DUIterator i = main_head->outs(); main_head->has_out(i); i++) {
1910 Node* main_phi = main_head->out(i);
1911 if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0) {
1912 Node* cur_phi = old_new[main_phi->_idx];
1913 Node* fallnew = clone_up_backedge_goo(main_head->back_control(),
1914 post_head->init_control(),
1915 main_phi->in(LoopNode::LoopBackControl),
1916 visited, clones);
1917 _igvn.hash_delete(cur_phi);
1918 cur_phi->set_req(LoopNode::EntryControl, fallnew);
1919 }
1920 }
1921
1922 // CastII for the new post loop:
1923 incr = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1924 assert(incr != NULL, "no castII inserted")do { if (!(incr != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1924, "assert(" "incr != __null" ") failed", "no castII inserted"
); ::breakpoint(); } } while (0)
;
1925
1926 return new_main_exit;
1927}
1928
1929//------------------------------is_invariant-----------------------------
1930// Return true if n is invariant
1931bool IdealLoopTree::is_invariant(Node* n) const {
1932 Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1933 if (n_c->is_top()) return false;
1934 return !is_member(_phase->get_loop(n_c));
1935}
1936
1937void PhaseIdealLoop::update_main_loop_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con) {
1938 // Search for skeleton predicates and update them according to the new stride
1939 Node* entry = ctrl;
1940 Node* prev_proj = ctrl;
1941 LoopNode* outer_loop_head = loop_head->skip_strip_mined();
1942 IdealLoopTree* outer_loop = get_loop(outer_loop_head);
1943
1944 // Compute the value of the loop induction variable at the end of the
1945 // first iteration of the unrolled loop: init + new_stride_con - init_inc
1946 int new_stride_con = stride_con * 2;
1947 Node* max_value = _igvn.intcon(new_stride_con);
1948 set_ctrl(max_value, C->root());
1949
1950 while (entry != NULL__null && entry->is_Proj() && entry->in(0)->is_If()) {
1951 IfNode* iff = entry->in(0)->as_If();
1952 ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con);
1953 if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
1954 break;
1955 }
1956 if (iff->in(1)->Opcode() == Op_Opaque4) {
1957 // Look for predicate with an Opaque1 node that can be used as a template
1958 if (!skeleton_predicate_has_opaque(iff)) {
1959 // No Opaque1 node? It's either the check for the first value
1960 // of the first iteration or the check for the last value of
1961 // the first iteration of an unrolled loop. We can't
1962 // tell. Kill it in any case.
1963 _igvn.replace_input_of(iff, 1, iff->in(1)->in(2));
1964 } else {
1965 // Add back predicates updated for the new stride.
1966 prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, max_value, entry, proj, ctrl, outer_loop,
1967 prev_proj);
1968 assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected")do { if (!(!skeleton_predicate_has_opaque(prev_proj->in(0)
->as_If()))) { (*g_assert_poison) = 'X';; report_vm_error(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1968, "assert(" "!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If())"
") failed", "unexpected"); ::breakpoint(); } } while (0)
;
1969 }
1970 }
1971 entry = entry->in(0)->in(0);
1972 }
1973 if (prev_proj != ctrl) {
1974 _igvn.replace_input_of(outer_loop_head, LoopNode::EntryControl, prev_proj);
1975 set_idom(outer_loop_head, prev_proj, dom_depth(outer_loop_head));
1976 }
1977}
1978
1979void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_head, CountedLoopNode* post_loop_head, Node* init, Node* stride) {
1980 // Go over the skeleton predicates of the main loop and make a copy for the post loop with its initial iv value and
1981 // stride as inputs.
1982 Node* post_loop_entry = post_loop_head->in(LoopNode::EntryControl);
1983 Node* main_loop_entry = main_loop_head->in(LoopNode::EntryControl);
1984 IdealLoopTree* post_loop = get_loop(post_loop_head);
1985
1986 Node* ctrl = main_loop_entry;
1987 Node* prev_proj = post_loop_entry;
1988 while (ctrl != NULL__null && ctrl->is_Proj() && ctrl->in(0)->is_If()) {
1989 IfNode* iff = ctrl->in(0)->as_If();
1990 ProjNode* proj = iff->proj_out(1 - ctrl->as_Proj()->_con);
1991 if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
1992 break;
1993 }
1994 if (iff->in(1)->Opcode() == Op_Opaque4 && skeleton_predicate_has_opaque(iff)) {
1995 prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, stride, ctrl, proj, post_loop_entry,
1996 post_loop, prev_proj);
1997 assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected")do { if (!(!skeleton_predicate_has_opaque(prev_proj->in(0)
->as_If()))) { (*g_assert_poison) = 'X';; report_vm_error(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 1997, "assert(" "!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If())"
") failed", "unexpected"); ::breakpoint(); } } while (0)
;
1998 }
1999 ctrl = ctrl->in(0)->in(0);
2000 }
2001 if (prev_proj != post_loop_entry) {
2002 _igvn.replace_input_of(post_loop_head, LoopNode::EntryControl, prev_proj);
2003 set_idom(post_loop_head, prev_proj, dom_depth(post_loop_head));
2004 }
2005}
2006
2007//------------------------------do_unroll--------------------------------------
2008// Unroll the loop body one step - make each trip do 2 iterations.
2009void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip) {
2010 assert(LoopUnrollLimit, "")do { if (!(LoopUnrollLimit)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2010, "assert(" "LoopUnrollLimit" ") failed", ""); ::breakpoint
(); } } while (0)
;
2011 CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
2012 CountedLoopEndNode *loop_end = loop_head->loopexit();
2013#ifndef PRODUCT
2014 if (PrintOpto && VerifyLoopOptimizations) {
2015 tty->print("Unrolling ");
2016 loop->dump_head();
2017 } else if (TraceLoopOpts) {
2018 if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
2019 tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
2020 } else {
2021 tty->print("Unroll %d ", loop_head->unrolled_count()*2);
2022 }
2023 loop->dump_head();
2024 }
2025
2026 if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) {
2027 Node_Stack stack(C->live_nodes() >> 2);
2028 Node_List rpo_list;
2029 VectorSet visited;
2030 visited.set(loop_head->_idx);
2031 rpo(loop_head, stack, visited, rpo_list);
2032 dump(loop, rpo_list.size(), rpo_list);
2033 }
2034#endif
2035
2036 // Remember loop node count before unrolling to detect
2037 // if rounds of unroll,optimize are making progress
2038 loop_head->set_node_count_before_unroll(loop->_body.size());
2039
2040 Node *ctrl = loop_head->skip_strip_mined()->in(LoopNode::EntryControl);
2041 Node *limit = loop_head->limit();
2042 Node *init = loop_head->init_trip();
2043 Node *stride = loop_head->stride();
2044
2045 Node *opaq = NULL__null;
2046 if (adjust_min_trip) { // If not maximally unrolling, need adjustment
2047 // Search for zero-trip guard.
2048
2049 // Check the shape of the graph at the loop entry. If an inappropriate
2050 // graph shape is encountered, the compiler bails out loop unrolling;
2051 // compilation of the method will still succeed.
2052 opaq = loop_head->is_canonical_loop_entry();
2053 if (opaq == NULL__null) {
2054 return;
2055 }
2056 // Zero-trip test uses an 'opaque' node which is not shared.
2057 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "")do { if (!(opaq->outcnt() == 1 && opaq->in(1) ==
limit)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2057, "assert(" "opaq->outcnt() == 1 && opaq->in(1) == limit"
") failed", ""); ::breakpoint(); } } while (0)
;
2058 }
2059
2060 C->set_major_progress();
2061
2062 Node* new_limit = NULL__null;
2063 int stride_con = stride->get_int();
2064 int stride_p = (stride_con > 0) ? stride_con : -stride_con;
2065 uint old_trip_count = loop_head->trip_count();
2066 // Verify that unroll policy result is still valid.
2067 assert(old_trip_count > 1 && (!adjust_min_trip || stride_p <=do { if (!(old_trip_count > 1 && (!adjust_min_trip
|| stride_p <= MIN2<int>(max_jint / 2 - 2, MAX2(1<<
3, Matcher::max_vector_size(T_BYTE)) * loop_head->unrolled_count
())))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2068, "assert(" "old_trip_count > 1 && (!adjust_min_trip || stride_p <= MIN2<int>(max_jint / 2 - 2, MAX2(1<<3, Matcher::max_vector_size(T_BYTE)) * loop_head->unrolled_count()))"
") failed", "sanity"); ::breakpoint(); } } while (0)
2068 MIN2<int>(max_jint / 2 - 2, MAX2(1<<3, Matcher::max_vector_size(T_BYTE)) * loop_head->unrolled_count())), "sanity")do { if (!(old_trip_count > 1 && (!adjust_min_trip
|| stride_p <= MIN2<int>(max_jint / 2 - 2, MAX2(1<<
3, Matcher::max_vector_size(T_BYTE)) * loop_head->unrolled_count
())))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2068, "assert(" "old_trip_count > 1 && (!adjust_min_trip || stride_p <= MIN2<int>(max_jint / 2 - 2, MAX2(1<<3, Matcher::max_vector_size(T_BYTE)) * loop_head->unrolled_count()))"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
2069
2070 update_main_loop_skeleton_predicates(ctrl, loop_head, init, stride_con);
2071
2072 // Adjust loop limit to keep valid iterations number after unroll.
2073 // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
2074 // which may overflow.
2075 if (!adjust_min_trip) {
2076 assert(old_trip_count > 1 && (old_trip_count & 1) == 0,do { if (!(old_trip_count > 1 && (old_trip_count &
1) == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2077, "assert(" "old_trip_count > 1 && (old_trip_count & 1) == 0"
") failed", "odd trip count for maximally unroll"); ::breakpoint
(); } } while (0)
2077 "odd trip count for maximally unroll")do { if (!(old_trip_count > 1 && (old_trip_count &
1) == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2077, "assert(" "old_trip_count > 1 && (old_trip_count & 1) == 0"
") failed", "odd trip count for maximally unroll"); ::breakpoint
(); } } while (0)
;
2078 // Don't need to adjust limit for maximally unroll since trip count is even.
2079 } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
2080 // Loop's limit is constant. Loop's init could be constant when pre-loop
2081 // become peeled iteration.
2082 jlong init_con = init->get_int();
2083 // We can keep old loop limit if iterations count stays the same:
2084 // old_trip_count == new_trip_count * 2
2085 // Note: since old_trip_count >= 2 then new_trip_count >= 1
2086 // so we also don't need to adjust zero trip test.
2087 jlong limit_con = limit->get_int();
2088 // (stride_con*2) not overflow since stride_con <= 8.
2089 int new_stride_con = stride_con * 2;
2090 int stride_m = new_stride_con - (stride_con > 0 ? 1 : -1);
2091 jlong trip_count = (limit_con - init_con + stride_m)/new_stride_con;
2092 // New trip count should satisfy next conditions.
2093 assert(trip_count > 0 && (julong)trip_count < (julong)max_juint/2, "sanity")do { if (!(trip_count > 0 && (julong)trip_count <
(julong)max_juint/2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2093, "assert(" "trip_count > 0 && (julong)trip_count < (julong)max_juint/2"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
2094 uint new_trip_count = (uint)trip_count;
2095 adjust_min_trip = (old_trip_count != new_trip_count*2);
2096 }
2097
2098 if (adjust_min_trip) {
2099 // Step 2: Adjust the trip limit if it is called for.
2100 // The adjustment amount is -stride. Need to make sure if the
2101 // adjustment underflows or overflows, then the main loop is skipped.
2102 Node* cmp = loop_end->cmp_node();
2103 assert(cmp->in(2) == limit, "sanity")do { if (!(cmp->in(2) == limit)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2103, "assert(" "cmp->in(2) == limit" ") failed", "sanity"
); ::breakpoint(); } } while (0)
;
2104 assert(opaq != NULL && opaq->in(1) == limit, "sanity")do { if (!(opaq != __null && opaq->in(1) == limit)
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2104, "assert(" "opaq != __null && opaq->in(1) == limit"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
2105
2106 // Verify that policy_unroll result is still valid.
2107 const TypeInt* limit_type = _igvn.type(limit)->is_int();
2108 assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||do { if (!(stride_con > 0 && ((limit_type->_hi -
stride_con) < limit_type->_hi) || stride_con < 0 &&
((limit_type->_lo - stride_con) > limit_type->_lo))
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2110, "assert(" "stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo)"
") failed", "sanity"); ::breakpoint(); } } while (0)
2109 stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo),do { if (!(stride_con > 0 && ((limit_type->_hi -
stride_con) < limit_type->_hi) || stride_con < 0 &&
((limit_type->_lo - stride_con) > limit_type->_lo))
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2110, "assert(" "stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo)"
") failed", "sanity"); ::breakpoint(); } } while (0)
2110 "sanity")do { if (!(stride_con > 0 && ((limit_type->_hi -
stride_con) < limit_type->_hi) || stride_con < 0 &&
((limit_type->_lo - stride_con) > limit_type->_lo))
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2110, "assert(" "stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo)"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
2111
2112 if (limit->is_Con()) {
2113 // The check in policy_unroll and the assert above guarantee
2114 // no underflow if limit is constant.
2115 new_limit = _igvn.intcon(limit->get_int() - stride_con);
2116 set_ctrl(new_limit, C->root());
2117 } else {
2118 // Limit is not constant.
2119 if (loop_head->unrolled_count() == 1) { // only for first unroll
2120 // Separate limit by Opaque node in case it is an incremented
2121 // variable from previous loop to avoid using pre-incremented
2122 // value which could increase register pressure.
2123 // Otherwise reorg_offsets() optimization will create a separate
2124 // Opaque node for each use of trip-counter and as result
2125 // zero trip guard limit will be different from loop limit.
2126 assert(has_ctrl(opaq), "should have it")do { if (!(has_ctrl(opaq))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2126, "assert(" "has_ctrl(opaq)" ") failed", "should have it"
); ::breakpoint(); } } while (0)
;
2127 Node* opaq_ctrl = get_ctrl(opaq);
2128 limit = new Opaque2Node(C, limit);
2129 register_new_node(limit, opaq_ctrl);
2130 }
2131 if ((stride_con > 0 && (java_subtract(limit_type->_lo, stride_con) < limit_type->_lo)) ||
2132 (stride_con < 0 && (java_subtract(limit_type->_hi, stride_con) > limit_type->_hi))) {
2133 // No underflow.
2134 new_limit = new SubINode(limit, stride);
2135 } else {
2136 // (limit - stride) may underflow.
2137 // Clamp the adjustment value with MININT or MAXINT:
2138 //
2139 // new_limit = limit-stride
2140 // if (stride > 0)
2141 // new_limit = (limit < new_limit) ? MININT : new_limit;
2142 // else
2143 // new_limit = (limit > new_limit) ? MAXINT : new_limit;
2144 //
2145 BoolTest::mask bt = loop_end->test_trip();
2146 assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected")do { if (!(bt == BoolTest::lt || bt == BoolTest::gt)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2146, "assert(" "bt == BoolTest::lt || bt == BoolTest::gt" ") failed"
, "canonical test is expected"); ::breakpoint(); } } while (0
)
;
2147 Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
2148 set_ctrl(adj_max, C->root());
2149 Node* old_limit = NULL__null;
2150 Node* adj_limit = NULL__null;
2151 Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL__null;
2152 if (loop_head->unrolled_count() > 1 &&
2153 limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
2154 limit->in(CMoveNode::IfTrue) == adj_max &&
2155 bol->as_Bool()->_test._test == bt &&
2156 bol->in(1)->Opcode() == Op_CmpI &&
2157 bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
2158 // Loop was unrolled before.
2159 // Optimize the limit to avoid nested CMove:
2160 // use original limit as old limit.
2161 old_limit = bol->in(1)->in(1);
2162 // Adjust previous adjusted limit.
2163 adj_limit = limit->in(CMoveNode::IfFalse);
2164 adj_limit = new SubINode(adj_limit, stride);
2165 } else {
2166 old_limit = limit;
2167 adj_limit = new SubINode(limit, stride);
2168 }
2169 assert(old_limit != NULL && adj_limit != NULL, "")do { if (!(old_limit != __null && adj_limit != __null
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2169, "assert(" "old_limit != __null && adj_limit != __null"
") failed", ""); ::breakpoint(); } } while (0)
;
2170 register_new_node(adj_limit, ctrl); // adjust amount
2171 Node* adj_cmp = new CmpINode(old_limit, adj_limit);
2172 register_new_node(adj_cmp, ctrl);
2173 Node* adj_bool = new BoolNode(adj_cmp, bt);
2174 register_new_node(adj_bool, ctrl);
2175 new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
2176 }
2177 register_new_node(new_limit, ctrl);
2178 }
2179
2180 assert(new_limit != NULL, "")do { if (!(new_limit != __null)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2180, "assert(" "new_limit != __null" ") failed", ""); ::breakpoint
(); } } while (0)
;
2181 // Replace in loop test.
2182 assert(loop_end->in(1)->in(1) == cmp, "sanity")do { if (!(loop_end->in(1)->in(1) == cmp)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2182, "assert(" "loop_end->in(1)->in(1) == cmp" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
2183 if (cmp->outcnt() == 1 && loop_end->in(1)->outcnt() == 1) {
2184 // Don't need to create new test since only one user.
2185 _igvn.hash_delete(cmp);
2186 cmp->set_req(2, new_limit);
2187 } else {
2188 // Create new test since it is shared.
2189 Node* ctrl2 = loop_end->in(0);
2190 Node* cmp2 = cmp->clone();
2191 cmp2->set_req(2, new_limit);
2192 register_new_node(cmp2, ctrl2);
2193 Node* bol2 = loop_end->in(1)->clone();
2194 bol2->set_req(1, cmp2);
2195 register_new_node(bol2, ctrl2);
2196 _igvn.replace_input_of(loop_end, 1, bol2);
2197 }
2198 // Step 3: Find the min-trip test guaranteed before a 'main' loop.
2199 // Make it a 1-trip test (means at least 2 trips).
2200
2201 // Guard test uses an 'opaque' node which is not shared. Hence I
2202 // can edit it's inputs directly. Hammer in the new limit for the
2203 // minimum-trip guard.
2204 assert(opaq->outcnt() == 1, "")do { if (!(opaq->outcnt() == 1)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2204, "assert(" "opaq->outcnt() == 1" ") failed", ""); ::
breakpoint(); } } while (0)
;
2205 _igvn.replace_input_of(opaq, 1, new_limit);
2206 }
2207
2208 // Adjust max trip count. The trip count is intentionally rounded
2209 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
2210 // the main, unrolled, part of the loop will never execute as it is protected
2211 // by the min-trip test. See bug 4834191 for a case where we over-unrolled
2212 // and later determined that part of the unrolled loop was dead.
2213 loop_head->set_trip_count(old_trip_count / 2);
2214
2215 // Double the count of original iterations in the unrolled loop body.
2216 loop_head->double_unrolled_count();
2217
2218 // ---------
2219 // Step 4: Clone the loop body. Move it inside the loop. This loop body
2220 // represents the odd iterations; since the loop trips an even number of
2221 // times its backedge is never taken. Kill the backedge.
2222 uint dd = dom_depth(loop_head);
2223 clone_loop(loop, old_new, dd, IgnoreStripMined);
2224
2225 // Make backedges of the clone equal to backedges of the original.
2226 // Make the fall-in from the original come from the fall-out of the clone.
2227 for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
2228 Node* phi = loop_head->fast_out(j);
2229 if (phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0) {
2230 Node *newphi = old_new[phi->_idx];
2231 _igvn.hash_delete(phi);
2232 _igvn.hash_delete(newphi);
2233
2234 phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl));
2235 newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl));
2236 phi ->set_req(LoopNode::LoopBackControl, C->top());
2237 }
2238 }
2239 Node *clone_head = old_new[loop_head->_idx];
2240 _igvn.hash_delete(clone_head);
2241 loop_head ->set_req(LoopNode:: EntryControl, clone_head->in(LoopNode::LoopBackControl));
2242 clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl));
2243 loop_head ->set_req(LoopNode::LoopBackControl, C->top());
2244 loop->_head = clone_head; // New loop header
2245
2246 set_idom(loop_head, loop_head ->in(LoopNode::EntryControl), dd);
2247 set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd);
2248
2249 // Kill the clone's backedge
2250 Node *newcle = old_new[loop_end->_idx];
2251 _igvn.hash_delete(newcle);
2252 Node *one = _igvn.intcon(1);
2253 set_ctrl(one, C->root());
2254 newcle->set_req(1, one);
2255 // Force clone into same loop body
2256 uint max = loop->_body.size();
2257 for (uint k = 0; k < max; k++) {
2258 Node *old = loop->_body.at(k);
2259 Node *nnn = old_new[old->_idx];
2260 loop->_body.push(nnn);
2261 if (!has_ctrl(old)) {
2262 set_loop(nnn, loop);
2263 }
2264 }
2265
2266 loop->record_for_igvn();
2267 loop_head->clear_strip_mined();
2268
2269#ifndef PRODUCT
2270 if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) {
2271 tty->print("\nnew loop after unroll\n"); loop->dump_head();
2272 for (uint i = 0; i < loop->_body.size(); i++) {
2273 loop->_body.at(i)->dump();
2274 }
2275 if (C->clone_map().is_debug()) {
2276 tty->print("\nCloneMap\n");
2277 Dict* dict = C->clone_map().dict();
2278 DictI i(dict);
2279 tty->print_cr("Dict@%p[%d] = ", dict, dict->Size());
2280 for (int ii = 0; i.test(); ++i, ++ii) {
2281 NodeCloneInfo cl((uint64_t)dict->operator[]((void*)i._key));
2282 tty->print("%d->%d:%d,", (int)(intptr_t)i._key, cl.idx(), cl.gen());
2283 if (ii % 10 == 9) {
2284 tty->print_cr(" ");
2285 }
2286 }
2287 tty->print_cr(" ");
2288 }
2289 }
2290#endif
2291}
2292
2293//------------------------------do_maximally_unroll----------------------------
2294
2295void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new) {
2296 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2297 assert(cl->has_exact_trip_count(), "trip count is not exact")do { if (!(cl->has_exact_trip_count())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2297, "assert(" "cl->has_exact_trip_count()" ") failed",
"trip count is not exact"); ::breakpoint(); } } while (0)
;
2298 assert(cl->trip_count() > 0, "")do { if (!(cl->trip_count() > 0)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2298, "assert(" "cl->trip_count() > 0" ") failed", ""
); ::breakpoint(); } } while (0)
;
2299#ifndef PRODUCT
2300 if (TraceLoopOpts) {
2301 tty->print("MaxUnroll %d ", cl->trip_count());
2302 loop->dump_head();
2303 }
2304#endif
2305
2306 // If loop is tripping an odd number of times, peel odd iteration
2307 if ((cl->trip_count() & 1) == 1) {
2308 do_peeling(loop, old_new);
2309 }
2310
2311 // Now its tripping an even number of times remaining. Double loop body.
2312 // Do not adjust pre-guards; they are not needed and do not exist.
2313 if (cl->trip_count() > 0) {
2314 assert((cl->trip_count() & 1) == 0, "missed peeling")do { if (!((cl->trip_count() & 1) == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2314, "assert(" "(cl->trip_count() & 1) == 0" ") failed"
, "missed peeling"); ::breakpoint(); } } while (0)
;
2315 do_unroll(loop, old_new, false);
2316 }
2317}
2318
2319void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
2320 if (SuperWordReductions == false) return;
2321
2322 CountedLoopNode* loop_head = loop->_head->as_CountedLoop();
2323 if (loop_head->unrolled_count() > 1) {
2324 return;
2325 }
2326
2327 Node* trip_phi = loop_head->phi();
2328 for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
2329 Node* phi = loop_head->fast_out(i);
2330 if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
2331 // For definitions which are loop inclusive and not tripcounts.
2332 Node* def_node = phi->in(LoopNode::LoopBackControl);
2333
2334 if (def_node != NULL__null) {
2335 Node* n_ctrl = get_ctrl(def_node);
2336 if (n_ctrl != NULL__null && loop->is_member(get_loop(n_ctrl))) {
2337 // Now test it to see if it fits the standard pattern for a reduction operator.
2338 int opc = def_node->Opcode();
2339 if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())
2340 || opc == Op_MinD || opc == Op_MinF || opc == Op_MaxD || opc == Op_MaxF) {
2341 if (!def_node->is_reduction()) { // Not marked yet
2342 // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
2343 bool ok = false;
2344 for (unsigned j = 1; j < def_node->req(); j++) {
2345 Node* in = def_node->in(j);
2346 if (in == phi) {
2347 ok = true;
2348 break;
2349 }
2350 }
2351
2352 // do nothing if we did not match the initial criteria
2353 if (ok == false) {
2354 continue;
2355 }
2356
2357 // The result of the reduction must not be used in the loop
2358 for (DUIterator_Fast imax, i = def_node->fast_outs(imax); i < imax && ok; i++) {
2359 Node* u = def_node->fast_out(i);
2360 if (!loop->is_member(get_loop(ctrl_or_self(u)))) {
2361 continue;
2362 }
2363 if (u == phi) {
2364 continue;
2365 }
2366 ok = false;
2367 }
2368
2369 // iff the uses conform
2370 if (ok) {
2371 def_node->add_flag(Node::Flag_is_reduction);
2372 loop_head->mark_has_reductions();
2373 }
2374 }
2375 }
2376 }
2377 }
2378 }
2379 }
2380}
2381
2382//------------------------------adjust_limit-----------------------------------
2383// Helper function that computes new loop limit as (rc_limit-offset)/scale
2384Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) {
2385 Node* sub = new SubLNode(rc_limit, offset);
2386 register_new_node(sub, pre_ctrl);
2387 Node* limit = new DivLNode(NULL__null, sub, scale);
2388 register_new_node(limit, pre_ctrl);
2389
2390 // When the absolute value of scale is greater than one, the division
2391 // may round limit down/up, so add/sub one to/from the limit.
2392 if (round) {
2393 limit = new AddLNode(limit, _igvn.longcon(is_positive_stride ? -1 : 1));
2394 register_new_node(limit, pre_ctrl);
2395 }
2396
2397 // Clamp the limit to handle integer under-/overflows by using long values.
2398 // We only convert the limit back to int when we handled under-/overflows.
2399 // Note that all values are longs in the following computations.
2400 // When reducing the limit, clamp to [min_jint, old_limit]:
2401 // INT(MINL(old_limit, MAXL(limit, min_jint)))
2402 // - integer underflow of limit: MAXL chooses min_jint.
2403 // - integer overflow of limit: MINL chooses old_limit (<= MAX_INT < limit)
2404 // When increasing the limit, clamp to [old_limit, max_jint]:
2405 // INT(MAXL(old_limit, MINL(limit, max_jint)))
2406 // - integer overflow of limit: MINL chooses max_jint.
2407 // - integer underflow of limit: MAXL chooses old_limit (>= MIN_INT > limit)
2408 // INT() is finally converting the limit back to an integer value.
2409
2410 // We use CMove nodes to implement long versions of min/max (MINL/MAXL).
2411 // We use helper methods for inner MINL/MAXL which return CMoveL nodes to keep a long value for the outer MINL/MAXL comparison:
2412 Node* inner_result_long;
2413 if (is_positive_stride) {
2414 inner_result_long = MaxNode::signed_max(limit, _igvn.longcon(min_jint), TypeLong::LONG, _igvn);
2415 } else {
2416 inner_result_long = MaxNode::signed_min(limit, _igvn.longcon(max_jint), TypeLong::LONG, _igvn);
2417 }
2418 set_subtree_ctrl(inner_result_long, false);
2419
2420 // Outer MINL/MAXL:
2421 // The comparison is done with long values but the result is the converted back to int by using CmovI.
2422 Node* old_limit_long = new ConvI2LNode(old_limit);
2423 register_new_node(old_limit_long, pre_ctrl);
2424 Node* cmp = new CmpLNode(old_limit_long, limit);
2425 register_new_node(cmp, pre_ctrl);
2426 Node* bol = new BoolNode(cmp, is_positive_stride ? BoolTest::gt : BoolTest::lt);
2427 register_new_node(bol, pre_ctrl);
2428 Node* inner_result_int = new ConvL2INode(inner_result_long); // Could under-/overflow but that's fine as comparison was done with CmpL
2429 register_new_node(inner_result_int, pre_ctrl);
2430 limit = new CMoveINode(bol, old_limit, inner_result_int, TypeInt::INT);
2431 register_new_node(limit, pre_ctrl);
2432 return limit;
2433}
2434
2435//------------------------------add_constraint---------------------------------
2436// Constrain the main loop iterations so the conditions:
2437// low_limit <= scale_con*I + offset < upper_limit
2438// always hold true. That is, either increase the number of iterations in the
2439// pre-loop or reduce the number of iterations in the main-loop until the condition
2440// holds true in the main-loop. Stride, scale, offset and limit are all loop
2441// invariant. Further, stride and scale are constants (offset and limit often are).
2442void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit) {
2443 assert(_igvn.type(offset)->isa_long() != NULL && _igvn.type(low_limit)->isa_long() != NULL &&do { if (!(_igvn.type(offset)->isa_long() != __null &&
_igvn.type(low_limit)->isa_long() != __null && _igvn
.type(upper_limit)->isa_long() != __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2444, "assert(" "_igvn.type(offset)->isa_long() != __null && _igvn.type(low_limit)->isa_long() != __null && _igvn.type(upper_limit)->isa_long() != __null"
") failed", "arguments should be long values"); ::breakpoint
(); } } while (0)
2444 _igvn.type(upper_limit)->isa_long() != NULL, "arguments should be long values")do { if (!(_igvn.type(offset)->isa_long() != __null &&
_igvn.type(low_limit)->isa_long() != __null && _igvn
.type(upper_limit)->isa_long() != __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2444, "assert(" "_igvn.type(offset)->isa_long() != __null && _igvn.type(low_limit)->isa_long() != __null && _igvn.type(upper_limit)->isa_long() != __null"
") failed", "arguments should be long values"); ::breakpoint
(); } } while (0)
;
2445
2446 // For a positive stride, we need to reduce the main-loop limit and
2447 // increase the pre-loop limit. This is reversed for a negative stride.
2448 bool is_positive_stride = (stride_con > 0);
2449
2450 // If the absolute scale value is greater one, division in 'adjust_limit' may require
2451 // rounding. Make sure the ABS method correctly handles min_jint.
2452 // Only do this for the pre-loop, one less iteration of the main loop doesn't hurt.
2453 bool round = ABS(scale_con) > 1;
2454
2455 Node* scale = _igvn.longcon(scale_con);
2456 set_ctrl(scale, C->root());
2457
2458 if ((stride_con^scale_con) >= 0) { // Use XOR to avoid overflow
2459 // Positive stride*scale: the affine function is increasing,
2460 // the pre-loop checks for underflow and the post-loop for overflow.
2461
2462 // The overflow limit: scale*I+offset < upper_limit
2463 // For the main-loop limit compute:
2464 // ( if (scale > 0) /* and stride > 0 */
2465 // I < (upper_limit-offset)/scale
2466 // else /* scale < 0 and stride < 0 */
2467 // I > (upper_limit-offset)/scale
2468 // )
2469 *main_limit = adjust_limit(is_positive_stride, scale, offset, upper_limit, *main_limit, pre_ctrl, false);
2470
2471 // The underflow limit: low_limit <= scale*I+offset
2472 // For the pre-loop limit compute:
2473 // NOT(scale*I+offset >= low_limit)
2474 // scale*I+offset < low_limit
2475 // ( if (scale > 0) /* and stride > 0 */
2476 // I < (low_limit-offset)/scale
2477 // else /* scale < 0 and stride < 0 */
2478 // I > (low_limit-offset)/scale
2479 // )
2480 *pre_limit = adjust_limit(!is_positive_stride, scale, offset, low_limit, *pre_limit, pre_ctrl, round);
2481 } else {
2482 // Negative stride*scale: the affine function is decreasing,
2483 // the pre-loop checks for overflow and the post-loop for underflow.
2484
2485 // The overflow limit: scale*I+offset < upper_limit
2486 // For the pre-loop limit compute:
2487 // NOT(scale*I+offset < upper_limit)
2488 // scale*I+offset >= upper_limit
2489 // scale*I+offset+1 > upper_limit
2490 // ( if (scale < 0) /* and stride > 0 */
2491 // I < (upper_limit-(offset+1))/scale
2492 // else /* scale > 0 and stride < 0 */
2493 // I > (upper_limit-(offset+1))/scale
2494 // )
2495 Node* one = _igvn.longcon(1);
2496 set_ctrl(one, C->root());
2497 Node* plus_one = new AddLNode(offset, one);
2498 register_new_node(plus_one, pre_ctrl);
2499 *pre_limit = adjust_limit(!is_positive_stride, scale, plus_one, upper_limit, *pre_limit, pre_ctrl, round);
2500
2501 // The underflow limit: low_limit <= scale*I+offset
2502 // For the main-loop limit compute:
2503 // scale*I+offset+1 > low_limit
2504 // ( if (scale < 0) /* and stride > 0 */
2505 // I < (low_limit-(offset+1))/scale
2506 // else /* scale > 0 and stride < 0 */
2507 // I > (low_limit-(offset+1))/scale
2508 // )
2509 *main_limit = adjust_limit(is_positive_stride, scale, plus_one, low_limit, *main_limit, pre_ctrl, false);
2510 }
2511}
2512
2513bool PhaseIdealLoop::is_iv(Node* exp, Node* iv, BasicType bt) {
2514 if (exp == iv) {
2515 return true;
2516 }
2517
2518 if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L && exp->in(1) == iv) {
2519 return true;
2520 }
2521 return false;
2522}
2523
2524//------------------------------is_scaled_iv---------------------------------
2525// Return true if exp is a constant times an induction var
2526bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt, bool* converted) {
2527 exp = exp->uncast();
2528 assert(bt == T_INT || bt == T_LONG, "unexpected int type")do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2528, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "unexpected int type"
); ::breakpoint(); } } while (0)
;
2529 if (is_iv(exp, iv, bt)) {
2530 if (p_scale != NULL__null) {
2531 *p_scale = 1;
2532 }
2533 return true;
2534 }
2535 if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L) {
2536 exp = exp->in(1);
2537 bt = T_INT;
2538 if (converted != NULL__null) {
2539 *converted = true;
2540 }
2541 }
2542 int opc = exp->Opcode();
2543 // Can't use is_Mul() here as it's true for AndI and AndL
2544 if (opc == Op_Mul(bt)) {
2545 if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
2546 if (p_scale != NULL__null) {
2547 *p_scale = exp->in(2)->get_integer_as_long(bt);
2548 }
2549 return true;
2550 }
2551 if (is_iv(exp->in(2)->uncast(), iv, bt) && exp->in(1)->is_Con()) {
2552 if (p_scale != NULL__null) {
2553 *p_scale = exp->in(1)->get_integer_as_long(bt);
2554 }
2555 return true;
2556 }
2557 } else if (opc == Op_LShift(bt)) {
2558 if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
2559 if (p_scale != NULL__null) {
2560 jint shift_amount = exp->in(2)->get_int();
2561 if (bt == T_INT) {
2562 *p_scale = java_shift_left((jint)1, (juint)shift_amount);
2563 } else if (bt == T_LONG) {
2564 *p_scale = java_shift_left((jlong)1, (julong)shift_amount);
2565 }
2566 }
2567 return true;
2568 }
2569 }
2570 return false;
2571}
2572
2573//-----------------------------is_scaled_iv_plus_offset------------------------------
2574// Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
2575bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, bool* converted, int depth) {
2576 assert(bt == T_INT || bt == T_LONG, "unexpected int type")do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2576, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "unexpected int type"
); ::breakpoint(); } } while (0)
;
2577 if (is_scaled_iv(exp, iv, p_scale, bt, converted)) {
2578 if (p_offset != NULL__null) {
2579 Node *zero = _igvn.integercon(0, bt);
2580 set_ctrl(zero, C->root());
2581 *p_offset = zero;
2582 }
2583 return true;
2584 }
2585 exp = exp->uncast();
2586 int opc = exp->Opcode();
2587 if (opc == Op_Add(bt)) {
2588 if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
2589 if (p_offset != NULL__null) {
2590 *p_offset = exp->in(2);
2591 }
2592 return true;
2593 }
2594 if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
2595 if (p_offset != NULL__null) {
2596 *p_offset = exp->in(1);
2597 }
2598 return true;
2599 }
2600 if (exp->in(2)->is_Con()) {
2601 Node* offset2 = NULL__null;
2602 if (depth < 2 &&
2603 is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
2604 p_offset != NULL__null ? &offset2 : NULL__null, bt, converted, depth+1)) {
2605 if (p_offset != NULL__null) {
2606 Node *ctrl_off2 = get_ctrl(offset2);
2607 Node* offset = AddNode::make(offset2, exp->in(2), bt);
2608 register_new_node(offset, ctrl_off2);
2609 *p_offset = offset;
2610 }
2611 return true;
2612 }
2613 }
2614 } else if (opc == Op_Sub(bt)) {
2615 if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
2616 if (p_offset != NULL__null) {
2617 Node *zero = _igvn.integercon(0, bt);
2618 set_ctrl(zero, C->root());
2619 Node *ctrl_off = get_ctrl(exp->in(2));
2620 Node* offset = SubNode::make(zero, exp->in(2), bt);
2621 register_new_node(offset, ctrl_off);
2622 *p_offset = offset;
2623 }
2624 return true;
2625 }
2626 if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
2627 if (p_offset != NULL__null) {
2628 // We can't handle a scale of min_jint (or min_jlong) here as -1 * min_jint = min_jint
2629 if (*p_scale == min_signed_integer(bt)) {
2630 return false;
2631 }
2632 *p_scale *= -1;
2633 *p_offset = exp->in(1);
2634 }
2635 return true;
2636 }
2637 }
2638 return false;
2639}
2640
2641// Same as PhaseIdealLoop::duplicate_predicates() but for range checks
2642// eliminated by iteration splitting.
2643Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
2644 Node* predicate_proj, int scale_con, Node* offset,
2645 Node* limit, jint stride_con, Node* value) {
2646 bool overflow = false;
2647 BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, value, NULL__null, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow, false);
2648 Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1));
2649 register_new_node(opaque_bol, predicate_proj);
2650 IfNode* new_iff = NULL__null;
2651 if (overflow) {
2652 new_iff = new IfNode(predicate_proj, opaque_bol, PROB_MAX(1.0f-(1e-6f)), COUNT_UNKNOWN(-1.0f));
2653 } else {
2654 new_iff = new RangeCheckNode(predicate_proj, opaque_bol, PROB_MAX(1.0f-(1e-6f)), COUNT_UNKNOWN(-1.0f));
2655 }
2656 register_control(new_iff, loop->_parent, predicate_proj);
2657 Node* iffalse = new IfFalseNode(new_iff);
2658 register_control(iffalse, _ltree_root, new_iff);
2659 ProjNode* iftrue = new IfTrueNode(new_iff);
2660 register_control(iftrue, loop->_parent, new_iff);
2661 Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
2662 register_new_node(frame, C->start());
2663 Node* halt = new HaltNode(iffalse, frame, "range check predicate failed which is impossible");
2664 register_control(halt, _ltree_root, iffalse);
2665 C->root()->add_req(halt);
2666 return iftrue;
2667}
2668
2669//------------------------------do_range_check---------------------------------
2670// Eliminate range-checks and other trip-counter vs loop-invariant tests.
2671int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
2672#ifndef PRODUCT
2673 if (PrintOpto && VerifyLoopOptimizations) {
2674 tty->print("Range Check Elimination ");
2675 loop->dump_head();
2676 } else if (TraceLoopOpts) {
2677 tty->print("RangeCheck ");
2678 loop->dump_head();
2679 }
2680#endif
2681
2682 assert(RangeCheckElimination, "")do { if (!(RangeCheckElimination)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2682, "assert(" "RangeCheckElimination" ") failed", ""); ::
breakpoint(); } } while (0)
;
2683 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2684 // If we fail before trying to eliminate range checks, set multiversion state
2685 int closed_range_checks = 1;
2686
2687 // protect against stride not being a constant
2688 if (!cl->stride_is_con()) {
2689 return closed_range_checks;
2690 }
2691 // Find the trip counter; we are iteration splitting based on it
2692 Node *trip_counter = cl->phi();
2693 // Find the main loop limit; we will trim it's iterations
2694 // to not ever trip end tests
2695 Node *main_limit = cl->limit();
2696
2697 // Check graph shape. Cannot optimize a loop if zero-trip
2698 // Opaque1 node is optimized away and then another round
2699 // of loop opts attempted.
2700 if (cl->is_canonical_loop_entry() == NULL__null) {
2701 return closed_range_checks;
2702 }
2703
2704 // Need to find the main-loop zero-trip guard
2705 Node *ctrl = cl->skip_predicates();
2706 Node *iffm = ctrl->in(0);
2707 Node *opqzm = iffm->in(1)->in(1)->in(2);
2708 assert(opqzm->in(1) == main_limit, "do not understand situation")do { if (!(opqzm->in(1) == main_limit)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2708, "assert(" "opqzm->in(1) == main_limit" ") failed",
"do not understand situation"); ::breakpoint(); } } while (0
)
;
2709
2710 // Find the pre-loop limit; we will expand its iterations to
2711 // not ever trip low tests.
2712 Node *p_f = iffm->in(0);
2713 // pre loop may have been optimized out
2714 if (p_f->Opcode() != Op_IfFalse) {
2715 return closed_range_checks;
2716 }
2717 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2718 assert(pre_end->loopnode()->is_pre_loop(), "")do { if (!(pre_end->loopnode()->is_pre_loop())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2718, "assert(" "pre_end->loopnode()->is_pre_loop()" ") failed"
, ""); ::breakpoint(); } } while (0)
;
2719 Node *pre_opaq1 = pre_end->limit();
2720 // Occasionally it's possible for a pre-loop Opaque1 node to be
2721 // optimized away and then another round of loop opts attempted.
2722 // We can not optimize this particular loop in that case.
2723 if (pre_opaq1->Opcode() != Op_Opaque1) {
2724 return closed_range_checks;
2725 }
2726 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2727 Node *pre_limit = pre_opaq->in(1);
2728
2729 // Where do we put new limit calculations
2730 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2731
2732 // Ensure the original loop limit is available from the
2733 // pre-loop Opaque1 node.
2734 Node *orig_limit = pre_opaq->original_loop_limit();
2735 if (orig_limit == NULL__null || _igvn.type(orig_limit) == Type::TOP) {
2736 return closed_range_checks;
2737 }
2738 // Must know if its a count-up or count-down loop
2739
2740 int stride_con = cl->stride_con();
2741 Node* zero = _igvn.longcon(0);
2742 Node* one = _igvn.longcon(1);
2743 // Use symmetrical int range [-max_jint,max_jint]
2744 Node* mini = _igvn.longcon(-max_jint);
2745 set_ctrl(zero, C->root());
2746 set_ctrl(one, C->root());
2747 set_ctrl(mini, C->root());
2748
2749 // Count number of range checks and reduce by load range limits, if zero,
2750 // the loop is in canonical form to multiversion.
2751 closed_range_checks = 0;
2752
2753 Node* predicate_proj = cl->skip_strip_mined()->in(LoopNode::EntryControl);
2754 assert(predicate_proj->is_Proj() && predicate_proj->in(0)->is_If(), "if projection only")do { if (!(predicate_proj->is_Proj() && predicate_proj
->in(0)->is_If())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2754, "assert(" "predicate_proj->is_Proj() && predicate_proj->in(0)->is_If()"
") failed", "if projection only"); ::breakpoint(); } } while
(0)
;
2755
2756 // Check loop body for tests of trip-counter plus loop-invariant vs loop-variant.
2757 for (uint i = 0; i < loop->_body.size(); i++) {
2758 Node *iff = loop->_body[i];
2759 if (iff->Opcode() == Op_If ||
2760 iff->Opcode() == Op_RangeCheck) { // Test?
2761 // Test is an IfNode, has 2 projections. If BOTH are in the loop
2762 // we need loop unswitching instead of iteration splitting.
2763 closed_range_checks++;
2764 Node *exit = loop->is_loop_exit(iff);
2765 if (!exit) continue;
2766 int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
2767
2768 // Get boolean condition to test
2769 Node *i1 = iff->in(1);
2770 if (!i1->is_Bool()) continue;
2771 BoolNode *bol = i1->as_Bool();
2772 BoolTest b_test = bol->_test;
2773 // Flip sense of test if exit condition is flipped
2774 if (flip) {
2775 b_test = b_test.negate();
2776 }
2777 // Get compare
2778 Node *cmp = bol->in(1);
2779
2780 // Look for trip_counter + offset vs limit
2781 Node *rc_exp = cmp->in(1);
2782 Node *limit = cmp->in(2);
2783 int scale_con= 1; // Assume trip counter not scaled
2784
2785 Node *limit_c = get_ctrl(limit);
2786 if (loop->is_member(get_loop(limit_c))) {
2787 // Compare might have operands swapped; commute them
2788 b_test = b_test.commute();
2789 rc_exp = cmp->in(2);
2790 limit = cmp->in(1);
2791 limit_c = get_ctrl(limit);
2792 if (loop->is_member(get_loop(limit_c))) {
2793 continue; // Both inputs are loop varying; cannot RCE
2794 }
2795 }
2796 // Here we know 'limit' is loop invariant
2797
2798 // 'limit' maybe pinned below the zero trip test (probably from a
2799 // previous round of rce), in which case, it can't be used in the
2800 // zero trip test expression which must occur before the zero test's if.
2801 if (is_dominator(ctrl, limit_c)) {
2802 continue; // Don't rce this check but continue looking for other candidates.
2803 }
2804
2805 // Check for scaled induction variable plus an offset
2806 Node *offset = NULL__null;
2807
2808 if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) {
2809 continue;
2810 }
2811
2812 Node *offset_c = get_ctrl(offset);
2813 if (loop->is_member(get_loop(offset_c))) {
2814 continue; // Offset is not really loop invariant
2815 }
2816 // Here we know 'offset' is loop invariant.
2817
2818 // As above for the 'limit', the 'offset' maybe pinned below the
2819 // zero trip test.
2820 if (is_dominator(ctrl, offset_c)) {
2821 continue; // Don't rce this check but continue looking for other candidates.
2822 }
2823#ifdef ASSERT1
2824 if (TraceRangeLimitCheck) {
2825 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2826 bol->dump(2);
2827 }
2828#endif
2829 // At this point we have the expression as:
2830 // scale_con * trip_counter + offset :: limit
2831 // where scale_con, offset and limit are loop invariant. Trip_counter
2832 // monotonically increases by stride_con, a constant. Both (or either)
2833 // stride_con and scale_con can be negative which will flip about the
2834 // sense of the test.
2835
2836 // Perform the limit computations in jlong to avoid overflow
2837 jlong lscale_con = scale_con;
2838 Node* int_offset = offset;
2839 offset = new ConvI2LNode(offset);
2840 register_new_node(offset, pre_ctrl);
2841 Node* int_limit = limit;
2842 limit = new ConvI2LNode(limit);
2843 register_new_node(limit, pre_ctrl);
2844
2845 // Adjust pre and main loop limits to guard the correct iteration set
2846 if (cmp->Opcode() == Op_CmpU) { // Unsigned compare is really 2 tests
2847 if (b_test._test == BoolTest::lt) { // Range checks always use lt
2848 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2849 add_constraint(stride_con, lscale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
2850 Node* init = cl->init_trip();
2851 Node* opaque_init = new OpaqueLoopInitNode(C, init);
2852 register_new_node(opaque_init, predicate_proj);
2853
2854 // predicate on first value of first iteration
2855 predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, int_offset, int_limit, stride_con, init);
2856 assert(!skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected")do { if (!(!skeleton_predicate_has_opaque(predicate_proj->
in(0)->as_If()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2856, "assert(" "!skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If())"
") failed", "unexpected"); ::breakpoint(); } } while (0)
;
2857
2858 // template predicate so it can be updated on next unrolling
2859 predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, int_offset, int_limit, stride_con, opaque_init);
2860 assert(skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected")do { if (!(skeleton_predicate_has_opaque(predicate_proj->in
(0)->as_If()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2860, "assert(" "skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If())"
") failed", "unexpected"); ::breakpoint(); } } while (0)
;
2861
2862 Node* opaque_stride = new OpaqueLoopStrideNode(C, cl->stride());
2863 register_new_node(opaque_stride, predicate_proj);
2864 Node* max_value = new SubINode(opaque_stride, cl->stride());
2865 register_new_node(max_value, predicate_proj);
2866 max_value = new AddINode(opaque_init, max_value);
2867 register_new_node(max_value, predicate_proj);
2868 predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, int_offset, int_limit, stride_con, max_value);
2869 assert(skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected")do { if (!(skeleton_predicate_has_opaque(predicate_proj->in
(0)->as_If()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2869, "assert(" "skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If())"
") failed", "unexpected"); ::breakpoint(); } } while (0)
;
2870
2871 } else {
2872 if (PrintOpto) {
2873 tty->print_cr("missed RCE opportunity");
2874 }
2875 continue; // In release mode, ignore it
2876 }
2877 } else { // Otherwise work on normal compares
2878 switch(b_test._test) {
2879 case BoolTest::gt:
2880 // Fall into GE case
2881 case BoolTest::ge:
2882 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2883 lscale_con = -lscale_con;
2884 offset = new SubLNode(zero, offset);
2885 register_new_node(offset, pre_ctrl);
2886 limit = new SubLNode(zero, limit);
2887 register_new_node(limit, pre_ctrl);
2888 // Fall into LE case
2889 case BoolTest::le:
2890 if (b_test._test != BoolTest::gt) {
2891 // Convert X <= Y to X < Y+1
2892 limit = new AddLNode(limit, one);
2893 register_new_node(limit, pre_ctrl);
2894 }
2895 // Fall into LT case
2896 case BoolTest::lt:
2897 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2898 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2899 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2900 add_constraint(stride_con, lscale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
2901 break;
2902 default:
2903 if (PrintOpto) {
2904 tty->print_cr("missed RCE opportunity");
2905 }
2906 continue; // Unhandled case
2907 }
2908 }
2909
2910 // Kill the eliminated test
2911 C->set_major_progress();
2912 Node *kill_con = _igvn.intcon(1-flip);
2913 set_ctrl(kill_con, C->root());
2914 _igvn.replace_input_of(iff, 1, kill_con);
2915 // Find surviving projection
2916 assert(iff->is_If(), "")do { if (!(iff->is_If())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2916, "assert(" "iff->is_If()" ") failed", ""); ::breakpoint
(); } } while (0)
;
2917 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2918 // Find loads off the surviving projection; remove their control edge
2919 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2920 Node* cd = dp->fast_out(i); // Control-dependent node
2921 if (cd->is_Load() && cd->depends_only_on_test()) { // Loads can now float around in the loop
2922 // Allow the load to float around in the loop, or before it
2923 // but NOT before the pre-loop.
2924 _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2925 --i;
2926 --imax;
2927 }
2928 }
2929 if (int_limit->Opcode() == Op_LoadRange) {
2930 closed_range_checks--;
2931 }
2932 } // End of is IF
2933 }
2934 if (predicate_proj != cl->skip_strip_mined()->in(LoopNode::EntryControl)) {
2935 _igvn.replace_input_of(cl->skip_strip_mined(), LoopNode::EntryControl, predicate_proj);
2936 set_idom(cl->skip_strip_mined(), predicate_proj, dom_depth(cl->skip_strip_mined()));
2937 }
2938
2939 // Update loop limits
2940 if (pre_limit != orig_limit) {
2941 // Computed pre-loop limit can be outside of loop iterations range.
2942 pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2943 : (Node*)new MaxINode(pre_limit, orig_limit);
2944 register_new_node(pre_limit, pre_ctrl);
2945 }
2946 _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2947
2948 // Note:: we are making the main loop limit no longer precise;
2949 // need to round up based on stride.
2950 cl->set_nonexact_trip_count();
2951 Node *main_cle = cl->loopexit();
2952 Node *main_bol = main_cle->in(1);
2953 // Hacking loop bounds; need private copies of exit test
2954 if (main_bol->outcnt() > 1) { // BoolNode shared?
2955 main_bol = main_bol->clone(); // Clone a private BoolNode
2956 register_new_node(main_bol, main_cle->in(0));
2957 _igvn.replace_input_of(main_cle, 1, main_bol);
2958 }
2959 Node *main_cmp = main_bol->in(1);
2960 if (main_cmp->outcnt() > 1) { // CmpNode shared?
2961 main_cmp = main_cmp->clone(); // Clone a private CmpNode
2962 register_new_node(main_cmp, main_cle->in(0));
2963 _igvn.replace_input_of(main_bol, 1, main_cmp);
2964 }
2965 assert(main_limit == cl->limit() || get_ctrl(main_limit) == pre_ctrl, "wrong control for added limit")do { if (!(main_limit == cl->limit() || get_ctrl(main_limit
) == pre_ctrl)) { (*g_assert_poison) = 'X';; report_vm_error(
"/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2965, "assert(" "main_limit == cl->limit() || get_ctrl(main_limit) == pre_ctrl"
") failed", "wrong control for added limit"); ::breakpoint()
; } } while (0)
;
2966 const TypeInt* orig_limit_t = _igvn.type(orig_limit)->is_int();
2967 bool upward = cl->stride_con() > 0;
2968 // The new loop limit is <= (for an upward loop) >= (for a downward loop) than the orig limit.
2969 // The expression that computes the new limit may be too complicated and the computed type of the new limit
2970 // may be too pessimistic. A CastII here guarantees it's not lost.
2971 main_limit = new CastIINode(main_limit, TypeInt::make(upward ? min_jint : orig_limit_t->_lo,
2972 upward ? orig_limit_t->_hi : max_jint, Type::WidenMax));
2973 main_limit->init_req(0, pre_ctrl);
2974 register_new_node(main_limit, pre_ctrl);
2975 // Hack the now-private loop bounds
2976 _igvn.replace_input_of(main_cmp, 2, main_limit);
2977 // The OpaqueNode is unshared by design
2978 assert(opqzm->outcnt() == 1, "cannot hack shared node")do { if (!(opqzm->outcnt() == 1)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2978, "assert(" "opqzm->outcnt() == 1" ") failed", "cannot hack shared node"
); ::breakpoint(); } } while (0)
;
2979 _igvn.replace_input_of(opqzm, 1, main_limit);
2980
2981 return closed_range_checks;
2982}
2983
2984//------------------------------has_range_checks-------------------------------
2985// Check to see if RCE cleaned the current loop of range-checks.
2986void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
2987 assert(RangeCheckElimination, "")do { if (!(RangeCheckElimination)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 2987, "assert(" "RangeCheckElimination" ") failed", ""); ::
breakpoint(); } } while (0)
;
2988
2989 // skip if not a counted loop
2990 if (!loop->is_counted()) return;
2991
2992 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2993
2994 // skip this loop if it is already checked
2995 if (cl->has_been_range_checked()) return;
2996
2997 // Now check for existence of range checks
2998 for (uint i = 0; i < loop->_body.size(); i++) {
2999 Node *iff = loop->_body[i];
3000 int iff_opc = iff->Opcode();
3001 if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
3002 cl->mark_has_range_checks();
3003 break;
3004 }
3005 }
3006 cl->set_has_been_range_checked();
3007}
3008
3009//-------------------------multi_version_post_loops----------------------------
3010// Check the range checks that remain, if simple, use the bounds to guard
3011// which version to a post loop we execute, one with range checks or one without
3012bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
3013 bool multi_version_succeeded = false;
3014 assert(RangeCheckElimination, "")do { if (!(RangeCheckElimination)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3014, "assert(" "RangeCheckElimination" ") failed", ""); ::
breakpoint(); } } while (0)
;
3015 CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
3016 assert(legacy_cl->is_post_loop(), "")do { if (!(legacy_cl->is_post_loop())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3016, "assert(" "legacy_cl->is_post_loop()" ") failed", ""
); ::breakpoint(); } } while (0)
;
3017
3018 // Check for existence of range checks using the unique instance to make a guard with
3019 Unique_Node_List worklist;
3020 for (uint i = 0; i < legacy_loop->_body.size(); i++) {
3021 Node *iff = legacy_loop->_body[i];
3022 int iff_opc = iff->Opcode();
3023 if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
3024 worklist.push(iff);
3025 }
3026 }
3027
3028 // Find RCE'd post loop so that we can stage its guard.
3029 if (legacy_cl->is_canonical_loop_entry() == NULL__null) {
3030 return multi_version_succeeded;
3031 }
3032 Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
3033 Node* iffm = ctrl->in(0);
3034
3035 // Now we test that both the post loops are connected
3036 Node* post_loop_region = iffm->in(0);
3037 if (post_loop_region == NULL__null) return multi_version_succeeded;
3038 if (!post_loop_region->is_Region()) return multi_version_succeeded;
3039 Node* covering_region = post_loop_region->in(RegionNode::Control+1);
3040 if (covering_region == NULL__null) return multi_version_succeeded;
3041 if (!covering_region->is_Region()) return multi_version_succeeded;
3042 Node* p_f = covering_region->in(RegionNode::Control);
3043 if (p_f == NULL__null) return multi_version_succeeded;
3044 if (!p_f->is_IfFalse()) return multi_version_succeeded;
3045 if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
3046 CountedLoopEndNode* rce_loop_end = p_f->in(0)->as_CountedLoopEnd();
3047 if (rce_loop_end == NULL__null) return multi_version_succeeded;
3048 CountedLoopNode* rce_cl = rce_loop_end->loopnode();
3049 if (rce_cl == NULL__null || !rce_cl->is_post_loop()) return multi_version_succeeded;
3050 CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop();
3051 if (rce_cl != known_rce_cl) return multi_version_succeeded;
3052
3053 // Then we fetch the cover entry test
3054 ctrl = rce_cl->in(LoopNode::EntryControl);
3055 if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
3056
3057#ifndef PRODUCT
3058 if (TraceLoopOpts) {
3059 tty->print("PostMultiVersion\n");
3060 rce_loop->dump_head();
3061 legacy_loop->dump_head();
3062 }
3063#endif
3064
3065 // Now fetch the limit we want to compare against
3066 Node *limit = rce_cl->limit();
3067 bool first_time = true;
3068
3069 // If we got this far, we identified the post loop which has been RCE'd and
3070 // we have a work list. Now we will try to transform the if guard to cause
3071 // the loop pair to be multi version executed with the determination left to runtime
3072 // or the optimizer if full information is known about the given arrays at compile time.
3073 Node *last_min = NULL__null;
3074 multi_version_succeeded = true;
3075 while (worklist.size()) {
3076 Node* rc_iffm = worklist.pop();
3077 if (rc_iffm->is_If()) {
3078 Node *rc_bolzm = rc_iffm->in(1);
3079 if (rc_bolzm->is_Bool()) {
3080 Node *rc_cmpzm = rc_bolzm->in(1);
3081 if (rc_cmpzm->is_Cmp()) {
3082 Node *rc_left = rc_cmpzm->in(2);
3083 if (rc_left->Opcode() != Op_LoadRange) {
3084 multi_version_succeeded = false;
3085 break;
3086 }
3087 if (first_time) {
3088 last_min = rc_left;
3089 first_time = false;
3090 } else {
3091 Node *cur_min = new MinINode(last_min, rc_left);
3092 last_min = cur_min;
3093 _igvn.register_new_node_with_optimizer(last_min);
3094 }
3095 }
3096 }
3097 }
3098 }
3099
3100 // All we have to do is update the limit of the rce loop
3101 // with the min of our expression and the current limit.
3102 // We will use this expression to replace the current limit.
3103 if (last_min && multi_version_succeeded) {
3104 Node *cur_min = new MinINode(last_min, limit);
3105 _igvn.register_new_node_with_optimizer(cur_min);
3106 Node *cmp_node = rce_loop_end->cmp_node();
3107 _igvn.replace_input_of(cmp_node, 2, cur_min);
3108 set_ctrl(cur_min, ctrl);
3109 set_loop(cur_min, rce_loop->_parent);
3110
3111 legacy_cl->mark_is_multiversioned();
3112 rce_cl->mark_is_multiversioned();
3113 multi_version_succeeded = true;
3114
3115 C->set_major_progress();
3116 }
3117
3118 return multi_version_succeeded;
3119}
3120
3121//-------------------------poison_rce_post_loop--------------------------------
3122// Causes the rce'd post loop to be optimized away if multiversioning fails
3123void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
3124 CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
3125 Node* ctrl = rce_cl->in(LoopNode::EntryControl);
3126 if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
3127 Node* iffm = ctrl->in(0);
3128 if (iffm->is_If()) {
3129 Node* cur_bool = iffm->in(1);
3130 if (cur_bool->is_Bool()) {
3131 Node* cur_cmp = cur_bool->in(1);
3132 if (cur_cmp->is_Cmp()) {
3133 BoolTest::mask new_test = BoolTest::gt;
3134 BoolNode *new_bool = new BoolNode(cur_cmp, new_test);
3135 _igvn.replace_node(cur_bool, new_bool);
3136 _igvn._worklist.push(new_bool);
3137 Node* left_op = cur_cmp->in(1);
3138 _igvn.replace_input_of(cur_cmp, 2, left_op);
3139 C->set_major_progress();
3140 }
3141 }
3142 }
3143 }
3144}
3145
3146//------------------------------DCE_loop_body----------------------------------
3147// Remove simplistic dead code from loop body
3148void IdealLoopTree::DCE_loop_body() {
3149 for (uint i = 0; i < _body.size(); i++) {
3150 if (_body.at(i)->outcnt() == 0) {
3151 _body.map(i, _body.pop());
3152 i--; // Ensure we revisit the updated index.
3153 }
3154 }
3155}
3156
3157
3158//------------------------------adjust_loop_exit_prob--------------------------
3159// Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
3160// Replace with a 1-in-10 exit guess.
3161void IdealLoopTree::adjust_loop_exit_prob(PhaseIdealLoop *phase) {
3162 Node *test = tail();
3163 while (test != _head) {
3164 uint top = test->Opcode();
3165 if (top == Op_IfTrue || top == Op_IfFalse) {
3166 int test_con = ((ProjNode*)test)->_con;
3167 assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity")do { if (!(top == (uint)(test_con? Op_IfTrue: Op_IfFalse))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3167, "assert(" "top == (uint)(test_con? Op_IfTrue: Op_IfFalse)"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
3168 IfNode *iff = test->in(0)->as_If();
3169 if (iff->outcnt() == 2) { // Ignore dead tests
3170 Node *bol = iff->in(1);
3171 if (bol && bol->req() > 1 && bol->in(1) &&
3172 ((bol->in(1)->Opcode() == Op_StorePConditional) ||
3173 (bol->in(1)->Opcode() == Op_StoreIConditional) ||
3174 (bol->in(1)->Opcode() == Op_StoreLConditional) ||
3175 (bol->in(1)->Opcode() == Op_CompareAndExchangeB) ||
3176 (bol->in(1)->Opcode() == Op_CompareAndExchangeS) ||
3177 (bol->in(1)->Opcode() == Op_CompareAndExchangeI) ||
3178 (bol->in(1)->Opcode() == Op_CompareAndExchangeL) ||
3179 (bol->in(1)->Opcode() == Op_CompareAndExchangeP) ||
3180 (bol->in(1)->Opcode() == Op_CompareAndExchangeN) ||
3181 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapB) ||
3182 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapS) ||
3183 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapI) ||
3184 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapL) ||
3185 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapP) ||
3186 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapN) ||
3187 (bol->in(1)->Opcode() == Op_CompareAndSwapB) ||
3188 (bol->in(1)->Opcode() == Op_CompareAndSwapS) ||
3189 (bol->in(1)->Opcode() == Op_CompareAndSwapI) ||
3190 (bol->in(1)->Opcode() == Op_CompareAndSwapL) ||
3191 (bol->in(1)->Opcode() == Op_CompareAndSwapP) ||
3192 (bol->in(1)->Opcode() == Op_CompareAndSwapN) ||
3193 (bol->in(1)->Opcode() == Op_ShenandoahCompareAndExchangeP) ||
3194 (bol->in(1)->Opcode() == Op_ShenandoahCompareAndExchangeN) ||
3195 (bol->in(1)->Opcode() == Op_ShenandoahWeakCompareAndSwapP) ||
3196 (bol->in(1)->Opcode() == Op_ShenandoahWeakCompareAndSwapN) ||
3197 (bol->in(1)->Opcode() == Op_ShenandoahCompareAndSwapP) ||
3198 (bol->in(1)->Opcode() == Op_ShenandoahCompareAndSwapN)))
3199 return; // Allocation loops RARELY take backedge
3200 // Find the OTHER exit path from the IF
3201 Node* ex = iff->proj_out(1-test_con);
3202 float p = iff->_prob;
3203 if (!phase->is_member(this, ex) && iff->_fcnt == COUNT_UNKNOWN(-1.0f)) {
3204 if (top == Op_IfTrue) {
3205 if (p < (PROB_FAIR(0.5f) + PROB_UNLIKELY_MAG(3)(1e-3f))) {
3206 iff->_prob = PROB_STATIC_FREQUENT(1.0f-(1e-1f));
3207 }
3208 } else {
3209 if (p > (PROB_FAIR(0.5f) - PROB_UNLIKELY_MAG(3)(1e-3f))) {
3210 iff->_prob = PROB_STATIC_INFREQUENT(1e-1f);
3211 }
3212 }
3213 }
3214 }
3215 }
3216 test = phase->idom(test);
3217 }
3218}
3219
3220#ifdef ASSERT1
3221static CountedLoopNode* locate_pre_from_main(CountedLoopNode* main_loop) {
3222 assert(!main_loop->is_main_no_pre_loop(), "Does not have a pre loop")do { if (!(!main_loop->is_main_no_pre_loop())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3222, "assert(" "!main_loop->is_main_no_pre_loop()" ") failed"
, "Does not have a pre loop"); ::breakpoint(); } } while (0)
;
3223 Node* ctrl = main_loop->skip_predicates();
3224 assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "")do { if (!(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode(
) == Op_IfFalse)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3224, "assert(" "ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse"
") failed", ""); ::breakpoint(); } } while (0)
;
3225 Node* iffm = ctrl->in(0);
3226 assert(iffm->Opcode() == Op_If, "")do { if (!(iffm->Opcode() == Op_If)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3226, "assert(" "iffm->Opcode() == Op_If" ") failed", ""
); ::breakpoint(); } } while (0)
;
3227 Node* p_f = iffm->in(0);
3228 assert(p_f->Opcode() == Op_IfFalse, "")do { if (!(p_f->Opcode() == Op_IfFalse)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3228, "assert(" "p_f->Opcode() == Op_IfFalse" ") failed"
, ""); ::breakpoint(); } } while (0)
;
3229 CountedLoopNode* pre_loop = p_f->in(0)->as_CountedLoopEnd()->loopnode();
3230 assert(pre_loop->is_pre_loop(), "No pre loop found")do { if (!(pre_loop->is_pre_loop())) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3230, "assert(" "pre_loop->is_pre_loop()" ") failed", "No pre loop found"
); ::breakpoint(); } } while (0)
;
3231 return pre_loop;
3232}
3233#endif
3234
3235// Remove the main and post loops and make the pre loop execute all
3236// iterations. Useful when the pre loop is found empty.
3237void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase) {
3238 CountedLoopEndNode* pre_end = cl->loopexit();
3239 Node* pre_cmp = pre_end->cmp_node();
3240 if (pre_cmp->in(2)->Opcode() != Op_Opaque1) {
3241 // Only safe to remove the main loop if the compiler optimized it
3242 // out based on an unknown number of iterations
3243 return;
3244 }
3245
3246 // Can we find the main loop?
3247 if (_next == NULL__null) {
3248 return;
3249 }
3250
3251 Node* next_head = _next->_head;
3252 if (!next_head->is_CountedLoop()) {
3253 return;
3254 }
3255
3256 CountedLoopNode* main_head = next_head->as_CountedLoop();
3257 if (!main_head->is_main_loop() || main_head->is_main_no_pre_loop()) {
3258 return;
3259 }
3260
3261 assert(locate_pre_from_main(main_head) == cl, "bad main loop")do { if (!(locate_pre_from_main(main_head) == cl)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3261, "assert(" "locate_pre_from_main(main_head) == cl" ") failed"
, "bad main loop"); ::breakpoint(); } } while (0)
;
3262 Node* main_iff = main_head->skip_predicates()->in(0);
3263
3264 // Remove the Opaque1Node of the pre loop and make it execute all iterations
3265 phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
3266 // Remove the Opaque1Node of the main loop so it can be optimized out
3267 Node* main_cmp = main_iff->in(1)->in(1);
3268 assert(main_cmp->in(2)->Opcode() == Op_Opaque1, "main loop has no opaque node?")do { if (!(main_cmp->in(2)->Opcode() == Op_Opaque1)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3268, "assert(" "main_cmp->in(2)->Opcode() == Op_Opaque1"
") failed", "main loop has no opaque node?"); ::breakpoint()
; } } while (0)
;
3269 phase->_igvn.replace_input_of(main_cmp, 2, main_cmp->in(2)->in(1));
3270}
3271
3272//------------------------------do_remove_empty_loop---------------------------
3273// We always attempt remove empty loops. The approach is to replace the trip
3274// counter with the value it will have on the last iteration. This will break
3275// the loop.
3276bool IdealLoopTree::do_remove_empty_loop(PhaseIdealLoop *phase) {
3277 // Minimum size must be empty loop
3278 if (_body.size() > EMPTY_LOOP_SIZE) {
3279 return false;
3280 }
3281 if (!_head->is_CountedLoop()) {
3282 return false; // Dead loop
3283 }
3284 CountedLoopNode *cl = _head->as_CountedLoop();
3285 if (!cl->is_valid_counted_loop(T_INT)) {
3286 return false; // Malformed loop
3287 }
3288 if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)))) {
3289 return false; // Infinite loop
3290 }
3291 if (cl->is_pre_loop()) {
3292 // If the loop we are removing is a pre-loop then the main and post loop
3293 // can be removed as well.
3294 remove_main_post_loops(cl, phase);
3295 }
3296
3297#ifdef ASSERT1
3298 // Ensure only one phi which is the iv.
3299 Node* iv = NULL__null;
3300 for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) {
3301 Node* n = cl->fast_out(i);
3302 if (n->Opcode() == Op_Phi) {
3303 assert(iv == NULL, "Too many phis")do { if (!(iv == __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3303, "assert(" "iv == __null" ") failed", "Too many phis")
; ::breakpoint(); } } while (0)
;
3304 iv = n;
3305 }
3306 }
3307 assert(iv == cl->phi(), "Wrong phi")do { if (!(iv == cl->phi())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3307, "assert(" "iv == cl->phi()" ") failed", "Wrong phi"
); ::breakpoint(); } } while (0)
;
3308#endif
3309
3310 // main and post loops have explicitly created zero trip guard
3311 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
3312 if (needs_guard) {
3313 // Skip guard if values not overlap.
3314 const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
3315 const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
3316 int stride_con = cl->stride_con();
3317 if (stride_con > 0) {
3318 needs_guard = (init_t->_hi >= limit_t->_lo);
3319 } else {
3320 needs_guard = (init_t->_lo <= limit_t->_hi);
3321 }
3322 }
3323 if (needs_guard) {
3324 // Check for an obvious zero trip guard.
3325 Node* inctrl = PhaseIdealLoop::skip_all_loop_predicates(cl->skip_predicates());
3326 if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
3327 bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
3328 // The test should look like just the backedge of a CountedLoop
3329 Node* iff = inctrl->in(0);
3330 if (iff->is_If()) {
3331 Node* bol = iff->in(1);
3332 if (bol->is_Bool()) {
3333 BoolTest test = bol->as_Bool()->_test;
3334 if (maybe_swapped) {
3335 test._test = test.commute();
3336 test._test = test.negate();
3337 }
3338 if (test._test == cl->loopexit()->test_trip()) {
3339 Node* cmp = bol->in(1);
3340 int init_idx = maybe_swapped ? 2 : 1;
3341 int limit_idx = maybe_swapped ? 1 : 2;
3342 if (cmp->is_Cmp() && cmp->in(init_idx) == cl->init_trip() && cmp->in(limit_idx) == cl->limit()) {
3343 needs_guard = false;
3344 }
3345 }
3346 }
3347 }
3348 }
3349 }
3350
3351#ifndef PRODUCT
3352 if (PrintOpto) {
3353 tty->print("Removing empty loop with%s zero trip guard", needs_guard ? "out" : "");
3354 this->dump_head();
3355 } else if (TraceLoopOpts) {
3356 tty->print("Empty with%s zero trip guard ", needs_guard ? "out" : "");
3357 this->dump_head();
3358 }
3359#endif
3360
3361 if (needs_guard) {
3362 // Peel the loop to ensure there's a zero trip guard
3363 Node_List old_new;
3364 phase->do_peeling(this, old_new);
3365 }
3366
3367 // Replace the phi at loop head with the final value of the last
3368 // iteration. Then the CountedLoopEnd will collapse (backedge never
3369 // taken) and all loop-invariant uses of the exit values will be correct.
3370 Node *phi = cl->phi();
3371 Node *exact_limit = phase->exact_limit(this);
3372 if (exact_limit != cl->limit()) {
3373 // We also need to replace the original limit to collapse loop exit.
3374 Node* cmp = cl->loopexit()->cmp_node();
3375 assert(cl->limit() == cmp->in(2), "sanity")do { if (!(cl->limit() == cmp->in(2))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3375, "assert(" "cl->limit() == cmp->in(2)" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
3376 // Duplicate cmp node if it has other users
3377 if (cmp->outcnt() > 1) {
3378 cmp = cmp->clone();
3379 cmp = phase->_igvn.register_new_node_with_optimizer(cmp);
3380 BoolNode *bol = cl->loopexit()->in(CountedLoopEndNode::TestValue)->as_Bool();
3381 phase->_igvn.replace_input_of(bol, 1, cmp); // put bol on worklist
3382 }
3383 phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist
3384 phase->_igvn.replace_input_of(cmp, 2, exact_limit); // put cmp on worklist
3385 }
3386 // Note: the final value after increment should not overflow since
3387 // counted loop has limit check predicate.
3388 Node *final = new SubINode(exact_limit, cl->stride());
3389 phase->register_new_node(final,cl->in(LoopNode::EntryControl));
3390 phase->_igvn.replace_node(phi,final);
3391 phase->C->set_major_progress();
3392 return true;
3393}
3394
3395//------------------------------do_one_iteration_loop--------------------------
3396// Convert one iteration loop into normal code.
3397bool IdealLoopTree::do_one_iteration_loop(PhaseIdealLoop *phase) {
3398 if (!_head->as_Loop()->is_valid_counted_loop(T_INT)) {
3399 return false; // Only for counted loop
3400 }
3401 CountedLoopNode *cl = _head->as_CountedLoop();
3402 if (!cl->has_exact_trip_count() || cl->trip_count() != 1) {
3403 return false;
3404 }
3405
3406#ifndef PRODUCT
3407 if (TraceLoopOpts) {
3408 tty->print("OneIteration ");
3409 this->dump_head();
3410 }
3411#endif
3412
3413 Node *init_n = cl->init_trip();
3414 // Loop boundaries should be constant since trip count is exact.
3415 assert((cl->stride_con() > 0 && init_n->get_int() + cl->stride_con() >= cl->limit()->get_int()) ||do { if (!((cl->stride_con() > 0 && init_n->
get_int() + cl->stride_con() >= cl->limit()->get_int
()) || (cl->stride_con() < 0 && init_n->get_int
() + cl->stride_con() <= cl->limit()->get_int()))
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3416, "assert(" "(cl->stride_con() > 0 && init_n->get_int() + cl->stride_con() >= cl->limit()->get_int()) || (cl->stride_con() < 0 && init_n->get_int() + cl->stride_con() <= cl->limit()->get_int())"
") failed", "should be one iteration"); ::breakpoint(); } } while
(0)
3416 (cl->stride_con() < 0 && init_n->get_int() + cl->stride_con() <= cl->limit()->get_int()), "should be one iteration")do { if (!((cl->stride_con() > 0 && init_n->
get_int() + cl->stride_con() >= cl->limit()->get_int
()) || (cl->stride_con() < 0 && init_n->get_int
() + cl->stride_con() <= cl->limit()->get_int()))
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3416, "assert(" "(cl->stride_con() > 0 && init_n->get_int() + cl->stride_con() >= cl->limit()->get_int()) || (cl->stride_con() < 0 && init_n->get_int() + cl->stride_con() <= cl->limit()->get_int())"
") failed", "should be one iteration"); ::breakpoint(); } } while
(0)
;
3417 // Replace the phi at loop head with the value of the init_trip.
3418 // Then the CountedLoopEnd will collapse (backedge will not be taken)
3419 // and all loop-invariant uses of the exit values will be correct.
3420 phase->_igvn.replace_node(cl->phi(), cl->init_trip());
3421 phase->C->set_major_progress();
3422 return true;
3423}
3424
3425//=============================================================================
3426//------------------------------iteration_split_impl---------------------------
3427bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_new) {
3428 // Compute loop trip count if possible.
3429 compute_trip_count(phase);
3430
3431 // Convert one iteration loop into normal code.
3432 if (do_one_iteration_loop(phase)) {
3433 return true;
3434 }
3435 // Check and remove empty loops (spam micro-benchmarks)
3436 if (do_remove_empty_loop(phase)) {
3437 return true; // Here we removed an empty loop
3438 }
3439
3440 AutoNodeBudget node_budget(phase);
3441
3442 // Non-counted loops may be peeled; exactly 1 iteration is peeled.
3443 // This removes loop-invariant tests (usually null checks).
3444 if (!_head->is_CountedLoop()) { // Non-counted loop
3445 if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
3446 // Partial peel succeeded so terminate this round of loop opts
3447 return false;
3448 }
3449 if (policy_peeling(phase)) { // Should we peel?
3450 if (PrintOpto) { tty->print_cr("should_peel"); }
3451 phase->do_peeling(this, old_new);
3452 } else if (policy_unswitching(phase)) {
3453 phase->do_unswitching(this, old_new);
3454 return false; // need to recalculate idom data
3455 } else if (_head->is_LongCountedLoop()) {
3456 phase->create_loop_nest(this, old_new);
3457 }
3458 return true;
3459 }
3460 CountedLoopNode *cl = _head->as_CountedLoop();
3461
3462 if (!cl->is_valid_counted_loop(T_INT)) return true; // Ignore various kinds of broken loops
3463
3464 // Do nothing special to pre- and post- loops
3465 if (cl->is_pre_loop() || cl->is_post_loop()) return true;
3466
3467 // Compute loop trip count from profile data
3468 compute_profile_trip_cnt(phase);
3469
3470 // Before attempting fancy unrolling, RCE or alignment, see if we want
3471 // to completely unroll this loop or do loop unswitching.
3472 if (cl->is_normal_loop()) {
3473 if (policy_unswitching(phase)) {
3474 phase->do_unswitching(this, old_new);
3475 return false; // need to recalculate idom data
3476 }
3477 if (policy_maximally_unroll(phase)) {
3478 // Here we did some unrolling and peeling. Eventually we will
3479 // completely unroll this loop and it will no longer be a loop.
3480 phase->do_maximally_unroll(this, old_new);
3481 return true;
3482 }
3483 }
3484
3485 uint est_peeling = estimate_peeling(phase);
3486 bool should_peel = 0 < est_peeling;
3487
3488 // Counted loops may be peeled, or may need some iterations run up
3489 // front for RCE. Thus we clone a full loop up front whose trip count is
3490 // at least 1 (if peeling), but may be several more.
3491
3492 // The main loop will start cache-line aligned with at least 1
3493 // iteration of the unrolled body (zero-trip test required) and
3494 // will have some range checks removed.
3495
3496 // A post-loop will finish any odd iterations (leftover after
3497 // unrolling), plus any needed for RCE purposes.
3498
3499 bool should_unroll = policy_unroll(phase);
3500 bool should_rce = policy_range_check(phase, false, T_INT);
3501 bool should_rce_long = policy_range_check(phase, false, T_LONG);
3502
3503 // If not RCE'ing (iteration splitting), then we do not need a pre-loop.
3504 // We may still need to peel an initial iteration but we will not
3505 // be needing an unknown number of pre-iterations.
3506 //
3507 // Basically, if peel_only reports TRUE first time through, we will not
3508 // be able to later do RCE on this loop.
3509 bool peel_only = policy_peel_only(phase) && !should_rce;
3510
3511 // If we have any of these conditions (RCE, unrolling) met, then
3512 // we switch to the pre-/main-/post-loop model. This model also covers
3513 // peeling.
3514 if (should_rce || should_unroll) {
3515 if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
3516 if (should_rce_long && phase->create_loop_nest(this, old_new)) {
3517 return true;
3518 }
3519 uint estimate = est_loop_clone_sz(3);
3520 if (!phase->may_require_nodes(estimate)) {
3521 return false;
3522 }
3523 phase->insert_pre_post_loops(this, old_new, peel_only);
3524 }
3525 // Adjust the pre- and main-loop limits to let the pre and post loops run
3526 // with full checks, but the main-loop with no checks. Remove said checks
3527 // from the main body.
3528 if (should_rce) {
3529 if (phase->do_range_check(this, old_new) != 0) {
3530 cl->mark_has_range_checks();
3531 }
3532 } else if (PostLoopMultiversioning) {
3533 phase->has_range_checks(this);
3534 }
3535
3536 if (should_unroll && !should_peel && PostLoopMultiversioning) {
3537 // Try to setup multiversioning on main loops before they are unrolled
3538 if (cl->is_main_loop() && (cl->unrolled_count() == 1)) {
3539 phase->insert_scalar_rced_post_loop(this, old_new);
3540 }
3541 }
3542
3543 // Double loop body for unrolling. Adjust the minimum-trip test (will do
3544 // twice as many iterations as before) and the main body limit (only do
3545 // an even number of trips). If we are peeling, we might enable some RCE
3546 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
3547 // peeling.
3548 if (should_unroll && !should_peel) {
3549 if (SuperWordLoopUnrollAnalysis) {
3550 phase->insert_vector_post_loop(this, old_new);
3551 }
3552 phase->do_unroll(this, old_new, true);
3553 }
3554 } else { // Else we have an unchanged counted loop
3555 if (should_peel) { // Might want to peel but do nothing else
3556 if (phase->may_require_nodes(est_peeling)) {
3557 phase->do_peeling(this, old_new);
3558 }
3559 }
3560 if (should_rce_long) {
3561 phase->create_loop_nest(this, old_new);
3562 }
3563 }
3564 return true;
3565}
3566
3567
3568//=============================================================================
3569//------------------------------iteration_split--------------------------------
3570bool IdealLoopTree::iteration_split(PhaseIdealLoop* phase, Node_List &old_new) {
3571 // Recursively iteration split nested loops
3572 if (_child && !_child->iteration_split(phase, old_new)) {
3573 return false;
3574 }
3575
3576 // Clean out prior deadwood
3577 DCE_loop_body();
3578
3579 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
3580 // Replace with a 1-in-10 exit guess.
3581 if (!is_root() && is_loop()) {
3582 adjust_loop_exit_prob(phase);
3583 }
3584
3585 // Unrolling, RCE and peeling efforts, iff innermost loop.
3586 if (_allow_optimizations && is_innermost()) {
3587 if (!_has_call) {
3588 if (!iteration_split_impl(phase, old_new)) {
3589 return false;
3590 }
3591 } else {
3592 AutoNodeBudget node_budget(phase);
3593 if (policy_unswitching(phase)) {
3594 phase->do_unswitching(this, old_new);
3595 return false; // need to recalculate idom data
3596 }
3597 }
3598 }
3599
3600 // Minor offset re-organization to remove loop-fallout uses of
3601 // trip counter when there was no major reshaping.
3602 phase->reorg_offsets(this);
3603
3604 if (_next && !_next->iteration_split(phase, old_new)) {
3605 return false;
3606 }
3607 return true;
3608}
3609
3610
3611//=============================================================================
3612// Process all the loops in the loop tree and replace any fill
3613// patterns with an intrinsic version.
3614bool PhaseIdealLoop::do_intrinsify_fill() {
3615 bool changed = false;
3616 for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
3617 IdealLoopTree* lpt = iter.current();
3618 changed |= intrinsify_fill(lpt);
3619 }
3620 return changed;
3621}
3622
3623
3624// Examine an inner loop looking for a a single store of an invariant
3625// value in a unit stride loop,
3626bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
3627 Node*& shift, Node*& con) {
3628 const char* msg = NULL__null;
3629 Node* msg_node = NULL__null;
3630
3631 store_value = NULL__null;
3632 con = NULL__null;
3633 shift = NULL__null;
3634
3635 // Process the loop looking for stores. If there are multiple
3636 // stores or extra control flow give at this point.
3637 CountedLoopNode* head = lpt->_head->as_CountedLoop();
3638 for (uint i = 0; msg == NULL__null && i < lpt->_body.size(); i++) {
3639 Node* n = lpt->_body.at(i);
3640 if (n->outcnt() == 0) continue; // Ignore dead
3641 if (n->is_Store()) {
3642 if (store != NULL__null) {
3643 msg = "multiple stores";
3644 break;
3645 }
3646 int opc = n->Opcode();
3647 if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreNKlass || opc == Op_StoreCM) {
3648 msg = "oop fills not handled";
3649 break;
3650 }
3651 Node* value = n->in(MemNode::ValueIn);
3652 if (!lpt->is_invariant(value)) {
3653 msg = "variant store value";
3654 } else if (!_igvn.type(n->in(MemNode::Address))->isa_aryptr()) {
3655 msg = "not array address";
3656 }
3657 store = n;
3658 store_value = value;
3659 } else if (n->is_If() && n != head->loopexit_or_null()) {
3660 msg = "extra control flow";
3661 msg_node = n;
3662 }
3663 }
3664
3665 if (store == NULL__null) {
3666 // No store in loop
3667 return false;
3668 }
3669
3670 if (msg == NULL__null && head->stride_con() != 1) {
3671 // could handle negative strides too
3672 if (head->stride_con() < 0) {
3673 msg = "negative stride";
3674 } else {
3675 msg = "non-unit stride";
3676 }
3677 }
3678
3679 if (msg == NULL__null && !store->in(MemNode::Address)->is_AddP()) {
3680 msg = "can't handle store address";
3681 msg_node = store->in(MemNode::Address);
3682 }
3683
3684 if (msg == NULL__null &&
3685 (!store->in(MemNode::Memory)->is_Phi() ||
3686 store->in(MemNode::Memory)->in(LoopNode::LoopBackControl) != store)) {
3687 msg = "store memory isn't proper phi";
3688 msg_node = store->in(MemNode::Memory);
3689 }
3690
3691 // Make sure there is an appropriate fill routine
3692 BasicType t = store->as_Mem()->memory_type();
3693 const char* fill_name;
3694 if (msg == NULL__null &&
3695 StubRoutines::select_fill_function(t, false, fill_name) == NULL__null) {
3696 msg = "unsupported store";
3697 msg_node = store;
3698 }
3699
3700 if (msg != NULL__null) {
3701#ifndef PRODUCT
3702 if (TraceOptimizeFill) {
3703 tty->print_cr("not fill intrinsic candidate: %s", msg);
3704 if (msg_node != NULL__null) msg_node->dump();
3705 }
3706#endif
3707 return false;
3708 }
3709
3710 // Make sure the address expression can be handled. It should be
3711 // head->phi * elsize + con. head->phi might have a ConvI2L(CastII()).
3712 Node* elements[4];
3713 Node* cast = NULL__null;
3714 Node* conv = NULL__null;
3715 bool found_index = false;
3716 int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements)sizeof(array_size_impl(elements)));
3717 for (int e = 0; e < count; e++) {
3718 Node* n = elements[e];
3719 if (n->is_Con() && con == NULL__null) {
3720 con = n;
3721 } else if (n->Opcode() == Op_LShiftXOp_LShiftL && shift == NULL__null) {
3722 Node* value = n->in(1);
3723#ifdef _LP641
3724 if (value->Opcode() == Op_ConvI2L) {
3725 conv = value;
3726 value = value->in(1);
3727 }
3728 if (value->Opcode() == Op_CastII &&
3729 value->as_CastII()->has_range_check()) {
3730 // Skip range check dependent CastII nodes
3731 cast = value;
3732 value = value->in(1);
3733 }
3734#endif
3735 if (value != head->phi()) {
3736 msg = "unhandled shift in address";
3737 } else {
3738 if (type2aelembytes(store->as_Mem()->memory_type(), true) != (1 << n->in(2)->get_int())) {
3739 msg = "scale doesn't match";
3740 } else {
3741 found_index = true;
3742 shift = n;
3743 }
3744 }
3745 } else if (n->Opcode() == Op_ConvI2L && conv == NULL__null) {
3746 conv = n;
3747 n = n->in(1);
3748 if (n->Opcode() == Op_CastII &&
3749 n->as_CastII()->has_range_check()) {
3750 // Skip range check dependent CastII nodes
3751 cast = n;
3752 n = n->in(1);
3753 }
3754 if (n == head->phi()) {
3755 found_index = true;
3756 } else {
3757 msg = "unhandled input to ConvI2L";
3758 }
3759 } else if (n == head->phi()) {
3760 // no shift, check below for allowed cases
3761 found_index = true;
3762 } else {
3763 msg = "unhandled node in address";
3764 msg_node = n;
3765 }
3766 }
3767
3768 if (count == -1) {
3769 msg = "malformed address expression";
3770 msg_node = store;
3771 }
3772
3773 if (!found_index) {
3774 msg = "missing use of index";
3775 }
3776
3777 // byte sized items won't have a shift
3778 if (msg == NULL__null && shift == NULL__null && t != T_BYTE && t != T_BOOLEAN) {
3779 msg = "can't find shift";
3780 msg_node = store;
3781 }
3782
3783 if (msg != NULL__null) {
3784#ifndef PRODUCT
3785 if (TraceOptimizeFill) {
3786 tty->print_cr("not fill intrinsic: %s", msg);
3787 if (msg_node != NULL__null) msg_node->dump();
3788 }
3789#endif
3790 return false;
3791 }
3792
3793 // No make sure all the other nodes in the loop can be handled
3794 VectorSet ok;
3795
3796 // store related values are ok
3797 ok.set(store->_idx);
3798 ok.set(store->in(MemNode::Memory)->_idx);
3799
3800 CountedLoopEndNode* loop_exit = head->loopexit();
3801
3802 // Loop structure is ok
3803 ok.set(head->_idx);
3804 ok.set(loop_exit->_idx);
3805 ok.set(head->phi()->_idx);
3806 ok.set(head->incr()->_idx);
3807 ok.set(loop_exit->cmp_node()->_idx);
3808 ok.set(loop_exit->in(1)->_idx);
3809
3810 // Address elements are ok
3811 if (con) ok.set(con->_idx);
3812 if (shift) ok.set(shift->_idx);
3813 if (cast) ok.set(cast->_idx);
3814 if (conv) ok.set(conv->_idx);
3815
3816 for (uint i = 0; msg == NULL__null && i < lpt->_body.size(); i++) {
3817 Node* n = lpt->_body.at(i);
3818 if (n->outcnt() == 0) continue; // Ignore dead
3819 if (ok.test(n->_idx)) continue;
3820 // Backedge projection is ok
3821 if (n->is_IfTrue() && n->in(0) == loop_exit) continue;
3822 if (!n->is_AddP()) {
3823 msg = "unhandled node";
3824 msg_node = n;
3825 break;
3826 }
3827 }
3828
3829 // Make sure no unexpected values are used outside the loop
3830 for (uint i = 0; msg == NULL__null && i < lpt->_body.size(); i++) {
3831 Node* n = lpt->_body.at(i);
3832 // These values can be replaced with other nodes if they are used
3833 // outside the loop.
3834 if (n == store || n == loop_exit || n == head->incr() || n == store->in(MemNode::Memory)) continue;
3835 for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) {
3836 Node* use = iter.get();
3837 if (!lpt->_body.contains(use)) {
3838 msg = "node is used outside loop";
3839 msg_node = n;
3840 break;
3841 }
3842 }
3843 }
3844
3845#ifdef ASSERT1
3846 if (TraceOptimizeFill) {
3847 if (msg != NULL__null) {
3848 tty->print_cr("no fill intrinsic: %s", msg);
3849 if (msg_node != NULL__null) msg_node->dump();
3850 } else {
3851 tty->print_cr("fill intrinsic for:");
3852 }
3853 store->dump();
3854 if (Verbose) {
3855 lpt->_body.dump();
3856 }
3857 }
3858#endif
3859
3860 return msg == NULL__null;
3861}
3862
3863
3864
3865bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) {
3866 // Only for counted inner loops
3867 if (!lpt->is_counted() || !lpt->is_innermost()) {
3868 return false;
3869 }
3870
3871 // Must have constant stride
3872 CountedLoopNode* head = lpt->_head->as_CountedLoop();
3873 if (!head->is_valid_counted_loop(T_INT) || !head->is_normal_loop()) {
3874 return false;
3875 }
3876
3877 head->verify_strip_mined(1);
3878
3879 // Check that the body only contains a store of a loop invariant
3880 // value that is indexed by the loop phi.
3881 Node* store = NULL__null;
3882 Node* store_value = NULL__null;
3883 Node* shift = NULL__null;
3884 Node* offset = NULL__null;
3885 if (!match_fill_loop(lpt, store, store_value, shift, offset)) {
3886 return false;
3887 }
3888
3889 Node* exit = head->loopexit()->proj_out_or_null(0);
3890 if (exit == NULL__null) {
3891 return false;
3892 }
3893
3894#ifndef PRODUCT
3895 if (TraceLoopOpts) {
3896 tty->print("ArrayFill ");
3897 lpt->dump_head();
3898 }
3899#endif
3900
3901 // Now replace the whole loop body by a call to a fill routine that
3902 // covers the same region as the loop.
3903 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base);
3904
3905 // Build an expression for the beginning of the copy region
3906 Node* index = head->init_trip();
3907#ifdef _LP641
3908 index = new ConvI2LNode(index);
3909 _igvn.register_new_node_with_optimizer(index);
3910#endif
3911 if (shift != NULL__null) {
3912 // byte arrays don't require a shift but others do.
3913 index = new LShiftXNodeLShiftLNode(index, shift->in(2));
3914 _igvn.register_new_node_with_optimizer(index);
3915 }
3916 index = new AddPNode(base, base, index);
3917 _igvn.register_new_node_with_optimizer(index);
3918 Node* from = new AddPNode(base, index, offset);
3919 _igvn.register_new_node_with_optimizer(from);
3920 // Compute the number of elements to copy
3921 Node* len = new SubINode(head->limit(), head->init_trip());
3922 _igvn.register_new_node_with_optimizer(len);
3923
3924 BasicType t = store->as_Mem()->memory_type();
3925 bool aligned = false;
3926 if (offset != NULL__null && head->init_trip()->is_Con()) {
3927 int element_size = type2aelembytes(t);
3928 aligned = (offset->find_intptr_t_typefind_long_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0;
3929 }
3930
3931 // Build a call to the fill routine
3932 const char* fill_name;
3933 address fill = StubRoutines::select_fill_function(t, aligned, fill_name);
3934 assert(fill != NULL, "what?")do { if (!(fill != __null)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/loopTransform.cpp"
, 3934, "assert(" "fill != __null" ") failed", "what?"); ::breakpoint
(); } } while (0)
;
3935
3936 // Convert float/double to int/long for fill routines
3937 if (t == T_FLOAT) {
3938 store_value = new MoveF2INode(store_value);
3939 _igvn.register_new_node_with_optimizer(store_value);
3940 } else if (t == T_DOUBLE) {
3941 store_value = new MoveD2LNode(store_value);
3942 _igvn.register_new_node_with_optimizer(store_value);
3943 }
3944
3945 Node* mem_phi = store->in(MemNode::Memory);
3946 Node* result_ctrl;
3947 Node* result_mem;
3948 const TypeFunc* call_type = OptoRuntime::array_fill_Type();
3949 CallLeafNode *call = new CallLeafNoFPNode(call_type, fill,
3950 fill_name, TypeAryPtr::get_array_body_type(t));
3951 uint cnt = 0;
3952 call->init_req(TypeFunc::Parms + cnt++, from);
3953 call->init_req(TypeFunc::Parms + cnt++, store_value);
3954#ifdef _LP641
3955 len = new ConvI2LNode(len);
3956 _igvn.register_new_node_with_optimizer(len);
3957#endif
3958 call->init_req(TypeFunc::Parms + cnt++, len);
3959#ifdef _LP641
3960 call->init_req(TypeFunc::Parms + cnt++, C->top());
3961#endif
3962 call->init_req(TypeFunc::Control, head->init_control());
3963 call->init_req(TypeFunc::I_O, C->top()); // Does no I/O.
3964 call->init_req(TypeFunc::Memory, mem_phi->in(LoopNode::EntryControl));
3965 call->init_req(TypeFunc::ReturnAdr, C->start()->proj_out_or_null(TypeFunc::ReturnAdr));
3966 call->init_req(TypeFunc::FramePtr, C->start()->proj_out_or_null(TypeFunc::FramePtr));
3967 _igvn.register_new_node_with_optimizer(call);
3968 result_ctrl = new ProjNode(call,TypeFunc::Control);
3969 _igvn.register_new_node_with_optimizer(result_ctrl);
3970 result_mem = new ProjNode(call,TypeFunc::Memory);
3971 _igvn.register_new_node_with_optimizer(result_mem);
3972
3973/* Disable following optimization until proper fix (add missing checks).
3974
3975 // If this fill is tightly coupled to an allocation and overwrites
3976 // the whole body, allow it to take over the zeroing.
3977 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this);
3978 if (alloc != NULL && alloc->is_AllocateArray()) {
3979 Node* length = alloc->as_AllocateArray()->Ideal_length();
3980 if (head->limit() == length &&
3981 head->init_trip() == _igvn.intcon(0)) {
3982 if (TraceOptimizeFill) {
3983 tty->print_cr("Eliminated zeroing in allocation");
3984 }
3985 alloc->maybe_set_complete(&_igvn);
3986 } else {
3987#ifdef ASSERT
3988 if (TraceOptimizeFill) {
3989 tty->print_cr("filling array but bounds don't match");
3990 alloc->dump();
3991 head->init_trip()->dump();
3992 head->limit()->dump();
3993 length->dump();
3994 }
3995#endif
3996 }
3997 }
3998*/
3999
4000 if (head->is_strip_mined()) {
4001 // Inner strip mined loop goes away so get rid of outer strip
4002 // mined loop
4003 Node* outer_sfpt = head->outer_safepoint();
4004 Node* in = outer_sfpt->in(0);
4005 Node* outer_out = head->outer_loop_exit();
4006 lazy_replace(outer_out, in);
4007 _igvn.replace_input_of(outer_sfpt, 0, C->top());
4008 }
4009
4010 // Redirect the old control and memory edges that are outside the loop.
4011 // Sometimes the memory phi of the head is used as the outgoing
4012 // state of the loop. It's safe in this case to replace it with the
4013 // result_mem.
4014 _igvn.replace_node(store->in(MemNode::Memory), result_mem);
4015 lazy_replace(exit, result_ctrl);
4016 _igvn.replace_node(store, result_mem);
4017 // Any uses the increment outside of the loop become the loop limit.
4018 _igvn.replace_node(head->incr(), head->limit());
4019
4020 // Disconnect the head from the loop.
4021 for (uint i = 0; i < lpt->_body.size(); i++) {
4022 Node* n = lpt->_body.at(i);
4023 _igvn.replace_node(n, C->top());
4024 }
4025
4026 return true;
4027}