File: | jdk/src/hotspot/share/opto/superword.cpp |
Warning: | line 4602, column 9 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. | |||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |||
4 | * | |||
5 | * This code is free software; you can redistribute it and/or modify it | |||
6 | * under the terms of the GNU General Public License version 2 only, as | |||
7 | * published by the Free Software Foundation. | |||
8 | * | |||
9 | * This code is distributed in the hope that it will be useful, but WITHOUT | |||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
12 | * version 2 for more details (a copy is included in the LICENSE file that | |||
13 | * accompanied this code). | |||
14 | * | |||
15 | * You should have received a copy of the GNU General Public License version | |||
16 | * 2 along with this work; if not, write to the Free Software Foundation, | |||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |||
18 | * | |||
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |||
20 | * or visit www.oracle.com if you need additional information or have any | |||
21 | * questions. | |||
22 | */ | |||
23 | ||||
24 | #include "precompiled.hpp" | |||
25 | #include "compiler/compileLog.hpp" | |||
26 | #include "libadt/vectset.hpp" | |||
27 | #include "memory/allocation.inline.hpp" | |||
28 | #include "memory/resourceArea.hpp" | |||
29 | #include "opto/addnode.hpp" | |||
30 | #include "opto/callnode.hpp" | |||
31 | #include "opto/castnode.hpp" | |||
32 | #include "opto/convertnode.hpp" | |||
33 | #include "opto/divnode.hpp" | |||
34 | #include "opto/matcher.hpp" | |||
35 | #include "opto/memnode.hpp" | |||
36 | #include "opto/mulnode.hpp" | |||
37 | #include "opto/opcodes.hpp" | |||
38 | #include "opto/opaquenode.hpp" | |||
39 | #include "opto/superword.hpp" | |||
40 | #include "opto/vectornode.hpp" | |||
41 | #include "opto/movenode.hpp" | |||
42 | #include "utilities/powerOfTwo.hpp" | |||
43 | ||||
44 | // | |||
45 | // S U P E R W O R D T R A N S F O R M | |||
46 | //============================================================================= | |||
47 | ||||
48 | //------------------------------SuperWord--------------------------- | |||
49 | SuperWord::SuperWord(PhaseIdealLoop* phase) : | |||
50 | _phase(phase), | |||
51 | _arena(phase->C->comp_arena()), | |||
52 | _igvn(phase->_igvn), | |||
53 | _packset(arena(), 8, 0, NULL__null), // packs for the current block | |||
54 | _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb | |||
55 | _block(arena(), 8, 0, NULL__null), // nodes in current block | |||
56 | _post_block(arena(), 8, 0, NULL__null), // nodes common to current block which are marked as post loop vectorizable | |||
57 | _data_entry(arena(), 8, 0, NULL__null), // nodes with all inputs from outside | |||
58 | _mem_slice_head(arena(), 8, 0, NULL__null), // memory slice heads | |||
59 | _mem_slice_tail(arena(), 8, 0, NULL__null), // memory slice tails | |||
60 | _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node | |||
61 | _clone_map(phase->C->clone_map()), // map of nodes created in cloning | |||
62 | _cmovev_kit(_arena, this), // map to facilitate CMoveV creation | |||
63 | _align_to_ref(NULL__null), // memory reference to align vectors to | |||
64 | _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs | |||
65 | _dg(_arena), // dependence graph | |||
66 | _visited(arena()), // visited node set | |||
67 | _post_visited(arena()), // post visited node set | |||
68 | _n_idx_list(arena(), 8), // scratch list of (node,index) pairs | |||
69 | _nlist(arena(), 8, 0, NULL__null), // scratch list of nodes | |||
70 | _stk(arena(), 8, 0, NULL__null), // scratch stack of nodes | |||
71 | _lpt(NULL__null), // loop tree node | |||
72 | _lp(NULL__null), // CountedLoopNode | |||
73 | _pre_loop_end(NULL__null), // Pre loop CountedLoopEndNode | |||
74 | _bb(NULL__null), // basic block | |||
75 | _iv(NULL__null), // induction var | |||
76 | _race_possible(false), // cases where SDMU is true | |||
77 | _early_return(true), // analysis evaluations routine | |||
78 | _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style | |||
79 | _do_reserve_copy(DoReserveCopyInSuperWord), | |||
80 | _num_work_vecs(0), // amount of vector work we have | |||
81 | _num_reductions(0), // amount of reduction work we have | |||
82 | _ii_first(-1), // first loop generation index - only if do_vector_loop() | |||
83 | _ii_last(-1), // last loop generation index - only if do_vector_loop() | |||
84 | _ii_order(arena(), 8, 0, 0) | |||
85 | { | |||
86 | #ifndef PRODUCT | |||
87 | _vector_loop_debug = 0; | |||
88 | if (_phase->C->method() != NULL__null) { | |||
89 | _vector_loop_debug = phase->C->directive()->VectorizeDebugOption; | |||
90 | } | |||
91 | ||||
92 | #endif | |||
93 | } | |||
94 | ||||
95 | static const bool _do_vector_loop_experimental = false; // Experimental vectorization which uses data from loop unrolling. | |||
96 | ||||
97 | //------------------------------transform_loop--------------------------- | |||
98 | void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { | |||
99 | assert(UseSuperWord, "should be")do { if (!(UseSuperWord)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 99, "assert(" "UseSuperWord" ") failed", "should be"); ::breakpoint (); } } while (0); | |||
100 | // SuperWord only works with power of two vector sizes. | |||
101 | int vector_width = Matcher::vector_width_in_bytes(T_BYTE); | |||
102 | if (vector_width < 2 || !is_power_of_2(vector_width)) { | |||
103 | return; | |||
104 | } | |||
105 | ||||
106 | assert(lpt->_head->is_CountedLoop(), "must be")do { if (!(lpt->_head->is_CountedLoop())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 106, "assert(" "lpt->_head->is_CountedLoop()" ") failed" , "must be"); ::breakpoint(); } } while (0); | |||
107 | CountedLoopNode *cl = lpt->_head->as_CountedLoop(); | |||
108 | ||||
109 | if (!cl->is_valid_counted_loop(T_INT)) return; // skip malformed counted loop | |||
110 | ||||
111 | bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); | |||
112 | if (post_loop_allowed) { | |||
113 | if (cl->is_reduction_loop()) return; // no predication mapping | |||
114 | Node *limit = cl->limit(); | |||
115 | if (limit->is_Con()) return; // non constant limits only | |||
116 | // Now check the limit for expressions we do not handle | |||
117 | if (limit->is_Add()) { | |||
118 | Node *in2 = limit->in(2); | |||
119 | if (in2->is_Con()) { | |||
120 | int val = in2->get_int(); | |||
121 | // should not try to program these cases | |||
122 | if (val < 0) return; | |||
123 | } | |||
124 | } | |||
125 | } | |||
126 | ||||
127 | // skip any loop that has not been assigned max unroll by analysis | |||
128 | if (do_optimization) { | |||
129 | if (SuperWordLoopUnrollAnalysis && cl->slp_max_unroll() == 0) return; | |||
130 | } | |||
131 | ||||
132 | // Check for no control flow in body (other than exit) | |||
133 | Node *cl_exit = cl->loopexit(); | |||
134 | if (cl->is_main_loop() && (cl_exit->in(0) != lpt->_head)) { | |||
135 | #ifndef PRODUCT | |||
136 | if (TraceSuperWord) { | |||
137 | tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head"); | |||
138 | tty->print("cl_exit %d", cl_exit->_idx); cl_exit->dump(); | |||
139 | tty->print("cl_exit->in(0) %d", cl_exit->in(0)->_idx); cl_exit->in(0)->dump(); | |||
140 | tty->print("lpt->_head %d", lpt->_head->_idx); lpt->_head->dump(); | |||
141 | lpt->dump_head(); | |||
142 | } | |||
143 | #endif | |||
144 | return; | |||
145 | } | |||
146 | ||||
147 | // Make sure the are no extra control users of the loop backedge | |||
148 | if (cl->back_control()->outcnt() != 1) { | |||
149 | return; | |||
150 | } | |||
151 | ||||
152 | // Skip any loops already optimized by slp | |||
153 | if (cl->is_vectorized_loop()) return; | |||
154 | ||||
155 | if (cl->is_unroll_only()) return; | |||
156 | ||||
157 | if (cl->is_main_loop()) { | |||
158 | // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) | |||
159 | CountedLoopEndNode* pre_end = find_pre_loop_end(cl); | |||
160 | if (pre_end == NULL__null) { | |||
161 | return; | |||
162 | } | |||
163 | Node* pre_opaq1 = pre_end->limit(); | |||
164 | if (pre_opaq1->Opcode() != Op_Opaque1) { | |||
165 | return; | |||
166 | } | |||
167 | set_pre_loop_end(pre_end); | |||
168 | } | |||
169 | ||||
170 | init(); // initialize data structures | |||
171 | ||||
172 | set_lpt(lpt); | |||
173 | set_lp(cl); | |||
174 | ||||
175 | // For now, define one block which is the entire loop body | |||
176 | set_bb(cl); | |||
177 | ||||
178 | if (do_optimization) { | |||
179 | assert(_packset.length() == 0, "packset must be empty")do { if (!(_packset.length() == 0)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 179, "assert(" "_packset.length() == 0" ") failed", "packset must be empty" ); ::breakpoint(); } } while (0); | |||
180 | SLP_extract(); | |||
181 | if (PostLoopMultiversioning && Matcher::has_predicated_vectors()) { | |||
182 | if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) { | |||
183 | IdealLoopTree *lpt_next = lpt->_next; | |||
184 | CountedLoopNode *cl_next = lpt_next->_head->as_CountedLoop(); | |||
185 | _phase->has_range_checks(lpt_next); | |||
186 | if (cl_next->is_post_loop() && !cl_next->range_checks_present()) { | |||
187 | if (!cl_next->is_vectorized_loop()) { | |||
188 | int slp_max_unroll_factor = cl->slp_max_unroll(); | |||
189 | cl_next->set_slp_max_unroll(slp_max_unroll_factor); | |||
190 | } | |||
191 | } | |||
192 | } | |||
193 | } | |||
194 | } | |||
195 | } | |||
196 | ||||
197 | //------------------------------early unrolling analysis------------------------------ | |||
198 | void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { | |||
199 | bool is_slp = true; | |||
200 | ResourceMark rm; | |||
201 | size_t ignored_size = lpt()->_body.size(); | |||
202 | int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size)(int*) resource_allocate_bytes((ignored_size) * sizeof(int)); | |||
203 | Node_Stack nstack((int)ignored_size); | |||
204 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
205 | Node *cl_exit = cl->loopexit_or_null(); | |||
206 | int rpo_idx = _post_block.length(); | |||
207 | ||||
208 | assert(rpo_idx == 0, "post loop block is empty")do { if (!(rpo_idx == 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 208, "assert(" "rpo_idx == 0" ") failed", "post loop block is empty" ); ::breakpoint(); } } while (0); | |||
209 | ||||
210 | // First clear the entries | |||
211 | for (uint i = 0; i < lpt()->_body.size(); i++) { | |||
212 | ignored_loop_nodes[i] = -1; | |||
213 | } | |||
214 | ||||
215 | int max_vector = Matcher::max_vector_size(T_BYTE); | |||
216 | bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); | |||
217 | ||||
218 | // Process the loop, some/all of the stack entries will not be in order, ergo | |||
219 | // need to preprocess the ignored initial state before we process the loop | |||
220 | for (uint i = 0; i < lpt()->_body.size(); i++) { | |||
221 | Node* n = lpt()->_body.at(i); | |||
222 | if (n == cl->incr() || | |||
223 | n->is_reduction() || | |||
224 | n->is_AddP() || | |||
225 | n->is_Cmp() || | |||
226 | n->is_IfTrue() || | |||
227 | n->is_CountedLoop() || | |||
228 | (n == cl_exit)) { | |||
229 | ignored_loop_nodes[i] = n->_idx; | |||
230 | continue; | |||
231 | } | |||
232 | ||||
233 | if (n->is_If()) { | |||
234 | IfNode *iff = n->as_If(); | |||
235 | if (iff->_fcnt != COUNT_UNKNOWN(-1.0f) && iff->_prob != PROB_UNKNOWN(-1.0f)) { | |||
236 | if (lpt()->is_loop_exit(iff)) { | |||
237 | ignored_loop_nodes[i] = n->_idx; | |||
238 | continue; | |||
239 | } | |||
240 | } | |||
241 | } | |||
242 | ||||
243 | if (n->is_Phi() && (n->bottom_type() == Type::MEMORY)) { | |||
244 | Node* n_tail = n->in(LoopNode::LoopBackControl); | |||
245 | if (n_tail != n->in(LoopNode::EntryControl)) { | |||
246 | if (!n_tail->is_Mem()) { | |||
247 | is_slp = false; | |||
248 | break; | |||
249 | } | |||
250 | } | |||
251 | } | |||
252 | ||||
253 | // This must happen after check of phi/if | |||
254 | if (n->is_Phi() || n->is_If()) { | |||
255 | ignored_loop_nodes[i] = n->_idx; | |||
256 | continue; | |||
257 | } | |||
258 | ||||
259 | if (n->is_LoadStore() || n->is_MergeMem() || | |||
260 | (n->is_Proj() && !n->as_Proj()->is_CFG())) { | |||
261 | is_slp = false; | |||
262 | break; | |||
263 | } | |||
264 | ||||
265 | // Ignore nodes with non-primitive type. | |||
266 | BasicType bt; | |||
267 | if (n->is_Mem()) { | |||
268 | bt = n->as_Mem()->memory_type(); | |||
269 | } else { | |||
270 | bt = n->bottom_type()->basic_type(); | |||
271 | } | |||
272 | if (is_java_primitive(bt) == false) { | |||
273 | ignored_loop_nodes[i] = n->_idx; | |||
274 | continue; | |||
275 | } | |||
276 | ||||
277 | if (n->is_Mem()) { | |||
278 | MemNode* current = n->as_Mem(); | |||
279 | Node* adr = n->in(MemNode::Address); | |||
280 | Node* n_ctrl = _phase->get_ctrl(adr); | |||
281 | ||||
282 | // save a queue of post process nodes | |||
283 | if (n_ctrl != NULL__null && lpt()->is_member(_phase->get_loop(n_ctrl))) { | |||
284 | // Process the memory expression | |||
285 | int stack_idx = 0; | |||
286 | bool have_side_effects = true; | |||
287 | if (adr->is_AddP() == false) { | |||
288 | nstack.push(adr, stack_idx++); | |||
289 | } else { | |||
290 | // Mark the components of the memory operation in nstack | |||
291 | SWPointer p1(current, this, &nstack, true); | |||
292 | have_side_effects = p1.node_stack()->is_nonempty(); | |||
293 | } | |||
294 | ||||
295 | // Process the pointer stack | |||
296 | while (have_side_effects) { | |||
297 | Node* pointer_node = nstack.node(); | |||
298 | for (uint j = 0; j < lpt()->_body.size(); j++) { | |||
299 | Node* cur_node = lpt()->_body.at(j); | |||
300 | if (cur_node == pointer_node) { | |||
301 | ignored_loop_nodes[j] = cur_node->_idx; | |||
302 | break; | |||
303 | } | |||
304 | } | |||
305 | nstack.pop(); | |||
306 | have_side_effects = nstack.is_nonempty(); | |||
307 | } | |||
308 | } | |||
309 | } | |||
310 | } | |||
311 | ||||
312 | if (is_slp) { | |||
313 | // Now we try to find the maximum supported consistent vector which the machine | |||
314 | // description can use | |||
315 | bool small_basic_type = false; | |||
316 | bool flag_small_bt = false; | |||
317 | for (uint i = 0; i < lpt()->_body.size(); i++) { | |||
318 | if (ignored_loop_nodes[i] != -1) continue; | |||
319 | ||||
320 | BasicType bt; | |||
321 | Node* n = lpt()->_body.at(i); | |||
322 | if (n->is_Mem()) { | |||
323 | bt = n->as_Mem()->memory_type(); | |||
324 | } else { | |||
325 | bt = n->bottom_type()->basic_type(); | |||
326 | } | |||
327 | ||||
328 | if (post_loop_allowed) { | |||
329 | if (!small_basic_type) { | |||
330 | switch (bt) { | |||
331 | case T_CHAR: | |||
332 | case T_BYTE: | |||
333 | case T_SHORT: | |||
334 | small_basic_type = true; | |||
335 | break; | |||
336 | ||||
337 | case T_LONG: | |||
338 | // TODO: Remove when support completed for mask context with LONG. | |||
339 | // Support needs to be augmented for logical qword operations, currently we map to dword | |||
340 | // buckets for vectors on logicals as these were legacy. | |||
341 | small_basic_type = true; | |||
342 | break; | |||
343 | ||||
344 | default: | |||
345 | break; | |||
346 | } | |||
347 | } | |||
348 | } | |||
349 | ||||
350 | if (is_java_primitive(bt) == false) continue; | |||
351 | ||||
352 | int cur_max_vector = Matcher::max_vector_size(bt); | |||
353 | ||||
354 | // If a max vector exists which is not larger than _local_loop_unroll_factor | |||
355 | // stop looking, we already have the max vector to map to. | |||
356 | if (cur_max_vector < local_loop_unroll_factor) { | |||
357 | is_slp = false; | |||
358 | if (TraceSuperWordLoopUnrollAnalysis) { | |||
359 | tty->print_cr("slp analysis fails: unroll limit greater than max vector\n"); | |||
360 | } | |||
361 | break; | |||
362 | } | |||
363 | ||||
364 | // Map the maximal common vector | |||
365 | if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) { | |||
366 | if (cur_max_vector < max_vector && !flag_small_bt) { | |||
367 | max_vector = cur_max_vector; | |||
368 | } else if (cur_max_vector > max_vector && UseSubwordForMaxVector) { | |||
369 | // Analyse subword in the loop to set maximum vector size to take advantage of full vector width for subword types. | |||
370 | // Here we analyze if narrowing is likely to happen and if it is we set vector size more aggressively. | |||
371 | // We check for possibility of narrowing by looking through chain operations using subword types. | |||
372 | if (is_subword_type(bt)) { | |||
373 | uint start, end; | |||
374 | VectorNode::vector_operands(n, &start, &end); | |||
375 | ||||
376 | for (uint j = start; j < end; j++) { | |||
377 | Node* in = n->in(j); | |||
378 | // Don't propagate through a memory | |||
379 | if (!in->is_Mem() && in_bb(in) && in->bottom_type()->basic_type() == T_INT) { | |||
380 | bool same_type = true; | |||
381 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { | |||
382 | Node *use = in->fast_out(k); | |||
383 | if (!in_bb(use) && use->bottom_type()->basic_type() != bt) { | |||
384 | same_type = false; | |||
385 | break; | |||
386 | } | |||
387 | } | |||
388 | if (same_type) { | |||
389 | max_vector = cur_max_vector; | |||
390 | flag_small_bt = true; | |||
391 | cl->mark_subword_loop(); | |||
392 | } | |||
393 | } | |||
394 | } | |||
395 | } | |||
396 | } | |||
397 | // We only process post loops on predicated targets where we want to | |||
398 | // mask map the loop to a single iteration | |||
399 | if (post_loop_allowed) { | |||
400 | _post_block.at_put_grow(rpo_idx++, n); | |||
401 | } | |||
402 | } | |||
403 | } | |||
404 | if (is_slp) { | |||
405 | local_loop_unroll_factor = max_vector; | |||
406 | cl->mark_passed_slp(); | |||
407 | } | |||
408 | cl->mark_was_slp(); | |||
409 | if (cl->is_main_loop()) { | |||
410 | cl->set_slp_max_unroll(local_loop_unroll_factor); | |||
411 | } else if (post_loop_allowed) { | |||
412 | if (!small_basic_type) { | |||
413 | // avoid replication context for small basic types in programmable masked loops | |||
414 | cl->set_slp_max_unroll(local_loop_unroll_factor); | |||
415 | } | |||
416 | } | |||
417 | } | |||
418 | } | |||
419 | ||||
420 | //------------------------------SLP_extract--------------------------- | |||
421 | // Extract the superword level parallelism | |||
422 | // | |||
423 | // 1) A reverse post-order of nodes in the block is constructed. By scanning | |||
424 | // this list from first to last, all definitions are visited before their uses. | |||
425 | // | |||
426 | // 2) A point-to-point dependence graph is constructed between memory references. | |||
427 | // This simplies the upcoming "independence" checker. | |||
428 | // | |||
429 | // 3) The maximum depth in the node graph from the beginning of the block | |||
430 | // to each node is computed. This is used to prune the graph search | |||
431 | // in the independence checker. | |||
432 | // | |||
433 | // 4) For integer types, the necessary bit width is propagated backwards | |||
434 | // from stores to allow packed operations on byte, char, and short | |||
435 | // integers. This reverses the promotion to type "int" that javac | |||
436 | // did for operations like: char c1,c2,c3; c1 = c2 + c3. | |||
437 | // | |||
438 | // 5) One of the memory references is picked to be an aligned vector reference. | |||
439 | // The pre-loop trip count is adjusted to align this reference in the | |||
440 | // unrolled body. | |||
441 | // | |||
442 | // 6) The initial set of pack pairs is seeded with memory references. | |||
443 | // | |||
444 | // 7) The set of pack pairs is extended by following use->def and def->use links. | |||
445 | // | |||
446 | // 8) The pairs are combined into vector sized packs. | |||
447 | // | |||
448 | // 9) Reorder the memory slices to co-locate members of the memory packs. | |||
449 | // | |||
450 | // 10) Generate ideal vector nodes for the final set of packs and where necessary, | |||
451 | // inserting scalar promotion, vector creation from multiple scalars, and | |||
452 | // extraction of scalar values from vectors. | |||
453 | // | |||
454 | void SuperWord::SLP_extract() { | |||
455 | ||||
456 | #ifndef PRODUCT | |||
457 | if (_do_vector_loop && TraceSuperWord) { | |||
458 | tty->print("SuperWord::SLP_extract\n"); | |||
459 | tty->print("input loop\n"); | |||
460 | _lpt->dump_head(); | |||
461 | _lpt->dump(); | |||
462 | for (uint i = 0; i < _lpt->_body.size(); i++) { | |||
463 | _lpt->_body.at(i)->dump(); | |||
464 | } | |||
465 | } | |||
466 | #endif | |||
467 | // Ready the block | |||
468 | if (!construct_bb()) { | |||
469 | return; // Exit if no interesting nodes or complex graph. | |||
470 | } | |||
471 | ||||
472 | // build _dg, _disjoint_ptrs | |||
473 | dependence_graph(); | |||
474 | ||||
475 | // compute function depth(Node*) | |||
476 | compute_max_depth(); | |||
477 | ||||
478 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
479 | bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); | |||
480 | if (cl->is_main_loop()) { | |||
481 | if (_do_vector_loop_experimental) { | |||
482 | if (mark_generations() != -1) { | |||
483 | hoist_loads_in_graph(); // this only rebuild the graph; all basic structs need rebuild explicitly | |||
484 | ||||
485 | if (!construct_bb()) { | |||
486 | return; // Exit if no interesting nodes or complex graph. | |||
487 | } | |||
488 | dependence_graph(); | |||
489 | compute_max_depth(); | |||
490 | } | |||
491 | ||||
492 | #ifndef PRODUCT | |||
493 | if (TraceSuperWord) { | |||
494 | tty->print_cr("\nSuperWord::_do_vector_loop: graph after hoist_loads_in_graph"); | |||
495 | _lpt->dump_head(); | |||
496 | for (int j = 0; j < _block.length(); j++) { | |||
497 | Node* n = _block.at(j); | |||
498 | int d = depth(n); | |||
499 | for (int i = 0; i < d; i++) tty->print("%s", " "); | |||
500 | tty->print("%d :", d); | |||
501 | n->dump(); | |||
502 | } | |||
503 | } | |||
504 | #endif | |||
505 | } | |||
506 | ||||
507 | compute_vector_element_type(); | |||
508 | ||||
509 | // Attempt vectorization | |||
510 | ||||
511 | find_adjacent_refs(); | |||
512 | ||||
513 | if (align_to_ref() == NULL__null) { | |||
514 | return; // Did not find memory reference to align vectors | |||
515 | } | |||
516 | ||||
517 | extend_packlist(); | |||
518 | ||||
519 | if (_do_vector_loop_experimental) { | |||
520 | if (_packset.length() == 0) { | |||
521 | #ifndef PRODUCT | |||
522 | if (TraceSuperWord) { | |||
523 | tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway"); | |||
524 | } | |||
525 | #endif | |||
526 | pack_parallel(); | |||
527 | } | |||
528 | } | |||
529 | ||||
530 | combine_packs(); | |||
531 | ||||
532 | construct_my_pack_map(); | |||
533 | if (UseVectorCmov) { | |||
534 | merge_packs_to_cmovd(); | |||
535 | } | |||
536 | ||||
537 | filter_packs(); | |||
538 | ||||
539 | schedule(); | |||
540 | } else if (post_loop_allowed) { | |||
541 | int saved_mapped_unroll_factor = cl->slp_max_unroll(); | |||
542 | if (saved_mapped_unroll_factor) { | |||
543 | int vector_mapped_unroll_factor = saved_mapped_unroll_factor; | |||
544 | ||||
545 | // now reset the slp_unroll_factor so that we can check the analysis mapped | |||
546 | // what the vector loop was mapped to | |||
547 | cl->set_slp_max_unroll(0); | |||
548 | ||||
549 | // do the analysis on the post loop | |||
550 | unrolling_analysis(vector_mapped_unroll_factor); | |||
551 | ||||
552 | // if our analyzed loop is a canonical fit, start processing it | |||
553 | if (vector_mapped_unroll_factor == saved_mapped_unroll_factor) { | |||
554 | // now add the vector nodes to packsets | |||
555 | for (int i = 0; i < _post_block.length(); i++) { | |||
556 | Node* n = _post_block.at(i); | |||
557 | Node_List* singleton = new Node_List(); | |||
558 | singleton->push(n); | |||
559 | _packset.append(singleton); | |||
560 | set_my_pack(n, singleton); | |||
561 | } | |||
562 | ||||
563 | // map base types for vector usage | |||
564 | compute_vector_element_type(); | |||
565 | } else { | |||
566 | return; | |||
567 | } | |||
568 | } else { | |||
569 | // for some reason we could not map the slp analysis state of the vectorized loop | |||
570 | return; | |||
571 | } | |||
572 | } | |||
573 | ||||
574 | output(); | |||
575 | } | |||
576 | ||||
577 | //------------------------------find_adjacent_refs--------------------------- | |||
578 | // Find the adjacent memory references and create pack pairs for them. | |||
579 | // This is the initial set of packs that will then be extended by | |||
580 | // following use->def and def->use links. The align positions are | |||
581 | // assigned relative to the reference "align_to_ref" | |||
582 | void SuperWord::find_adjacent_refs() { | |||
583 | // Get list of memory operations | |||
584 | Node_List memops; | |||
585 | for (int i = 0; i < _block.length(); i++) { | |||
586 | Node* n = _block.at(i); | |||
587 | if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && | |||
588 | is_java_primitive(n->as_Mem()->memory_type())) { | |||
589 | int align = memory_alignment(n->as_Mem(), 0); | |||
590 | if (align != bottom_align) { | |||
591 | memops.push(n); | |||
592 | } | |||
593 | } | |||
594 | } | |||
595 | if (TraceSuperWord) { | |||
596 | tty->print_cr("\nfind_adjacent_refs found %d memops", memops.size()); | |||
597 | } | |||
598 | ||||
599 | Node_List align_to_refs; | |||
600 | int max_idx; | |||
601 | int best_iv_adjustment = 0; | |||
602 | MemNode* best_align_to_mem_ref = NULL__null; | |||
603 | ||||
604 | while (memops.size() != 0) { | |||
605 | // Find a memory reference to align to. | |||
606 | MemNode* mem_ref = find_align_to_ref(memops, max_idx); | |||
607 | if (mem_ref == NULL__null) break; | |||
608 | align_to_refs.push(mem_ref); | |||
609 | int iv_adjustment = get_iv_adjustment(mem_ref); | |||
610 | ||||
611 | if (best_align_to_mem_ref == NULL__null) { | |||
612 | // Set memory reference which is the best from all memory operations | |||
613 | // to be used for alignment. The pre-loop trip count is modified to align | |||
614 | // this reference to a vector-aligned address. | |||
615 | best_align_to_mem_ref = mem_ref; | |||
616 | best_iv_adjustment = iv_adjustment; | |||
617 | NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment ); | |||
618 | } | |||
619 | ||||
620 | SWPointer align_to_ref_p(mem_ref, this, NULL__null, false); | |||
621 | // Set alignment relative to "align_to_ref" for all related memory operations. | |||
622 | for (int i = memops.size() - 1; i >= 0; i--) { | |||
623 | MemNode* s = memops.at(i)->as_Mem(); | |||
624 | if (isomorphic(s, mem_ref) && | |||
625 | (!_do_vector_loop || same_origin_idx(s, mem_ref))) { | |||
626 | SWPointer p2(s, this, NULL__null, false); | |||
627 | if (p2.comparable(align_to_ref_p)) { | |||
628 | int align = memory_alignment(s, iv_adjustment); | |||
629 | set_alignment(s, align); | |||
630 | } | |||
631 | } | |||
632 | } | |||
633 | ||||
634 | // Create initial pack pairs of memory operations for which | |||
635 | // alignment is set and vectors will be aligned. | |||
636 | bool create_pack = true; | |||
637 | if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) { | |||
638 | if (vectors_should_be_aligned()) { | |||
639 | int vw = vector_width(mem_ref); | |||
640 | int vw_best = vector_width(best_align_to_mem_ref); | |||
641 | if (vw > vw_best) { | |||
642 | // Do not vectorize a memory access with more elements per vector | |||
643 | // if unaligned memory access is not allowed because number of | |||
644 | // iterations in pre-loop will be not enough to align it. | |||
645 | create_pack = false; | |||
646 | } else { | |||
647 | SWPointer p2(best_align_to_mem_ref, this, NULL__null, false); | |||
648 | if (!align_to_ref_p.invar_equals(p2)) { | |||
649 | // Do not vectorize memory accesses with different invariants | |||
650 | // if unaligned memory accesses are not allowed. | |||
651 | create_pack = false; | |||
652 | } | |||
653 | } | |||
654 | } | |||
655 | } else { | |||
656 | if (same_velt_type(mem_ref, best_align_to_mem_ref)) { | |||
657 | // Can't allow vectorization of unaligned memory accesses with the | |||
658 | // same type since it could be overlapped accesses to the same array. | |||
659 | create_pack = false; | |||
660 | } else { | |||
661 | // Allow independent (different type) unaligned memory operations | |||
662 | // if HW supports them. | |||
663 | if (vectors_should_be_aligned()) { | |||
664 | create_pack = false; | |||
665 | } else { | |||
666 | // Check if packs of the same memory type but | |||
667 | // with a different alignment were created before. | |||
668 | for (uint i = 0; i < align_to_refs.size(); i++) { | |||
669 | MemNode* mr = align_to_refs.at(i)->as_Mem(); | |||
670 | if (mr == mem_ref) { | |||
671 | // Skip when we are looking at same memory operation. | |||
672 | continue; | |||
673 | } | |||
674 | if (same_velt_type(mr, mem_ref) && | |||
675 | memory_alignment(mr, iv_adjustment) != 0) | |||
676 | create_pack = false; | |||
677 | } | |||
678 | } | |||
679 | } | |||
680 | } | |||
681 | if (create_pack) { | |||
682 | for (uint i = 0; i < memops.size(); i++) { | |||
683 | Node* s1 = memops.at(i); | |||
684 | int align = alignment(s1); | |||
685 | if (align == top_align) continue; | |||
686 | for (uint j = 0; j < memops.size(); j++) { | |||
687 | Node* s2 = memops.at(j); | |||
688 | if (alignment(s2) == top_align) continue; | |||
689 | if (s1 != s2 && are_adjacent_refs(s1, s2)) { | |||
690 | if (stmts_can_pack(s1, s2, align)) { | |||
691 | Node_List* pair = new Node_List(); | |||
692 | pair->push(s1); | |||
693 | pair->push(s2); | |||
694 | if (!_do_vector_loop || same_origin_idx(s1, s2)) { | |||
695 | _packset.append(pair); | |||
696 | } | |||
697 | } | |||
698 | } | |||
699 | } | |||
700 | } | |||
701 | } else { // Don't create unaligned pack | |||
702 | // First, remove remaining memory ops of the same type from the list. | |||
703 | for (int i = memops.size() - 1; i >= 0; i--) { | |||
704 | MemNode* s = memops.at(i)->as_Mem(); | |||
705 | if (same_velt_type(s, mem_ref)) { | |||
706 | memops.remove(i); | |||
707 | } | |||
708 | } | |||
709 | ||||
710 | // Second, remove already constructed packs of the same type. | |||
711 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
712 | Node_List* p = _packset.at(i); | |||
713 | MemNode* s = p->at(0)->as_Mem(); | |||
714 | if (same_velt_type(s, mem_ref)) { | |||
715 | remove_pack_at(i); | |||
716 | } | |||
717 | } | |||
718 | ||||
719 | // If needed find the best memory reference for loop alignment again. | |||
720 | if (same_velt_type(mem_ref, best_align_to_mem_ref)) { | |||
721 | // Put memory ops from remaining packs back on memops list for | |||
722 | // the best alignment search. | |||
723 | uint orig_msize = memops.size(); | |||
724 | for (int i = 0; i < _packset.length(); i++) { | |||
725 | Node_List* p = _packset.at(i); | |||
726 | MemNode* s = p->at(0)->as_Mem(); | |||
727 | assert(!same_velt_type(s, mem_ref), "sanity")do { if (!(!same_velt_type(s, mem_ref))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 727, "assert(" "!same_velt_type(s, mem_ref)" ") failed", "sanity" ); ::breakpoint(); } } while (0); | |||
728 | memops.push(s); | |||
729 | } | |||
730 | best_align_to_mem_ref = find_align_to_ref(memops, max_idx); | |||
731 | if (best_align_to_mem_ref == NULL__null) { | |||
732 | if (TraceSuperWord) { | |||
733 | tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL"); | |||
734 | } | |||
735 | // best_align_to_mem_ref will be used for adjusting the pre-loop limit in | |||
736 | // SuperWord::align_initial_loop_index. Find one with the biggest vector size, | |||
737 | // smallest data size and smallest iv offset from memory ops from remaining packs. | |||
738 | if (_packset.length() > 0) { | |||
739 | if (orig_msize == 0) { | |||
740 | best_align_to_mem_ref = memops.at(max_idx)->as_Mem(); | |||
741 | } else { | |||
742 | for (uint i = 0; i < orig_msize; i++) { | |||
743 | memops.remove(0); | |||
744 | } | |||
745 | best_align_to_mem_ref = find_align_to_ref(memops, max_idx); | |||
746 | assert(best_align_to_mem_ref == NULL, "sanity")do { if (!(best_align_to_mem_ref == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 746, "assert(" "best_align_to_mem_ref == __null" ") failed" , "sanity"); ::breakpoint(); } } while (0); | |||
747 | best_align_to_mem_ref = memops.at(max_idx)->as_Mem(); | |||
748 | } | |||
749 | assert(best_align_to_mem_ref != NULL, "sanity")do { if (!(best_align_to_mem_ref != __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 749, "assert(" "best_align_to_mem_ref != __null" ") failed" , "sanity"); ::breakpoint(); } } while (0); | |||
750 | } | |||
751 | break; | |||
752 | } | |||
753 | best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); | |||
754 | NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment ); | |||
755 | // Restore list. | |||
756 | while (memops.size() > orig_msize) | |||
757 | (void)memops.pop(); | |||
758 | } | |||
759 | } // unaligned memory accesses | |||
760 | ||||
761 | // Remove used mem nodes. | |||
762 | for (int i = memops.size() - 1; i >= 0; i--) { | |||
763 | MemNode* m = memops.at(i)->as_Mem(); | |||
764 | if (alignment(m) != top_align) { | |||
765 | memops.remove(i); | |||
766 | } | |||
767 | } | |||
768 | ||||
769 | } // while (memops.size() != 0 | |||
770 | set_align_to_ref(best_align_to_mem_ref); | |||
771 | ||||
772 | if (TraceSuperWord) { | |||
773 | tty->print_cr("\nAfter find_adjacent_refs"); | |||
774 | print_packset(); | |||
775 | } | |||
776 | } | |||
777 | ||||
778 | #ifndef PRODUCT | |||
779 | void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) { | |||
780 | if (is_trace_adjacent()) { | |||
781 | tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d", | |||
782 | best_align_to_mem_ref->_idx, best_iv_adjustment); | |||
783 | best_align_to_mem_ref->dump(); | |||
784 | } | |||
785 | } | |||
786 | #endif | |||
787 | ||||
788 | //------------------------------find_align_to_ref--------------------------- | |||
789 | // Find a memory reference to align the loop induction variable to. | |||
790 | // Looks first at stores then at loads, looking for a memory reference | |||
791 | // with the largest number of references similar to it. | |||
792 | MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) { | |||
793 | GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); | |||
794 | ||||
795 | // Count number of comparable memory ops | |||
796 | for (uint i = 0; i < memops.size(); i++) { | |||
797 | MemNode* s1 = memops.at(i)->as_Mem(); | |||
798 | SWPointer p1(s1, this, NULL__null, false); | |||
799 | // Only discard unalignable memory references if vector memory references | |||
800 | // should be aligned on this platform. | |||
801 | if (vectors_should_be_aligned() && !ref_is_alignable(p1)) { | |||
802 | *cmp_ct.adr_at(i) = 0; | |||
803 | continue; | |||
804 | } | |||
805 | for (uint j = i+1; j < memops.size(); j++) { | |||
806 | MemNode* s2 = memops.at(j)->as_Mem(); | |||
807 | if (isomorphic(s1, s2)) { | |||
808 | SWPointer p2(s2, this, NULL__null, false); | |||
809 | if (p1.comparable(p2)) { | |||
810 | (*cmp_ct.adr_at(i))++; | |||
811 | (*cmp_ct.adr_at(j))++; | |||
812 | } | |||
813 | } | |||
814 | } | |||
815 | } | |||
816 | ||||
817 | // Find Store (or Load) with the greatest number of "comparable" references, | |||
818 | // biggest vector size, smallest data size and smallest iv offset. | |||
819 | int max_ct = 0; | |||
820 | int max_vw = 0; | |||
821 | int max_idx = -1; | |||
822 | int min_size = max_jint; | |||
823 | int min_iv_offset = max_jint; | |||
824 | for (uint j = 0; j < memops.size(); j++) { | |||
825 | MemNode* s = memops.at(j)->as_Mem(); | |||
826 | if (s->is_Store()) { | |||
827 | int vw = vector_width_in_bytes(s); | |||
828 | assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 828, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
829 | SWPointer p(s, this, NULL__null, false); | |||
830 | if ( cmp_ct.at(j) > max_ct || | |||
831 | (cmp_ct.at(j) == max_ct && | |||
832 | ( vw > max_vw || | |||
833 | (vw == max_vw && | |||
834 | ( data_size(s) < min_size || | |||
835 | (data_size(s) == min_size && | |||
836 | p.offset_in_bytes() < min_iv_offset)))))) { | |||
837 | max_ct = cmp_ct.at(j); | |||
838 | max_vw = vw; | |||
839 | max_idx = j; | |||
840 | min_size = data_size(s); | |||
841 | min_iv_offset = p.offset_in_bytes(); | |||
842 | } | |||
843 | } | |||
844 | } | |||
845 | // If no stores, look at loads | |||
846 | if (max_ct == 0) { | |||
847 | for (uint j = 0; j < memops.size(); j++) { | |||
848 | MemNode* s = memops.at(j)->as_Mem(); | |||
849 | if (s->is_Load()) { | |||
850 | int vw = vector_width_in_bytes(s); | |||
851 | assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 851, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
852 | SWPointer p(s, this, NULL__null, false); | |||
853 | if ( cmp_ct.at(j) > max_ct || | |||
854 | (cmp_ct.at(j) == max_ct && | |||
855 | ( vw > max_vw || | |||
856 | (vw == max_vw && | |||
857 | ( data_size(s) < min_size || | |||
858 | (data_size(s) == min_size && | |||
859 | p.offset_in_bytes() < min_iv_offset)))))) { | |||
860 | max_ct = cmp_ct.at(j); | |||
861 | max_vw = vw; | |||
862 | max_idx = j; | |||
863 | min_size = data_size(s); | |||
864 | min_iv_offset = p.offset_in_bytes(); | |||
865 | } | |||
866 | } | |||
867 | } | |||
868 | } | |||
869 | ||||
870 | #ifdef ASSERT1 | |||
871 | if (TraceSuperWord && Verbose) { | |||
872 | tty->print_cr("\nVector memops after find_align_to_ref"); | |||
873 | for (uint i = 0; i < memops.size(); i++) { | |||
874 | MemNode* s = memops.at(i)->as_Mem(); | |||
875 | s->dump(); | |||
876 | } | |||
877 | } | |||
878 | #endif | |||
879 | ||||
880 | idx = max_idx; | |||
881 | if (max_ct > 0) { | |||
882 | #ifdef ASSERT1 | |||
883 | if (TraceSuperWord) { | |||
884 | tty->print("\nVector align to node: "); | |||
885 | memops.at(max_idx)->as_Mem()->dump(); | |||
886 | } | |||
887 | #endif | |||
888 | return memops.at(max_idx)->as_Mem(); | |||
889 | } | |||
890 | return NULL__null; | |||
891 | } | |||
892 | ||||
893 | //------------------span_works_for_memory_size----------------------------- | |||
894 | static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) { | |||
895 | bool span_matches_memory = false; | |||
896 | if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT)) | |||
897 | && ABS(span) == type2aelembytes(T_INT)) { | |||
898 | // There is a mismatch on span size compared to memory. | |||
899 | for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) { | |||
900 | Node* use = mem->fast_out(j); | |||
901 | if (!VectorNode::is_type_transition_to_int(use)) { | |||
902 | return false; | |||
903 | } | |||
904 | } | |||
905 | // If all uses transition to integer, it means that we can successfully align even on mismatch. | |||
906 | return true; | |||
907 | } | |||
908 | else { | |||
909 | span_matches_memory = ABS(span) == mem_size; | |||
910 | } | |||
911 | return span_matches_memory && (ABS(offset) % mem_size) == 0; | |||
912 | } | |||
913 | ||||
914 | //------------------------------ref_is_alignable--------------------------- | |||
915 | // Can the preloop align the reference to position zero in the vector? | |||
916 | bool SuperWord::ref_is_alignable(SWPointer& p) { | |||
917 | if (!p.has_iv()) { | |||
918 | return true; // no induction variable | |||
919 | } | |||
920 | CountedLoopEndNode* pre_end = pre_loop_end(); | |||
921 | assert(pre_end->stride_is_con(), "pre loop stride is constant")do { if (!(pre_end->stride_is_con())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 921, "assert(" "pre_end->stride_is_con()" ") failed", "pre loop stride is constant" ); ::breakpoint(); } } while (0); | |||
922 | int preloop_stride = pre_end->stride_con(); | |||
923 | ||||
924 | int span = preloop_stride * p.scale_in_bytes(); | |||
925 | int mem_size = p.memory_size(); | |||
926 | int offset = p.offset_in_bytes(); | |||
927 | // Stride one accesses are alignable if offset is aligned to memory operation size. | |||
928 | // Offset can be unaligned when UseUnalignedAccesses is used. | |||
929 | if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) { | |||
930 | return true; | |||
931 | } | |||
932 | // If the initial offset from start of the object is computable, | |||
933 | // check if the pre-loop can align the final offset accordingly. | |||
934 | // | |||
935 | // In other words: Can we find an i such that the offset | |||
936 | // after i pre-loop iterations is aligned to vw? | |||
937 | // (init_offset + pre_loop) % vw == 0 (1) | |||
938 | // where | |||
939 | // pre_loop = i * span | |||
940 | // is the number of bytes added to the offset by i pre-loop iterations. | |||
941 | // | |||
942 | // For this to hold we need pre_loop to increase init_offset by | |||
943 | // pre_loop = vw - (init_offset % vw) | |||
944 | // | |||
945 | // This is only possible if pre_loop is divisible by span because each | |||
946 | // pre-loop iteration increases the initial offset by 'span' bytes: | |||
947 | // (vw - (init_offset % vw)) % span == 0 | |||
948 | // | |||
949 | int vw = vector_width_in_bytes(p.mem()); | |||
950 | assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 950, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
951 | Node* init_nd = pre_end->init_trip(); | |||
952 | if (init_nd->is_Con() && p.invar() == NULL__null) { | |||
953 | int init = init_nd->bottom_type()->is_int()->get_con(); | |||
954 | int init_offset = init * p.scale_in_bytes() + offset; | |||
955 | if (init_offset < 0) { // negative offset from object start? | |||
956 | return false; // may happen in dead loop | |||
957 | } | |||
958 | if (vw % span == 0) { | |||
959 | // If vm is a multiple of span, we use formula (1). | |||
960 | if (span > 0) { | |||
961 | return (vw - (init_offset % vw)) % span == 0; | |||
962 | } else { | |||
963 | assert(span < 0, "nonzero stride * scale")do { if (!(span < 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 963, "assert(" "span < 0" ") failed", "nonzero stride * scale" ); ::breakpoint(); } } while (0); | |||
964 | return (init_offset % vw) % -span == 0; | |||
965 | } | |||
966 | } else if (span % vw == 0) { | |||
967 | // If span is a multiple of vw, we can simplify formula (1) to: | |||
968 | // (init_offset + i * span) % vw == 0 | |||
969 | // => | |||
970 | // (init_offset % vw) + ((i * span) % vw) == 0 | |||
971 | // => | |||
972 | // init_offset % vw == 0 | |||
973 | // | |||
974 | // Because we add a multiple of vw to the initial offset, the final | |||
975 | // offset is a multiple of vw if and only if init_offset is a multiple. | |||
976 | // | |||
977 | return (init_offset % vw) == 0; | |||
978 | } | |||
979 | } | |||
980 | return false; | |||
981 | } | |||
982 | //---------------------------get_vw_bytes_special------------------------ | |||
983 | int SuperWord::get_vw_bytes_special(MemNode* s) { | |||
984 | // Get the vector width in bytes. | |||
985 | int vw = vector_width_in_bytes(s); | |||
986 | ||||
987 | // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined. | |||
988 | BasicType btype = velt_basic_type(s); | |||
989 | if (type2aelembytes(btype) == 2) { | |||
990 | bool should_combine_adjacent = true; | |||
991 | for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { | |||
992 | Node* user = s->fast_out(i); | |||
993 | if (!VectorNode::is_muladds2i(user)) { | |||
994 | should_combine_adjacent = false; | |||
995 | } | |||
996 | } | |||
997 | if (should_combine_adjacent) { | |||
998 | vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2); | |||
999 | } | |||
1000 | } | |||
1001 | ||||
1002 | return vw; | |||
1003 | } | |||
1004 | ||||
1005 | //---------------------------get_iv_adjustment--------------------------- | |||
1006 | // Calculate loop's iv adjustment for this memory ops. | |||
1007 | int SuperWord::get_iv_adjustment(MemNode* mem_ref) { | |||
1008 | SWPointer align_to_ref_p(mem_ref, this, NULL__null, false); | |||
1009 | int offset = align_to_ref_p.offset_in_bytes(); | |||
1010 | int scale = align_to_ref_p.scale_in_bytes(); | |||
1011 | int elt_size = align_to_ref_p.memory_size(); | |||
1012 | int vw = get_vw_bytes_special(mem_ref); | |||
1013 | assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1013, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
1014 | int iv_adjustment; | |||
1015 | if (scale != 0) { | |||
1016 | int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; | |||
1017 | // At least one iteration is executed in pre-loop by default. As result | |||
1018 | // several iterations are needed to align memory operations in main-loop even | |||
1019 | // if offset is 0. | |||
1020 | int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw)); | |||
1021 | // iv_adjustment_in_bytes must be a multiple of elt_size if vector memory | |||
1022 | // references should be aligned on this platform. | |||
1023 | assert((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned(),do { if (!((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || ! vectors_should_be_aligned())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1024, "assert(" "(ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned()" ") failed", "(%d) should be divisible by (%d)", iv_adjustment_in_bytes , elt_size); ::breakpoint(); } } while (0) | |||
1024 | "(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size)do { if (!((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || ! vectors_should_be_aligned())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1024, "assert(" "(ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned()" ") failed", "(%d) should be divisible by (%d)", iv_adjustment_in_bytes , elt_size); ::breakpoint(); } } while (0); | |||
1025 | iv_adjustment = iv_adjustment_in_bytes/elt_size; | |||
1026 | } else { | |||
1027 | // This memory op is not dependent on iv (scale == 0) | |||
1028 | iv_adjustment = 0; | |||
1029 | } | |||
1030 | ||||
1031 | #ifndef PRODUCT | |||
1032 | if (TraceSuperWord) { | |||
1033 | tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ", | |||
1034 | mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw); | |||
1035 | mem_ref->dump(); | |||
1036 | } | |||
1037 | #endif | |||
1038 | return iv_adjustment; | |||
1039 | } | |||
1040 | ||||
1041 | //---------------------------dependence_graph--------------------------- | |||
1042 | // Construct dependency graph. | |||
1043 | // Add dependence edges to load/store nodes for memory dependence | |||
1044 | // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) | |||
1045 | void SuperWord::dependence_graph() { | |||
1046 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
1047 | // First, assign a dependence node to each memory node | |||
1048 | for (int i = 0; i < _block.length(); i++ ) { | |||
1049 | Node *n = _block.at(i); | |||
1050 | if (n->is_Mem() || (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { | |||
1051 | _dg.make_node(n); | |||
1052 | } | |||
1053 | } | |||
1054 | ||||
1055 | // For each memory slice, create the dependences | |||
1056 | for (int i = 0; i < _mem_slice_head.length(); i++) { | |||
1057 | Node* n = _mem_slice_head.at(i); | |||
1058 | Node* n_tail = _mem_slice_tail.at(i); | |||
1059 | ||||
1060 | // Get slice in predecessor order (last is first) | |||
1061 | if (cl->is_main_loop()) { | |||
1062 | mem_slice_preds(n_tail, n, _nlist); | |||
1063 | } | |||
1064 | ||||
1065 | #ifndef PRODUCT | |||
1066 | if(TraceSuperWord && Verbose) { | |||
1067 | tty->print_cr("SuperWord::dependence_graph: built a new mem slice"); | |||
1068 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { | |||
1069 | _nlist.at(j)->dump(); | |||
1070 | } | |||
1071 | } | |||
1072 | #endif | |||
1073 | // Make the slice dependent on the root | |||
1074 | DepMem* slice = _dg.dep(n); | |||
1075 | _dg.make_edge(_dg.root(), slice); | |||
1076 | ||||
1077 | // Create a sink for the slice | |||
1078 | DepMem* slice_sink = _dg.make_node(NULL__null); | |||
1079 | _dg.make_edge(slice_sink, _dg.tail()); | |||
1080 | ||||
1081 | // Now visit each pair of memory ops, creating the edges | |||
1082 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { | |||
1083 | Node* s1 = _nlist.at(j); | |||
1084 | ||||
1085 | // If no dependency yet, use slice | |||
1086 | if (_dg.dep(s1)->in_cnt() == 0) { | |||
1087 | _dg.make_edge(slice, s1); | |||
1088 | } | |||
1089 | SWPointer p1(s1->as_Mem(), this, NULL__null, false); | |||
1090 | bool sink_dependent = true; | |||
1091 | for (int k = j - 1; k >= 0; k--) { | |||
1092 | Node* s2 = _nlist.at(k); | |||
1093 | if (s1->is_Load() && s2->is_Load()) | |||
1094 | continue; | |||
1095 | SWPointer p2(s2->as_Mem(), this, NULL__null, false); | |||
1096 | ||||
1097 | int cmp = p1.cmp(p2); | |||
1098 | if (SuperWordRTDepCheck && | |||
1099 | p1.base() != p2.base() && p1.valid() && p2.valid()) { | |||
1100 | // Create a runtime check to disambiguate | |||
1101 | OrderedPair pp(p1.base(), p2.base()); | |||
1102 | _disjoint_ptrs.append_if_missing(pp); | |||
1103 | } else if (!SWPointer::not_equal(cmp)) { | |||
1104 | // Possibly same address | |||
1105 | _dg.make_edge(s1, s2); | |||
1106 | sink_dependent = false; | |||
1107 | } | |||
1108 | } | |||
1109 | if (sink_dependent) { | |||
1110 | _dg.make_edge(s1, slice_sink); | |||
1111 | } | |||
1112 | } | |||
1113 | ||||
1114 | if (TraceSuperWord) { | |||
1115 | tty->print_cr("\nDependence graph for slice: %d", n->_idx); | |||
1116 | for (int q = 0; q < _nlist.length(); q++) { | |||
1117 | _dg.print(_nlist.at(q)); | |||
1118 | } | |||
1119 | tty->cr(); | |||
1120 | } | |||
1121 | ||||
1122 | _nlist.clear(); | |||
1123 | } | |||
1124 | ||||
1125 | if (TraceSuperWord) { | |||
1126 | tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); | |||
1127 | for (int r = 0; r < _disjoint_ptrs.length(); r++) { | |||
1128 | _disjoint_ptrs.at(r).print(); | |||
1129 | tty->cr(); | |||
1130 | } | |||
1131 | tty->cr(); | |||
1132 | } | |||
1133 | ||||
1134 | } | |||
1135 | ||||
1136 | //---------------------------mem_slice_preds--------------------------- | |||
1137 | // Return a memory slice (node list) in predecessor order starting at "start" | |||
1138 | void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { | |||
1139 | assert(preds.length() == 0, "start empty")do { if (!(preds.length() == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1139, "assert(" "preds.length() == 0" ") failed", "start empty" ); ::breakpoint(); } } while (0); | |||
1140 | Node* n = start; | |||
1141 | Node* prev = NULL__null; | |||
1142 | while (true) { | |||
1143 | NOT_PRODUCT( if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);)if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d" , n->_idx); | |||
1144 | assert(in_bb(n), "must be in block")do { if (!(in_bb(n))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1144, "assert(" "in_bb(n)" ") failed", "must be in block"); ::breakpoint(); } } while (0); | |||
1145 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |||
1146 | Node* out = n->fast_out(i); | |||
1147 | if (out->is_Load()) { | |||
1148 | if (in_bb(out)) { | |||
1149 | preds.push(out); | |||
1150 | if (TraceSuperWord && Verbose) { | |||
1151 | tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx); | |||
1152 | } | |||
1153 | } | |||
1154 | } else { | |||
1155 | // FIXME | |||
1156 | if (out->is_MergeMem() && !in_bb(out)) { | |||
1157 | // Either unrolling is causing a memory edge not to disappear, | |||
1158 | // or need to run igvn.optimize() again before SLP | |||
1159 | } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { | |||
1160 | // Ditto. Not sure what else to check further. | |||
1161 | } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) { | |||
1162 | // StoreCM has an input edge used as a precedence edge. | |||
1163 | // Maybe an issue when oop stores are vectorized. | |||
1164 | } else { | |||
1165 | assert(out == prev || prev == NULL, "no branches off of store slice")do { if (!(out == prev || prev == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1165, "assert(" "out == prev || prev == __null" ") failed", "no branches off of store slice"); ::breakpoint(); } } while (0); | |||
1166 | } | |||
1167 | }//else | |||
1168 | }//for | |||
1169 | if (n == stop) break; | |||
1170 | preds.push(n); | |||
1171 | if (TraceSuperWord && Verbose) { | |||
1172 | tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx); | |||
1173 | } | |||
1174 | prev = n; | |||
1175 | assert(n->is_Mem(), "unexpected node %s", n->Name())do { if (!(n->is_Mem())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1175, "assert(" "n->is_Mem()" ") failed", "unexpected node %s" , n->Name()); ::breakpoint(); } } while (0); | |||
1176 | n = n->in(MemNode::Memory); | |||
1177 | } | |||
1178 | } | |||
1179 | ||||
1180 | //------------------------------stmts_can_pack--------------------------- | |||
1181 | // Can s1 and s2 be in a pack with s1 immediately preceding s2 and | |||
1182 | // s1 aligned at "align" | |||
1183 | bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { | |||
1184 | ||||
1185 | // Do not use superword for non-primitives | |||
1186 | BasicType bt1 = velt_basic_type(s1); | |||
1187 | BasicType bt2 = velt_basic_type(s2); | |||
1188 | if(!is_java_primitive(bt1) || !is_java_primitive(bt2)) | |||
1189 | return false; | |||
1190 | if (Matcher::max_vector_size(bt1) < 2) { | |||
1191 | return false; // No vectors for this type | |||
1192 | } | |||
1193 | ||||
1194 | if (isomorphic(s1, s2)) { | |||
1195 | if ((independent(s1, s2) && have_similar_inputs(s1, s2)) || reduction(s1, s2)) { | |||
1196 | if (!exists_at(s1, 0) && !exists_at(s2, 1)) { | |||
1197 | if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { | |||
1198 | int s1_align = alignment(s1); | |||
1199 | int s2_align = alignment(s2); | |||
1200 | if (s1_align == top_align || s1_align == align) { | |||
1201 | if (s2_align == top_align || s2_align == align + data_size(s1)) { | |||
1202 | return true; | |||
1203 | } | |||
1204 | } | |||
1205 | } | |||
1206 | } | |||
1207 | } | |||
1208 | } | |||
1209 | return false; | |||
1210 | } | |||
1211 | ||||
1212 | //------------------------------exists_at--------------------------- | |||
1213 | // Does s exist in a pack at position pos? | |||
1214 | bool SuperWord::exists_at(Node* s, uint pos) { | |||
1215 | for (int i = 0; i < _packset.length(); i++) { | |||
1216 | Node_List* p = _packset.at(i); | |||
1217 | if (p->at(pos) == s) { | |||
1218 | return true; | |||
1219 | } | |||
1220 | } | |||
1221 | return false; | |||
1222 | } | |||
1223 | ||||
1224 | //------------------------------are_adjacent_refs--------------------------- | |||
1225 | // Is s1 immediately before s2 in memory? | |||
1226 | bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { | |||
1227 | if (!s1->is_Mem() || !s2->is_Mem()) return false; | |||
1228 | if (!in_bb(s1) || !in_bb(s2)) return false; | |||
1229 | ||||
1230 | // Do not use superword for non-primitives | |||
1231 | if (!is_java_primitive(s1->as_Mem()->memory_type()) || | |||
1232 | !is_java_primitive(s2->as_Mem()->memory_type())) { | |||
1233 | return false; | |||
1234 | } | |||
1235 | ||||
1236 | // FIXME - co_locate_pack fails on Stores in different mem-slices, so | |||
1237 | // only pack memops that are in the same alias set until that's fixed. | |||
1238 | if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != | |||
1239 | _phase->C->get_alias_index(s2->as_Mem()->adr_type())) | |||
1240 | return false; | |||
1241 | SWPointer p1(s1->as_Mem(), this, NULL__null, false); | |||
1242 | SWPointer p2(s2->as_Mem(), this, NULL__null, false); | |||
1243 | if (p1.base() != p2.base() || !p1.comparable(p2)) return false; | |||
1244 | int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); | |||
1245 | return diff == data_size(s1); | |||
1246 | } | |||
1247 | ||||
1248 | //------------------------------isomorphic--------------------------- | |||
1249 | // Are s1 and s2 similar? | |||
1250 | bool SuperWord::isomorphic(Node* s1, Node* s2) { | |||
1251 | if (s1->Opcode() != s2->Opcode()) return false; | |||
1252 | if (s1->req() != s2->req()) return false; | |||
1253 | if (!same_velt_type(s1, s2)) return false; | |||
1254 | Node* s1_ctrl = s1->in(0); | |||
1255 | Node* s2_ctrl = s2->in(0); | |||
1256 | // If the control nodes are equivalent, no further checks are required to test for isomorphism. | |||
1257 | if (s1_ctrl == s2_ctrl) { | |||
1258 | return true; | |||
1259 | } else { | |||
1260 | bool s1_ctrl_inv = ((s1_ctrl == NULL__null) ? true : lpt()->is_invariant(s1_ctrl)); | |||
1261 | bool s2_ctrl_inv = ((s2_ctrl == NULL__null) ? true : lpt()->is_invariant(s2_ctrl)); | |||
1262 | // If the control nodes are not invariant for the loop, fail isomorphism test. | |||
1263 | if (!s1_ctrl_inv || !s2_ctrl_inv) { | |||
1264 | return false; | |||
1265 | } | |||
1266 | if(s1_ctrl != NULL__null && s2_ctrl != NULL__null) { | |||
1267 | if (s1_ctrl->is_Proj()) { | |||
1268 | s1_ctrl = s1_ctrl->in(0); | |||
1269 | assert(lpt()->is_invariant(s1_ctrl), "must be invariant")do { if (!(lpt()->is_invariant(s1_ctrl))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1269, "assert(" "lpt()->is_invariant(s1_ctrl)" ") failed" , "must be invariant"); ::breakpoint(); } } while (0); | |||
1270 | } | |||
1271 | if (s2_ctrl->is_Proj()) { | |||
1272 | s2_ctrl = s2_ctrl->in(0); | |||
1273 | assert(lpt()->is_invariant(s2_ctrl), "must be invariant")do { if (!(lpt()->is_invariant(s2_ctrl))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1273, "assert(" "lpt()->is_invariant(s2_ctrl)" ") failed" , "must be invariant"); ::breakpoint(); } } while (0); | |||
1274 | } | |||
1275 | if (!s1_ctrl->is_RangeCheck() || !s2_ctrl->is_RangeCheck()) { | |||
1276 | return false; | |||
1277 | } | |||
1278 | } | |||
1279 | // Control nodes are invariant. However, we have no way of checking whether they resolve | |||
1280 | // in an equivalent manner. But, we know that invariant range checks are guaranteed to | |||
1281 | // throw before the loop (if they would have thrown). Thus, the loop would not have been reached. | |||
1282 | // Therefore, if the control nodes for both are range checks, we accept them to be isomorphic. | |||
1283 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1284 | Node* t1 = s1->fast_out(i); | |||
1285 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { | |||
1286 | Node* t2 = s2->fast_out(j); | |||
1287 | if (VectorNode::is_muladds2i(t1) && VectorNode::is_muladds2i(t2)) { | |||
1288 | return true; | |||
1289 | } | |||
1290 | } | |||
1291 | } | |||
1292 | } | |||
1293 | return false; | |||
1294 | } | |||
1295 | ||||
1296 | //------------------------------independent--------------------------- | |||
1297 | // Is there no data path from s1 to s2 or s2 to s1? | |||
1298 | bool SuperWord::independent(Node* s1, Node* s2) { | |||
1299 | // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first"); | |||
1300 | int d1 = depth(s1); | |||
1301 | int d2 = depth(s2); | |||
1302 | if (d1 == d2) return s1 != s2; | |||
1303 | Node* deep = d1 > d2 ? s1 : s2; | |||
1304 | Node* shallow = d1 > d2 ? s2 : s1; | |||
1305 | ||||
1306 | visited_clear(); | |||
1307 | ||||
1308 | return independent_path(shallow, deep); | |||
1309 | } | |||
1310 | ||||
1311 | //--------------------------have_similar_inputs----------------------- | |||
1312 | // For a node pair (s1, s2) which is isomorphic and independent, | |||
1313 | // do s1 and s2 have similar input edges? | |||
1314 | bool SuperWord::have_similar_inputs(Node* s1, Node* s2) { | |||
1315 | // assert(isomorphic(s1, s2) == true, "check isomorphic"); | |||
1316 | // assert(independent(s1, s2) == true, "check independent"); | |||
1317 | if (s1->req() > 1 && !s1->is_Store() && !s1->is_Load()) { | |||
1318 | for (uint i = 1; i < s1->req(); i++) { | |||
1319 | if (s1->in(i)->Opcode() != s2->in(i)->Opcode()) return false; | |||
1320 | } | |||
1321 | } | |||
1322 | return true; | |||
1323 | } | |||
1324 | ||||
1325 | //------------------------------reduction--------------------------- | |||
1326 | // Is there a data path between s1 and s2 and the nodes reductions? | |||
1327 | bool SuperWord::reduction(Node* s1, Node* s2) { | |||
1328 | bool retValue = false; | |||
1329 | int d1 = depth(s1); | |||
1330 | int d2 = depth(s2); | |||
1331 | if (d2 > d1) { | |||
1332 | if (s1->is_reduction() && s2->is_reduction()) { | |||
1333 | // This is an ordered set, so s1 should define s2 | |||
1334 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1335 | Node* t1 = s1->fast_out(i); | |||
1336 | if (t1 == s2) { | |||
1337 | // both nodes are reductions and connected | |||
1338 | retValue = true; | |||
1339 | } | |||
1340 | } | |||
1341 | } | |||
1342 | } | |||
1343 | ||||
1344 | return retValue; | |||
1345 | } | |||
1346 | ||||
1347 | //------------------------------independent_path------------------------------ | |||
1348 | // Helper for independent | |||
1349 | bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) { | |||
1350 | if (dp >= 1000) return false; // stop deep recursion | |||
1351 | visited_set(deep); | |||
1352 | int shal_depth = depth(shallow); | |||
1353 | assert(shal_depth <= depth(deep), "must be")do { if (!(shal_depth <= depth(deep))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1353, "assert(" "shal_depth <= depth(deep)" ") failed", "must be" ); ::breakpoint(); } } while (0); | |||
1354 | for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) { | |||
1355 | Node* pred = preds.current(); | |||
1356 | if (in_bb(pred) && !visited_test(pred)) { | |||
1357 | if (shallow == pred) { | |||
1358 | return false; | |||
1359 | } | |||
1360 | if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { | |||
1361 | return false; | |||
1362 | } | |||
1363 | } | |||
1364 | } | |||
1365 | return true; | |||
1366 | } | |||
1367 | ||||
1368 | //------------------------------set_alignment--------------------------- | |||
1369 | void SuperWord::set_alignment(Node* s1, Node* s2, int align) { | |||
1370 | set_alignment(s1, align); | |||
1371 | if (align == top_align || align == bottom_align) { | |||
1372 | set_alignment(s2, align); | |||
1373 | } else { | |||
1374 | set_alignment(s2, align + data_size(s1)); | |||
1375 | } | |||
1376 | } | |||
1377 | ||||
1378 | //------------------------------data_size--------------------------- | |||
1379 | int SuperWord::data_size(Node* s) { | |||
1380 | Node* use = NULL__null; //test if the node is a candidate for CMoveV optimization, then return the size of CMov | |||
1381 | if (UseVectorCmov) { | |||
1382 | use = _cmovev_kit.is_Bool_candidate(s); | |||
1383 | if (use != NULL__null) { | |||
1384 | return data_size(use); | |||
1385 | } | |||
1386 | use = _cmovev_kit.is_CmpD_candidate(s); | |||
1387 | if (use != NULL__null) { | |||
1388 | return data_size(use); | |||
1389 | } | |||
1390 | } | |||
1391 | ||||
1392 | int bsize = type2aelembytes(velt_basic_type(s)); | |||
1393 | assert(bsize != 0, "valid size")do { if (!(bsize != 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1393, "assert(" "bsize != 0" ") failed", "valid size"); ::breakpoint (); } } while (0); | |||
1394 | return bsize; | |||
1395 | } | |||
1396 | ||||
1397 | //------------------------------extend_packlist--------------------------- | |||
1398 | // Extend packset by following use->def and def->use links from pack members. | |||
1399 | void SuperWord::extend_packlist() { | |||
1400 | bool changed; | |||
1401 | do { | |||
1402 | packset_sort(_packset.length()); | |||
1403 | changed = false; | |||
1404 | for (int i = 0; i < _packset.length(); i++) { | |||
1405 | Node_List* p = _packset.at(i); | |||
1406 | changed |= follow_use_defs(p); | |||
1407 | changed |= follow_def_uses(p); | |||
1408 | } | |||
1409 | } while (changed); | |||
1410 | ||||
1411 | if (_race_possible) { | |||
1412 | for (int i = 0; i < _packset.length(); i++) { | |||
1413 | Node_List* p = _packset.at(i); | |||
1414 | order_def_uses(p); | |||
1415 | } | |||
1416 | } | |||
1417 | ||||
1418 | if (TraceSuperWord) { | |||
1419 | tty->print_cr("\nAfter extend_packlist"); | |||
1420 | print_packset(); | |||
1421 | } | |||
1422 | } | |||
1423 | ||||
1424 | //------------------------------follow_use_defs--------------------------- | |||
1425 | // Extend the packset by visiting operand definitions of nodes in pack p | |||
1426 | bool SuperWord::follow_use_defs(Node_List* p) { | |||
1427 | assert(p->size() == 2, "just checking")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1427, "assert(" "p->size() == 2" ") failed", "just checking" ); ::breakpoint(); } } while (0); | |||
1428 | Node* s1 = p->at(0); | |||
1429 | Node* s2 = p->at(1); | |||
1430 | assert(s1->req() == s2->req(), "just checking")do { if (!(s1->req() == s2->req())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1430, "assert(" "s1->req() == s2->req()" ") failed", "just checking" ); ::breakpoint(); } } while (0); | |||
1431 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking")do { if (!(alignment(s1) + data_size(s1) == alignment(s2))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1431, "assert(" "alignment(s1) + data_size(s1) == alignment(s2)" ") failed", "just checking"); ::breakpoint(); } } while (0); | |||
1432 | ||||
1433 | if (s1->is_Load()) return false; | |||
1434 | ||||
1435 | int align = alignment(s1); | |||
1436 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d" , s1->_idx, align); | |||
1437 | bool changed = false; | |||
1438 | int start = s1->is_Store() ? MemNode::ValueIn : 1; | |||
1439 | int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); | |||
1440 | for (int j = start; j < end; j++) { | |||
1441 | Node* t1 = s1->in(j); | |||
1442 | Node* t2 = s2->in(j); | |||
1443 | if (!in_bb(t1) || !in_bb(t2)) | |||
1444 | continue; | |||
1445 | if (stmts_can_pack(t1, t2, align)) { | |||
1446 | if (est_savings(t1, t2) >= 0) { | |||
1447 | Node_List* pair = new Node_List(); | |||
1448 | pair->push(t1); | |||
1449 | pair->push(t2); | |||
1450 | _packset.append(pair); | |||
1451 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)", t1->_idx, t2->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)" , t1->_idx, t2->_idx, align); | |||
1452 | set_alignment(t1, t2, align); | |||
1453 | changed = true; | |||
1454 | } | |||
1455 | } | |||
1456 | } | |||
1457 | return changed; | |||
1458 | } | |||
1459 | ||||
1460 | //------------------------------follow_def_uses--------------------------- | |||
1461 | // Extend the packset by visiting uses of nodes in pack p | |||
1462 | bool SuperWord::follow_def_uses(Node_List* p) { | |||
1463 | bool changed = false; | |||
1464 | Node* s1 = p->at(0); | |||
1465 | Node* s2 = p->at(1); | |||
1466 | assert(p->size() == 2, "just checking")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1466, "assert(" "p->size() == 2" ") failed", "just checking" ); ::breakpoint(); } } while (0); | |||
1467 | assert(s1->req() == s2->req(), "just checking")do { if (!(s1->req() == s2->req())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1467, "assert(" "s1->req() == s2->req()" ") failed", "just checking" ); ::breakpoint(); } } while (0); | |||
1468 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking")do { if (!(alignment(s1) + data_size(s1) == alignment(s2))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1468, "assert(" "alignment(s1) + data_size(s1) == alignment(s2)" ") failed", "just checking"); ::breakpoint(); } } while (0); | |||
1469 | ||||
1470 | if (s1->is_Store()) return false; | |||
1471 | ||||
1472 | int align = alignment(s1); | |||
1473 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d", s1->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d" , s1->_idx, align); | |||
1474 | int savings = -1; | |||
1475 | int num_s1_uses = 0; | |||
1476 | Node* u1 = NULL__null; | |||
1477 | Node* u2 = NULL__null; | |||
1478 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1479 | Node* t1 = s1->fast_out(i); | |||
1480 | num_s1_uses++; | |||
1481 | if (!in_bb(t1)) continue; | |||
1482 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { | |||
1483 | Node* t2 = s2->fast_out(j); | |||
1484 | if (!in_bb(t2)) continue; | |||
1485 | if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv | |||
1486 | if (!opnd_positions_match(s1, t1, s2, t2)) | |||
1487 | continue; | |||
1488 | if (stmts_can_pack(t1, t2, align)) { | |||
1489 | int my_savings = est_savings(t1, t2); | |||
1490 | if (my_savings > savings) { | |||
1491 | savings = my_savings; | |||
1492 | u1 = t1; | |||
1493 | u2 = t2; | |||
1494 | } | |||
1495 | } | |||
1496 | } | |||
1497 | } | |||
1498 | if (num_s1_uses > 1) { | |||
1499 | _race_possible = true; | |||
1500 | } | |||
1501 | if (savings >= 0) { | |||
1502 | Node_List* pair = new Node_List(); | |||
1503 | pair->push(u1); | |||
1504 | pair->push(u2); | |||
1505 | _packset.append(pair); | |||
1506 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)", u1->_idx, u2->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)" , u1->_idx, u2->_idx, align); | |||
1507 | set_alignment(u1, u2, align); | |||
1508 | changed = true; | |||
1509 | } | |||
1510 | return changed; | |||
1511 | } | |||
1512 | ||||
1513 | //------------------------------order_def_uses--------------------------- | |||
1514 | // For extended packsets, ordinally arrange uses packset by major component | |||
1515 | void SuperWord::order_def_uses(Node_List* p) { | |||
1516 | Node* s1 = p->at(0); | |||
1517 | ||||
1518 | if (s1->is_Store()) return; | |||
1519 | ||||
1520 | // reductions are always managed beforehand | |||
1521 | if (s1->is_reduction()) return; | |||
1522 | ||||
1523 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1524 | Node* t1 = s1->fast_out(i); | |||
1525 | ||||
1526 | // Only allow operand swap on commuting operations | |||
1527 | if (!t1->is_Add() && !t1->is_Mul() && !VectorNode::is_muladds2i(t1)) { | |||
1528 | break; | |||
1529 | } | |||
1530 | ||||
1531 | // Now find t1's packset | |||
1532 | Node_List* p2 = NULL__null; | |||
1533 | for (int j = 0; j < _packset.length(); j++) { | |||
1534 | p2 = _packset.at(j); | |||
1535 | Node* first = p2->at(0); | |||
1536 | if (t1 == first) { | |||
1537 | break; | |||
1538 | } | |||
1539 | p2 = NULL__null; | |||
1540 | } | |||
1541 | // Arrange all sub components by the major component | |||
1542 | if (p2 != NULL__null) { | |||
1543 | for (uint j = 1; j < p->size(); j++) { | |||
1544 | Node* d1 = p->at(j); | |||
1545 | Node* u1 = p2->at(j); | |||
1546 | opnd_positions_match(s1, t1, d1, u1); | |||
1547 | } | |||
1548 | } | |||
1549 | } | |||
1550 | } | |||
1551 | ||||
1552 | //---------------------------opnd_positions_match------------------------- | |||
1553 | // Is the use of d1 in u1 at the same operand position as d2 in u2? | |||
1554 | bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { | |||
1555 | // check reductions to see if they are marshalled to represent the reduction | |||
1556 | // operator in a specified opnd | |||
1557 | if (u1->is_reduction() && u2->is_reduction()) { | |||
1558 | // ensure reductions have phis and reduction definitions feeding the 1st operand | |||
1559 | Node* first = u1->in(2); | |||
1560 | if (first->is_Phi() || first->is_reduction()) { | |||
1561 | u1->swap_edges(1, 2); | |||
1562 | } | |||
1563 | // ensure reductions have phis and reduction definitions feeding the 1st operand | |||
1564 | first = u2->in(2); | |||
1565 | if (first->is_Phi() || first->is_reduction()) { | |||
1566 | u2->swap_edges(1, 2); | |||
1567 | } | |||
1568 | return true; | |||
1569 | } | |||
1570 | ||||
1571 | uint ct = u1->req(); | |||
1572 | if (ct != u2->req()) return false; | |||
1573 | uint i1 = 0; | |||
1574 | uint i2 = 0; | |||
1575 | do { | |||
1576 | for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; | |||
1577 | for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; | |||
1578 | if (i1 != i2) { | |||
1579 | if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) { | |||
1580 | // Further analysis relies on operands position matching. | |||
1581 | u2->swap_edges(i1, i2); | |||
1582 | } else if (VectorNode::is_muladds2i(u2) && u1 != u2) { | |||
1583 | if (i1 == 5 - i2) { // ((i1 == 3 && i2 == 2) || (i1 == 2 && i2 == 3) || (i1 == 1 && i2 == 4) || (i1 == 4 && i2 == 1)) | |||
1584 | u2->swap_edges(1, 2); | |||
1585 | u2->swap_edges(3, 4); | |||
1586 | } | |||
1587 | if (i1 == 3 - i2 || i1 == 7 - i2) { // ((i1 == 1 && i2 == 2) || (i1 == 2 && i2 == 1) || (i1 == 3 && i2 == 4) || (i1 == 4 && i2 == 3)) | |||
1588 | u2->swap_edges(2, 3); | |||
1589 | u2->swap_edges(1, 4); | |||
1590 | } | |||
1591 | return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs | |||
1592 | } else { | |||
1593 | return false; | |||
1594 | } | |||
1595 | } else if (i1 == i2 && VectorNode::is_muladds2i(u2) && u1 != u2) { | |||
1596 | u2->swap_edges(1, 3); | |||
1597 | u2->swap_edges(2, 4); | |||
1598 | return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs | |||
1599 | } | |||
1600 | } while (i1 < ct); | |||
1601 | return true; | |||
1602 | } | |||
1603 | ||||
1604 | //------------------------------est_savings--------------------------- | |||
1605 | // Estimate the savings from executing s1 and s2 as a pack | |||
1606 | int SuperWord::est_savings(Node* s1, Node* s2) { | |||
1607 | int save_in = 2 - 1; // 2 operations per instruction in packed form | |||
1608 | ||||
1609 | // inputs | |||
1610 | for (uint i = 1; i < s1->req(); i++) { | |||
1611 | Node* x1 = s1->in(i); | |||
1612 | Node* x2 = s2->in(i); | |||
1613 | if (x1 != x2) { | |||
1614 | if (are_adjacent_refs(x1, x2)) { | |||
1615 | save_in += adjacent_profit(x1, x2); | |||
1616 | } else if (!in_packset(x1, x2)) { | |||
1617 | save_in -= pack_cost(2); | |||
1618 | } else { | |||
1619 | save_in += unpack_cost(2); | |||
1620 | } | |||
1621 | } | |||
1622 | } | |||
1623 | ||||
1624 | // uses of result | |||
1625 | uint ct = 0; | |||
1626 | int save_use = 0; | |||
1627 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1628 | Node* s1_use = s1->fast_out(i); | |||
1629 | for (int j = 0; j < _packset.length(); j++) { | |||
1630 | Node_List* p = _packset.at(j); | |||
1631 | if (p->at(0) == s1_use) { | |||
1632 | for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { | |||
1633 | Node* s2_use = s2->fast_out(k); | |||
1634 | if (p->at(p->size()-1) == s2_use) { | |||
1635 | ct++; | |||
1636 | if (are_adjacent_refs(s1_use, s2_use)) { | |||
1637 | save_use += adjacent_profit(s1_use, s2_use); | |||
1638 | } | |||
1639 | } | |||
1640 | } | |||
1641 | } | |||
1642 | } | |||
1643 | } | |||
1644 | ||||
1645 | if (ct < s1->outcnt()) save_use += unpack_cost(1); | |||
1646 | if (ct < s2->outcnt()) save_use += unpack_cost(1); | |||
1647 | ||||
1648 | return MAX2(save_in, save_use); | |||
1649 | } | |||
1650 | ||||
1651 | //------------------------------costs--------------------------- | |||
1652 | int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } | |||
1653 | int SuperWord::pack_cost(int ct) { return ct; } | |||
1654 | int SuperWord::unpack_cost(int ct) { return ct; } | |||
1655 | ||||
1656 | //------------------------------combine_packs--------------------------- | |||
1657 | // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last | |||
1658 | void SuperWord::combine_packs() { | |||
1659 | bool changed = true; | |||
1660 | // Combine packs regardless max vector size. | |||
1661 | while (changed) { | |||
1662 | changed = false; | |||
1663 | for (int i = 0; i < _packset.length(); i++) { | |||
1664 | Node_List* p1 = _packset.at(i); | |||
1665 | if (p1 == NULL__null) continue; | |||
1666 | // Because of sorting we can start at i + 1 | |||
1667 | for (int j = i + 1; j < _packset.length(); j++) { | |||
1668 | Node_List* p2 = _packset.at(j); | |||
1669 | if (p2 == NULL__null) continue; | |||
1670 | if (i == j) continue; | |||
1671 | if (p1->at(p1->size()-1) == p2->at(0)) { | |||
1672 | for (uint k = 1; k < p2->size(); k++) { | |||
1673 | p1->push(p2->at(k)); | |||
1674 | } | |||
1675 | _packset.at_put(j, NULL__null); | |||
1676 | changed = true; | |||
1677 | } | |||
1678 | } | |||
1679 | } | |||
1680 | } | |||
1681 | ||||
1682 | // Split packs which have size greater then max vector size. | |||
1683 | for (int i = 0; i < _packset.length(); i++) { | |||
1684 | Node_List* p1 = _packset.at(i); | |||
1685 | if (p1 != NULL__null) { | |||
1686 | BasicType bt = velt_basic_type(p1->at(0)); | |||
1687 | uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector | |||
1688 | assert(is_power_of_2(max_vlen), "sanity")do { if (!(is_power_of_2(max_vlen))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1688, "assert(" "is_power_of_2(max_vlen)" ") failed", "sanity" ); ::breakpoint(); } } while (0); | |||
1689 | uint psize = p1->size(); | |||
1690 | if (!is_power_of_2(psize)) { | |||
1691 | // Skip pack which can't be vector. | |||
1692 | // case1: for(...) { a[i] = i; } elements values are different (i+x) | |||
1693 | // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store | |||
1694 | _packset.at_put(i, NULL__null); | |||
1695 | continue; | |||
1696 | } | |||
1697 | if (psize > max_vlen) { | |||
1698 | Node_List* pack = new Node_List(); | |||
1699 | for (uint j = 0; j < psize; j++) { | |||
1700 | pack->push(p1->at(j)); | |||
1701 | if (pack->size() >= max_vlen) { | |||
1702 | assert(is_power_of_2(pack->size()), "sanity")do { if (!(is_power_of_2(pack->size()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1702, "assert(" "is_power_of_2(pack->size())" ") failed" , "sanity"); ::breakpoint(); } } while (0); | |||
1703 | _packset.append(pack); | |||
1704 | pack = new Node_List(); | |||
1705 | } | |||
1706 | } | |||
1707 | _packset.at_put(i, NULL__null); | |||
1708 | } | |||
1709 | } | |||
1710 | } | |||
1711 | ||||
1712 | // Compress list. | |||
1713 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
1714 | Node_List* p1 = _packset.at(i); | |||
1715 | if (p1 == NULL__null) { | |||
1716 | _packset.remove_at(i); | |||
1717 | } | |||
1718 | } | |||
1719 | ||||
1720 | if (TraceSuperWord) { | |||
1721 | tty->print_cr("\nAfter combine_packs"); | |||
1722 | print_packset(); | |||
1723 | } | |||
1724 | } | |||
1725 | ||||
1726 | //-----------------------------construct_my_pack_map-------------------------- | |||
1727 | // Construct the map from nodes to packs. Only valid after the | |||
1728 | // point where a node is only in one pack (after combine_packs). | |||
1729 | void SuperWord::construct_my_pack_map() { | |||
1730 | Node_List* rslt = NULL__null; | |||
1731 | for (int i = 0; i < _packset.length(); i++) { | |||
1732 | Node_List* p = _packset.at(i); | |||
1733 | for (uint j = 0; j < p->size(); j++) { | |||
1734 | Node* s = p->at(j); | |||
1735 | #ifdef ASSERT1 | |||
1736 | if (my_pack(s) != NULL__null) { | |||
1737 | s->dump(1); | |||
1738 | tty->print_cr("packs[%d]:", i); | |||
1739 | print_pack(p); | |||
1740 | assert(false, "only in one pack")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1740, "assert(" "false" ") failed", "only in one pack"); :: breakpoint(); } } while (0); | |||
1741 | } | |||
1742 | #endif | |||
1743 | set_my_pack(s, p); | |||
1744 | } | |||
1745 | } | |||
1746 | } | |||
1747 | ||||
1748 | //------------------------------filter_packs--------------------------- | |||
1749 | // Remove packs that are not implemented or not profitable. | |||
1750 | void SuperWord::filter_packs() { | |||
1751 | // Remove packs that are not implemented | |||
1752 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
1753 | Node_List* pk = _packset.at(i); | |||
1754 | bool impl = implemented(pk); | |||
1755 | if (!impl) { | |||
1756 | #ifndef PRODUCT | |||
1757 | if ((TraceSuperWord && Verbose) || _vector_loop_debug) { | |||
1758 | tty->print_cr("Unimplemented"); | |||
1759 | pk->at(0)->dump(); | |||
1760 | } | |||
1761 | #endif | |||
1762 | remove_pack_at(i); | |||
1763 | } | |||
1764 | Node *n = pk->at(0); | |||
1765 | if (n->is_reduction()) { | |||
1766 | _num_reductions++; | |||
1767 | } else { | |||
1768 | _num_work_vecs++; | |||
1769 | } | |||
1770 | } | |||
1771 | ||||
1772 | // Remove packs that are not profitable | |||
1773 | bool changed; | |||
1774 | do { | |||
1775 | changed = false; | |||
1776 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
1777 | Node_List* pk = _packset.at(i); | |||
1778 | bool prof = profitable(pk); | |||
1779 | if (!prof) { | |||
1780 | #ifndef PRODUCT | |||
1781 | if ((TraceSuperWord && Verbose) || _vector_loop_debug) { | |||
1782 | tty->print_cr("Unprofitable"); | |||
1783 | pk->at(0)->dump(); | |||
1784 | } | |||
1785 | #endif | |||
1786 | remove_pack_at(i); | |||
1787 | changed = true; | |||
1788 | } | |||
1789 | } | |||
1790 | } while (changed); | |||
1791 | ||||
1792 | #ifndef PRODUCT | |||
1793 | if (TraceSuperWord) { | |||
1794 | tty->print_cr("\nAfter filter_packs"); | |||
1795 | print_packset(); | |||
1796 | tty->cr(); | |||
1797 | } | |||
1798 | #endif | |||
1799 | } | |||
1800 | ||||
1801 | //------------------------------merge_packs_to_cmovd--------------------------- | |||
1802 | // Merge CMoveD into new vector-nodes | |||
1803 | // We want to catch this pattern and subsume CmpD and Bool into CMoveD | |||
1804 | // | |||
1805 | // SubD ConD | |||
1806 | // / | / | |||
1807 | // / | / / | |||
1808 | // / | / / | |||
1809 | // / | / / | |||
1810 | // / / / | |||
1811 | // / / | / | |||
1812 | // v / | / | |||
1813 | // CmpD | / | |||
1814 | // | | / | |||
1815 | // v | / | |||
1816 | // Bool | / | |||
1817 | // \ | / | |||
1818 | // \ | / | |||
1819 | // \ | / | |||
1820 | // \ | / | |||
1821 | // \ v / | |||
1822 | // CMoveD | |||
1823 | // | |||
1824 | ||||
1825 | void SuperWord::merge_packs_to_cmovd() { | |||
1826 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
1827 | _cmovev_kit.make_cmovevd_pack(_packset.at(i)); | |||
1828 | } | |||
1829 | #ifndef PRODUCT | |||
1830 | if (TraceSuperWord) { | |||
1831 | tty->print_cr("\nSuperWord::merge_packs_to_cmovd(): After merge"); | |||
1832 | print_packset(); | |||
1833 | tty->cr(); | |||
1834 | } | |||
1835 | #endif | |||
1836 | } | |||
1837 | ||||
1838 | Node* CMoveKit::is_Bool_candidate(Node* def) const { | |||
1839 | Node* use = NULL__null; | |||
1840 | if (!def->is_Bool() || def->in(0) != NULL__null || def->outcnt() != 1) { | |||
1841 | return NULL__null; | |||
1842 | } | |||
1843 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |||
1844 | use = def->fast_out(j); | |||
1845 | if (!_sw->same_generation(def, use) || !use->is_CMove()) { | |||
1846 | return NULL__null; | |||
1847 | } | |||
1848 | } | |||
1849 | return use; | |||
1850 | } | |||
1851 | ||||
1852 | Node* CMoveKit::is_CmpD_candidate(Node* def) const { | |||
1853 | Node* use = NULL__null; | |||
1854 | if (!def->is_Cmp() || def->in(0) != NULL__null || def->outcnt() != 1) { | |||
1855 | return NULL__null; | |||
1856 | } | |||
1857 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |||
1858 | use = def->fast_out(j); | |||
1859 | if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == NULL__null || !_sw->same_generation(def, use)) { | |||
1860 | return NULL__null; | |||
1861 | } | |||
1862 | } | |||
1863 | return use; | |||
1864 | } | |||
1865 | ||||
1866 | Node_List* CMoveKit::make_cmovevd_pack(Node_List* cmovd_pk) { | |||
1867 | Node *cmovd = cmovd_pk->at(0); | |||
1868 | if (!cmovd->is_CMove()) { | |||
1869 | return NULL__null; | |||
1870 | } | |||
1871 | if (cmovd->Opcode() != Op_CMoveF && cmovd->Opcode() != Op_CMoveD) { | |||
1872 | return NULL__null; | |||
1873 | } | |||
1874 | if (pack(cmovd) != NULL__null) { // already in the cmov pack | |||
1875 | return NULL__null; | |||
1876 | } | |||
1877 | if (cmovd->in(0) != NULL__null) { | |||
1878 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping...", cmovd->_idx); cmovd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping..." , cmovd->_idx); cmovd->dump();} | |||
1879 | return NULL__null; | |||
1880 | } | |||
1881 | ||||
1882 | Node* bol = cmovd->as_CMove()->in(CMoveNode::Condition); | |||
1883 | if (!bol->is_Bool() | |||
1884 | || bol->outcnt() != 1 | |||
1885 | || !_sw->same_generation(bol, cmovd) | |||
1886 | || bol->in(0) != NULL__null // BoolNode has control flow!! | |||
1887 | || _sw->my_pack(bol) == NULL__null) { | |||
1888 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping...", bol->_idx, cmovd->_idx); bol->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping..." , bol->_idx, cmovd->_idx); bol->dump();} | |||
1889 | return NULL__null; | |||
1890 | } | |||
1891 | Node_List* bool_pk = _sw->my_pack(bol); | |||
1892 | if (bool_pk->size() != cmovd_pk->size() ) { | |||
1893 | return NULL__null; | |||
1894 | } | |||
1895 | ||||
1896 | Node* cmpd = bol->in(1); | |||
1897 | if (!cmpd->is_Cmp() | |||
1898 | || cmpd->outcnt() != 1 | |||
1899 | || !_sw->same_generation(cmpd, cmovd) | |||
1900 | || cmpd->in(0) != NULL__null // CmpDNode has control flow!! | |||
1901 | || _sw->my_pack(cmpd) == NULL__null) { | |||
1902 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping...", cmpd->_idx, cmovd->_idx); cmpd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping..." , cmpd->_idx, cmovd->_idx); cmpd->dump();} | |||
1903 | return NULL__null; | |||
1904 | } | |||
1905 | Node_List* cmpd_pk = _sw->my_pack(cmpd); | |||
1906 | if (cmpd_pk->size() != cmovd_pk->size() ) { | |||
1907 | return NULL__null; | |||
1908 | } | |||
1909 | ||||
1910 | if (!test_cmpd_pack(cmpd_pk, cmovd_pk)) { | |||
1911 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test", cmpd->_idx); cmpd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test" , cmpd->_idx); cmpd->dump();} | |||
1912 | return NULL__null; | |||
1913 | } | |||
1914 | ||||
1915 | Node_List* new_cmpd_pk = new Node_List(); | |||
1916 | uint sz = cmovd_pk->size() - 1; | |||
1917 | for (uint i = 0; i <= sz; ++i) { | |||
1918 | Node* cmov = cmovd_pk->at(i); | |||
1919 | Node* bol = bool_pk->at(i); | |||
1920 | Node* cmp = cmpd_pk->at(i); | |||
1921 | ||||
1922 | new_cmpd_pk->insert(i, cmov); | |||
1923 | ||||
1924 | map(cmov, new_cmpd_pk); | |||
1925 | map(bol, new_cmpd_pk); | |||
1926 | map(cmp, new_cmpd_pk); | |||
1927 | ||||
1928 | _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool | |||
1929 | } | |||
1930 | _sw->_packset.remove(cmovd_pk); | |||
1931 | _sw->_packset.remove(bool_pk); | |||
1932 | _sw->_packset.remove(cmpd_pk); | |||
1933 | _sw->_packset.append(new_cmpd_pk); | |||
1934 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);})if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack" ); _sw->print_pack(new_cmpd_pk);} | |||
1935 | return new_cmpd_pk; | |||
1936 | } | |||
1937 | ||||
1938 | bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) { | |||
1939 | Node* cmpd0 = cmpd_pk->at(0); | |||
1940 | assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode")do { if (!(cmpd0->is_Cmp())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1940, "assert(" "cmpd0->is_Cmp()" ") failed", "CMoveKit::test_cmpd_pack: should be CmpDNode" ); ::breakpoint(); } } while (0); | |||
1941 | assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD")do { if (!(cmovd_pk->at(0)->is_CMove())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1941, "assert(" "cmovd_pk->at(0)->is_CMove()" ") failed" , "CMoveKit::test_cmpd_pack: should be CMoveD"); ::breakpoint (); } } while (0); | |||
1942 | assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size")do { if (!(cmpd_pk->size() == cmovd_pk->size())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1942, "assert(" "cmpd_pk->size() == cmovd_pk->size()" ") failed", "CMoveKit::test_cmpd_pack: should be same size") ; ::breakpoint(); } } while (0); | |||
1943 | Node* in1 = cmpd0->in(1); | |||
1944 | Node* in2 = cmpd0->in(2); | |||
1945 | Node_List* in1_pk = _sw->my_pack(in1); | |||
1946 | Node_List* in2_pk = _sw->my_pack(in2); | |||
1947 | ||||
1948 | if ( (in1_pk != NULL__null && in1_pk->size() != cmpd_pk->size()) | |||
1949 | || (in2_pk != NULL__null && in2_pk->size() != cmpd_pk->size()) ) { | |||
1950 | return false; | |||
1951 | } | |||
1952 | ||||
1953 | // test if "all" in1 are in the same pack or the same node | |||
1954 | if (in1_pk == NULL__null) { | |||
1955 | for (uint j = 1; j < cmpd_pk->size(); j++) { | |||
1956 | if (cmpd_pk->at(j)->in(1) != in1) { | |||
1957 | return false; | |||
1958 | } | |||
1959 | }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1) | |||
1960 | } | |||
1961 | // test if "all" in2 are in the same pack or the same node | |||
1962 | if (in2_pk == NULL__null) { | |||
1963 | for (uint j = 1; j < cmpd_pk->size(); j++) { | |||
1964 | if (cmpd_pk->at(j)->in(2) != in2) { | |||
1965 | return false; | |||
1966 | } | |||
1967 | }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2) | |||
1968 | } | |||
1969 | //now check if cmpd_pk may be subsumed in vector built for cmovd_pk | |||
1970 | int cmovd_ind1, cmovd_ind2; | |||
1971 | if (cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse) | |||
1972 | && cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) { | |||
1973 | cmovd_ind1 = CMoveNode::IfFalse; | |||
1974 | cmovd_ind2 = CMoveNode::IfTrue; | |||
1975 | } else if (cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse) | |||
1976 | && cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) { | |||
1977 | cmovd_ind2 = CMoveNode::IfFalse; | |||
1978 | cmovd_ind1 = CMoveNode::IfTrue; | |||
1979 | } | |||
1980 | else { | |||
1981 | return false; | |||
1982 | } | |||
1983 | ||||
1984 | for (uint j = 1; j < cmpd_pk->size(); j++) { | |||
1985 | if (cmpd_pk->at(j)->in(1) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind1) | |||
1986 | || cmpd_pk->at(j)->in(2) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind2)) { | |||
1987 | return false; | |||
1988 | }//if | |||
1989 | } | |||
1990 | NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: ", cmpd0->_idx); cmpd0->dump(); })if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: " , cmpd0->_idx); cmpd0->dump(); } | |||
1991 | return true; | |||
1992 | } | |||
1993 | ||||
1994 | //------------------------------implemented--------------------------- | |||
1995 | // Can code be generated for pack p? | |||
1996 | bool SuperWord::implemented(Node_List* p) { | |||
1997 | bool retValue = false; | |||
1998 | Node* p0 = p->at(0); | |||
1999 | if (p0 != NULL__null) { | |||
2000 | int opc = p0->Opcode(); | |||
2001 | uint size = p->size(); | |||
2002 | if (p0->is_reduction()) { | |||
2003 | const Type *arith_type = p0->bottom_type(); | |||
2004 | // Length 2 reductions of INT/LONG do not offer performance benefits | |||
2005 | if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) { | |||
2006 | retValue = false; | |||
2007 | } else { | |||
2008 | retValue = ReductionNode::implemented(opc, size, arith_type->basic_type()); | |||
2009 | } | |||
2010 | } else { | |||
2011 | retValue = VectorNode::implemented(opc, size, velt_basic_type(p0)); | |||
2012 | } | |||
2013 | if (!retValue) { | |||
2014 | if (is_cmov_pack(p)) { | |||
2015 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack" ); print_pack(p);} | |||
2016 | return true; | |||
2017 | } | |||
2018 | } | |||
2019 | } | |||
2020 | return retValue; | |||
2021 | } | |||
2022 | ||||
2023 | bool SuperWord::is_cmov_pack(Node_List* p) { | |||
2024 | return _cmovev_kit.pack(p->at(0)) != NULL__null; | |||
2025 | } | |||
2026 | //------------------------------same_inputs-------------------------- | |||
2027 | // For pack p, are all idx operands the same? | |||
2028 | bool SuperWord::same_inputs(Node_List* p, int idx) { | |||
2029 | Node* p0 = p->at(0); | |||
2030 | uint vlen = p->size(); | |||
2031 | Node* p0_def = p0->in(idx); | |||
2032 | for (uint i = 1; i < vlen; i++) { | |||
2033 | Node* pi = p->at(i); | |||
2034 | Node* pi_def = pi->in(idx); | |||
2035 | if (p0_def != pi_def) { | |||
2036 | return false; | |||
2037 | } | |||
2038 | } | |||
2039 | return true; | |||
2040 | } | |||
2041 | ||||
2042 | //------------------------------profitable--------------------------- | |||
2043 | // For pack p, are all operands and all uses (with in the block) vector? | |||
2044 | bool SuperWord::profitable(Node_List* p) { | |||
2045 | Node* p0 = p->at(0); | |||
2046 | uint start, end; | |||
2047 | VectorNode::vector_operands(p0, &start, &end); | |||
2048 | ||||
2049 | // Return false if some inputs are not vectors or vectors with different | |||
2050 | // size or alignment. | |||
2051 | // Also, for now, return false if not scalar promotion case when inputs are | |||
2052 | // the same. Later, implement PackNode and allow differing, non-vector inputs | |||
2053 | // (maybe just the ones from outside the block.) | |||
2054 | for (uint i = start; i < end; i++) { | |||
2055 | if (!is_vector_use(p0, i)) { | |||
2056 | return false; | |||
2057 | } | |||
2058 | } | |||
2059 | // Check if reductions are connected | |||
2060 | if (p0->is_reduction()) { | |||
2061 | Node* second_in = p0->in(2); | |||
2062 | Node_List* second_pk = my_pack(second_in); | |||
2063 | if ((second_pk == NULL__null) || (_num_work_vecs == _num_reductions)) { | |||
2064 | // Remove reduction flag if no parent pack or if not enough work | |||
2065 | // to cover reduction expansion overhead | |||
2066 | p0->remove_flag(Node::Flag_is_reduction); | |||
2067 | return false; | |||
2068 | } else if (second_pk->size() != p->size()) { | |||
2069 | return false; | |||
2070 | } | |||
2071 | } | |||
2072 | if (VectorNode::is_shift(p0)) { | |||
2073 | // For now, return false if shift count is vector or not scalar promotion | |||
2074 | // case (different shift counts) because it is not supported yet. | |||
2075 | Node* cnt = p0->in(2); | |||
2076 | Node_List* cnt_pk = my_pack(cnt); | |||
2077 | if (cnt_pk != NULL__null) | |||
2078 | return false; | |||
2079 | if (!same_inputs(p, 2)) | |||
2080 | return false; | |||
2081 | } | |||
2082 | if (!p0->is_Store()) { | |||
2083 | // For now, return false if not all uses are vector. | |||
2084 | // Later, implement ExtractNode and allow non-vector uses (maybe | |||
2085 | // just the ones outside the block.) | |||
2086 | for (uint i = 0; i < p->size(); i++) { | |||
2087 | Node* def = p->at(i); | |||
2088 | if (is_cmov_pack_internal_node(p, def)) { | |||
2089 | continue; | |||
2090 | } | |||
2091 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |||
2092 | Node* use = def->fast_out(j); | |||
2093 | for (uint k = 0; k < use->req(); k++) { | |||
2094 | Node* n = use->in(k); | |||
2095 | if (def == n) { | |||
2096 | // Reductions should only have a Phi use at the loop head or a non-phi use | |||
2097 | // outside of the loop if it is the last element of the pack (e.g. SafePoint). | |||
2098 | if (def->is_reduction() && | |||
2099 | ((use->is_Phi() && use->in(0) == _lpt->_head) || | |||
2100 | (!_lpt->is_member(_phase->get_loop(_phase->ctrl_or_self(use))) && i == p->size()-1))) { | |||
2101 | continue; | |||
2102 | } | |||
2103 | if (!is_vector_use(use, k)) { | |||
2104 | return false; | |||
2105 | } | |||
2106 | } | |||
2107 | } | |||
2108 | } | |||
2109 | } | |||
2110 | } | |||
2111 | return true; | |||
2112 | } | |||
2113 | ||||
2114 | //------------------------------schedule--------------------------- | |||
2115 | // Adjust the memory graph for the packed operations | |||
2116 | void SuperWord::schedule() { | |||
2117 | ||||
2118 | // Co-locate in the memory graph the members of each memory pack | |||
2119 | for (int i = 0; i < _packset.length(); i++) { | |||
2120 | co_locate_pack(_packset.at(i)); | |||
2121 | } | |||
2122 | } | |||
2123 | ||||
2124 | //-------------------------------remove_and_insert------------------- | |||
2125 | // Remove "current" from its current position in the memory graph and insert | |||
2126 | // it after the appropriate insertion point (lip or uip). | |||
2127 | void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, | |||
2128 | Node *uip, Unique_Node_List &sched_before) { | |||
2129 | Node* my_mem = current->in(MemNode::Memory); | |||
2130 | bool sched_up = sched_before.member(current); | |||
2131 | ||||
2132 | // remove current_store from its current position in the memmory graph | |||
2133 | for (DUIterator i = current->outs(); current->has_out(i); i++) { | |||
2134 | Node* use = current->out(i); | |||
2135 | if (use->is_Mem()) { | |||
2136 | assert(use->in(MemNode::Memory) == current, "must be")do { if (!(use->in(MemNode::Memory) == current)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2136, "assert(" "use->in(MemNode::Memory) == current" ") failed" , "must be"); ::breakpoint(); } } while (0); | |||
2137 | if (use == prev) { // connect prev to my_mem | |||
2138 | _igvn.replace_input_of(use, MemNode::Memory, my_mem); | |||
2139 | --i; //deleted this edge; rescan position | |||
2140 | } else if (sched_before.member(use)) { | |||
2141 | if (!sched_up) { // Will be moved together with current | |||
2142 | _igvn.replace_input_of(use, MemNode::Memory, uip); | |||
2143 | --i; //deleted this edge; rescan position | |||
2144 | } | |||
2145 | } else { | |||
2146 | if (sched_up) { // Will be moved together with current | |||
2147 | _igvn.replace_input_of(use, MemNode::Memory, lip); | |||
2148 | --i; //deleted this edge; rescan position | |||
2149 | } | |||
2150 | } | |||
2151 | } | |||
2152 | } | |||
2153 | ||||
2154 | Node *insert_pt = sched_up ? uip : lip; | |||
2155 | ||||
2156 | // all uses of insert_pt's memory state should use current's instead | |||
2157 | for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { | |||
2158 | Node* use = insert_pt->out(i); | |||
2159 | if (use->is_Mem()) { | |||
2160 | assert(use->in(MemNode::Memory) == insert_pt, "must be")do { if (!(use->in(MemNode::Memory) == insert_pt)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2160, "assert(" "use->in(MemNode::Memory) == insert_pt" ") failed" , "must be"); ::breakpoint(); } } while (0); | |||
2161 | _igvn.replace_input_of(use, MemNode::Memory, current); | |||
2162 | --i; //deleted this edge; rescan position | |||
2163 | } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { | |||
2164 | uint pos; //lip (lower insert point) must be the last one in the memory slice | |||
2165 | for (pos=1; pos < use->req(); pos++) { | |||
2166 | if (use->in(pos) == insert_pt) break; | |||
2167 | } | |||
2168 | _igvn.replace_input_of(use, pos, current); | |||
2169 | --i; | |||
2170 | } | |||
2171 | } | |||
2172 | ||||
2173 | //connect current to insert_pt | |||
2174 | _igvn.replace_input_of(current, MemNode::Memory, insert_pt); | |||
2175 | } | |||
2176 | ||||
2177 | //------------------------------co_locate_pack---------------------------------- | |||
2178 | // To schedule a store pack, we need to move any sandwiched memory ops either before | |||
2179 | // or after the pack, based upon dependence information: | |||
2180 | // (1) If any store in the pack depends on the sandwiched memory op, the | |||
2181 | // sandwiched memory op must be scheduled BEFORE the pack; | |||
2182 | // (2) If a sandwiched memory op depends on any store in the pack, the | |||
2183 | // sandwiched memory op must be scheduled AFTER the pack; | |||
2184 | // (3) If a sandwiched memory op (say, memA) depends on another sandwiched | |||
2185 | // memory op (say memB), memB must be scheduled before memA. So, if memA is | |||
2186 | // scheduled before the pack, memB must also be scheduled before the pack; | |||
2187 | // (4) If there is no dependence restriction for a sandwiched memory op, we simply | |||
2188 | // schedule this store AFTER the pack | |||
2189 | // (5) We know there is no dependence cycle, so there in no other case; | |||
2190 | // (6) Finally, all memory ops in another single pack should be moved in the same direction. | |||
2191 | // | |||
2192 | // To schedule a load pack, we use the memory state of either the first or the last load in | |||
2193 | // the pack, based on the dependence constraint. | |||
2194 | void SuperWord::co_locate_pack(Node_List* pk) { | |||
2195 | if (pk->at(0)->is_Store()) { | |||
2196 | MemNode* first = executed_first(pk)->as_Mem(); | |||
2197 | MemNode* last = executed_last(pk)->as_Mem(); | |||
2198 | Unique_Node_List schedule_before_pack; | |||
2199 | Unique_Node_List memops; | |||
2200 | ||||
2201 | MemNode* current = last->in(MemNode::Memory)->as_Mem(); | |||
2202 | MemNode* previous = last; | |||
2203 | while (true) { | |||
2204 | assert(in_bb(current), "stay in block")do { if (!(in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2204, "assert(" "in_bb(current)" ") failed", "stay in block" ); ::breakpoint(); } } while (0); | |||
2205 | memops.push(previous); | |||
2206 | for (DUIterator i = current->outs(); current->has_out(i); i++) { | |||
2207 | Node* use = current->out(i); | |||
2208 | if (use->is_Mem() && use != previous) | |||
2209 | memops.push(use); | |||
2210 | } | |||
2211 | if (current == first) break; | |||
2212 | previous = current; | |||
2213 | current = current->in(MemNode::Memory)->as_Mem(); | |||
2214 | } | |||
2215 | ||||
2216 | // determine which memory operations should be scheduled before the pack | |||
2217 | for (uint i = 1; i < memops.size(); i++) { | |||
2218 | Node *s1 = memops.at(i); | |||
2219 | if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { | |||
2220 | for (uint j = 0; j< i; j++) { | |||
2221 | Node *s2 = memops.at(j); | |||
2222 | if (!independent(s1, s2)) { | |||
2223 | if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { | |||
2224 | schedule_before_pack.push(s1); // s1 must be scheduled before | |||
2225 | Node_List* mem_pk = my_pack(s1); | |||
2226 | if (mem_pk != NULL__null) { | |||
2227 | for (uint ii = 0; ii < mem_pk->size(); ii++) { | |||
2228 | Node* s = mem_pk->at(ii); // follow partner | |||
2229 | if (memops.member(s) && !schedule_before_pack.member(s)) | |||
2230 | schedule_before_pack.push(s); | |||
2231 | } | |||
2232 | } | |||
2233 | break; | |||
2234 | } | |||
2235 | } | |||
2236 | } | |||
2237 | } | |||
2238 | } | |||
2239 | ||||
2240 | Node* upper_insert_pt = first->in(MemNode::Memory); | |||
2241 | // Following code moves loads connected to upper_insert_pt below aliased stores. | |||
2242 | // Collect such loads here and reconnect them back to upper_insert_pt later. | |||
2243 | memops.clear(); | |||
2244 | for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) { | |||
2245 | Node* use = upper_insert_pt->out(i); | |||
2246 | if (use->is_Mem() && !use->is_Store()) { | |||
2247 | memops.push(use); | |||
2248 | } | |||
2249 | } | |||
2250 | ||||
2251 | MemNode* lower_insert_pt = last; | |||
2252 | previous = last; //previous store in pk | |||
2253 | current = last->in(MemNode::Memory)->as_Mem(); | |||
2254 | ||||
2255 | // start scheduling from "last" to "first" | |||
2256 | while (true) { | |||
2257 | assert(in_bb(current), "stay in block")do { if (!(in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2257, "assert(" "in_bb(current)" ") failed", "stay in block" ); ::breakpoint(); } } while (0); | |||
2258 | assert(in_pack(previous, pk), "previous stays in pack")do { if (!(in_pack(previous, pk))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2258, "assert(" "in_pack(previous, pk)" ") failed", "previous stays in pack" ); ::breakpoint(); } } while (0); | |||
2259 | Node* my_mem = current->in(MemNode::Memory); | |||
2260 | ||||
2261 | if (in_pack(current, pk)) { | |||
2262 | // Forward users of my memory state (except "previous) to my input memory state | |||
2263 | for (DUIterator i = current->outs(); current->has_out(i); i++) { | |||
2264 | Node* use = current->out(i); | |||
2265 | if (use->is_Mem() && use != previous) { | |||
2266 | assert(use->in(MemNode::Memory) == current, "must be")do { if (!(use->in(MemNode::Memory) == current)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2266, "assert(" "use->in(MemNode::Memory) == current" ") failed" , "must be"); ::breakpoint(); } } while (0); | |||
2267 | if (schedule_before_pack.member(use)) { | |||
2268 | _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt); | |||
2269 | } else { | |||
2270 | _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt); | |||
2271 | } | |||
2272 | --i; // deleted this edge; rescan position | |||
2273 | } | |||
2274 | } | |||
2275 | previous = current; | |||
2276 | } else { // !in_pack(current, pk) ==> a sandwiched store | |||
2277 | remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack); | |||
2278 | } | |||
2279 | ||||
2280 | if (current == first) break; | |||
2281 | current = my_mem->as_Mem(); | |||
2282 | } // end while | |||
2283 | ||||
2284 | // Reconnect loads back to upper_insert_pt. | |||
2285 | for (uint i = 0; i < memops.size(); i++) { | |||
2286 | Node *ld = memops.at(i); | |||
2287 | if (ld->in(MemNode::Memory) != upper_insert_pt) { | |||
2288 | _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt); | |||
2289 | } | |||
2290 | } | |||
2291 | } else if (pk->at(0)->is_Load()) { // Load pack | |||
2292 | // All loads in the pack should have the same memory state. By default, | |||
2293 | // we use the memory state of the last load. However, if any load could | |||
2294 | // not be moved down due to the dependence constraint, we use the memory | |||
2295 | // state of the first load. | |||
2296 | Node* mem_input = pick_mem_state(pk); | |||
2297 | _igvn.hash_delete(mem_input); | |||
2298 | // Give each load the same memory state | |||
2299 | for (uint i = 0; i < pk->size(); i++) { | |||
2300 | LoadNode* ld = pk->at(i)->as_Load(); | |||
2301 | _igvn.replace_input_of(ld, MemNode::Memory, mem_input); | |||
2302 | } | |||
2303 | } | |||
2304 | } | |||
2305 | ||||
2306 | // Finds the first and last memory state and then picks either of them by checking dependence constraints. | |||
2307 | // If a store is dependent on an earlier load then we need to pick the memory state of the first load and cannot | |||
2308 | // pick the memory state of the last load. | |||
2309 | Node* SuperWord::pick_mem_state(Node_List* pk) { | |||
2310 | Node* first_mem = find_first_mem_state(pk); | |||
2311 | Node* last_mem = find_last_mem_state(pk, first_mem); | |||
2312 | ||||
2313 | for (uint i = 0; i < pk->size(); i++) { | |||
2314 | Node* ld = pk->at(i); | |||
2315 | for (Node* current = last_mem; current != ld->in(MemNode::Memory); current = current->in(MemNode::Memory)) { | |||
2316 | assert(current->is_Mem() && in_bb(current), "unexpected memory")do { if (!(current->is_Mem() && in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2316, "assert(" "current->is_Mem() && in_bb(current)" ") failed", "unexpected memory"); ::breakpoint(); } } while ( 0); | |||
2317 | assert(current != first_mem, "corrupted memory graph")do { if (!(current != first_mem)) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2317, "assert(" "current != first_mem" ") failed", "corrupted memory graph" ); ::breakpoint(); } } while (0); | |||
2318 | if (!independent(current, ld)) { | |||
2319 | // A later store depends on this load, pick the memory state of the first load. This can happen, for example, | |||
2320 | // if a load pack has interleaving stores that are part of a store pack which, however, is removed at the pack | |||
2321 | // filtering stage. This leaves us with only a load pack for which we cannot take the memory state of the | |||
2322 | // last load as the remaining unvectorized stores could interfere since they have a dependency to the loads. | |||
2323 | // Some stores could be executed before the load vector resulting in a wrong result. We need to take the | |||
2324 | // memory state of the first load to prevent this. | |||
2325 | return first_mem; | |||
2326 | } | |||
2327 | } | |||
2328 | } | |||
2329 | return last_mem; | |||
2330 | } | |||
2331 | ||||
2332 | // Walk the memory graph from the current first load until the | |||
2333 | // start of the loop and check if nodes on the way are memory | |||
2334 | // edges of loads in the pack. The last one we encounter is the | |||
2335 | // first load. | |||
2336 | Node* SuperWord::find_first_mem_state(Node_List* pk) { | |||
2337 | Node* first_mem = pk->at(0)->in(MemNode::Memory); | |||
2338 | for (Node* current = first_mem; in_bb(current); current = current->is_Phi() ? current->in(LoopNode::EntryControl) : current->in(MemNode::Memory)) { | |||
2339 | assert(current->is_Mem() || (current->is_Phi() && current->in(0) == bb()), "unexpected memory")do { if (!(current->is_Mem() || (current->is_Phi() && current->in(0) == bb()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2339, "assert(" "current->is_Mem() || (current->is_Phi() && current->in(0) == bb())" ") failed", "unexpected memory"); ::breakpoint(); } } while ( 0); | |||
2340 | for (uint i = 1; i < pk->size(); i++) { | |||
2341 | Node* ld = pk->at(i); | |||
2342 | if (ld->in(MemNode::Memory) == current) { | |||
2343 | first_mem = current; | |||
2344 | break; | |||
2345 | } | |||
2346 | } | |||
2347 | } | |||
2348 | return first_mem; | |||
2349 | } | |||
2350 | ||||
2351 | // Find the last load by going over the pack again and walking | |||
2352 | // the memory graph from the loads of the pack to the memory of | |||
2353 | // the first load. If we encounter the memory of the current last | |||
2354 | // load, then we started from further down in the memory graph and | |||
2355 | // the load we started from is the last load. | |||
2356 | Node* SuperWord::find_last_mem_state(Node_List* pk, Node* first_mem) { | |||
2357 | Node* last_mem = pk->at(0)->in(MemNode::Memory); | |||
2358 | for (uint i = 0; i < pk->size(); i++) { | |||
2359 | Node* ld = pk->at(i); | |||
2360 | for (Node* current = ld->in(MemNode::Memory); current != first_mem; current = current->in(MemNode::Memory)) { | |||
2361 | assert(current->is_Mem() && in_bb(current), "unexpected memory")do { if (!(current->is_Mem() && in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2361, "assert(" "current->is_Mem() && in_bb(current)" ") failed", "unexpected memory"); ::breakpoint(); } } while ( 0); | |||
2362 | if (current->in(MemNode::Memory) == last_mem) { | |||
2363 | last_mem = ld->in(MemNode::Memory); | |||
2364 | } | |||
2365 | } | |||
2366 | } | |||
2367 | return last_mem; | |||
2368 | } | |||
2369 | ||||
2370 | #ifndef PRODUCT | |||
2371 | void SuperWord::print_loop(bool whole) { | |||
2372 | Node_Stack stack(_arena, _phase->C->unique() >> 2); | |||
2373 | Node_List rpo_list; | |||
2374 | VectorSet visited(_arena); | |||
2375 | visited.set(lpt()->_head->_idx); | |||
2376 | _phase->rpo(lpt()->_head, stack, visited, rpo_list); | |||
2377 | _phase->dump(lpt(), rpo_list.size(), rpo_list ); | |||
2378 | if(whole) { | |||
2379 | tty->print_cr("\n Whole loop tree"); | |||
2380 | _phase->dump(); | |||
2381 | tty->print_cr(" End of whole loop tree\n"); | |||
2382 | } | |||
2383 | } | |||
2384 | #endif | |||
2385 | ||||
2386 | //------------------------------output--------------------------- | |||
2387 | // Convert packs into vector node operations | |||
2388 | void SuperWord::output() { | |||
2389 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
2390 | Compile* C = _phase->C; | |||
2391 | if (_packset.length() == 0) { | |||
2392 | if (cl->is_main_loop()) { | |||
2393 | // Instigate more unrolling for optimization when vectorization fails. | |||
2394 | C->set_major_progress(); | |||
2395 | cl->set_notpassed_slp(); | |||
2396 | cl->mark_do_unroll_only(); | |||
2397 | } | |||
2398 | return; | |||
2399 | } | |||
2400 | ||||
2401 | #ifndef PRODUCT | |||
2402 | if (TraceLoopOpts) { | |||
2403 | tty->print("SuperWord::output "); | |||
2404 | lpt()->dump_head(); | |||
2405 | } | |||
2406 | #endif | |||
2407 | ||||
2408 | if (cl->is_main_loop()) { | |||
2409 | // MUST ENSURE main loop's initial value is properly aligned: | |||
2410 | // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 | |||
2411 | ||||
2412 | align_initial_loop_index(align_to_ref()); | |||
2413 | ||||
2414 | // Insert extract (unpack) operations for scalar uses | |||
2415 | for (int i = 0; i < _packset.length(); i++) { | |||
2416 | insert_extracts(_packset.at(i)); | |||
2417 | } | |||
2418 | } | |||
2419 | ||||
2420 | uint max_vlen_in_bytes = 0; | |||
2421 | uint max_vlen = 0; | |||
2422 | bool can_process_post_loop = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); | |||
2423 | ||||
2424 | NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop" ); print_loop(true);} | |||
2425 | ||||
2426 | CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy()); | |||
2427 | ||||
2428 | NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop" ); print_loop(true);} | |||
2429 | ||||
2430 | if (do_reserve_copy() && !make_reversable.has_reserved()) { | |||
2431 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: loop was not reserved correctly, exiting SuperWord" );} | |||
2432 | return; | |||
2433 | } | |||
2434 | ||||
2435 | for (int i = 0; i < _block.length(); i++) { | |||
2436 | Node* n = _block.at(i); | |||
2437 | Node_List* p = my_pack(n); | |||
2438 | if (p && n == executed_last(p)) { | |||
2439 | uint vlen = p->size(); | |||
2440 | uint vlen_in_bytes = 0; | |||
2441 | Node* vn = NULL__null; | |||
2442 | Node* low_adr = p->at(0); | |||
2443 | Node* first = executed_first(p); | |||
2444 | if (can_process_post_loop) { | |||
2445 | // override vlen with the main loops vector length | |||
2446 | vlen = cl->slp_max_unroll(); | |||
2447 | } | |||
2448 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack" , first->_idx, n->_idx); print_pack(p);} | |||
2449 | int opc = n->Opcode(); | |||
2450 | if (n->is_Load()) { | |||
2451 | Node* ctl = n->in(MemNode::Control); | |||
2452 | Node* mem = first->in(MemNode::Memory); | |||
2453 | SWPointer p1(n->as_Mem(), this, NULL__null, false); | |||
2454 | // Identify the memory dependency for the new loadVector node by | |||
2455 | // walking up through memory chain. | |||
2456 | // This is done to give flexibility to the new loadVector node so that | |||
2457 | // it can move above independent storeVector nodes. | |||
2458 | while (mem->is_StoreVector()) { | |||
2459 | SWPointer p2(mem->as_Mem(), this, NULL__null, false); | |||
2460 | int cmp = p1.cmp(p2); | |||
2461 | if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) { | |||
2462 | mem = mem->in(MemNode::Memory); | |||
2463 | } else { | |||
2464 | break; // dependent memory | |||
2465 | } | |||
2466 | } | |||
2467 | Node* adr = low_adr->in(MemNode::Address); | |||
2468 | const TypePtr* atyp = n->adr_type(); | |||
2469 | vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p)); | |||
2470 | vlen_in_bytes = vn->as_LoadVector()->memory_size(); | |||
2471 | } else if (n->is_Store()) { | |||
2472 | // Promote value to be stored to vector | |||
2473 | Node* val = vector_opd(p, MemNode::ValueIn); | |||
2474 | if (val == NULL__null) { | |||
2475 | if (do_reserve_copy()) { | |||
2476 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: val should not be NULL, exiting SuperWord" );} | |||
2477 | return; //and reverse to backup IG | |||
2478 | } | |||
2479 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2479); ::breakpoint(); } while (0); | |||
2480 | } | |||
2481 | ||||
2482 | Node* ctl = n->in(MemNode::Control); | |||
2483 | Node* mem = first->in(MemNode::Memory); | |||
2484 | Node* adr = low_adr->in(MemNode::Address); | |||
2485 | const TypePtr* atyp = n->adr_type(); | |||
2486 | vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); | |||
2487 | vlen_in_bytes = vn->as_StoreVector()->memory_size(); | |||
2488 | } else if (VectorNode::is_scalar_rotate(n)) { | |||
2489 | Node* in1 = low_adr->in(1); | |||
2490 | Node* in2 = p->at(0)->in(2); | |||
2491 | // If rotation count is non-constant or greater than 8bit value create a vector. | |||
2492 | if (!in2->is_Con() || !Matcher::supports_vector_constant_rotates(in2->get_int())) { | |||
2493 | in2 = vector_opd(p, 2); | |||
2494 | } | |||
2495 | vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); | |||
2496 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2497 | } else if (VectorNode::is_roundopD(n)) { | |||
2498 | Node* in1 = vector_opd(p, 1); | |||
2499 | Node* in2 = low_adr->in(2); | |||
2500 | assert(in2->is_Con(), "Constant rounding mode expected.")do { if (!(in2->is_Con())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2500, "assert(" "in2->is_Con()" ") failed", "Constant rounding mode expected." ); ::breakpoint(); } } while (0); | |||
2501 | vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); | |||
2502 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2503 | } else if (VectorNode::is_muladds2i(n)) { | |||
2504 | assert(n->req() == 5u, "MulAddS2I should have 4 operands.")do { if (!(n->req() == 5u)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2504, "assert(" "n->req() == 5u" ") failed", "MulAddS2I should have 4 operands." ); ::breakpoint(); } } while (0); | |||
2505 | Node* in1 = vector_opd(p, 1); | |||
2506 | Node* in2 = vector_opd(p, 2); | |||
2507 | vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); | |||
2508 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2509 | } else if (n->req() == 3 && !is_cmov_pack(p)) { | |||
2510 | // Promote operands to vector | |||
2511 | Node* in1 = NULL__null; | |||
2512 | bool node_isa_reduction = n->is_reduction(); | |||
2513 | if (node_isa_reduction) { | |||
2514 | // the input to the first reduction operation is retained | |||
2515 | in1 = low_adr->in(1); | |||
2516 | } else { | |||
2517 | in1 = vector_opd(p, 1); | |||
2518 | if (in1 == NULL__null) { | |||
2519 | if (do_reserve_copy()) { | |||
2520 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: in1 should not be NULL, exiting SuperWord" );} | |||
2521 | return; //and reverse to backup IG | |||
2522 | } | |||
2523 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2523); ::breakpoint(); } while (0); | |||
2524 | } | |||
2525 | } | |||
2526 | Node* in2 = vector_opd(p, 2); | |||
2527 | if (in2 == NULL__null) { | |||
2528 | if (do_reserve_copy()) { | |||
2529 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: in2 should not be NULL, exiting SuperWord" );} | |||
2530 | return; //and reverse to backup IG | |||
2531 | } | |||
2532 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2532); ::breakpoint(); } while (0); | |||
2533 | } | |||
2534 | if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) { | |||
2535 | // Move invariant vector input into second position to avoid register spilling. | |||
2536 | Node* tmp = in1; | |||
2537 | in1 = in2; | |||
2538 | in2 = tmp; | |||
2539 | } | |||
2540 | if (node_isa_reduction) { | |||
2541 | const Type *arith_type = n->bottom_type(); | |||
2542 | vn = ReductionNode::make(opc, NULL__null, in1, in2, arith_type->basic_type()); | |||
2543 | if (in2->is_Load()) { | |||
2544 | vlen_in_bytes = in2->as_LoadVector()->memory_size(); | |||
2545 | } else { | |||
2546 | vlen_in_bytes = in2->as_Vector()->length_in_bytes(); | |||
2547 | } | |||
2548 | } else { | |||
2549 | vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); | |||
2550 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2551 | } | |||
2552 | } else if (opc == Op_SqrtF || opc == Op_SqrtD || | |||
2553 | opc == Op_AbsF || opc == Op_AbsD || | |||
2554 | opc == Op_AbsI || opc == Op_AbsL || | |||
2555 | opc == Op_NegF || opc == Op_NegD || | |||
2556 | opc == Op_PopCountI) { | |||
2557 | assert(n->req() == 2, "only one input expected")do { if (!(n->req() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2557, "assert(" "n->req() == 2" ") failed", "only one input expected" ); ::breakpoint(); } } while (0); | |||
2558 | Node* in = vector_opd(p, 1); | |||
2559 | vn = VectorNode::make(opc, in, NULL__null, vlen, velt_basic_type(n)); | |||
2560 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2561 | } else if (opc == Op_ConvI2F || opc == Op_ConvL2D || | |||
2562 | opc == Op_ConvF2I || opc == Op_ConvD2L) { | |||
2563 | assert(n->req() == 2, "only one input expected")do { if (!(n->req() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2563, "assert(" "n->req() == 2" ") failed", "only one input expected" ); ::breakpoint(); } } while (0); | |||
2564 | BasicType bt = velt_basic_type(n); | |||
2565 | int vopc = VectorNode::opcode(opc, bt); | |||
2566 | Node* in = vector_opd(p, 1); | |||
2567 | vn = VectorCastNode::make(vopc, in, bt, vlen); | |||
2568 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2569 | } else if (is_cmov_pack(p)) { | |||
2570 | if (can_process_post_loop) { | |||
2571 | // do not refactor of flow in post loop context | |||
2572 | return; | |||
2573 | } | |||
2574 | if (!n->is_CMove()) { | |||
2575 | continue; | |||
2576 | } | |||
2577 | // place here CMoveVDNode | |||
2578 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization" ); print_loop(false);} | |||
2579 | Node* bol = n->in(CMoveNode::Condition); | |||
2580 | if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) { | |||
2581 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d" , bol->_idx, bol->in(1)->_idx); bol->dump(); bol-> in(1)->dump();} | |||
2582 | bol = bol->in(1); //may be ExtractNode | |||
2583 | } | |||
2584 | ||||
2585 | assert(bol->is_Bool(), "should be BoolNode - too late to bail out!")do { if (!(bol->is_Bool())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2585, "assert(" "bol->is_Bool()" ") failed", "should be BoolNode - too late to bail out!" ); ::breakpoint(); } } while (0); | |||
2586 | if (!bol->is_Bool()) { | |||
2587 | if (do_reserve_copy()) { | |||
2588 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: expected %d bool node, exiting SuperWord" , bol->_idx); bol->dump();} | |||
2589 | return; //and reverse to backup IG | |||
2590 | } | |||
2591 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2591); ::breakpoint(); } while (0); | |||
2592 | } | |||
2593 | ||||
2594 | int cond = (int)bol->as_Bool()->_test._test; | |||
2595 | Node* in_cc = _igvn.intcon(cond); | |||
2596 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d" , in_cc->_idx); in_cc->dump();} | |||
2597 | Node* cc = bol->clone(); | |||
2598 | cc->set_req(1, in_cc); | |||
2599 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d" , cc->_idx); cc->dump();} | |||
2600 | ||||
2601 | Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse | |||
2602 | if (src1 == NULL__null) { | |||
2603 | if (do_reserve_copy()) { | |||
2604 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: src1 should not be NULL, exiting SuperWord" );} | |||
2605 | return; //and reverse to backup IG | |||
2606 | } | |||
2607 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2607); ::breakpoint(); } while (0); | |||
2608 | } | |||
2609 | Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue | |||
2610 | if (src2 == NULL__null) { | |||
2611 | if (do_reserve_copy()) { | |||
2612 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: src2 should not be NULL, exiting SuperWord" );} | |||
2613 | return; //and reverse to backup IG | |||
2614 | } | |||
2615 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2615); ::breakpoint(); } while (0); | |||
2616 | } | |||
2617 | BasicType bt = velt_basic_type(n); | |||
2618 | const TypeVect* vt = TypeVect::make(bt, vlen); | |||
2619 | assert(bt == T_FLOAT || bt == T_DOUBLE, "Only vectorization for FP cmovs is supported")do { if (!(bt == T_FLOAT || bt == T_DOUBLE)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2619, "assert(" "bt == T_FLOAT || bt == T_DOUBLE" ") failed" , "Only vectorization for FP cmovs is supported"); ::breakpoint (); } } while (0); | |||
2620 | if (bt == T_FLOAT) { | |||
2621 | vn = new CMoveVFNode(cc, src1, src2, vt); | |||
2622 | } else { | |||
2623 | assert(bt == T_DOUBLE, "Expected double")do { if (!(bt == T_DOUBLE)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2623, "assert(" "bt == T_DOUBLE" ") failed", "Expected double" ); ::breakpoint(); } } while (0); | |||
2624 | vn = new CMoveVDNode(cc, src1, src2, vt); | |||
2625 | } | |||
2626 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: " , vn->_idx); vn->dump();} | |||
2627 | } else if (opc == Op_FmaD || opc == Op_FmaF) { | |||
2628 | // Promote operands to vector | |||
2629 | Node* in1 = vector_opd(p, 1); | |||
2630 | Node* in2 = vector_opd(p, 2); | |||
2631 | Node* in3 = vector_opd(p, 3); | |||
2632 | vn = VectorNode::make(opc, in1, in2, in3, vlen, velt_basic_type(n)); | |||
2633 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2634 | } else { | |||
2635 | if (do_reserve_copy()) { | |||
2636 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: ShouldNotReachHere, exiting SuperWord"); } | |||
2637 | return; //and reverse to backup IG | |||
2638 | } | |||
2639 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2639); ::breakpoint(); } while (0); | |||
2640 | } | |||
2641 | ||||
2642 | assert(vn != NULL, "sanity")do { if (!(vn != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2642, "assert(" "vn != __null" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
2643 | if (vn == NULL__null) { | |||
2644 | if (do_reserve_copy()){ | |||
2645 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord" );} | |||
2646 | return; //and reverse to backup IG | |||
2647 | } | |||
2648 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2648); ::breakpoint(); } while (0); | |||
2649 | } | |||
2650 | ||||
2651 | _block.at_put(i, vn); | |||
2652 | _igvn.register_new_node_with_optimizer(vn); | |||
2653 | _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); | |||
2654 | for (uint j = 0; j < p->size(); j++) { | |||
2655 | Node* pm = p->at(j); | |||
2656 | _igvn.replace_node(pm, vn); | |||
2657 | } | |||
2658 | _igvn._worklist.push(vn); | |||
2659 | ||||
2660 | if (can_process_post_loop) { | |||
2661 | // first check if the vector size if the maximum vector which we can use on the machine, | |||
2662 | // other vector size have reduced values for predicated data mapping. | |||
2663 | if (vlen_in_bytes != (uint)MaxVectorSize) { | |||
2664 | return; | |||
2665 | } | |||
2666 | } | |||
2667 | ||||
2668 | if (vlen > max_vlen) { | |||
2669 | max_vlen = vlen; | |||
2670 | } | |||
2671 | if (vlen_in_bytes > max_vlen_in_bytes) { | |||
2672 | max_vlen_in_bytes = vlen_in_bytes; | |||
2673 | } | |||
2674 | #ifdef ASSERT1 | |||
2675 | if (TraceNewVectors) { | |||
2676 | tty->print("new Vector node: "); | |||
2677 | vn->dump(); | |||
2678 | } | |||
2679 | #endif | |||
2680 | } | |||
2681 | }//for (int i = 0; i < _block.length(); i++) | |||
2682 | ||||
2683 | if (max_vlen_in_bytes > C->max_vector_size()) { | |||
2684 | C->set_max_vector_size(max_vlen_in_bytes); | |||
2685 | } | |||
2686 | if (max_vlen_in_bytes > 0) { | |||
2687 | cl->mark_loop_vectorized(); | |||
2688 | } | |||
2689 | ||||
2690 | if (SuperWordLoopUnrollAnalysis) { | |||
2691 | if (cl->has_passed_slp()) { | |||
2692 | uint slp_max_unroll_factor = cl->slp_max_unroll(); | |||
2693 | if (slp_max_unroll_factor == max_vlen) { | |||
2694 | if (TraceSuperWordLoopUnrollAnalysis) { | |||
2695 | tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte); | |||
2696 | } | |||
2697 | ||||
2698 | // For atomic unrolled loops which are vector mapped, instigate more unrolling | |||
2699 | cl->set_notpassed_slp(); | |||
2700 | if (cl->is_main_loop()) { | |||
2701 | // if vector resources are limited, do not allow additional unrolling, also | |||
2702 | // do not unroll more on pure vector loops which were not reduced so that we can | |||
2703 | // program the post loop to single iteration execution. | |||
2704 | if (Matcher::float_pressure_limit() > 8) { | |||
2705 | C->set_major_progress(); | |||
2706 | cl->mark_do_unroll_only(); | |||
2707 | } | |||
2708 | } | |||
2709 | ||||
2710 | if (do_reserve_copy()) { | |||
2711 | if (can_process_post_loop) { | |||
2712 | // Now create the difference of trip and limit and use it as our mask index. | |||
2713 | // Note: We limited the unroll of the vectorized loop so that | |||
2714 | // only vlen-1 size iterations can remain to be mask programmed. | |||
2715 | Node *incr = cl->incr(); | |||
2716 | SubINode *index = new SubINode(cl->limit(), cl->init_trip()); | |||
2717 | _igvn.register_new_node_with_optimizer(index); | |||
2718 | SetVectMaskINode *mask = new SetVectMaskINode(_phase->get_ctrl(cl->init_trip()), index); | |||
2719 | _igvn.register_new_node_with_optimizer(mask); | |||
2720 | // make this a single iteration loop | |||
2721 | AddINode *new_incr = new AddINode(incr->in(1), mask); | |||
2722 | _igvn.register_new_node_with_optimizer(new_incr); | |||
2723 | _phase->set_ctrl(new_incr, _phase->get_ctrl(incr)); | |||
2724 | _igvn.replace_node(incr, new_incr); | |||
2725 | cl->mark_is_multiversioned(); | |||
2726 | cl->loopexit()->add_flag(Node::Flag_has_vector_mask_set); | |||
2727 | } | |||
2728 | } | |||
2729 | } | |||
2730 | } | |||
2731 | } | |||
2732 | ||||
2733 | if (do_reserve_copy()) { | |||
2734 | make_reversable.use_new(); | |||
2735 | } | |||
2736 | NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord" ); print_loop(true);} | |||
2737 | return; | |||
2738 | } | |||
2739 | ||||
2740 | //------------------------------vector_opd--------------------------- | |||
2741 | // Create a vector operand for the nodes in pack p for operand: in(opd_idx) | |||
2742 | Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { | |||
2743 | Node* p0 = p->at(0); | |||
2744 | uint vlen = p->size(); | |||
2745 | Node* opd = p0->in(opd_idx); | |||
2746 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
2747 | ||||
2748 | if (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()) { | |||
2749 | // override vlen with the main loops vector length | |||
2750 | vlen = cl->slp_max_unroll(); | |||
2751 | } | |||
2752 | ||||
2753 | if (same_inputs(p, opd_idx)) { | |||
2754 | if (opd->is_Vector() || opd->is_LoadVector()) { | |||
2755 | assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector")do { if (!(((opd_idx != 2) || !VectorNode::is_shift(p0)))) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2755, "assert(" "((opd_idx != 2) || !VectorNode::is_shift(p0))" ") failed", "shift's count can't be vector"); ::breakpoint() ; } } while (0); | |||
2756 | if (opd_idx == 2 && VectorNode::is_shift(p0)) { | |||
2757 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("shift's count can't be vector");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("shift's count can't be vector");} | |||
2758 | return NULL__null; | |||
2759 | } | |||
2760 | return opd; // input is matching vector | |||
2761 | } | |||
2762 | if ((opd_idx == 2) && VectorNode::is_shift(p0)) { | |||
2763 | Compile* C = _phase->C; | |||
2764 | Node* cnt = opd; | |||
2765 | // Vector instructions do not mask shift count, do it here. | |||
2766 | juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); | |||
2767 | const TypeInt* t = opd->find_int_type(); | |||
2768 | if (t != NULL__null && t->is_con()) { | |||
2769 | juint shift = t->get_con(); | |||
2770 | if (shift > mask) { // Unsigned cmp | |||
2771 | cnt = ConNode::make(TypeInt::make(shift & mask)); | |||
2772 | } | |||
2773 | } else { | |||
2774 | if (t == NULL__null || t->_lo < 0 || t->_hi > (int)mask) { | |||
2775 | cnt = ConNode::make(TypeInt::make(mask)); | |||
2776 | _igvn.register_new_node_with_optimizer(cnt); | |||
2777 | cnt = new AndINode(opd, cnt); | |||
2778 | _igvn.register_new_node_with_optimizer(cnt); | |||
2779 | _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); | |||
2780 | } | |||
2781 | assert(opd->bottom_type()->isa_int(), "int type only")do { if (!(opd->bottom_type()->isa_int())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2781, "assert(" "opd->bottom_type()->isa_int()" ") failed" , "int type only"); ::breakpoint(); } } while (0); | |||
2782 | if (!opd->bottom_type()->isa_int()) { | |||
2783 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should be int type only");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("Should be int type only");} | |||
2784 | return NULL__null; | |||
2785 | } | |||
2786 | } | |||
2787 | // Move shift count into vector register. | |||
2788 | cnt = VectorNode::shift_count(p0->Opcode(), cnt, vlen, velt_basic_type(p0)); | |||
2789 | _igvn.register_new_node_with_optimizer(cnt); | |||
2790 | _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); | |||
2791 | return cnt; | |||
2792 | } | |||
2793 | assert(!opd->is_StoreVector(), "such vector is not expected here")do { if (!(!opd->is_StoreVector())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2793, "assert(" "!opd->is_StoreVector()" ") failed", "such vector is not expected here" ); ::breakpoint(); } } while (0); | |||
2794 | if (opd->is_StoreVector()) { | |||
2795 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("StoreVector is not expected here");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("StoreVector is not expected here");} | |||
2796 | return NULL__null; | |||
2797 | } | |||
2798 | // Convert scalar input to vector with the same number of elements as | |||
2799 | // p0's vector. Use p0's type because size of operand's container in | |||
2800 | // vector should match p0's size regardless operand's size. | |||
2801 | const Type* p0_t = NULL__null; | |||
2802 | VectorNode* vn = NULL__null; | |||
2803 | if (opd_idx == 2 && VectorNode::is_scalar_rotate(p0)) { | |||
2804 | Node* conv = opd; | |||
2805 | p0_t = TypeInt::INT; | |||
2806 | if (p0->bottom_type()->isa_long()) { | |||
2807 | p0_t = TypeLong::LONG; | |||
2808 | conv = new ConvI2LNode(opd); | |||
2809 | _igvn.register_new_node_with_optimizer(conv); | |||
2810 | _phase->set_ctrl(conv, _phase->get_ctrl(opd)); | |||
2811 | } | |||
2812 | vn = VectorNode::scalar2vector(conv, vlen, p0_t); | |||
2813 | } else { | |||
2814 | p0_t = velt_type(p0); | |||
2815 | vn = VectorNode::scalar2vector(opd, vlen, p0_t); | |||
2816 | } | |||
2817 | ||||
2818 | _igvn.register_new_node_with_optimizer(vn); | |||
2819 | _phase->set_ctrl(vn, _phase->get_ctrl(opd)); | |||
2820 | #ifdef ASSERT1 | |||
2821 | if (TraceNewVectors) { | |||
2822 | tty->print("new Vector node: "); | |||
2823 | vn->dump(); | |||
2824 | } | |||
2825 | #endif | |||
2826 | return vn; | |||
2827 | } | |||
2828 | ||||
2829 | // Insert pack operation | |||
2830 | BasicType bt = velt_basic_type(p0); | |||
2831 | PackNode* pk = PackNode::make(opd, vlen, bt); | |||
2832 | DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )const BasicType opd_bt = opd->bottom_type()->basic_type (); | |||
2833 | ||||
2834 | for (uint i = 1; i < vlen; i++) { | |||
2835 | Node* pi = p->at(i); | |||
2836 | Node* in = pi->in(opd_idx); | |||
2837 | assert(my_pack(in) == NULL, "Should already have been unpacked")do { if (!(my_pack(in) == __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2837, "assert(" "my_pack(in) == __null" ") failed", "Should already have been unpacked" ); ::breakpoint(); } } while (0); | |||
2838 | if (my_pack(in) != NULL__null) { | |||
2839 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("Should already have been unpacked");} | |||
2840 | return NULL__null; | |||
2841 | } | |||
2842 | assert(opd_bt == in->bottom_type()->basic_type(), "all same type")do { if (!(opd_bt == in->bottom_type()->basic_type())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2842, "assert(" "opd_bt == in->bottom_type()->basic_type()" ") failed", "all same type"); ::breakpoint(); } } while (0); | |||
2843 | pk->add_opd(in); | |||
2844 | if (VectorNode::is_muladds2i(pi)) { | |||
2845 | Node* in2 = pi->in(opd_idx + 2); | |||
2846 | assert(my_pack(in2) == NULL, "Should already have been unpacked")do { if (!(my_pack(in2) == __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2846, "assert(" "my_pack(in2) == __null" ") failed", "Should already have been unpacked" ); ::breakpoint(); } } while (0); | |||
2847 | if (my_pack(in2) != NULL__null) { | |||
2848 | NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); })if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr ("Should already have been unpacked"); } | |||
2849 | return NULL__null; | |||
2850 | } | |||
2851 | assert(opd_bt == in2->bottom_type()->basic_type(), "all same type")do { if (!(opd_bt == in2->bottom_type()->basic_type())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2851, "assert(" "opd_bt == in2->bottom_type()->basic_type()" ") failed", "all same type"); ::breakpoint(); } } while (0); | |||
2852 | pk->add_opd(in2); | |||
2853 | } | |||
2854 | } | |||
2855 | _igvn.register_new_node_with_optimizer(pk); | |||
2856 | _phase->set_ctrl(pk, _phase->get_ctrl(opd)); | |||
2857 | #ifdef ASSERT1 | |||
2858 | if (TraceNewVectors) { | |||
2859 | tty->print("new Vector node: "); | |||
2860 | pk->dump(); | |||
2861 | } | |||
2862 | #endif | |||
2863 | return pk; | |||
2864 | } | |||
2865 | ||||
2866 | //------------------------------insert_extracts--------------------------- | |||
2867 | // If a use of pack p is not a vector use, then replace the | |||
2868 | // use with an extract operation. | |||
2869 | void SuperWord::insert_extracts(Node_List* p) { | |||
2870 | if (p->at(0)->is_Store()) return; | |||
2871 | assert(_n_idx_list.is_empty(), "empty (node,index) list")do { if (!(_n_idx_list.is_empty())) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2871, "assert(" "_n_idx_list.is_empty()" ") failed", "empty (node,index) list" ); ::breakpoint(); } } while (0); | |||
2872 | ||||
2873 | // Inspect each use of each pack member. For each use that is | |||
2874 | // not a vector use, replace the use with an extract operation. | |||
2875 | ||||
2876 | for (uint i = 0; i < p->size(); i++) { | |||
2877 | Node* def = p->at(i); | |||
2878 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |||
2879 | Node* use = def->fast_out(j); | |||
2880 | for (uint k = 0; k < use->req(); k++) { | |||
2881 | Node* n = use->in(k); | |||
2882 | if (def == n) { | |||
2883 | Node_List* u_pk = my_pack(use); | |||
2884 | if ((u_pk == NULL__null || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) { | |||
2885 | _n_idx_list.push(use, k); | |||
2886 | } | |||
2887 | } | |||
2888 | } | |||
2889 | } | |||
2890 | } | |||
2891 | ||||
2892 | while (_n_idx_list.is_nonempty()) { | |||
2893 | Node* use = _n_idx_list.node(); | |||
2894 | int idx = _n_idx_list.index(); | |||
2895 | _n_idx_list.pop(); | |||
2896 | Node* def = use->in(idx); | |||
2897 | ||||
2898 | if (def->is_reduction()) continue; | |||
2899 | ||||
2900 | // Insert extract operation | |||
2901 | _igvn.hash_delete(def); | |||
2902 | int def_pos = alignment(def) / data_size(def); | |||
2903 | ||||
2904 | Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def)); | |||
2905 | _igvn.register_new_node_with_optimizer(ex); | |||
2906 | _phase->set_ctrl(ex, _phase->get_ctrl(def)); | |||
2907 | _igvn.replace_input_of(use, idx, ex); | |||
2908 | _igvn._worklist.push(def); | |||
2909 | ||||
2910 | bb_insert_after(ex, bb_idx(def)); | |||
2911 | set_velt_type(ex, velt_type(def)); | |||
2912 | } | |||
2913 | } | |||
2914 | ||||
2915 | //------------------------------is_vector_use--------------------------- | |||
2916 | // Is use->in(u_idx) a vector use? | |||
2917 | bool SuperWord::is_vector_use(Node* use, int u_idx) { | |||
2918 | Node_List* u_pk = my_pack(use); | |||
2919 | if (u_pk == NULL__null) return false; | |||
2920 | if (use->is_reduction()) return true; | |||
2921 | Node* def = use->in(u_idx); | |||
2922 | Node_List* d_pk = my_pack(def); | |||
2923 | if (d_pk == NULL__null) { | |||
2924 | // check for scalar promotion | |||
2925 | Node* n = u_pk->at(0)->in(u_idx); | |||
2926 | for (uint i = 1; i < u_pk->size(); i++) { | |||
2927 | if (u_pk->at(i)->in(u_idx) != n) return false; | |||
2928 | } | |||
2929 | return true; | |||
2930 | } | |||
2931 | if (VectorNode::is_muladds2i(use)) { | |||
2932 | // MulAddS2I takes shorts and produces ints - hence the special checks | |||
2933 | // on alignment and size. | |||
2934 | if (u_pk->size() * 2 != d_pk->size()) { | |||
2935 | return false; | |||
2936 | } | |||
2937 | for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) { | |||
2938 | Node* ui = u_pk->at(i); | |||
2939 | Node* di = d_pk->at(i); | |||
2940 | if (alignment(ui) != alignment(di) * 2) { | |||
2941 | return false; | |||
2942 | } | |||
2943 | } | |||
2944 | return true; | |||
2945 | } | |||
2946 | if (u_pk->size() != d_pk->size()) | |||
2947 | return false; | |||
2948 | for (uint i = 0; i < u_pk->size(); i++) { | |||
2949 | Node* ui = u_pk->at(i); | |||
2950 | Node* di = d_pk->at(i); | |||
2951 | if (ui->in(u_idx) != di || alignment(ui) != alignment(di)) | |||
2952 | return false; | |||
2953 | } | |||
2954 | return true; | |||
2955 | } | |||
2956 | ||||
2957 | //------------------------------construct_bb--------------------------- | |||
2958 | // Construct reverse postorder list of block members | |||
2959 | bool SuperWord::construct_bb() { | |||
2960 | Node* entry = bb(); | |||
2961 | ||||
2962 | assert(_stk.length() == 0, "stk is empty")do { if (!(_stk.length() == 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2962, "assert(" "_stk.length() == 0" ") failed", "stk is empty" ); ::breakpoint(); } } while (0); | |||
2963 | assert(_block.length() == 0, "block is empty")do { if (!(_block.length() == 0)) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2963, "assert(" "_block.length() == 0" ") failed", "block is empty" ); ::breakpoint(); } } while (0); | |||
2964 | assert(_data_entry.length() == 0, "data_entry is empty")do { if (!(_data_entry.length() == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2964, "assert(" "_data_entry.length() == 0" ") failed", "data_entry is empty" ); ::breakpoint(); } } while (0); | |||
2965 | assert(_mem_slice_head.length() == 0, "mem_slice_head is empty")do { if (!(_mem_slice_head.length() == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2965, "assert(" "_mem_slice_head.length() == 0" ") failed", "mem_slice_head is empty"); ::breakpoint(); } } while (0); | |||
2966 | assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty")do { if (!(_mem_slice_tail.length() == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2966, "assert(" "_mem_slice_tail.length() == 0" ") failed", "mem_slice_tail is empty"); ::breakpoint(); } } while (0); | |||
2967 | ||||
2968 | // Find non-control nodes with no inputs from within block, | |||
2969 | // create a temporary map from node _idx to bb_idx for use | |||
2970 | // by the visited and post_visited sets, | |||
2971 | // and count number of nodes in block. | |||
2972 | int bb_ct = 0; | |||
2973 | for (uint i = 0; i < lpt()->_body.size(); i++) { | |||
2974 | Node *n = lpt()->_body.at(i); | |||
2975 | set_bb_idx(n, i); // Create a temporary map | |||
2976 | if (in_bb(n)) { | |||
2977 | if (n->is_LoadStore() || n->is_MergeMem() || | |||
2978 | (n->is_Proj() && !n->as_Proj()->is_CFG())) { | |||
2979 | // Bailout if the loop has LoadStore, MergeMem or data Proj | |||
2980 | // nodes. Superword optimization does not work with them. | |||
2981 | return false; | |||
2982 | } | |||
2983 | bb_ct++; | |||
2984 | if (!n->is_CFG()) { | |||
2985 | bool found = false; | |||
2986 | for (uint j = 0; j < n->req(); j++) { | |||
2987 | Node* def = n->in(j); | |||
2988 | if (def && in_bb(def)) { | |||
2989 | found = true; | |||
2990 | break; | |||
2991 | } | |||
2992 | } | |||
2993 | if (!found) { | |||
2994 | assert(n != entry, "can't be entry")do { if (!(n != entry)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2994, "assert(" "n != entry" ") failed", "can't be entry"); ::breakpoint(); } } while (0); | |||
2995 | _data_entry.push(n); | |||
2996 | } | |||
2997 | } | |||
2998 | } | |||
2999 | } | |||
3000 | ||||
3001 | // Find memory slices (head and tail) | |||
3002 | for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) { | |||
3003 | Node *n = lp()->fast_out(i); | |||
3004 | if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { | |||
3005 | Node* n_tail = n->in(LoopNode::LoopBackControl); | |||
3006 | if (n_tail != n->in(LoopNode::EntryControl)) { | |||
3007 | if (!n_tail->is_Mem()) { | |||
3008 | assert(n_tail->is_Mem(), "unexpected node for memory slice: %s", n_tail->Name())do { if (!(n_tail->is_Mem())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3008, "assert(" "n_tail->is_Mem()" ") failed", "unexpected node for memory slice: %s" , n_tail->Name()); ::breakpoint(); } } while (0); | |||
3009 | return false; // Bailout | |||
3010 | } | |||
3011 | _mem_slice_head.push(n); | |||
3012 | _mem_slice_tail.push(n_tail); | |||
3013 | } | |||
3014 | } | |||
3015 | } | |||
3016 | ||||
3017 | // Create an RPO list of nodes in block | |||
3018 | ||||
3019 | visited_clear(); | |||
3020 | post_visited_clear(); | |||
3021 | ||||
3022 | // Push all non-control nodes with no inputs from within block, then control entry | |||
3023 | for (int j = 0; j < _data_entry.length(); j++) { | |||
3024 | Node* n = _data_entry.at(j); | |||
3025 | visited_set(n); | |||
3026 | _stk.push(n); | |||
3027 | } | |||
3028 | visited_set(entry); | |||
3029 | _stk.push(entry); | |||
3030 | ||||
3031 | // Do a depth first walk over out edges | |||
3032 | int rpo_idx = bb_ct - 1; | |||
3033 | int size; | |||
3034 | int reduction_uses = 0; | |||
3035 | while ((size = _stk.length()) > 0) { | |||
3036 | Node* n = _stk.top(); // Leave node on stack | |||
3037 | if (!visited_test_set(n)) { | |||
3038 | // forward arc in graph | |||
3039 | } else if (!post_visited_test(n)) { | |||
3040 | // cross or back arc | |||
3041 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |||
3042 | Node *use = n->fast_out(i); | |||
3043 | if (in_bb(use) && !visited_test(use) && | |||
3044 | // Don't go around backedge | |||
3045 | (!use->is_Phi() || n == entry)) { | |||
3046 | if (use->is_reduction()) { | |||
3047 | // First see if we can map the reduction on the given system we are on, then | |||
3048 | // make a data entry operation for each reduction we see. | |||
3049 | BasicType bt = use->bottom_type()->basic_type(); | |||
3050 | if (ReductionNode::implemented(use->Opcode(), Matcher::min_vector_size(bt), bt)) { | |||
3051 | reduction_uses++; | |||
3052 | } | |||
3053 | } | |||
3054 | _stk.push(use); | |||
3055 | } | |||
3056 | } | |||
3057 | if (_stk.length() == size) { | |||
3058 | // There were no additional uses, post visit node now | |||
3059 | _stk.pop(); // Remove node from stack | |||
3060 | assert(rpo_idx >= 0, "")do { if (!(rpo_idx >= 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3060, "assert(" "rpo_idx >= 0" ") failed", ""); ::breakpoint (); } } while (0); | |||
3061 | _block.at_put_grow(rpo_idx, n); | |||
3062 | rpo_idx--; | |||
3063 | post_visited_set(n); | |||
3064 | assert(rpo_idx >= 0 || _stk.is_empty(), "")do { if (!(rpo_idx >= 0 || _stk.is_empty())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3064, "assert(" "rpo_idx >= 0 || _stk.is_empty()" ") failed" , ""); ::breakpoint(); } } while (0); | |||
3065 | } | |||
3066 | } else { | |||
3067 | _stk.pop(); // Remove post-visited node from stack | |||
3068 | } | |||
3069 | }//while | |||
3070 | ||||
3071 | int ii_current = -1; | |||
3072 | unsigned int load_idx = (unsigned int)-1; | |||
3073 | // Build iterations order if needed | |||
3074 | bool build_ii_order = _do_vector_loop_experimental && _ii_order.is_empty(); | |||
3075 | // Create real map of block indices for nodes | |||
3076 | for (int j = 0; j < _block.length(); j++) { | |||
3077 | Node* n = _block.at(j); | |||
3078 | set_bb_idx(n, j); | |||
3079 | if (build_ii_order && n->is_Load()) { | |||
3080 | if (ii_current == -1) { | |||
3081 | ii_current = _clone_map.gen(n->_idx); | |||
3082 | _ii_order.push(ii_current); | |||
3083 | load_idx = _clone_map.idx(n->_idx); | |||
3084 | } else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) { | |||
3085 | ii_current = _clone_map.gen(n->_idx); | |||
3086 | _ii_order.push(ii_current); | |||
3087 | } | |||
3088 | } | |||
3089 | }//for | |||
3090 | ||||
3091 | // Ensure extra info is allocated. | |||
3092 | initialize_bb(); | |||
3093 | ||||
3094 | #ifndef PRODUCT | |||
3095 | if (_vector_loop_debug && _ii_order.length() > 0) { | |||
3096 | tty->print("SuperWord::construct_bb: List of generations: "); | |||
3097 | for (int jj = 0; jj < _ii_order.length(); ++jj) { | |||
3098 | tty->print(" %d:%d", jj, _ii_order.at(jj)); | |||
3099 | } | |||
3100 | tty->print_cr(" "); | |||
3101 | } | |||
3102 | if (TraceSuperWord) { | |||
3103 | print_bb(); | |||
3104 | tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); | |||
3105 | for (int m = 0; m < _data_entry.length(); m++) { | |||
3106 | tty->print("%3d ", m); | |||
3107 | _data_entry.at(m)->dump(); | |||
3108 | } | |||
3109 | tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE"); | |||
3110 | for (int m = 0; m < _mem_slice_head.length(); m++) { | |||
3111 | tty->print("%3d ", m); _mem_slice_head.at(m)->dump(); | |||
3112 | tty->print(" "); _mem_slice_tail.at(m)->dump(); | |||
3113 | } | |||
3114 | } | |||
3115 | #endif | |||
3116 | assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found")do { if (!(rpo_idx == -1 && bb_ct == _block.length()) ) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3116, "assert(" "rpo_idx == -1 && bb_ct == _block.length()" ") failed", "all block members found"); ::breakpoint(); } } while (0); | |||
3117 | return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0); | |||
3118 | } | |||
3119 | ||||
3120 | //------------------------------initialize_bb--------------------------- | |||
3121 | // Initialize per node info | |||
3122 | void SuperWord::initialize_bb() { | |||
3123 | Node* last = _block.at(_block.length() - 1); | |||
3124 | grow_node_info(bb_idx(last)); | |||
3125 | } | |||
3126 | ||||
3127 | //------------------------------bb_insert_after--------------------------- | |||
3128 | // Insert n into block after pos | |||
3129 | void SuperWord::bb_insert_after(Node* n, int pos) { | |||
3130 | int n_pos = pos + 1; | |||
3131 | // Make room | |||
3132 | for (int i = _block.length() - 1; i >= n_pos; i--) { | |||
3133 | _block.at_put_grow(i+1, _block.at(i)); | |||
3134 | } | |||
3135 | for (int j = _node_info.length() - 1; j >= n_pos; j--) { | |||
3136 | _node_info.at_put_grow(j+1, _node_info.at(j)); | |||
3137 | } | |||
3138 | // Set value | |||
3139 | _block.at_put_grow(n_pos, n); | |||
3140 | _node_info.at_put_grow(n_pos, SWNodeInfo::initial); | |||
3141 | // Adjust map from node->_idx to _block index | |||
3142 | for (int i = n_pos; i < _block.length(); i++) { | |||
3143 | set_bb_idx(_block.at(i), i); | |||
3144 | } | |||
3145 | } | |||
3146 | ||||
3147 | //------------------------------compute_max_depth--------------------------- | |||
3148 | // Compute max depth for expressions from beginning of block | |||
3149 | // Use to prune search paths during test for independence. | |||
3150 | void SuperWord::compute_max_depth() { | |||
3151 | int ct = 0; | |||
3152 | bool again; | |||
3153 | do { | |||
3154 | again = false; | |||
3155 | for (int i = 0; i < _block.length(); i++) { | |||
3156 | Node* n = _block.at(i); | |||
3157 | if (!n->is_Phi()) { | |||
3158 | int d_orig = depth(n); | |||
3159 | int d_in = 0; | |||
3160 | for (DepPreds preds(n, _dg); !preds.done(); preds.next()) { | |||
3161 | Node* pred = preds.current(); | |||
3162 | if (in_bb(pred)) { | |||
3163 | d_in = MAX2(d_in, depth(pred)); | |||
3164 | } | |||
3165 | } | |||
3166 | if (d_in + 1 != d_orig) { | |||
3167 | set_depth(n, d_in + 1); | |||
3168 | again = true; | |||
3169 | } | |||
3170 | } | |||
3171 | } | |||
3172 | ct++; | |||
3173 | } while (again); | |||
3174 | ||||
3175 | if (TraceSuperWord && Verbose) { | |||
3176 | tty->print_cr("compute_max_depth iterated: %d times", ct); | |||
3177 | } | |||
3178 | } | |||
3179 | ||||
3180 | //-------------------------compute_vector_element_type----------------------- | |||
3181 | // Compute necessary vector element type for expressions | |||
3182 | // This propagates backwards a narrower integer type when the | |||
3183 | // upper bits of the value are not needed. | |||
3184 | // Example: char a,b,c; a = b + c; | |||
3185 | // Normally the type of the add is integer, but for packed character | |||
3186 | // operations the type of the add needs to be char. | |||
3187 | void SuperWord::compute_vector_element_type() { | |||
3188 | if (TraceSuperWord && Verbose) { | |||
3189 | tty->print_cr("\ncompute_velt_type:"); | |||
3190 | } | |||
3191 | ||||
3192 | // Initial type | |||
3193 | for (int i = 0; i < _block.length(); i++) { | |||
3194 | Node* n = _block.at(i); | |||
3195 | set_velt_type(n, container_type(n)); | |||
3196 | } | |||
3197 | ||||
3198 | // Propagate integer narrowed type backwards through operations | |||
3199 | // that don't depend on higher order bits | |||
3200 | for (int i = _block.length() - 1; i >= 0; i--) { | |||
3201 | Node* n = _block.at(i); | |||
3202 | // Only integer types need be examined | |||
3203 | const Type* vtn = velt_type(n); | |||
3204 | if (vtn->basic_type() == T_INT) { | |||
3205 | uint start, end; | |||
3206 | VectorNode::vector_operands(n, &start, &end); | |||
3207 | ||||
3208 | for (uint j = start; j < end; j++) { | |||
3209 | Node* in = n->in(j); | |||
3210 | // Don't propagate through a memory | |||
3211 | if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT && | |||
3212 | data_size(n) < data_size(in)) { | |||
3213 | bool same_type = true; | |||
3214 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { | |||
3215 | Node *use = in->fast_out(k); | |||
3216 | if (!in_bb(use) || !same_velt_type(use, n)) { | |||
3217 | same_type = false; | |||
3218 | break; | |||
3219 | } | |||
3220 | } | |||
3221 | if (same_type) { | |||
3222 | // In any Java arithmetic operation, operands of small integer types | |||
3223 | // (boolean, byte, char & short) should be promoted to int first. As | |||
3224 | // vector elements of small types don't have upper bits of int, for | |||
3225 | // RShiftI or AbsI operations, the compiler has to know the precise | |||
3226 | // signedness info of the 1st operand. These operations shouldn't be | |||
3227 | // vectorized if the signedness info is imprecise. | |||
3228 | const Type* vt = vtn; | |||
3229 | int op = in->Opcode(); | |||
3230 | if (VectorNode::is_shift_opcode(op) || op == Op_AbsI) { | |||
3231 | Node* load = in->in(1); | |||
3232 | if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) { | |||
3233 | // Only Load nodes distinguish signed (LoadS/LoadB) and unsigned | |||
3234 | // (LoadUS/LoadUB) values. Store nodes only have one version. | |||
3235 | vt = velt_type(load); | |||
3236 | } else if (op != Op_LShiftI) { | |||
3237 | // Widen type to int to avoid the creation of vector nodes. Note | |||
3238 | // that left shifts work regardless of the signedness. | |||
3239 | vt = TypeInt::INT; | |||
3240 | } | |||
3241 | } | |||
3242 | set_velt_type(in, vt); | |||
3243 | } | |||
3244 | } | |||
3245 | } | |||
3246 | } | |||
3247 | } | |||
3248 | #ifndef PRODUCT | |||
3249 | if (TraceSuperWord && Verbose) { | |||
3250 | for (int i = 0; i < _block.length(); i++) { | |||
3251 | Node* n = _block.at(i); | |||
3252 | velt_type(n)->dump(); | |||
3253 | tty->print("\t"); | |||
3254 | n->dump(); | |||
3255 | } | |||
3256 | } | |||
3257 | #endif | |||
3258 | } | |||
3259 | ||||
3260 | //------------------------------memory_alignment--------------------------- | |||
3261 | // Alignment within a vector memory reference | |||
3262 | int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { | |||
3263 | #ifndef PRODUCT | |||
3264 | if ((TraceSuperWord && Verbose) || is_trace_alignment()) { | |||
3265 | tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump(); | |||
3266 | } | |||
3267 | #endif | |||
3268 | NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);)SWPointer::Tracer::Depth ddd(0); | |||
3269 | SWPointer p(s, this, NULL__null, false); | |||
3270 | if (!p.valid()) { | |||
3271 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align" ); | |||
3272 | return bottom_align; | |||
3273 | } | |||
3274 | int vw = get_vw_bytes_special(s); | |||
3275 | if (vw < 2) { | |||
3276 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align" ); | |||
3277 | return bottom_align; // No vectors for this type | |||
3278 | } | |||
3279 | int offset = p.offset_in_bytes(); | |||
3280 | offset += iv_adjust*p.memory_size(); | |||
3281 | int off_rem = offset % vw; | |||
3282 | int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; | |||
3283 | #ifndef PRODUCT | |||
3284 | if ((TraceSuperWord && Verbose) || is_trace_alignment()) { | |||
3285 | tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod); | |||
3286 | } | |||
3287 | #endif | |||
3288 | return off_mod; | |||
3289 | } | |||
3290 | ||||
3291 | //---------------------------container_type--------------------------- | |||
3292 | // Smallest type containing range of values | |||
3293 | const Type* SuperWord::container_type(Node* n) { | |||
3294 | if (n->is_Mem()) { | |||
3295 | BasicType bt = n->as_Mem()->memory_type(); | |||
3296 | if (n->is_Store() && (bt == T_CHAR)) { | |||
3297 | // Use T_SHORT type instead of T_CHAR for stored values because any | |||
3298 | // preceding arithmetic operation extends values to signed Int. | |||
3299 | bt = T_SHORT; | |||
3300 | } | |||
3301 | if (n->Opcode() == Op_LoadUB) { | |||
3302 | // Adjust type for unsigned byte loads, it is important for right shifts. | |||
3303 | // T_BOOLEAN is used because there is no basic type representing type | |||
3304 | // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only | |||
3305 | // size (one byte) and sign is important. | |||
3306 | bt = T_BOOLEAN; | |||
3307 | } | |||
3308 | return Type::get_const_basic_type(bt); | |||
3309 | } | |||
3310 | const Type* t = _igvn.type(n); | |||
3311 | if (t->basic_type() == T_INT) { | |||
3312 | // A narrow type of arithmetic operations will be determined by | |||
3313 | // propagating the type of memory operations. | |||
3314 | return TypeInt::INT; | |||
3315 | } | |||
3316 | return t; | |||
3317 | } | |||
3318 | ||||
3319 | bool SuperWord::same_velt_type(Node* n1, Node* n2) { | |||
3320 | const Type* vt1 = velt_type(n1); | |||
3321 | const Type* vt2 = velt_type(n2); | |||
3322 | if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) { | |||
3323 | // Compare vectors element sizes for integer types. | |||
3324 | return data_size(n1) == data_size(n2); | |||
3325 | } | |||
3326 | return vt1 == vt2; | |||
3327 | } | |||
3328 | ||||
3329 | //------------------------------in_packset--------------------------- | |||
3330 | // Are s1 and s2 in a pack pair and ordered as s1,s2? | |||
3331 | bool SuperWord::in_packset(Node* s1, Node* s2) { | |||
3332 | for (int i = 0; i < _packset.length(); i++) { | |||
3333 | Node_List* p = _packset.at(i); | |||
3334 | assert(p->size() == 2, "must be")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3334, "assert(" "p->size() == 2" ") failed", "must be"); ::breakpoint(); } } while (0); | |||
3335 | if (p->at(0) == s1 && p->at(p->size()-1) == s2) { | |||
3336 | return true; | |||
3337 | } | |||
3338 | } | |||
3339 | return false; | |||
3340 | } | |||
3341 | ||||
3342 | //------------------------------in_pack--------------------------- | |||
3343 | // Is s in pack p? | |||
3344 | Node_List* SuperWord::in_pack(Node* s, Node_List* p) { | |||
3345 | for (uint i = 0; i < p->size(); i++) { | |||
3346 | if (p->at(i) == s) { | |||
3347 | return p; | |||
3348 | } | |||
3349 | } | |||
3350 | return NULL__null; | |||
3351 | } | |||
3352 | ||||
3353 | //------------------------------remove_pack_at--------------------------- | |||
3354 | // Remove the pack at position pos in the packset | |||
3355 | void SuperWord::remove_pack_at(int pos) { | |||
3356 | Node_List* p = _packset.at(pos); | |||
3357 | for (uint i = 0; i < p->size(); i++) { | |||
3358 | Node* s = p->at(i); | |||
3359 | set_my_pack(s, NULL__null); | |||
3360 | } | |||
3361 | _packset.remove_at(pos); | |||
3362 | } | |||
3363 | ||||
3364 | void SuperWord::packset_sort(int n) { | |||
3365 | // simple bubble sort so that we capitalize with O(n) when its already sorted | |||
3366 | while (n != 0) { | |||
3367 | bool swapped = false; | |||
3368 | for (int i = 1; i < n; i++) { | |||
3369 | Node_List* q_low = _packset.at(i-1); | |||
3370 | Node_List* q_i = _packset.at(i); | |||
3371 | ||||
3372 | // only swap when we find something to swap | |||
3373 | if (alignment(q_low->at(0)) > alignment(q_i->at(0))) { | |||
3374 | Node_List* t = q_i; | |||
3375 | *(_packset.adr_at(i)) = q_low; | |||
3376 | *(_packset.adr_at(i-1)) = q_i; | |||
3377 | swapped = true; | |||
3378 | } | |||
3379 | } | |||
3380 | if (swapped == false) break; | |||
3381 | n--; | |||
3382 | } | |||
3383 | } | |||
3384 | ||||
3385 | //------------------------------executed_first--------------------------- | |||
3386 | // Return the node executed first in pack p. Uses the RPO block list | |||
3387 | // to determine order. | |||
3388 | Node* SuperWord::executed_first(Node_List* p) { | |||
3389 | Node* n = p->at(0); | |||
3390 | int n_rpo = bb_idx(n); | |||
3391 | for (uint i = 1; i < p->size(); i++) { | |||
3392 | Node* s = p->at(i); | |||
3393 | int s_rpo = bb_idx(s); | |||
3394 | if (s_rpo < n_rpo) { | |||
3395 | n = s; | |||
3396 | n_rpo = s_rpo; | |||
3397 | } | |||
3398 | } | |||
3399 | return n; | |||
3400 | } | |||
3401 | ||||
3402 | //------------------------------executed_last--------------------------- | |||
3403 | // Return the node executed last in pack p. | |||
3404 | Node* SuperWord::executed_last(Node_List* p) { | |||
3405 | Node* n = p->at(0); | |||
3406 | int n_rpo = bb_idx(n); | |||
3407 | for (uint i = 1; i < p->size(); i++) { | |||
3408 | Node* s = p->at(i); | |||
3409 | int s_rpo = bb_idx(s); | |||
3410 | if (s_rpo > n_rpo) { | |||
3411 | n = s; | |||
3412 | n_rpo = s_rpo; | |||
3413 | } | |||
3414 | } | |||
3415 | return n; | |||
3416 | } | |||
3417 | ||||
3418 | LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) { | |||
3419 | LoadNode::ControlDependency dep = LoadNode::DependsOnlyOnTest; | |||
3420 | for (uint i = 0; i < p->size(); i++) { | |||
3421 | Node* n = p->at(i); | |||
3422 | assert(n->is_Load(), "only meaningful for loads")do { if (!(n->is_Load())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3422, "assert(" "n->is_Load()" ") failed", "only meaningful for loads" ); ::breakpoint(); } } while (0); | |||
3423 | if (!n->depends_only_on_test()) { | |||
3424 | if (n->as_Load()->has_unknown_control_dependency() && | |||
3425 | dep != LoadNode::Pinned) { | |||
3426 | // Upgrade to unknown control... | |||
3427 | dep = LoadNode::UnknownControl; | |||
3428 | } else { | |||
3429 | // Otherwise, we must pin it. | |||
3430 | dep = LoadNode::Pinned; | |||
3431 | } | |||
3432 | } | |||
3433 | } | |||
3434 | return dep; | |||
3435 | } | |||
3436 | ||||
3437 | ||||
3438 | //----------------------------align_initial_loop_index--------------------------- | |||
3439 | // Adjust pre-loop limit so that in main loop, a load/store reference | |||
3440 | // to align_to_ref will be a position zero in the vector. | |||
3441 | // (iv + k) mod vector_align == 0 | |||
3442 | void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { | |||
3443 | assert(lp()->is_main_loop(), "")do { if (!(lp()->is_main_loop())) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3443, "assert(" "lp()->is_main_loop()" ") failed", ""); :: breakpoint(); } } while (0); | |||
3444 | CountedLoopEndNode* pre_end = pre_loop_end(); | |||
3445 | Node* pre_opaq1 = pre_end->limit(); | |||
3446 | assert(pre_opaq1->Opcode() == Op_Opaque1, "")do { if (!(pre_opaq1->Opcode() == Op_Opaque1)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3446, "assert(" "pre_opaq1->Opcode() == Op_Opaque1" ") failed" , ""); ::breakpoint(); } } while (0); | |||
3447 | Opaque1Node* pre_opaq = (Opaque1Node*)pre_opaq1; | |||
3448 | Node* lim0 = pre_opaq->in(1); | |||
3449 | ||||
3450 | // Where we put new limit calculations | |||
3451 | Node* pre_ctrl = pre_loop_head()->in(LoopNode::EntryControl); | |||
3452 | ||||
3453 | // Ensure the original loop limit is available from the | |||
3454 | // pre-loop Opaque1 node. | |||
3455 | Node* orig_limit = pre_opaq->original_loop_limit(); | |||
3456 | assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "")do { if (!(orig_limit != __null && _igvn.type(orig_limit ) != Type::TOP)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3456, "assert(" "orig_limit != __null && _igvn.type(orig_limit) != Type::TOP" ") failed", ""); ::breakpoint(); } } while (0); | |||
3457 | ||||
3458 | SWPointer align_to_ref_p(align_to_ref, this, NULL__null, false); | |||
3459 | assert(align_to_ref_p.valid(), "sanity")do { if (!(align_to_ref_p.valid())) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3459, "assert(" "align_to_ref_p.valid()" ") failed", "sanity" ); ::breakpoint(); } } while (0); | |||
3460 | ||||
3461 | // Given: | |||
3462 | // lim0 == original pre loop limit | |||
3463 | // V == v_align (power of 2) | |||
3464 | // invar == extra invariant piece of the address expression | |||
3465 | // e == offset [ +/- invar ] | |||
3466 | // | |||
3467 | // When reassociating expressions involving '%' the basic rules are: | |||
3468 | // (a - b) % k == 0 => a % k == b % k | |||
3469 | // and: | |||
3470 | // (a + b) % k == 0 => a % k == (k - b) % k | |||
3471 | // | |||
3472 | // For stride > 0 && scale > 0, | |||
3473 | // Derive the new pre-loop limit "lim" such that the two constraints: | |||
3474 | // (1) lim = lim0 + N (where N is some positive integer < V) | |||
3475 | // (2) (e + lim) % V == 0 | |||
3476 | // are true. | |||
3477 | // | |||
3478 | // Substituting (1) into (2), | |||
3479 | // (e + lim0 + N) % V == 0 | |||
3480 | // solve for N: | |||
3481 | // N = (V - (e + lim0)) % V | |||
3482 | // substitute back into (1), so that new limit | |||
3483 | // lim = lim0 + (V - (e + lim0)) % V | |||
3484 | // | |||
3485 | // For stride > 0 && scale < 0 | |||
3486 | // Constraints: | |||
3487 | // lim = lim0 + N | |||
3488 | // (e - lim) % V == 0 | |||
3489 | // Solving for lim: | |||
3490 | // (e - lim0 - N) % V == 0 | |||
3491 | // N = (e - lim0) % V | |||
3492 | // lim = lim0 + (e - lim0) % V | |||
3493 | // | |||
3494 | // For stride < 0 && scale > 0 | |||
3495 | // Constraints: | |||
3496 | // lim = lim0 - N | |||
3497 | // (e + lim) % V == 0 | |||
3498 | // Solving for lim: | |||
3499 | // (e + lim0 - N) % V == 0 | |||
3500 | // N = (e + lim0) % V | |||
3501 | // lim = lim0 - (e + lim0) % V | |||
3502 | // | |||
3503 | // For stride < 0 && scale < 0 | |||
3504 | // Constraints: | |||
3505 | // lim = lim0 - N | |||
3506 | // (e - lim) % V == 0 | |||
3507 | // Solving for lim: | |||
3508 | // (e - lim0 + N) % V == 0 | |||
3509 | // N = (V - (e - lim0)) % V | |||
3510 | // lim = lim0 - (V - (e - lim0)) % V | |||
3511 | ||||
3512 | int vw = vector_width_in_bytes(align_to_ref); | |||
3513 | int stride = iv_stride(); | |||
3514 | int scale = align_to_ref_p.scale_in_bytes(); | |||
3515 | int elt_size = align_to_ref_p.memory_size(); | |||
3516 | int v_align = vw / elt_size; | |||
3517 | assert(v_align > 1, "sanity")do { if (!(v_align > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3517, "assert(" "v_align > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
3518 | int offset = align_to_ref_p.offset_in_bytes() / elt_size; | |||
3519 | Node *offsn = _igvn.intcon(offset); | |||
3520 | ||||
3521 | Node *e = offsn; | |||
3522 | if (align_to_ref_p.invar() != NULL__null) { | |||
3523 | // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) | |||
3524 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); | |||
3525 | Node* invar = align_to_ref_p.invar(); | |||
3526 | if (_igvn.type(invar)->isa_long()) { | |||
3527 | // Computations are done % (vector width/element size) so it's | |||
3528 | // safe to simply convert invar to an int and loose the upper 32 | |||
3529 | // bit half. | |||
3530 | invar = new ConvL2INode(invar); | |||
3531 | _igvn.register_new_node_with_optimizer(invar); | |||
3532 | } | |||
3533 | Node* invar_scale = align_to_ref_p.invar_scale(); | |||
3534 | if (invar_scale != NULL__null) { | |||
3535 | invar = new LShiftINode(invar, invar_scale); | |||
3536 | _igvn.register_new_node_with_optimizer(invar); | |||
3537 | } | |||
3538 | Node* aref = new URShiftINode(invar, log2_elt); | |||
3539 | _igvn.register_new_node_with_optimizer(aref); | |||
3540 | _phase->set_ctrl(aref, pre_ctrl); | |||
3541 | if (align_to_ref_p.negate_invar()) { | |||
3542 | e = new SubINode(e, aref); | |||
3543 | } else { | |||
3544 | e = new AddINode(e, aref); | |||
3545 | } | |||
3546 | _igvn.register_new_node_with_optimizer(e); | |||
3547 | _phase->set_ctrl(e, pre_ctrl); | |||
3548 | } | |||
3549 | if (vw > ObjectAlignmentInBytes || align_to_ref_p.base()->is_top()) { | |||
3550 | // incorporate base e +/- base && Mask >>> log2(elt) | |||
3551 | Node* xbase = new CastP2XNode(NULL__null, align_to_ref_p.adr()); | |||
3552 | _igvn.register_new_node_with_optimizer(xbase); | |||
3553 | #ifdef _LP641 | |||
3554 | xbase = new ConvL2INode(xbase); | |||
3555 | _igvn.register_new_node_with_optimizer(xbase); | |||
3556 | #endif | |||
3557 | Node* mask = _igvn.intcon(vw-1); | |||
3558 | Node* masked_xbase = new AndINode(xbase, mask); | |||
3559 | _igvn.register_new_node_with_optimizer(masked_xbase); | |||
3560 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); | |||
3561 | Node* bref = new URShiftINode(masked_xbase, log2_elt); | |||
3562 | _igvn.register_new_node_with_optimizer(bref); | |||
3563 | _phase->set_ctrl(bref, pre_ctrl); | |||
3564 | e = new AddINode(e, bref); | |||
3565 | _igvn.register_new_node_with_optimizer(e); | |||
3566 | _phase->set_ctrl(e, pre_ctrl); | |||
3567 | } | |||
3568 | ||||
3569 | // compute e +/- lim0 | |||
3570 | if (scale < 0) { | |||
3571 | e = new SubINode(e, lim0); | |||
3572 | } else { | |||
3573 | e = new AddINode(e, lim0); | |||
3574 | } | |||
3575 | _igvn.register_new_node_with_optimizer(e); | |||
3576 | _phase->set_ctrl(e, pre_ctrl); | |||
3577 | ||||
3578 | if (stride * scale > 0) { | |||
3579 | // compute V - (e +/- lim0) | |||
3580 | Node* va = _igvn.intcon(v_align); | |||
3581 | e = new SubINode(va, e); | |||
3582 | _igvn.register_new_node_with_optimizer(e); | |||
3583 | _phase->set_ctrl(e, pre_ctrl); | |||
3584 | } | |||
3585 | // compute N = (exp) % V | |||
3586 | Node* va_msk = _igvn.intcon(v_align - 1); | |||
3587 | Node* N = new AndINode(e, va_msk); | |||
3588 | _igvn.register_new_node_with_optimizer(N); | |||
3589 | _phase->set_ctrl(N, pre_ctrl); | |||
3590 | ||||
3591 | // substitute back into (1), so that new limit | |||
3592 | // lim = lim0 + N | |||
3593 | Node* lim; | |||
3594 | if (stride < 0) { | |||
3595 | lim = new SubINode(lim0, N); | |||
3596 | } else { | |||
3597 | lim = new AddINode(lim0, N); | |||
3598 | } | |||
3599 | _igvn.register_new_node_with_optimizer(lim); | |||
3600 | _phase->set_ctrl(lim, pre_ctrl); | |||
3601 | Node* constrained = | |||
3602 | (stride > 0) ? (Node*) new MinINode(lim, orig_limit) | |||
3603 | : (Node*) new MaxINode(lim, orig_limit); | |||
3604 | _igvn.register_new_node_with_optimizer(constrained); | |||
3605 | _phase->set_ctrl(constrained, pre_ctrl); | |||
3606 | _igvn.replace_input_of(pre_opaq, 1, constrained); | |||
3607 | } | |||
3608 | ||||
3609 | //----------------------------get_pre_loop_end--------------------------- | |||
3610 | // Find pre loop end from main loop. Returns null if none. | |||
3611 | CountedLoopEndNode* SuperWord::find_pre_loop_end(CountedLoopNode* cl) const { | |||
3612 | // The loop cannot be optimized if the graph shape at | |||
3613 | // the loop entry is inappropriate. | |||
3614 | if (cl->is_canonical_loop_entry() == NULL__null) { | |||
3615 | return NULL__null; | |||
3616 | } | |||
3617 | ||||
3618 | Node* p_f = cl->skip_predicates()->in(0)->in(0); | |||
3619 | if (!p_f->is_IfFalse()) return NULL__null; | |||
3620 | if (!p_f->in(0)->is_CountedLoopEnd()) return NULL__null; | |||
3621 | CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd(); | |||
3622 | CountedLoopNode* loop_node = pre_end->loopnode(); | |||
3623 | if (loop_node == NULL__null || !loop_node->is_pre_loop()) return NULL__null; | |||
3624 | return pre_end; | |||
3625 | } | |||
3626 | ||||
3627 | //------------------------------init--------------------------- | |||
3628 | void SuperWord::init() { | |||
3629 | _dg.init(); | |||
3630 | _packset.clear(); | |||
3631 | _disjoint_ptrs.clear(); | |||
3632 | _block.clear(); | |||
3633 | _post_block.clear(); | |||
3634 | _data_entry.clear(); | |||
3635 | _mem_slice_head.clear(); | |||
3636 | _mem_slice_tail.clear(); | |||
3637 | _iteration_first.clear(); | |||
3638 | _iteration_last.clear(); | |||
3639 | _node_info.clear(); | |||
3640 | _align_to_ref = NULL__null; | |||
3641 | _lpt = NULL__null; | |||
3642 | _lp = NULL__null; | |||
3643 | _bb = NULL__null; | |||
3644 | _iv = NULL__null; | |||
3645 | _race_possible = 0; | |||
3646 | _early_return = false; | |||
3647 | _num_work_vecs = 0; | |||
3648 | _num_reductions = 0; | |||
3649 | } | |||
3650 | ||||
3651 | //------------------------------restart--------------------------- | |||
3652 | void SuperWord::restart() { | |||
3653 | _dg.init(); | |||
3654 | _packset.clear(); | |||
3655 | _disjoint_ptrs.clear(); | |||
3656 | _block.clear(); | |||
3657 | _post_block.clear(); | |||
3658 | _data_entry.clear(); | |||
3659 | _mem_slice_head.clear(); | |||
3660 | _mem_slice_tail.clear(); | |||
3661 | _node_info.clear(); | |||
3662 | } | |||
3663 | ||||
3664 | //------------------------------print_packset--------------------------- | |||
3665 | void SuperWord::print_packset() { | |||
3666 | #ifndef PRODUCT | |||
3667 | tty->print_cr("packset"); | |||
3668 | for (int i = 0; i < _packset.length(); i++) { | |||
3669 | tty->print_cr("Pack: %d", i); | |||
3670 | Node_List* p = _packset.at(i); | |||
3671 | print_pack(p); | |||
3672 | } | |||
3673 | #endif | |||
3674 | } | |||
3675 | ||||
3676 | //------------------------------print_pack--------------------------- | |||
3677 | void SuperWord::print_pack(Node_List* p) { | |||
3678 | for (uint i = 0; i < p->size(); i++) { | |||
3679 | print_stmt(p->at(i)); | |||
3680 | } | |||
3681 | } | |||
3682 | ||||
3683 | //------------------------------print_bb--------------------------- | |||
3684 | void SuperWord::print_bb() { | |||
3685 | #ifndef PRODUCT | |||
3686 | tty->print_cr("\nBlock"); | |||
3687 | for (int i = 0; i < _block.length(); i++) { | |||
3688 | Node* n = _block.at(i); | |||
3689 | tty->print("%d ", i); | |||
3690 | if (n) { | |||
3691 | n->dump(); | |||
3692 | } | |||
3693 | } | |||
3694 | #endif | |||
3695 | } | |||
3696 | ||||
3697 | //------------------------------print_stmt--------------------------- | |||
3698 | void SuperWord::print_stmt(Node* s) { | |||
3699 | #ifndef PRODUCT | |||
3700 | tty->print(" align: %d \t", alignment(s)); | |||
3701 | s->dump(); | |||
3702 | #endif | |||
3703 | } | |||
3704 | ||||
3705 | //------------------------------blank--------------------------- | |||
3706 | char* SuperWord::blank(uint depth) { | |||
3707 | static char blanks[101]; | |||
3708 | assert(depth < 101, "too deep")do { if (!(depth < 101)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3708, "assert(" "depth < 101" ") failed", "too deep"); :: breakpoint(); } } while (0); | |||
3709 | for (uint i = 0; i < depth; i++) blanks[i] = ' '; | |||
3710 | blanks[depth] = '\0'; | |||
3711 | return blanks; | |||
3712 | } | |||
3713 | ||||
3714 | ||||
3715 | //==============================SWPointer=========================== | |||
3716 | #ifndef PRODUCT | |||
3717 | int SWPointer::Tracer::_depth = 0; | |||
3718 | #endif | |||
3719 | //----------------------------SWPointer------------------------ | |||
3720 | SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) : | |||
3721 | _mem(mem), _slp(slp), _base(NULL__null), _adr(NULL__null), | |||
3722 | _scale(0), _offset(0), _invar(NULL__null), _negate_invar(false), | |||
3723 | _invar_scale(NULL__null), | |||
3724 | _nstack(nstack), _analyze_only(analyze_only), | |||
3725 | _stack_idx(0) | |||
3726 | #ifndef PRODUCT | |||
3727 | , _tracer(slp) | |||
3728 | #endif | |||
3729 | { | |||
3730 | NOT_PRODUCT(_tracer.ctor_1(mem);)_tracer.ctor_1(mem); | |||
3731 | ||||
3732 | Node* adr = mem->in(MemNode::Address); | |||
3733 | if (!adr->is_AddP()) { | |||
3734 | assert(!valid(), "too complex")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3734, "assert(" "!valid()" ") failed", "too complex"); ::breakpoint (); } } while (0); | |||
3735 | return; | |||
3736 | } | |||
3737 | // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) | |||
3738 | Node* base = adr->in(AddPNode::Base); | |||
3739 | // The base address should be loop invariant | |||
3740 | if (is_main_loop_member(base)) { | |||
3741 | assert(!valid(), "base address is loop variant")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3741, "assert(" "!valid()" ") failed", "base address is loop variant" ); ::breakpoint(); } } while (0); | |||
3742 | return; | |||
3743 | } | |||
3744 | // unsafe references require misaligned vector access support | |||
3745 | if (base->is_top() && !Matcher::misaligned_vectors_ok()) { | |||
3746 | assert(!valid(), "unsafe access")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3746, "assert(" "!valid()" ") failed", "unsafe access"); :: breakpoint(); } } while (0); | |||
3747 | return; | |||
3748 | } | |||
3749 | ||||
3750 | NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();)if(_slp->is_trace_alignment()) _tracer.store_depth(); | |||
3751 | NOT_PRODUCT(_tracer.ctor_2(adr);)_tracer.ctor_2(adr); | |||
3752 | ||||
3753 | int i; | |||
3754 | for (i = 0; i < 3; i++) { | |||
3755 | NOT_PRODUCT(_tracer.ctor_3(adr, i);)_tracer.ctor_3(adr, i); | |||
3756 | ||||
3757 | if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { | |||
3758 | assert(!valid(), "too complex")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3758, "assert(" "!valid()" ") failed", "too complex"); ::breakpoint (); } } while (0); | |||
3759 | return; | |||
3760 | } | |||
3761 | adr = adr->in(AddPNode::Address); | |||
3762 | NOT_PRODUCT(_tracer.ctor_4(adr, i);)_tracer.ctor_4(adr, i); | |||
3763 | ||||
3764 | if (base == adr || !adr->is_AddP()) { | |||
3765 | NOT_PRODUCT(_tracer.ctor_5(adr, base, i);)_tracer.ctor_5(adr, base, i); | |||
3766 | break; // stop looking at addp's | |||
3767 | } | |||
3768 | } | |||
3769 | if (is_main_loop_member(adr)) { | |||
3770 | assert(!valid(), "adr is loop variant")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3770, "assert(" "!valid()" ") failed", "adr is loop variant" ); ::breakpoint(); } } while (0); | |||
3771 | return; | |||
3772 | } | |||
3773 | ||||
3774 | if (!base->is_top() && adr != base) { | |||
3775 | assert(!valid(), "adr and base differ")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3775, "assert(" "!valid()" ") failed", "adr and base differ" ); ::breakpoint(); } } while (0); | |||
3776 | return; | |||
3777 | } | |||
3778 | ||||
3779 | NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();)if(_slp->is_trace_alignment()) _tracer.restore_depth(); | |||
3780 | NOT_PRODUCT(_tracer.ctor_6(mem);)_tracer.ctor_6(mem); | |||
3781 | ||||
3782 | _base = base; | |||
3783 | _adr = adr; | |||
3784 | assert(valid(), "Usable")do { if (!(valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3784, "assert(" "valid()" ") failed", "Usable"); ::breakpoint (); } } while (0); | |||
3785 | } | |||
3786 | ||||
3787 | // Following is used to create a temporary object during | |||
3788 | // the pattern match of an address expression. | |||
3789 | SWPointer::SWPointer(SWPointer* p) : | |||
3790 | _mem(p->_mem), _slp(p->_slp), _base(NULL__null), _adr(NULL__null), | |||
3791 | _scale(0), _offset(0), _invar(NULL__null), _negate_invar(false), | |||
3792 | _invar_scale(NULL__null), | |||
3793 | _nstack(p->_nstack), _analyze_only(p->_analyze_only), | |||
3794 | _stack_idx(p->_stack_idx) | |||
3795 | #ifndef PRODUCT | |||
3796 | , _tracer(p->_slp) | |||
3797 | #endif | |||
3798 | {} | |||
3799 | ||||
3800 | bool SWPointer::is_main_loop_member(Node* n) const { | |||
3801 | Node* n_c = phase()->get_ctrl(n); | |||
3802 | return lpt()->is_member(phase()->get_loop(n_c)); | |||
3803 | } | |||
3804 | ||||
3805 | bool SWPointer::invariant(Node* n) const { | |||
3806 | NOT_PRODUCT(Tracer::Depth dd;)Tracer::Depth dd; | |||
3807 | Node* n_c = phase()->get_ctrl(n); | |||
3808 | NOT_PRODUCT(_tracer.invariant_1(n, n_c);)_tracer.invariant_1(n, n_c); | |||
3809 | bool is_not_member = !is_main_loop_member(n); | |||
3810 | if (is_not_member && _slp->lp()->is_main_loop()) { | |||
3811 | // Check that n_c dominates the pre loop head node. If it does not, then we cannot use n as invariant for the pre loop | |||
3812 | // CountedLoopEndNode check because n_c is either part of the pre loop or between the pre and the main loop (illegal | |||
3813 | // invariant: Happens, for example, when n_c is a CastII node that prevents data nodes to flow above the main loop). | |||
3814 | return phase()->is_dominator(n_c, _slp->pre_loop_head()); | |||
3815 | } | |||
3816 | return is_not_member; | |||
3817 | } | |||
3818 | ||||
3819 | //------------------------scaled_iv_plus_offset-------------------- | |||
3820 | // Match: k*iv + offset | |||
3821 | // where: k is a constant that maybe zero, and | |||
3822 | // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional | |||
3823 | bool SWPointer::scaled_iv_plus_offset(Node* n) { | |||
3824 | NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd; | |||
3825 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);)_tracer.scaled_iv_plus_offset_1(n); | |||
3826 | ||||
3827 | if (scaled_iv(n)) { | |||
3828 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);)_tracer.scaled_iv_plus_offset_2(n); | |||
3829 | return true; | |||
3830 | } | |||
3831 | ||||
3832 | if (offset_plus_k(n)) { | |||
3833 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);)_tracer.scaled_iv_plus_offset_3(n); | |||
3834 | return true; | |||
3835 | } | |||
3836 | ||||
3837 | int opc = n->Opcode(); | |||
3838 | if (opc == Op_AddI) { | |||
3839 | if (offset_plus_k(n->in(2)) && scaled_iv_plus_offset(n->in(1))) { | |||
3840 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);)_tracer.scaled_iv_plus_offset_4(n); | |||
3841 | return true; | |||
3842 | } | |||
3843 | if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) { | |||
3844 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);)_tracer.scaled_iv_plus_offset_5(n); | |||
3845 | return true; | |||
3846 | } | |||
3847 | } else if (opc == Op_SubI) { | |||
3848 | if (offset_plus_k(n->in(2), true) && scaled_iv_plus_offset(n->in(1))) { | |||
3849 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);)_tracer.scaled_iv_plus_offset_6(n); | |||
3850 | return true; | |||
3851 | } | |||
3852 | if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) { | |||
3853 | _scale *= -1; | |||
3854 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);)_tracer.scaled_iv_plus_offset_7(n); | |||
3855 | return true; | |||
3856 | } | |||
3857 | } | |||
3858 | ||||
3859 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);)_tracer.scaled_iv_plus_offset_8(n); | |||
3860 | return false; | |||
3861 | } | |||
3862 | ||||
3863 | //----------------------------scaled_iv------------------------ | |||
3864 | // Match: k*iv where k is a constant that's not zero | |||
3865 | bool SWPointer::scaled_iv(Node* n) { | |||
3866 | NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd; | |||
3867 | NOT_PRODUCT(_tracer.scaled_iv_1(n);)_tracer.scaled_iv_1(n); | |||
3868 | ||||
3869 | if (_scale != 0) { // already found a scale | |||
3870 | NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);)_tracer.scaled_iv_2(n, _scale); | |||
3871 | return false; | |||
3872 | } | |||
3873 | ||||
3874 | if (n == iv()) { | |||
3875 | _scale = 1; | |||
3876 | NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);)_tracer.scaled_iv_3(n, _scale); | |||
3877 | return true; | |||
3878 | } | |||
3879 | if (_analyze_only && (is_main_loop_member(n))) { | |||
3880 | _nstack->push(n, _stack_idx++); | |||
3881 | } | |||
3882 | ||||
3883 | int opc = n->Opcode(); | |||
3884 | if (opc == Op_MulI) { | |||
3885 | if (n->in(1) == iv() && n->in(2)->is_Con()) { | |||
3886 | _scale = n->in(2)->get_int(); | |||
3887 | NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);)_tracer.scaled_iv_4(n, _scale); | |||
3888 | return true; | |||
3889 | } else if (n->in(2) == iv() && n->in(1)->is_Con()) { | |||
3890 | _scale = n->in(1)->get_int(); | |||
3891 | NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);)_tracer.scaled_iv_5(n, _scale); | |||
3892 | return true; | |||
3893 | } | |||
3894 | } else if (opc == Op_LShiftI) { | |||
3895 | if (n->in(1) == iv() && n->in(2)->is_Con()) { | |||
3896 | _scale = 1 << n->in(2)->get_int(); | |||
3897 | NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)_tracer.scaled_iv_6(n, _scale); | |||
3898 | return true; | |||
3899 | } | |||
3900 | } else if (opc == Op_ConvI2L || opc == Op_CastII) { | |||
3901 | if (scaled_iv_plus_offset(n->in(1))) { | |||
3902 | NOT_PRODUCT(_tracer.scaled_iv_7(n);)_tracer.scaled_iv_7(n); | |||
3903 | return true; | |||
3904 | } | |||
3905 | } else if (opc == Op_LShiftL && n->in(2)->is_Con()) { | |||
3906 | if (!has_iv() && _invar == NULL__null) { | |||
3907 | // Need to preserve the current _offset value, so | |||
3908 | // create a temporary object for this expression subtree. | |||
3909 | // Hacky, so should re-engineer the address pattern match. | |||
3910 | NOT_PRODUCT(Tracer::Depth dddd;)Tracer::Depth dddd; | |||
3911 | SWPointer tmp(this); | |||
3912 | NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)_tracer.scaled_iv_8(n, &tmp); | |||
3913 | ||||
3914 | if (tmp.scaled_iv_plus_offset(n->in(1))) { | |||
3915 | int scale = n->in(2)->get_int(); | |||
3916 | _scale = tmp._scale << scale; | |||
3917 | _offset += tmp._offset << scale; | |||
3918 | _invar = tmp._invar; | |||
3919 | if (_invar != NULL__null) { | |||
3920 | _negate_invar = tmp._negate_invar; | |||
3921 | _invar_scale = n->in(2); | |||
3922 | } | |||
3923 | NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar, _negate_invar);)_tracer.scaled_iv_9(n, _scale, _offset, _invar, _negate_invar ); | |||
3924 | return true; | |||
3925 | } | |||
3926 | } | |||
3927 | } | |||
3928 | NOT_PRODUCT(_tracer.scaled_iv_10(n);)_tracer.scaled_iv_10(n); | |||
3929 | return false; | |||
3930 | } | |||
3931 | ||||
3932 | //----------------------------offset_plus_k------------------------ | |||
3933 | // Match: offset is (k [+/- invariant]) | |||
3934 | // where k maybe zero and invariant is optional, but not both. | |||
3935 | bool SWPointer::offset_plus_k(Node* n, bool negate) { | |||
3936 | NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd; | |||
3937 | NOT_PRODUCT(_tracer.offset_plus_k_1(n);)_tracer.offset_plus_k_1(n); | |||
3938 | ||||
3939 | int opc = n->Opcode(); | |||
3940 | if (opc == Op_ConI) { | |||
3941 | _offset += negate ? -(n->get_int()) : n->get_int(); | |||
3942 | NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)_tracer.offset_plus_k_2(n, _offset); | |||
3943 | return true; | |||
3944 | } else if (opc == Op_ConL) { | |||
3945 | // Okay if value fits into an int | |||
3946 | const TypeLong* t = n->find_long_type(); | |||
3947 | if (t->higher_equal(TypeLong::INT)) { | |||
3948 | jlong loff = n->get_long(); | |||
3949 | jint off = (jint)loff; | |||
3950 | _offset += negate ? -off : loff; | |||
3951 | NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)_tracer.offset_plus_k_3(n, _offset); | |||
3952 | return true; | |||
3953 | } | |||
3954 | NOT_PRODUCT(_tracer.offset_plus_k_4(n);)_tracer.offset_plus_k_4(n); | |||
3955 | return false; | |||
3956 | } | |||
3957 | if (_invar != NULL__null) { // already has an invariant | |||
3958 | NOT_PRODUCT(_tracer.offset_plus_k_5(n, _invar);)_tracer.offset_plus_k_5(n, _invar); | |||
3959 | return false; | |||
3960 | } | |||
3961 | ||||
3962 | if (_analyze_only && is_main_loop_member(n)) { | |||
3963 | _nstack->push(n, _stack_idx++); | |||
3964 | } | |||
3965 | if (opc == Op_AddI) { | |||
3966 | if (n->in(2)->is_Con() && invariant(n->in(1))) { | |||
3967 | _negate_invar = negate; | |||
3968 | _invar = n->in(1); | |||
3969 | _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); | |||
3970 | NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset); | |||
3971 | return true; | |||
3972 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { | |||
3973 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); | |||
3974 | _negate_invar = negate; | |||
3975 | _invar = n->in(2); | |||
3976 | NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset); | |||
3977 | return true; | |||
3978 | } | |||
3979 | } | |||
3980 | if (opc == Op_SubI) { | |||
3981 | if (n->in(2)->is_Con() && invariant(n->in(1))) { | |||
3982 | _negate_invar = negate; | |||
3983 | _invar = n->in(1); | |||
3984 | _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); | |||
3985 | NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset); | |||
3986 | return true; | |||
3987 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { | |||
3988 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); | |||
3989 | _negate_invar = !negate; | |||
3990 | _invar = n->in(2); | |||
3991 | NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset); | |||
3992 | return true; | |||
3993 | } | |||
3994 | } | |||
3995 | ||||
3996 | if (!is_main_loop_member(n)) { | |||
3997 | // 'n' is loop invariant. Skip ConvI2L and CastII nodes before checking if 'n' is dominating the pre loop. | |||
3998 | if (opc == Op_ConvI2L) { | |||
3999 | n = n->in(1); | |||
4000 | } | |||
4001 | if (n->Opcode() == Op_CastII) { | |||
4002 | // Skip CastII nodes | |||
4003 | assert(!is_main_loop_member(n), "sanity")do { if (!(!is_main_loop_member(n))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4003, "assert(" "!is_main_loop_member(n)" ") failed", "sanity" ); ::breakpoint(); } } while (0); | |||
4004 | n = n->in(1); | |||
4005 | } | |||
4006 | // Check if 'n' can really be used as invariant (not in main loop and dominating the pre loop). | |||
4007 | if (invariant(n)) { | |||
4008 | _negate_invar = negate; | |||
4009 | _invar = n; | |||
4010 | NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset); | |||
4011 | return true; | |||
4012 | } | |||
4013 | } | |||
4014 | ||||
4015 | NOT_PRODUCT(_tracer.offset_plus_k_11(n);)_tracer.offset_plus_k_11(n); | |||
4016 | return false; | |||
4017 | } | |||
4018 | ||||
4019 | //----------------------------print------------------------ | |||
4020 | void SWPointer::print() { | |||
4021 | #ifndef PRODUCT | |||
4022 | tty->print("base: [%d] adr: [%d] scale: %d offset: %d", | |||
4023 | _base != NULL__null ? _base->_idx : 0, | |||
4024 | _adr != NULL__null ? _adr->_idx : 0, | |||
4025 | _scale, _offset); | |||
4026 | if (_invar != NULL__null) { | |||
4027 | tty->print(" invar: %c[%d] << [%d]", _negate_invar?'-':'+', _invar->_idx, _invar_scale->_idx); | |||
4028 | } | |||
4029 | tty->cr(); | |||
4030 | #endif | |||
4031 | } | |||
4032 | ||||
4033 | //----------------------------tracing------------------------ | |||
4034 | #ifndef PRODUCT | |||
4035 | void SWPointer::Tracer::print_depth() const { | |||
4036 | for (int ii = 0; ii < _depth; ++ii) { | |||
4037 | tty->print(" "); | |||
4038 | } | |||
4039 | } | |||
4040 | ||||
4041 | void SWPointer::Tracer::ctor_1 (Node* mem) { | |||
4042 | if(_slp->is_trace_alignment()) { | |||
4043 | print_depth(); tty->print(" %d SWPointer::SWPointer: start alignment analysis", mem->_idx); mem->dump(); | |||
4044 | } | |||
4045 | } | |||
4046 | ||||
4047 | void SWPointer::Tracer::ctor_2(Node* adr) { | |||
4048 | if(_slp->is_trace_alignment()) { | |||
4049 | //store_depth(); | |||
4050 | inc_depth(); | |||
4051 | print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump(); | |||
4052 | inc_depth(); | |||
4053 | print_depth(); tty->print(" %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump(); | |||
4054 | } | |||
4055 | } | |||
4056 | ||||
4057 | void SWPointer::Tracer::ctor_3(Node* adr, int i) { | |||
4058 | if(_slp->is_trace_alignment()) { | |||
4059 | inc_depth(); | |||
4060 | Node* offset = adr->in(AddPNode::Offset); | |||
4061 | print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump(); | |||
4062 | } | |||
4063 | } | |||
4064 | ||||
4065 | void SWPointer::Tracer::ctor_4(Node* adr, int i) { | |||
4066 | if(_slp->is_trace_alignment()) { | |||
4067 | inc_depth(); | |||
4068 | print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i); adr->dump(); | |||
4069 | } | |||
4070 | } | |||
4071 | ||||
4072 | void SWPointer::Tracer::ctor_5(Node* adr, Node* base, int i) { | |||
4073 | if(_slp->is_trace_alignment()) { | |||
4074 | inc_depth(); | |||
4075 | if (base == adr) { | |||
4076 | print_depth(); tty->print_cr(" \\ %d (adr) == %d (base) SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i); | |||
4077 | } else if (!adr->is_AddP()) { | |||
4078 | print_depth(); tty->print_cr(" \\ %d (adr) is NOT Addp SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, i); | |||
4079 | } | |||
4080 | } | |||
4081 | } | |||
4082 | ||||
4083 | void SWPointer::Tracer::ctor_6(Node* mem) { | |||
4084 | if(_slp->is_trace_alignment()) { | |||
4085 | //restore_depth(); | |||
4086 | print_depth(); tty->print_cr(" %d (adr) SWPointer::SWPointer: stop analysis", mem->_idx); | |||
4087 | } | |||
4088 | } | |||
4089 | ||||
4090 | void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) const { | |||
4091 | if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) { | |||
4092 | int is_member = _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)); | |||
4093 | int in_bb = _slp->in_bb(n); | |||
4094 | print_depth(); tty->print(" \\ "); tty->print_cr(" %d SWPointer::invariant conditions differ: n_c %d", n->_idx, n_c->_idx); | |||
4095 | print_depth(); tty->print(" \\ "); tty->print_cr("is_member %d, in_bb %d", is_member, in_bb); | |||
4096 | print_depth(); tty->print(" \\ "); n->dump(); | |||
4097 | print_depth(); tty->print(" \\ "); n_c->dump(); | |||
4098 | } | |||
4099 | } | |||
4100 | ||||
4101 | void SWPointer::Tracer::scaled_iv_plus_offset_1(Node* n) { | |||
4102 | if(_slp->is_trace_alignment()) { | |||
4103 | print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx); | |||
4104 | n->dump(); | |||
4105 | } | |||
4106 | } | |||
4107 | ||||
4108 | void SWPointer::Tracer::scaled_iv_plus_offset_2(Node* n) { | |||
4109 | if(_slp->is_trace_alignment()) { | |||
4110 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); | |||
4111 | } | |||
4112 | } | |||
4113 | ||||
4114 | void SWPointer::Tracer::scaled_iv_plus_offset_3(Node* n) { | |||
4115 | if(_slp->is_trace_alignment()) { | |||
4116 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); | |||
4117 | } | |||
4118 | } | |||
4119 | ||||
4120 | void SWPointer::Tracer::scaled_iv_plus_offset_4(Node* n) { | |||
4121 | if(_slp->is_trace_alignment()) { | |||
4122 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); | |||
4123 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4124 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4125 | } | |||
4126 | } | |||
4127 | ||||
4128 | void SWPointer::Tracer::scaled_iv_plus_offset_5(Node* n) { | |||
4129 | if(_slp->is_trace_alignment()) { | |||
4130 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); | |||
4131 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4132 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4133 | } | |||
4134 | } | |||
4135 | ||||
4136 | void SWPointer::Tracer::scaled_iv_plus_offset_6(Node* n) { | |||
4137 | if(_slp->is_trace_alignment()) { | |||
4138 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx); | |||
4139 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4140 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4141 | } | |||
4142 | } | |||
4143 | ||||
4144 | void SWPointer::Tracer::scaled_iv_plus_offset_7(Node* n) { | |||
4145 | if(_slp->is_trace_alignment()) { | |||
4146 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx); | |||
4147 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4148 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4149 | } | |||
4150 | } | |||
4151 | ||||
4152 | void SWPointer::Tracer::scaled_iv_plus_offset_8(Node* n) { | |||
4153 | if(_slp->is_trace_alignment()) { | |||
4154 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: FAILED", n->_idx); | |||
4155 | } | |||
4156 | } | |||
4157 | ||||
4158 | void SWPointer::Tracer::scaled_iv_1(Node* n) { | |||
4159 | if(_slp->is_trace_alignment()) { | |||
4160 | print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump(); | |||
4161 | } | |||
4162 | } | |||
4163 | ||||
4164 | void SWPointer::Tracer::scaled_iv_2(Node* n, int scale) { | |||
4165 | if(_slp->is_trace_alignment()) { | |||
4166 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx); | |||
4167 | print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: _scale (%d) != 0", scale); | |||
4168 | } | |||
4169 | } | |||
4170 | ||||
4171 | void SWPointer::Tracer::scaled_iv_3(Node* n, int scale) { | |||
4172 | if(_slp->is_trace_alignment()) { | |||
4173 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale); | |||
4174 | } | |||
4175 | } | |||
4176 | ||||
4177 | void SWPointer::Tracer::scaled_iv_4(Node* n, int scale) { | |||
4178 | if(_slp->is_trace_alignment()) { | |||
4179 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); | |||
4180 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4181 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4182 | } | |||
4183 | } | |||
4184 | ||||
4185 | void SWPointer::Tracer::scaled_iv_5(Node* n, int scale) { | |||
4186 | if(_slp->is_trace_alignment()) { | |||
4187 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); | |||
4188 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4189 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4190 | } | |||
4191 | } | |||
4192 | ||||
4193 | void SWPointer::Tracer::scaled_iv_6(Node* n, int scale) { | |||
4194 | if(_slp->is_trace_alignment()) { | |||
4195 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale); | |||
4196 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4197 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4198 | } | |||
4199 | } | |||
4200 | ||||
4201 | void SWPointer::Tracer::scaled_iv_7(Node* n) { | |||
4202 | if(_slp->is_trace_alignment()) { | |||
4203 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx); | |||
4204 | print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx); | |||
4205 | inc_depth(); inc_depth(); | |||
4206 | print_depth(); n->in(1)->dump(); | |||
4207 | dec_depth(); dec_depth(); | |||
4208 | } | |||
4209 | } | |||
4210 | ||||
4211 | void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) { | |||
4212 | if(_slp->is_trace_alignment()) { | |||
4213 | print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp->print(); | |||
4214 | } | |||
4215 | } | |||
4216 | ||||
4217 | void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int offset, Node* invar, bool negate_invar) { | |||
4218 | if(_slp->is_trace_alignment()) { | |||
4219 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, offset); | |||
4220 | print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) [%d] is scaled_iv_plus_offset, in(2) [%d] used to scale: _scale = %d, _offset = %d", | |||
4221 | n->in(1)->_idx, n->in(2)->_idx, scale, offset); | |||
4222 | if (invar != NULL__null) { | |||
4223 | print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: scaled invariant: %c[%d]", (negate_invar?'-':'+'), invar->_idx); | |||
4224 | } | |||
4225 | inc_depth(); inc_depth(); | |||
4226 | print_depth(); n->in(1)->dump(); | |||
4227 | print_depth(); n->in(2)->dump(); | |||
4228 | if (invar != NULL__null) { | |||
4229 | print_depth(); invar->dump(); | |||
4230 | } | |||
4231 | dec_depth(); dec_depth(); | |||
4232 | } | |||
4233 | } | |||
4234 | ||||
4235 | void SWPointer::Tracer::scaled_iv_10(Node* n) { | |||
4236 | if(_slp->is_trace_alignment()) { | |||
4237 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED", n->_idx); | |||
4238 | } | |||
4239 | } | |||
4240 | ||||
4241 | void SWPointer::Tracer::offset_plus_k_1(Node* n) { | |||
4242 | if(_slp->is_trace_alignment()) { | |||
4243 | print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump(); | |||
4244 | } | |||
4245 | } | |||
4246 | ||||
4247 | void SWPointer::Tracer::offset_plus_k_2(Node* n, int _offset) { | |||
4248 | if(_slp->is_trace_alignment()) { | |||
4249 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset); | |||
4250 | } | |||
4251 | } | |||
4252 | ||||
4253 | void SWPointer::Tracer::offset_plus_k_3(Node* n, int _offset) { | |||
4254 | if(_slp->is_trace_alignment()) { | |||
4255 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset); | |||
4256 | } | |||
4257 | } | |||
4258 | ||||
4259 | void SWPointer::Tracer::offset_plus_k_4(Node* n) { | |||
4260 | if(_slp->is_trace_alignment()) { | |||
4261 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); | |||
4262 | print_depth(); tty->print_cr(" \\ " JLONG_FORMAT"%" "l" "d" " SWPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long()); | |||
4263 | } | |||
4264 | } | |||
4265 | ||||
4266 | void SWPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) { | |||
4267 | if(_slp->is_trace_alignment()) { | |||
4268 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx); | |||
4269 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: _invar != NULL: ", _invar->_idx); _invar->dump(); | |||
4270 | } | |||
4271 | } | |||
4272 | ||||
4273 | void SWPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4274 | if(_slp->is_trace_alignment()) { | |||
4275 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", | |||
4276 | n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4277 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4278 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); | |||
4279 | } | |||
4280 | } | |||
4281 | ||||
4282 | void SWPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4283 | if(_slp->is_trace_alignment()) { | |||
4284 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", | |||
4285 | n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4286 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4287 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); | |||
4288 | } | |||
4289 | } | |||
4290 | ||||
4291 | void SWPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4292 | if(_slp->is_trace_alignment()) { | |||
4293 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", | |||
4294 | n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4295 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4296 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); | |||
4297 | } | |||
4298 | } | |||
4299 | ||||
4300 | void SWPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4301 | if(_slp->is_trace_alignment()) { | |||
4302 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4303 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4304 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); | |||
4305 | } | |||
4306 | } | |||
4307 | ||||
4308 | void SWPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4309 | if(_slp->is_trace_alignment()) { | |||
4310 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4311 | print_depth(); tty->print_cr(" \\ %d SWPointer::offset_plus_k: is invariant", n->_idx); | |||
4312 | } | |||
4313 | } | |||
4314 | ||||
4315 | void SWPointer::Tracer::offset_plus_k_11(Node* n) { | |||
4316 | if(_slp->is_trace_alignment()) { | |||
4317 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); | |||
4318 | } | |||
4319 | } | |||
4320 | ||||
4321 | #endif | |||
4322 | // ========================= OrderedPair ===================== | |||
4323 | ||||
4324 | const OrderedPair OrderedPair::initial; | |||
4325 | ||||
4326 | // ========================= SWNodeInfo ===================== | |||
4327 | ||||
4328 | const SWNodeInfo SWNodeInfo::initial; | |||
4329 | ||||
4330 | ||||
4331 | // ============================ DepGraph =========================== | |||
4332 | ||||
4333 | //------------------------------make_node--------------------------- | |||
4334 | // Make a new dependence graph node for an ideal node. | |||
4335 | DepMem* DepGraph::make_node(Node* node) { | |||
4336 | DepMem* m = new (_arena) DepMem(node); | |||
4337 | if (node != NULL__null) { | |||
4338 | assert(_map.at_grow(node->_idx) == NULL, "one init only")do { if (!(_map.at_grow(node->_idx) == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4338, "assert(" "_map.at_grow(node->_idx) == __null" ") failed" , "one init only"); ::breakpoint(); } } while (0); | |||
4339 | _map.at_put_grow(node->_idx, m); | |||
4340 | } | |||
4341 | return m; | |||
4342 | } | |||
4343 | ||||
4344 | //------------------------------make_edge--------------------------- | |||
4345 | // Make a new dependence graph edge from dpred -> dsucc | |||
4346 | DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) { | |||
4347 | DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head()); | |||
4348 | dpred->set_out_head(e); | |||
4349 | dsucc->set_in_head(e); | |||
4350 | return e; | |||
4351 | } | |||
4352 | ||||
4353 | // ========================== DepMem ======================== | |||
4354 | ||||
4355 | //------------------------------in_cnt--------------------------- | |||
4356 | int DepMem::in_cnt() { | |||
4357 | int ct = 0; | |||
4358 | for (DepEdge* e = _in_head; e != NULL__null; e = e->next_in()) ct++; | |||
4359 | return ct; | |||
4360 | } | |||
4361 | ||||
4362 | //------------------------------out_cnt--------------------------- | |||
4363 | int DepMem::out_cnt() { | |||
4364 | int ct = 0; | |||
4365 | for (DepEdge* e = _out_head; e != NULL__null; e = e->next_out()) ct++; | |||
4366 | return ct; | |||
4367 | } | |||
4368 | ||||
4369 | //------------------------------print----------------------------- | |||
4370 | void DepMem::print() { | |||
4371 | #ifndef PRODUCT | |||
4372 | tty->print(" DepNode %d (", _node->_idx); | |||
4373 | for (DepEdge* p = _in_head; p != NULL__null; p = p->next_in()) { | |||
4374 | Node* pred = p->pred()->node(); | |||
4375 | tty->print(" %d", pred != NULL__null ? pred->_idx : 0); | |||
4376 | } | |||
4377 | tty->print(") ["); | |||
4378 | for (DepEdge* s = _out_head; s != NULL__null; s = s->next_out()) { | |||
4379 | Node* succ = s->succ()->node(); | |||
4380 | tty->print(" %d", succ != NULL__null ? succ->_idx : 0); | |||
4381 | } | |||
4382 | tty->print_cr(" ]"); | |||
4383 | #endif | |||
4384 | } | |||
4385 | ||||
4386 | // =========================== DepEdge ========================= | |||
4387 | ||||
4388 | //------------------------------DepPreds--------------------------- | |||
4389 | void DepEdge::print() { | |||
4390 | #ifndef PRODUCT | |||
4391 | tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx); | |||
4392 | #endif | |||
4393 | } | |||
4394 | ||||
4395 | // =========================== DepPreds ========================= | |||
4396 | // Iterator over predecessor edges in the dependence graph. | |||
4397 | ||||
4398 | //------------------------------DepPreds--------------------------- | |||
4399 | DepPreds::DepPreds(Node* n, DepGraph& dg) { | |||
4400 | _n = n; | |||
4401 | _done = false; | |||
4402 | if (_n->is_Store() || _n->is_Load()) { | |||
4403 | _next_idx = MemNode::Address; | |||
4404 | _end_idx = n->req(); | |||
4405 | _dep_next = dg.dep(_n)->in_head(); | |||
4406 | } else if (_n->is_Mem()) { | |||
4407 | _next_idx = 0; | |||
4408 | _end_idx = 0; | |||
4409 | _dep_next = dg.dep(_n)->in_head(); | |||
4410 | } else { | |||
4411 | _next_idx = 1; | |||
4412 | _end_idx = _n->req(); | |||
4413 | _dep_next = NULL__null; | |||
4414 | } | |||
4415 | next(); | |||
4416 | } | |||
4417 | ||||
4418 | //------------------------------next--------------------------- | |||
4419 | void DepPreds::next() { | |||
4420 | if (_dep_next != NULL__null) { | |||
4421 | _current = _dep_next->pred()->node(); | |||
4422 | _dep_next = _dep_next->next_in(); | |||
4423 | } else if (_next_idx < _end_idx) { | |||
4424 | _current = _n->in(_next_idx++); | |||
4425 | } else { | |||
4426 | _done = true; | |||
4427 | } | |||
4428 | } | |||
4429 | ||||
4430 | // =========================== DepSuccs ========================= | |||
4431 | // Iterator over successor edges in the dependence graph. | |||
4432 | ||||
4433 | //------------------------------DepSuccs--------------------------- | |||
4434 | DepSuccs::DepSuccs(Node* n, DepGraph& dg) { | |||
4435 | _n = n; | |||
4436 | _done = false; | |||
4437 | if (_n->is_Load()) { | |||
4438 | _next_idx = 0; | |||
4439 | _end_idx = _n->outcnt(); | |||
4440 | _dep_next = dg.dep(_n)->out_head(); | |||
4441 | } else if (_n->is_Mem() || (_n->is_Phi() && _n->bottom_type() == Type::MEMORY)) { | |||
4442 | _next_idx = 0; | |||
4443 | _end_idx = 0; | |||
4444 | _dep_next = dg.dep(_n)->out_head(); | |||
4445 | } else { | |||
4446 | _next_idx = 0; | |||
4447 | _end_idx = _n->outcnt(); | |||
4448 | _dep_next = NULL__null; | |||
4449 | } | |||
4450 | next(); | |||
4451 | } | |||
4452 | ||||
4453 | //-------------------------------next--------------------------- | |||
4454 | void DepSuccs::next() { | |||
4455 | if (_dep_next != NULL__null) { | |||
4456 | _current = _dep_next->succ()->node(); | |||
4457 | _dep_next = _dep_next->next_out(); | |||
4458 | } else if (_next_idx < _end_idx) { | |||
4459 | _current = _n->raw_out(_next_idx++); | |||
4460 | } else { | |||
4461 | _done = true; | |||
4462 | } | |||
4463 | } | |||
4464 | ||||
4465 | // | |||
4466 | // --------------------------------- vectorization/simd ----------------------------------- | |||
4467 | // | |||
4468 | bool SuperWord::same_origin_idx(Node* a, Node* b) const { | |||
4469 | return a != NULL__null && b != NULL__null && _clone_map.same_idx(a->_idx, b->_idx); | |||
4470 | } | |||
4471 | bool SuperWord::same_generation(Node* a, Node* b) const { | |||
4472 | return a != NULL__null && b != NULL__null && _clone_map.same_gen(a->_idx, b->_idx); | |||
4473 | } | |||
4474 | ||||
4475 | Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) { | |||
4476 | assert(in_bb(ld), "must be in block")do { if (!(in_bb(ld))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4476, "assert(" "in_bb(ld)" ") failed", "must be in block") ; ::breakpoint(); } } while (0); | |||
4477 | if (_clone_map.gen(ld->_idx) == _ii_first) { | |||
4478 | #ifndef PRODUCT | |||
4479 | if (_vector_loop_debug) { | |||
4480 | tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(ld->_idx)=%d", | |||
4481 | _clone_map.gen(ld->_idx)); | |||
4482 | } | |||
4483 | #endif | |||
4484 | return NULL__null; //we think that any ld in the first gen being vectorizable | |||
4485 | } | |||
4486 | ||||
4487 | Node* mem = ld->in(MemNode::Memory); | |||
4488 | if (mem->outcnt() <= 1) { | |||
4489 | // we don't want to remove the only edge from mem node to load | |||
4490 | #ifndef PRODUCT | |||
4491 | if (_vector_loop_debug) { | |||
4492 | tty->print_cr("SuperWord::find_phi_for_mem_dep input node %d to load %d has no other outputs and edge mem->load cannot be removed", | |||
4493 | mem->_idx, ld->_idx); | |||
4494 | ld->dump(); | |||
4495 | mem->dump(); | |||
4496 | } | |||
4497 | #endif | |||
4498 | return NULL__null; | |||
4499 | } | |||
4500 | if (!in_bb(mem) || same_generation(mem, ld)) { | |||
4501 | #ifndef PRODUCT | |||
4502 | if (_vector_loop_debug) { | |||
4503 | tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d", | |||
4504 | _clone_map.gen(mem->_idx)); | |||
4505 | } | |||
4506 | #endif | |||
4507 | return NULL__null; // does not depend on loop volatile node or depends on the same generation | |||
4508 | } | |||
4509 | ||||
4510 | //otherwise first node should depend on mem-phi | |||
4511 | Node* first = first_node(ld); | |||
4512 | assert(first->is_Load(), "must be Load")do { if (!(first->is_Load())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4512, "assert(" "first->is_Load()" ") failed", "must be Load" ); ::breakpoint(); } } while (0); | |||
4513 | Node* phi = first->as_Load()->in(MemNode::Memory); | |||
4514 | if (!phi->is_Phi() || phi->bottom_type() != Type::MEMORY) { | |||
4515 | #ifndef PRODUCT | |||
4516 | if (_vector_loop_debug) { | |||
4517 | tty->print_cr("SuperWord::find_phi_for_mem_dep load is not vectorizable node, since it's `first` does not take input from mem phi"); | |||
4518 | ld->dump(); | |||
4519 | first->dump(); | |||
4520 | } | |||
4521 | #endif | |||
4522 | return NULL__null; | |||
4523 | } | |||
4524 | ||||
4525 | Node* tail = 0; | |||
4526 | for (int m = 0; m < _mem_slice_head.length(); m++) { | |||
4527 | if (_mem_slice_head.at(m) == phi) { | |||
4528 | tail = _mem_slice_tail.at(m); | |||
4529 | } | |||
4530 | } | |||
4531 | if (tail == 0) { //test that found phi is in the list _mem_slice_head | |||
4532 | #ifndef PRODUCT | |||
4533 | if (_vector_loop_debug) { | |||
4534 | tty->print_cr("SuperWord::find_phi_for_mem_dep load %d is not vectorizable node, its phi %d is not _mem_slice_head", | |||
4535 | ld->_idx, phi->_idx); | |||
4536 | ld->dump(); | |||
4537 | phi->dump(); | |||
4538 | } | |||
4539 | #endif | |||
4540 | return NULL__null; | |||
4541 | } | |||
4542 | ||||
4543 | // now all conditions are met | |||
4544 | return phi; | |||
4545 | } | |||
4546 | ||||
4547 | Node* SuperWord::first_node(Node* nd) { | |||
4548 | for (int ii = 0; ii < _iteration_first.length(); ii++) { | |||
4549 | Node* nnn = _iteration_first.at(ii); | |||
4550 | if (same_origin_idx(nnn, nd)) { | |||
4551 | #ifndef PRODUCT | |||
4552 | if (_vector_loop_debug) { | |||
4553 | tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)", | |||
4554 | nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx)); | |||
4555 | } | |||
4556 | #endif | |||
4557 | return nnn; | |||
4558 | } | |||
4559 | } | |||
4560 | ||||
4561 | #ifndef PRODUCT | |||
4562 | if (_vector_loop_debug) { | |||
4563 | tty->print_cr("SuperWord::first_node: did not find first iteration node for %d (_clone_map.idx(nd->_idx)=%d)", | |||
4564 | nd->_idx, _clone_map.idx(nd->_idx)); | |||
4565 | } | |||
4566 | #endif | |||
4567 | return 0; | |||
4568 | } | |||
4569 | ||||
4570 | Node* SuperWord::last_node(Node* nd) { | |||
4571 | for (int ii = 0; ii < _iteration_last.length(); ii++) { | |||
4572 | Node* nnn = _iteration_last.at(ii); | |||
4573 | if (same_origin_idx(nnn, nd)) { | |||
4574 | #ifndef PRODUCT | |||
4575 | if (_vector_loop_debug) { | |||
4576 | tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d", | |||
4577 | _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx)); | |||
4578 | } | |||
4579 | #endif | |||
4580 | return nnn; | |||
4581 | } | |||
4582 | } | |||
4583 | return 0; | |||
4584 | } | |||
4585 | ||||
4586 | int SuperWord::mark_generations() { | |||
4587 | Node *ii_err = NULL__null, *tail_err = NULL__null; | |||
| ||||
4588 | for (int i = 0; i < _mem_slice_head.length(); i++) { | |||
4589 | Node* phi = _mem_slice_head.at(i); | |||
4590 | assert(phi->is_Phi(), "must be phi")do { if (!(phi->is_Phi())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4590, "assert(" "phi->is_Phi()" ") failed", "must be phi" ); ::breakpoint(); } } while (0); | |||
4591 | ||||
4592 | Node* tail = _mem_slice_tail.at(i); | |||
4593 | if (_ii_last == -1) { | |||
4594 | tail_err = tail; | |||
4595 | _ii_last = _clone_map.gen(tail->_idx); | |||
4596 | } | |||
4597 | else if (_ii_last != _clone_map.gen(tail->_idx)) { | |||
4598 | #ifndef PRODUCT | |||
4599 | if (TraceSuperWord && Verbose) { | |||
4600 | tty->print_cr("SuperWord::mark_generations _ii_last error - found different generations in two tail nodes "); | |||
4601 | tail->dump(); | |||
4602 | tail_err->dump(); | |||
| ||||
4603 | } | |||
4604 | #endif | |||
4605 | return -1; | |||
4606 | } | |||
4607 | ||||
4608 | // find first iteration in the loop | |||
4609 | for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) { | |||
4610 | Node* ii = phi->fast_out(i); | |||
4611 | if (in_bb(ii) && ii->is_Store()) { // we speculate that normally Stores of one and one only generation have deps from mem phi | |||
4612 | if (_ii_first == -1) { | |||
4613 | ii_err = ii; | |||
4614 | _ii_first = _clone_map.gen(ii->_idx); | |||
4615 | } else if (_ii_first != _clone_map.gen(ii->_idx)) { | |||
4616 | #ifndef PRODUCT | |||
4617 | if (TraceSuperWord && Verbose) { | |||
4618 | tty->print_cr("SuperWord::mark_generations: _ii_first was found before and not equal to one in this node (%d)", _ii_first); | |||
4619 | ii->dump(); | |||
4620 | if (ii_err!= 0) { | |||
4621 | ii_err->dump(); | |||
4622 | } | |||
4623 | } | |||
4624 | #endif | |||
4625 | return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized | |||
4626 | } | |||
4627 | } | |||
4628 | }//for (DUIterator_Fast imax, | |||
4629 | }//for (int i... | |||
4630 | ||||
4631 | if (_ii_first == -1 || _ii_last == -1) { | |||
4632 | if (TraceSuperWord && Verbose) { | |||
4633 | tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong"); | |||
4634 | } | |||
4635 | return -1; // something vent wrong | |||
4636 | } | |||
4637 | // collect nodes in the first and last generations | |||
4638 | assert(_iteration_first.length() == 0, "_iteration_first must be empty")do { if (!(_iteration_first.length() == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4638, "assert(" "_iteration_first.length() == 0" ") failed" , "_iteration_first must be empty"); ::breakpoint(); } } while (0); | |||
4639 | assert(_iteration_last.length() == 0, "_iteration_last must be empty")do { if (!(_iteration_last.length() == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4639, "assert(" "_iteration_last.length() == 0" ") failed", "_iteration_last must be empty"); ::breakpoint(); } } while ( 0); | |||
4640 | for (int j = 0; j < _block.length(); j++) { | |||
4641 | Node* n = _block.at(j); | |||
4642 | node_idx_t gen = _clone_map.gen(n->_idx); | |||
4643 | if ((signed)gen == _ii_first) { | |||
4644 | _iteration_first.push(n); | |||
4645 | } else if ((signed)gen == _ii_last) { | |||
4646 | _iteration_last.push(n); | |||
4647 | } | |||
4648 | } | |||
4649 | ||||
4650 | // building order of iterations | |||
4651 | if (_ii_order.length() == 0 && ii_err != 0) { | |||
4652 | assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb")do { if (!(in_bb(ii_err) && ii_err->is_Store())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4652, "assert(" "in_bb(ii_err) && ii_err->is_Store()" ") failed", "should be Store in bb"); ::breakpoint(); } } while (0); | |||
4653 | Node* nd = ii_err; | |||
4654 | while(_clone_map.gen(nd->_idx) != _ii_last) { | |||
4655 | _ii_order.push(_clone_map.gen(nd->_idx)); | |||
4656 | bool found = false; | |||
4657 | for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) { | |||
4658 | Node* use = nd->fast_out(i); | |||
4659 | if (same_origin_idx(use, nd) && use->as_Store()->in(MemNode::Memory) == nd) { | |||
4660 | found = true; | |||
4661 | nd = use; | |||
4662 | break; | |||
4663 | } | |||
4664 | }//for | |||
4665 | ||||
4666 | if (found == false) { | |||
4667 | if (TraceSuperWord && Verbose) { | |||
4668 | tty->print_cr("SuperWord::mark_generations: Cannot build order of iterations - no dependent Store for %d", nd->_idx); | |||
4669 | } | |||
4670 | _ii_order.clear(); | |||
4671 | return -1; | |||
4672 | } | |||
4673 | } //while | |||
4674 | _ii_order.push(_clone_map.gen(nd->_idx)); | |||
4675 | } | |||
4676 | ||||
4677 | #ifndef PRODUCT | |||
4678 | if (_vector_loop_debug) { | |||
4679 | tty->print_cr("SuperWord::mark_generations"); | |||
4680 | tty->print_cr("First generation (%d) nodes:", _ii_first); | |||
4681 | for (int ii = 0; ii < _iteration_first.length(); ii++) _iteration_first.at(ii)->dump(); | |||
4682 | tty->print_cr("Last generation (%d) nodes:", _ii_last); | |||
4683 | for (int ii = 0; ii < _iteration_last.length(); ii++) _iteration_last.at(ii)->dump(); | |||
4684 | tty->print_cr(" "); | |||
4685 | ||||
4686 | tty->print("SuperWord::List of generations: "); | |||
4687 | for (int jj = 0; jj < _ii_order.length(); ++jj) { | |||
4688 | tty->print("%d:%d ", jj, _ii_order.at(jj)); | |||
4689 | } | |||
4690 | tty->print_cr(" "); | |||
4691 | } | |||
4692 | #endif | |||
4693 | ||||
4694 | return _ii_first; | |||
4695 | } | |||
4696 | ||||
4697 | bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) { | |||
4698 | assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes")do { if (!(gold->is_Add() && fix->is_Add() || gold ->is_Mul() && fix->is_Mul())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4698, "assert(" "gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul()" ") failed", "should be only Add or Mul nodes"); ::breakpoint (); } } while (0); | |||
4699 | assert(same_origin_idx(gold, fix), "should be clones of the same node")do { if (!(same_origin_idx(gold, fix))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4699, "assert(" "same_origin_idx(gold, fix)" ") failed", "should be clones of the same node" ); ::breakpoint(); } } while (0); | |||
4700 | Node* gin1 = gold->in(1); | |||
4701 | Node* gin2 = gold->in(2); | |||
4702 | Node* fin1 = fix->in(1); | |||
4703 | Node* fin2 = fix->in(2); | |||
4704 | bool swapped = false; | |||
4705 | ||||
4706 | if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin2)) { | |||
4707 | if (same_origin_idx(gin1, fin1) && | |||
4708 | same_origin_idx(gin2, fin2)) { | |||
4709 | return true; // nothing to fix | |||
4710 | } | |||
4711 | if (same_origin_idx(gin1, fin2) && | |||
4712 | same_origin_idx(gin2, fin1)) { | |||
4713 | fix->swap_edges(1, 2); | |||
4714 | swapped = true; | |||
4715 | } | |||
4716 | } | |||
4717 | // at least one input comes from outside of bb | |||
4718 | if (gin1->_idx == fin1->_idx) { | |||
4719 | return true; // nothing to fix | |||
4720 | } | |||
4721 | if (!swapped && (gin1->_idx == fin2->_idx || gin2->_idx == fin1->_idx)) { //swapping is expensive, check condition first | |||
4722 | fix->swap_edges(1, 2); | |||
4723 | swapped = true; | |||
4724 | } | |||
4725 | ||||
4726 | if (swapped) { | |||
4727 | #ifndef PRODUCT | |||
4728 | if (_vector_loop_debug) { | |||
4729 | tty->print_cr("SuperWord::fix_commutative_inputs: fixed node %d", fix->_idx); | |||
4730 | } | |||
4731 | #endif | |||
4732 | return true; | |||
4733 | } | |||
4734 | ||||
4735 | if (TraceSuperWord && Verbose) { | |||
4736 | tty->print_cr("SuperWord::fix_commutative_inputs: cannot fix node %d", fix->_idx); | |||
4737 | } | |||
4738 | ||||
4739 | return false; | |||
4740 | } | |||
4741 | ||||
4742 | bool SuperWord::pack_parallel() { | |||
4743 | #ifndef PRODUCT | |||
4744 | if (_vector_loop_debug) { | |||
4745 | tty->print_cr("SuperWord::pack_parallel: START"); | |||
4746 | } | |||
4747 | #endif | |||
4748 | ||||
4749 | _packset.clear(); | |||
4750 | ||||
4751 | if (_ii_order.is_empty()) { | |||
4752 | #ifndef PRODUCT | |||
4753 | if (_vector_loop_debug) { | |||
4754 | tty->print_cr("SuperWord::pack_parallel: EMPTY"); | |||
4755 | } | |||
4756 | #endif | |||
4757 | return false; | |||
4758 | } | |||
4759 | ||||
4760 | for (int ii = 0; ii < _iteration_first.length(); ii++) { | |||
4761 | Node* nd = _iteration_first.at(ii); | |||
4762 | if (in_bb(nd) && (nd->is_Load() || nd->is_Store() || nd->is_Add() || nd->is_Mul())) { | |||
4763 | Node_List* pk = new Node_List(); | |||
4764 | pk->push(nd); | |||
4765 | for (int gen = 1; gen < _ii_order.length(); ++gen) { | |||
4766 | for (int kk = 0; kk < _block.length(); kk++) { | |||
4767 | Node* clone = _block.at(kk); | |||
4768 | if (same_origin_idx(clone, nd) && | |||
4769 | _clone_map.gen(clone->_idx) == _ii_order.at(gen)) { | |||
4770 | if (nd->is_Add() || nd->is_Mul()) { | |||
4771 | fix_commutative_inputs(nd, clone); | |||
4772 | } | |||
4773 | pk->push(clone); | |||
4774 | if (pk->size() == 4) { | |||
4775 | _packset.append(pk); | |||
4776 | #ifndef PRODUCT | |||
4777 | if (_vector_loop_debug) { | |||
4778 | tty->print_cr("SuperWord::pack_parallel: added pack "); | |||
4779 | pk->dump(); | |||
4780 | } | |||
4781 | #endif | |||
4782 | if (_clone_map.gen(clone->_idx) != _ii_last) { | |||
4783 | pk = new Node_List(); | |||
4784 | } | |||
4785 | } | |||
4786 | break; | |||
4787 | } | |||
4788 | } | |||
4789 | }//for | |||
4790 | }//if | |||
4791 | }//for | |||
4792 | ||||
4793 | #ifndef PRODUCT | |||
4794 | if (_vector_loop_debug) { | |||
4795 | tty->print_cr("SuperWord::pack_parallel: END"); | |||
4796 | } | |||
4797 | #endif | |||
4798 | ||||
4799 | return true; | |||
4800 | } | |||
4801 | ||||
4802 | bool SuperWord::hoist_loads_in_graph() { | |||
4803 | GrowableArray<Node*> loads; | |||
4804 | ||||
4805 | #ifndef PRODUCT | |||
4806 | if (_vector_loop_debug) { | |||
4807 | tty->print_cr("SuperWord::hoist_loads_in_graph: total number _mem_slice_head.length() = %d", _mem_slice_head.length()); | |||
4808 | } | |||
4809 | #endif | |||
4810 | ||||
4811 | for (int i = 0; i < _mem_slice_head.length(); i++) { | |||
4812 | Node* n = _mem_slice_head.at(i); | |||
4813 | if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) { | |||
4814 | if (TraceSuperWord && Verbose) { | |||
4815 | tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx); | |||
4816 | } | |||
4817 | continue; | |||
4818 | } | |||
4819 | ||||
4820 | #ifndef PRODUCT | |||
4821 | if (_vector_loop_debug) { | |||
4822 | tty->print_cr("SuperWord::hoist_loads_in_graph: processing phi %d = _mem_slice_head.at(%d);", n->_idx, i); | |||
4823 | } | |||
4824 | #endif | |||
4825 | ||||
4826 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |||
4827 | Node* ld = n->fast_out(i); | |||
4828 | if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) { | |||
4829 | for (int i = 0; i < _block.length(); i++) { | |||
4830 | Node* ld2 = _block.at(i); | |||
4831 | if (ld2->is_Load() && same_origin_idx(ld, ld2) && | |||
4832 | !same_generation(ld, ld2)) { // <= do not collect the first generation ld | |||
4833 | #ifndef PRODUCT | |||
4834 | if (_vector_loop_debug) { | |||
4835 | tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)", | |||
4836 | ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx); | |||
4837 | } | |||
4838 | #endif | |||
4839 | // could not do on-the-fly, since iterator is immutable | |||
4840 | loads.push(ld2); | |||
4841 | } | |||
4842 | }// for | |||
4843 | }//if | |||
4844 | }//for (DUIterator_Fast imax, | |||
4845 | }//for (int i = 0; i | |||
4846 | ||||
4847 | for (int i = 0; i < loads.length(); i++) { | |||
4848 | LoadNode* ld = loads.at(i)->as_Load(); | |||
4849 | Node* phi = find_phi_for_mem_dep(ld); | |||
4850 | if (phi != NULL__null) { | |||
4851 | #ifndef PRODUCT | |||
4852 | if (_vector_loop_debug) { | |||
4853 | tty->print_cr("SuperWord::hoist_loads_in_graph replacing MemNode::Memory(%d) edge in %d with one from %d", | |||
4854 | MemNode::Memory, ld->_idx, phi->_idx); | |||
4855 | } | |||
4856 | #endif | |||
4857 | _igvn.replace_input_of(ld, MemNode::Memory, phi); | |||
4858 | } | |||
4859 | }//for | |||
4860 | ||||
4861 | restart(); // invalidate all basic structures, since we rebuilt the graph | |||
4862 | ||||
4863 | if (TraceSuperWord && Verbose) { | |||
4864 | tty->print_cr("\nSuperWord::hoist_loads_in_graph() the graph was rebuilt, all structures invalidated and need rebuild"); | |||
4865 | } | |||
4866 | ||||
4867 | return true; | |||
4868 | } |
1 | /* |
2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #ifndef SHARE_OPTO_COMPILE_HPP |
26 | #define SHARE_OPTO_COMPILE_HPP |
27 | |
28 | #include "asm/codeBuffer.hpp" |
29 | #include "ci/compilerInterface.hpp" |
30 | #include "code/debugInfoRec.hpp" |
31 | #include "compiler/compiler_globals.hpp" |
32 | #include "compiler/compilerOracle.hpp" |
33 | #include "compiler/compileBroker.hpp" |
34 | #include "compiler/compilerEvent.hpp" |
35 | #include "libadt/dict.hpp" |
36 | #include "libadt/vectset.hpp" |
37 | #include "memory/resourceArea.hpp" |
38 | #include "oops/methodData.hpp" |
39 | #include "opto/idealGraphPrinter.hpp" |
40 | #include "opto/phasetype.hpp" |
41 | #include "opto/phase.hpp" |
42 | #include "opto/regmask.hpp" |
43 | #include "runtime/deoptimization.hpp" |
44 | #include "runtime/sharedRuntime.hpp" |
45 | #include "runtime/timerTrace.hpp" |
46 | #include "runtime/vmThread.hpp" |
47 | #include "utilities/ticks.hpp" |
48 | |
49 | class AbstractLockNode; |
50 | class AddPNode; |
51 | class Block; |
52 | class Bundle; |
53 | class CallGenerator; |
54 | class CloneMap; |
55 | class ConnectionGraph; |
56 | class IdealGraphPrinter; |
57 | class InlineTree; |
58 | class Int_Array; |
59 | class Matcher; |
60 | class MachConstantNode; |
61 | class MachConstantBaseNode; |
62 | class MachNode; |
63 | class MachOper; |
64 | class MachSafePointNode; |
65 | class Node; |
66 | class Node_Array; |
67 | class Node_List; |
68 | class Node_Notes; |
69 | class NodeCloneInfo; |
70 | class OptoReg; |
71 | class PhaseCFG; |
72 | class PhaseGVN; |
73 | class PhaseIterGVN; |
74 | class PhaseRegAlloc; |
75 | class PhaseCCP; |
76 | class PhaseOutput; |
77 | class RootNode; |
78 | class relocInfo; |
79 | class Scope; |
80 | class StartNode; |
81 | class SafePointNode; |
82 | class JVMState; |
83 | class Type; |
84 | class TypeData; |
85 | class TypeInt; |
86 | class TypeInteger; |
87 | class TypePtr; |
88 | class TypeOopPtr; |
89 | class TypeFunc; |
90 | class TypeVect; |
91 | class Unique_Node_List; |
92 | class nmethod; |
93 | class Node_Stack; |
94 | struct Final_Reshape_Counts; |
95 | |
96 | enum LoopOptsMode { |
97 | LoopOptsDefault, |
98 | LoopOptsNone, |
99 | LoopOptsMaxUnroll, |
100 | LoopOptsShenandoahExpand, |
101 | LoopOptsShenandoahPostExpand, |
102 | LoopOptsSkipSplitIf, |
103 | LoopOptsVerify |
104 | }; |
105 | |
106 | typedef unsigned int node_idx_t; |
107 | class NodeCloneInfo { |
108 | private: |
109 | uint64_t _idx_clone_orig; |
110 | public: |
111 | |
112 | void set_idx(node_idx_t idx) { |
113 | _idx_clone_orig = (_idx_clone_orig & CONST64(0xFFFFFFFF00000000)(0xFFFFFFFF00000000LL)) | idx; |
114 | } |
115 | node_idx_t idx() const { return (node_idx_t)(_idx_clone_orig & 0xFFFFFFFF); } |
116 | |
117 | void set_gen(int generation) { |
118 | uint64_t g = (uint64_t)generation << 32; |
119 | _idx_clone_orig = (_idx_clone_orig & 0xFFFFFFFF) | g; |
120 | } |
121 | int gen() const { return (int)(_idx_clone_orig >> 32); } |
122 | |
123 | void set(uint64_t x) { _idx_clone_orig = x; } |
124 | void set(node_idx_t x, int g) { set_idx(x); set_gen(g); } |
125 | uint64_t get() const { return _idx_clone_orig; } |
126 | |
127 | NodeCloneInfo(uint64_t idx_clone_orig) : _idx_clone_orig(idx_clone_orig) {} |
128 | NodeCloneInfo(node_idx_t x, int g) : _idx_clone_orig(0) { set(x, g); } |
129 | |
130 | void dump() const; |
131 | }; |
132 | |
133 | class CloneMap { |
134 | friend class Compile; |
135 | private: |
136 | bool _debug; |
137 | Dict* _dict; |
138 | int _clone_idx; // current cloning iteration/generation in loop unroll |
139 | public: |
140 | void* _2p(node_idx_t key) const { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy |
141 | node_idx_t _2_node_idx_t(const void* k) const { return (node_idx_t)(intptr_t)k; } |
142 | Dict* dict() const { return _dict; } |
143 | void insert(node_idx_t key, uint64_t val) { assert(_dict->operator[](_2p(key)) == NULL, "key existed")do { if (!(_dict->operator[](_2p(key)) == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 143, "assert(" "_dict->operator[](_2p(key)) == __null" ") failed" , "key existed"); ::breakpoint(); } } while (0); _dict->Insert(_2p(key), (void*)val); } |
144 | void insert(node_idx_t key, NodeCloneInfo& ci) { insert(key, ci.get()); } |
145 | void remove(node_idx_t key) { _dict->Delete(_2p(key)); } |
146 | uint64_t value(node_idx_t key) const { return (uint64_t)_dict->operator[](_2p(key)); } |
147 | node_idx_t idx(node_idx_t key) const { return NodeCloneInfo(value(key)).idx(); } |
148 | int gen(node_idx_t key) const { return NodeCloneInfo(value(key)).gen(); } |
149 | int gen(const void* k) const { return gen(_2_node_idx_t(k)); } |
150 | int max_gen() const; |
151 | void clone(Node* old, Node* nnn, int gen); |
152 | void verify_insert_and_clone(Node* old, Node* nnn, int gen); |
153 | void dump(node_idx_t key) const; |
154 | |
155 | int clone_idx() const { return _clone_idx; } |
156 | void set_clone_idx(int x) { _clone_idx = x; } |
157 | bool is_debug() const { return _debug; } |
158 | void set_debug(bool debug) { _debug = debug; } |
159 | static const char* debug_option_name; |
160 | |
161 | bool same_idx(node_idx_t k1, node_idx_t k2) const { return idx(k1) == idx(k2); } |
162 | bool same_gen(node_idx_t k1, node_idx_t k2) const { return gen(k1) == gen(k2); } |
163 | }; |
164 | |
165 | class Options { |
166 | friend class Compile; |
167 | friend class VMStructs; |
168 | private: |
169 | const bool _subsume_loads; // Load can be matched as part of a larger op. |
170 | const bool _do_escape_analysis; // Do escape analysis. |
171 | const bool _do_iterative_escape_analysis; // Do iterative escape analysis. |
172 | const bool _eliminate_boxing; // Do boxing elimination. |
173 | const bool _do_locks_coarsening; // Do locks coarsening |
174 | const bool _install_code; // Install the code that was compiled |
175 | public: |
176 | Options(bool subsume_loads, bool do_escape_analysis, |
177 | bool do_iterative_escape_analysis, |
178 | bool eliminate_boxing, bool do_locks_coarsening, |
179 | bool install_code) : |
180 | _subsume_loads(subsume_loads), |
181 | _do_escape_analysis(do_escape_analysis), |
182 | _do_iterative_escape_analysis(do_iterative_escape_analysis), |
183 | _eliminate_boxing(eliminate_boxing), |
184 | _do_locks_coarsening(do_locks_coarsening), |
185 | _install_code(install_code) { |
186 | } |
187 | |
188 | static Options for_runtime_stub() { |
189 | return Options( |
190 | /* subsume_loads = */ true, |
191 | /* do_escape_analysis = */ false, |
192 | /* do_iterative_escape_analysis = */ false, |
193 | /* eliminate_boxing = */ false, |
194 | /* do_lock_coarsening = */ false, |
195 | /* install_code = */ true |
196 | ); |
197 | } |
198 | }; |
199 | |
200 | //------------------------------Compile---------------------------------------- |
201 | // This class defines a top-level Compiler invocation. |
202 | |
203 | class Compile : public Phase { |
204 | friend class VMStructs; |
205 | |
206 | public: |
207 | // Fixed alias indexes. (See also MergeMemNode.) |
208 | enum { |
209 | AliasIdxTop = 1, // pseudo-index, aliases to nothing (used as sentinel value) |
210 | AliasIdxBot = 2, // pseudo-index, aliases to everything |
211 | AliasIdxRaw = 3 // hard-wired index for TypeRawPtr::BOTTOM |
212 | }; |
213 | |
214 | // Variant of TraceTime(NULL, &_t_accumulator, CITime); |
215 | // Integrated with logging. If logging is turned on, and CITimeVerbose is true, |
216 | // then brackets are put into the log, with time stamps and node counts. |
217 | // (The time collection itself is always conditionalized on CITime.) |
218 | class TracePhase : public TraceTime { |
219 | private: |
220 | Compile* C; |
221 | CompileLog* _log; |
222 | const char* _phase_name; |
223 | bool _dolog; |
224 | public: |
225 | TracePhase(const char* name, elapsedTimer* accumulator); |
226 | ~TracePhase(); |
227 | }; |
228 | |
229 | // Information per category of alias (memory slice) |
230 | class AliasType { |
231 | private: |
232 | friend class Compile; |
233 | |
234 | int _index; // unique index, used with MergeMemNode |
235 | const TypePtr* _adr_type; // normalized address type |
236 | ciField* _field; // relevant instance field, or null if none |
237 | const Type* _element; // relevant array element type, or null if none |
238 | bool _is_rewritable; // false if the memory is write-once only |
239 | int _general_index; // if this is type is an instance, the general |
240 | // type that this is an instance of |
241 | |
242 | void Init(int i, const TypePtr* at); |
243 | |
244 | public: |
245 | int index() const { return _index; } |
246 | const TypePtr* adr_type() const { return _adr_type; } |
247 | ciField* field() const { return _field; } |
248 | const Type* element() const { return _element; } |
249 | bool is_rewritable() const { return _is_rewritable; } |
250 | bool is_volatile() const { return (_field ? _field->is_volatile() : false); } |
251 | int general_index() const { return (_general_index != 0) ? _general_index : _index; } |
252 | |
253 | void set_rewritable(bool z) { _is_rewritable = z; } |
254 | void set_field(ciField* f) { |
255 | assert(!_field,"")do { if (!(!_field)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 255, "assert(" "!_field" ") failed", ""); ::breakpoint(); } } while (0); |
256 | _field = f; |
257 | if (f->is_final() || f->is_stable()) { |
258 | // In the case of @Stable, multiple writes are possible but may be assumed to be no-ops. |
259 | _is_rewritable = false; |
260 | } |
261 | } |
262 | void set_element(const Type* e) { |
263 | assert(_element == NULL, "")do { if (!(_element == __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 263, "assert(" "_element == __null" ") failed", ""); ::breakpoint (); } } while (0); |
264 | _element = e; |
265 | } |
266 | |
267 | BasicType basic_type() const; |
268 | |
269 | void print_on(outputStream* st) PRODUCT_RETURN; |
270 | }; |
271 | |
272 | enum { |
273 | logAliasCacheSize = 6, |
274 | AliasCacheSize = (1<<logAliasCacheSize) |
275 | }; |
276 | struct AliasCacheEntry { const TypePtr* _adr_type; int _index; }; // simple duple type |
277 | enum { |
278 | trapHistLength = MethodData::_trap_hist_limit |
279 | }; |
280 | |
281 | private: |
282 | // Fixed parameters to this compilation. |
283 | const int _compile_id; |
284 | const Options _options; // Compilation options |
285 | ciMethod* _method; // The method being compiled. |
286 | int _entry_bci; // entry bci for osr methods. |
287 | const TypeFunc* _tf; // My kind of signature |
288 | InlineTree* _ilt; // Ditto (temporary). |
289 | address _stub_function; // VM entry for stub being compiled, or NULL |
290 | const char* _stub_name; // Name of stub or adapter being compiled, or NULL |
291 | address _stub_entry_point; // Compile code entry for generated stub, or NULL |
292 | |
293 | // Control of this compilation. |
294 | int _max_inline_size; // Max inline size for this compilation |
295 | int _freq_inline_size; // Max hot method inline size for this compilation |
296 | int _fixed_slots; // count of frame slots not allocated by the register |
297 | // allocator i.e. locks, original deopt pc, etc. |
298 | uintx _max_node_limit; // Max unique node count during a single compilation. |
299 | |
300 | bool _post_loop_opts_phase; // Loop opts are finished. |
301 | |
302 | int _major_progress; // Count of something big happening |
303 | bool _inlining_progress; // progress doing incremental inlining? |
304 | bool _inlining_incrementally;// Are we doing incremental inlining (post parse) |
305 | bool _do_cleanup; // Cleanup is needed before proceeding with incremental inlining |
306 | bool _has_loops; // True if the method _may_ have some loops |
307 | bool _has_split_ifs; // True if the method _may_ have some split-if |
308 | bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores. |
309 | bool _has_stringbuilder; // True StringBuffers or StringBuilders are allocated |
310 | bool _has_boxed_value; // True if a boxed object is allocated |
311 | bool _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess |
312 | uint _max_vector_size; // Maximum size of generated vectors |
313 | bool _clear_upper_avx; // Clear upper bits of ymm registers using vzeroupper |
314 | uint _trap_hist[trapHistLength]; // Cumulative traps |
315 | bool _trap_can_recompile; // Have we emitted a recompiling trap? |
316 | uint _decompile_count; // Cumulative decompilation counts. |
317 | bool _do_inlining; // True if we intend to do inlining |
318 | bool _do_scheduling; // True if we intend to do scheduling |
319 | bool _do_freq_based_layout; // True if we intend to do frequency based block layout |
320 | bool _do_vector_loop; // True if allowed to execute loop in parallel iterations |
321 | bool _use_cmove; // True if CMove should be used without profitability analysis |
322 | bool _age_code; // True if we need to profile code age (decrement the aging counter) |
323 | int _AliasLevel; // Locally-adjusted version of AliasLevel flag. |
324 | bool _print_assembly; // True if we should dump assembly code for this compilation |
325 | bool _print_inlining; // True if we should print inlining for this compilation |
326 | bool _print_intrinsics; // True if we should print intrinsics for this compilation |
327 | #ifndef PRODUCT |
328 | uint _igv_idx; // Counter for IGV node identifiers |
329 | bool _trace_opto_output; |
330 | bool _print_ideal; |
331 | bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing |
332 | #endif |
333 | bool _has_irreducible_loop; // Found irreducible loops |
334 | // JSR 292 |
335 | bool _has_method_handle_invokes; // True if this method has MethodHandle invokes. |
336 | RTMState _rtm_state; // State of Restricted Transactional Memory usage |
337 | int _loop_opts_cnt; // loop opts round |
338 | bool _clinit_barrier_on_entry; // True if clinit barrier is needed on nmethod entry |
339 | uint _stress_seed; // Seed for stress testing |
340 | |
341 | // Compilation environment. |
342 | Arena _comp_arena; // Arena with lifetime equivalent to Compile |
343 | void* _barrier_set_state; // Potential GC barrier state for Compile |
344 | ciEnv* _env; // CI interface |
345 | DirectiveSet* _directive; // Compiler directive |
346 | CompileLog* _log; // from CompilerThread |
347 | const char* _failure_reason; // for record_failure/failing pattern |
348 | GrowableArray<CallGenerator*> _intrinsics; // List of intrinsics. |
349 | GrowableArray<Node*> _macro_nodes; // List of nodes which need to be expanded before matching. |
350 | GrowableArray<Node*> _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. |
351 | GrowableArray<Node*> _skeleton_predicate_opaqs; // List of Opaque4 nodes for the loop skeleton predicates. |
352 | GrowableArray<Node*> _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common |
353 | GrowableArray<Node*> _for_post_loop_igvn; // List of nodes for IGVN after loop opts are over |
354 | GrowableArray<Node_List*> _coarsened_locks; // List of coarsened Lock and Unlock nodes |
355 | ConnectionGraph* _congraph; |
356 | #ifndef PRODUCT |
357 | IdealGraphPrinter* _printer; |
358 | static IdealGraphPrinter* _debug_file_printer; |
359 | static IdealGraphPrinter* _debug_network_printer; |
360 | #endif |
361 | |
362 | |
363 | // Node management |
364 | uint _unique; // Counter for unique Node indices |
365 | VectorSet _dead_node_list; // Set of dead nodes |
366 | uint _dead_node_count; // Number of dead nodes; VectorSet::Size() is O(N). |
367 | // So use this to keep count and make the call O(1). |
368 | DEBUG_ONLY(Unique_Node_List* _modified_nodes;)Unique_Node_List* _modified_nodes; // List of nodes which inputs were modified |
369 | DEBUG_ONLY(bool _phase_optimize_finished;)bool _phase_optimize_finished; // Used for live node verification while creating new nodes |
370 | |
371 | debug_only(static int _debug_idx;)static int _debug_idx; // Monotonic counter (not reset), use -XX:BreakAtNode=<idx> |
372 | Arena _node_arena; // Arena for new-space Nodes |
373 | Arena _old_arena; // Arena for old-space Nodes, lifetime during xform |
374 | RootNode* _root; // Unique root of compilation, or NULL after bail-out. |
375 | Node* _top; // Unique top node. (Reset by various phases.) |
376 | |
377 | Node* _immutable_memory; // Initial memory state |
378 | |
379 | Node* _recent_alloc_obj; |
380 | Node* _recent_alloc_ctl; |
381 | |
382 | // Constant table |
383 | MachConstantBaseNode* _mach_constant_base_node; // Constant table base node singleton. |
384 | |
385 | |
386 | // Blocked array of debugging and profiling information, |
387 | // tracked per node. |
388 | enum { _log2_node_notes_block_size = 8, |
389 | _node_notes_block_size = (1<<_log2_node_notes_block_size) |
390 | }; |
391 | GrowableArray<Node_Notes*>* _node_note_array; |
392 | Node_Notes* _default_node_notes; // default notes for new nodes |
393 | |
394 | // After parsing and every bulk phase we hang onto the Root instruction. |
395 | // The RootNode instruction is where the whole program begins. It produces |
396 | // the initial Control and BOTTOM for everybody else. |
397 | |
398 | // Type management |
399 | Arena _Compile_types; // Arena for all types |
400 | Arena* _type_arena; // Alias for _Compile_types except in Initialize_shared() |
401 | Dict* _type_dict; // Intern table |
402 | CloneMap _clone_map; // used for recording history of cloned nodes |
403 | size_t _type_last_size; // Last allocation size (see Type::operator new/delete) |
404 | ciMethod* _last_tf_m; // Cache for |
405 | const TypeFunc* _last_tf; // TypeFunc::make |
406 | AliasType** _alias_types; // List of alias types seen so far. |
407 | int _num_alias_types; // Logical length of _alias_types |
408 | int _max_alias_types; // Physical length of _alias_types |
409 | AliasCacheEntry _alias_cache[AliasCacheSize]; // Gets aliases w/o data structure walking |
410 | |
411 | // Parsing, optimization |
412 | PhaseGVN* _initial_gvn; // Results of parse-time PhaseGVN |
413 | Unique_Node_List* _for_igvn; // Initial work-list for next round of Iterative GVN |
414 | |
415 | GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after main parsing has finished. |
416 | GrowableArray<CallGenerator*> _string_late_inlines; // same but for string operations |
417 | GrowableArray<CallGenerator*> _boxing_late_inlines; // same but for boxing operations |
418 | |
419 | GrowableArray<CallGenerator*> _vector_reboxing_late_inlines; // same but for vector reboxing operations |
420 | |
421 | int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining) |
422 | uint _number_of_mh_late_inlines; // number of method handle late inlining still pending |
423 | |
424 | GrowableArray<RuntimeStub*> _native_invokers; |
425 | |
426 | // Inlining may not happen in parse order which would make |
427 | // PrintInlining output confusing. Keep track of PrintInlining |
428 | // pieces in order. |
429 | class PrintInliningBuffer : public CHeapObj<mtCompiler> { |
430 | private: |
431 | CallGenerator* _cg; |
432 | stringStream _ss; |
433 | static const size_t default_stream_buffer_size = 128; |
434 | |
435 | public: |
436 | PrintInliningBuffer() |
437 | : _cg(NULL__null), _ss(default_stream_buffer_size) {} |
438 | |
439 | stringStream* ss() { return &_ss; } |
440 | CallGenerator* cg() { return _cg; } |
441 | void set_cg(CallGenerator* cg) { _cg = cg; } |
442 | }; |
443 | |
444 | stringStream* _print_inlining_stream; |
445 | GrowableArray<PrintInliningBuffer*>* _print_inlining_list; |
446 | int _print_inlining_idx; |
447 | char* _print_inlining_output; |
448 | |
449 | // Only keep nodes in the expensive node list that need to be optimized |
450 | void cleanup_expensive_nodes(PhaseIterGVN &igvn); |
451 | // Use for sorting expensive nodes to bring similar nodes together |
452 | static int cmp_expensive_nodes(Node** n1, Node** n2); |
453 | // Expensive nodes list already sorted? |
454 | bool expensive_nodes_sorted() const; |
455 | // Remove the speculative part of types and clean up the graph |
456 | void remove_speculative_types(PhaseIterGVN &igvn); |
457 | |
458 | void* _replay_inline_data; // Pointer to data loaded from file |
459 | |
460 | void print_inlining_stream_free(); |
461 | void print_inlining_init(); |
462 | void print_inlining_reinit(); |
463 | void print_inlining_commit(); |
464 | void print_inlining_push(); |
465 | PrintInliningBuffer* print_inlining_current(); |
466 | |
467 | void log_late_inline_failure(CallGenerator* cg, const char* msg); |
468 | DEBUG_ONLY(bool _exception_backedge;)bool _exception_backedge; |
469 | |
470 | public: |
471 | |
472 | void* barrier_set_state() const { return _barrier_set_state; } |
473 | |
474 | outputStream* print_inlining_stream() const { |
475 | assert(print_inlining() || print_intrinsics(), "PrintInlining off?")do { if (!(print_inlining() || print_intrinsics())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 475, "assert(" "print_inlining() || print_intrinsics()" ") failed" , "PrintInlining off?"); ::breakpoint(); } } while (0); |
476 | return _print_inlining_stream; |
477 | } |
478 | |
479 | void print_inlining_update(CallGenerator* cg); |
480 | void print_inlining_update_delayed(CallGenerator* cg); |
481 | void print_inlining_move_to(CallGenerator* cg); |
482 | void print_inlining_assert_ready(); |
483 | void print_inlining_reset(); |
484 | |
485 | void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL__null) { |
486 | stringStream ss; |
487 | CompileTask::print_inlining_inner(&ss, method, inline_level, bci, msg); |
488 | print_inlining_stream()->print("%s", ss.as_string()); |
489 | } |
490 | |
491 | #ifndef PRODUCT |
492 | IdealGraphPrinter* printer() { return _printer; } |
493 | #endif |
494 | |
495 | void log_late_inline(CallGenerator* cg); |
496 | void log_inline_id(CallGenerator* cg); |
497 | void log_inline_failure(const char* msg); |
498 | |
499 | void* replay_inline_data() const { return _replay_inline_data; } |
500 | |
501 | // Dump inlining replay data to the stream. |
502 | void dump_inline_data(outputStream* out); |
503 | |
504 | private: |
505 | // Matching, CFG layout, allocation, code generation |
506 | PhaseCFG* _cfg; // Results of CFG finding |
507 | int _java_calls; // Number of java calls in the method |
508 | int _inner_loops; // Number of inner loops in the method |
509 | Matcher* _matcher; // Engine to map ideal to machine instructions |
510 | PhaseRegAlloc* _regalloc; // Results of register allocation. |
511 | RegMask _FIRST_STACK_mask; // All stack slots usable for spills (depends on frame layout) |
512 | Arena* _indexSet_arena; // control IndexSet allocation within PhaseChaitin |
513 | void* _indexSet_free_block_list; // free list of IndexSet bit blocks |
514 | int _interpreter_frame_size; |
515 | |
516 | PhaseOutput* _output; |
517 | |
518 | public: |
519 | // Accessors |
520 | |
521 | // The Compile instance currently active in this (compiler) thread. |
522 | static Compile* current() { |
523 | return (Compile*) ciEnv::current()->compiler_data(); |
524 | } |
525 | |
526 | int interpreter_frame_size() const { return _interpreter_frame_size; } |
527 | |
528 | PhaseOutput* output() const { return _output; } |
529 | void set_output(PhaseOutput* o) { _output = o; } |
530 | |
531 | // ID for this compilation. Useful for setting breakpoints in the debugger. |
532 | int compile_id() const { return _compile_id; } |
533 | DirectiveSet* directive() const { return _directive; } |
534 | |
535 | // Does this compilation allow instructions to subsume loads? User |
536 | // instructions that subsume a load may result in an unschedulable |
537 | // instruction sequence. |
538 | bool subsume_loads() const { return _options._subsume_loads; } |
539 | /** Do escape analysis. */ |
540 | bool do_escape_analysis() const { return _options._do_escape_analysis; } |
541 | bool do_iterative_escape_analysis() const { return _options._do_iterative_escape_analysis; } |
542 | /** Do boxing elimination. */ |
543 | bool eliminate_boxing() const { return _options._eliminate_boxing; } |
544 | /** Do aggressive boxing elimination. */ |
545 | bool aggressive_unboxing() const { return _options._eliminate_boxing && AggressiveUnboxing; } |
546 | bool should_install_code() const { return _options._install_code; } |
547 | /** Do locks coarsening. */ |
548 | bool do_locks_coarsening() const { return _options._do_locks_coarsening; } |
549 | |
550 | // Other fixed compilation parameters. |
551 | ciMethod* method() const { return _method; } |
552 | int entry_bci() const { return _entry_bci; } |
553 | bool is_osr_compilation() const { return _entry_bci != InvocationEntryBci; } |
554 | bool is_method_compilation() const { return (_method != NULL__null && !_method->flags().is_native()); } |
555 | const TypeFunc* tf() const { assert(_tf!=NULL, "")do { if (!(_tf!=__null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 555, "assert(" "_tf!=__null" ") failed", ""); ::breakpoint( ); } } while (0); return _tf; } |
556 | void init_tf(const TypeFunc* tf) { assert(_tf==NULL, "")do { if (!(_tf==__null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 556, "assert(" "_tf==__null" ") failed", ""); ::breakpoint( ); } } while (0); _tf = tf; } |
557 | InlineTree* ilt() const { return _ilt; } |
558 | address stub_function() const { return _stub_function; } |
559 | const char* stub_name() const { return _stub_name; } |
560 | address stub_entry_point() const { return _stub_entry_point; } |
561 | void set_stub_entry_point(address z) { _stub_entry_point = z; } |
562 | |
563 | // Control of this compilation. |
564 | int fixed_slots() const { assert(_fixed_slots >= 0, "")do { if (!(_fixed_slots >= 0)) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 564, "assert(" "_fixed_slots >= 0" ") failed", ""); ::breakpoint (); } } while (0); return _fixed_slots; } |
565 | void set_fixed_slots(int n) { _fixed_slots = n; } |
566 | int major_progress() const { return _major_progress; } |
567 | void set_inlining_progress(bool z) { _inlining_progress = z; } |
568 | int inlining_progress() const { return _inlining_progress; } |
569 | void set_inlining_incrementally(bool z) { _inlining_incrementally = z; } |
570 | int inlining_incrementally() const { return _inlining_incrementally; } |
571 | void set_do_cleanup(bool z) { _do_cleanup = z; } |
572 | int do_cleanup() const { return _do_cleanup; } |
573 | void set_major_progress() { _major_progress++; } |
574 | void restore_major_progress(int progress) { _major_progress += progress; } |
575 | void clear_major_progress() { _major_progress = 0; } |
576 | int max_inline_size() const { return _max_inline_size; } |
577 | void set_freq_inline_size(int n) { _freq_inline_size = n; } |
578 | int freq_inline_size() const { return _freq_inline_size; } |
579 | void set_max_inline_size(int n) { _max_inline_size = n; } |
580 | bool has_loops() const { return _has_loops; } |
581 | void set_has_loops(bool z) { _has_loops = z; } |
582 | bool has_split_ifs() const { return _has_split_ifs; } |
583 | void set_has_split_ifs(bool z) { _has_split_ifs = z; } |
584 | bool has_unsafe_access() const { return _has_unsafe_access; } |
585 | void set_has_unsafe_access(bool z) { _has_unsafe_access = z; } |
586 | bool has_stringbuilder() const { return _has_stringbuilder; } |
587 | void set_has_stringbuilder(bool z) { _has_stringbuilder = z; } |
588 | bool has_boxed_value() const { return _has_boxed_value; } |
589 | void set_has_boxed_value(bool z) { _has_boxed_value = z; } |
590 | bool has_reserved_stack_access() const { return _has_reserved_stack_access; } |
591 | void set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; } |
592 | uint max_vector_size() const { return _max_vector_size; } |
593 | void set_max_vector_size(uint s) { _max_vector_size = s; } |
594 | bool clear_upper_avx() const { return _clear_upper_avx; } |
595 | void set_clear_upper_avx(bool s) { _clear_upper_avx = s; } |
596 | void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob")do { if (!(r < trapHistLength)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 596, "assert(" "r < trapHistLength" ") failed", "oob"); :: breakpoint(); } } while (0); _trap_hist[r] = c; } |
597 | uint trap_count(uint r) const { assert(r < trapHistLength, "oob")do { if (!(r < trapHistLength)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 597, "assert(" "r < trapHistLength" ") failed", "oob"); :: breakpoint(); } } while (0); return _trap_hist[r]; } |
598 | bool trap_can_recompile() const { return _trap_can_recompile; } |
599 | void set_trap_can_recompile(bool z) { _trap_can_recompile = z; } |
600 | uint decompile_count() const { return _decompile_count; } |
601 | void set_decompile_count(uint c) { _decompile_count = c; } |
602 | bool allow_range_check_smearing() const; |
603 | bool do_inlining() const { return _do_inlining; } |
604 | void set_do_inlining(bool z) { _do_inlining = z; } |
605 | bool do_scheduling() const { return _do_scheduling; } |
606 | void set_do_scheduling(bool z) { _do_scheduling = z; } |
607 | bool do_freq_based_layout() const{ return _do_freq_based_layout; } |
608 | void set_do_freq_based_layout(bool z){ _do_freq_based_layout = z; } |
609 | bool do_vector_loop() const { return _do_vector_loop; } |
610 | void set_do_vector_loop(bool z) { _do_vector_loop = z; } |
611 | bool use_cmove() const { return _use_cmove; } |
612 | void set_use_cmove(bool z) { _use_cmove = z; } |
613 | bool age_code() const { return _age_code; } |
614 | void set_age_code(bool z) { _age_code = z; } |
615 | int AliasLevel() const { return _AliasLevel; } |
616 | bool print_assembly() const { return _print_assembly; } |
617 | void set_print_assembly(bool z) { _print_assembly = z; } |
618 | bool print_inlining() const { return _print_inlining; } |
619 | void set_print_inlining(bool z) { _print_inlining = z; } |
620 | bool print_intrinsics() const { return _print_intrinsics; } |
621 | void set_print_intrinsics(bool z) { _print_intrinsics = z; } |
622 | RTMState rtm_state() const { return _rtm_state; } |
623 | void set_rtm_state(RTMState s) { _rtm_state = s; } |
624 | bool use_rtm() const { return (_rtm_state & NoRTM) == 0; } |
625 | bool profile_rtm() const { return _rtm_state == ProfileRTM; } |
626 | uint max_node_limit() const { return (uint)_max_node_limit; } |
627 | void set_max_node_limit(uint n) { _max_node_limit = n; } |
628 | bool clinit_barrier_on_entry() { return _clinit_barrier_on_entry; } |
629 | void set_clinit_barrier_on_entry(bool z) { _clinit_barrier_on_entry = z; } |
630 | |
631 | // check the CompilerOracle for special behaviours for this compile |
632 | bool method_has_option(enum CompileCommand option) { |
633 | return method() != NULL__null && method()->has_option(option); |
634 | } |
635 | |
636 | #ifndef PRODUCT |
637 | uint next_igv_idx() { return _igv_idx++; } |
638 | bool trace_opto_output() const { return _trace_opto_output; } |
639 | bool print_ideal() const { return _print_ideal; } |
640 | bool parsed_irreducible_loop() const { return _parsed_irreducible_loop; } |
641 | void set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; } |
642 | int _in_dump_cnt; // Required for dumping ir nodes. |
643 | #endif |
644 | bool has_irreducible_loop() const { return _has_irreducible_loop; } |
645 | void set_has_irreducible_loop(bool z) { _has_irreducible_loop = z; } |
646 | |
647 | // JSR 292 |
648 | bool has_method_handle_invokes() const { return _has_method_handle_invokes; } |
649 | void set_has_method_handle_invokes(bool z) { _has_method_handle_invokes = z; } |
650 | |
651 | Ticks _latest_stage_start_counter; |
652 | |
653 | void begin_method(int level = 1) { |
654 | #ifndef PRODUCT |
655 | if (_method != NULL__null && should_print(level)) { |
656 | _printer->begin_method(); |
657 | } |
658 | #endif |
659 | C->_latest_stage_start_counter.stamp(); |
660 | } |
661 | |
662 | bool should_print(int level = 1) { |
663 | #ifndef PRODUCT |
664 | if (PrintIdealGraphLevel < 0) { // disabled by the user |
665 | return false; |
666 | } |
667 | |
668 | bool need = directive()->IGVPrintLevelOption >= level; |
669 | if (need && !_printer) { |
670 | _printer = IdealGraphPrinter::printer(); |
671 | assert(_printer != NULL, "_printer is NULL when we need it!")do { if (!(_printer != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 671, "assert(" "_printer != __null" ") failed", "_printer is NULL when we need it!" ); ::breakpoint(); } } while (0); |
672 | _printer->set_compile(this); |
673 | } |
674 | return need; |
675 | #else |
676 | return false; |
677 | #endif |
678 | } |
679 | |
680 | void print_method(CompilerPhaseType cpt, const char *name, int level = 1); |
681 | void print_method(CompilerPhaseType cpt, int level = 1, int idx = 0); |
682 | void print_method(CompilerPhaseType cpt, Node* n, int level = 3); |
683 | |
684 | #ifndef PRODUCT |
685 | void igv_print_method_to_file(const char* phase_name = "Debug", bool append = false); |
686 | void igv_print_method_to_network(const char* phase_name = "Debug"); |
687 | static IdealGraphPrinter* debug_file_printer() { return _debug_file_printer; } |
688 | static IdealGraphPrinter* debug_network_printer() { return _debug_network_printer; } |
689 | #endif |
690 | |
691 | void end_method(int level = 1); |
692 | |
693 | int macro_count() const { return _macro_nodes.length(); } |
694 | int predicate_count() const { return _predicate_opaqs.length(); } |
695 | int skeleton_predicate_count() const { return _skeleton_predicate_opaqs.length(); } |
696 | int expensive_count() const { return _expensive_nodes.length(); } |
697 | int coarsened_count() const { return _coarsened_locks.length(); } |
698 | |
699 | Node* macro_node(int idx) const { return _macro_nodes.at(idx); } |
700 | Node* predicate_opaque1_node(int idx) const { return _predicate_opaqs.at(idx); } |
701 | Node* skeleton_predicate_opaque4_node(int idx) const { return _skeleton_predicate_opaqs.at(idx); } |
702 | Node* expensive_node(int idx) const { return _expensive_nodes.at(idx); } |
703 | |
704 | ConnectionGraph* congraph() { return _congraph;} |
705 | void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;} |
706 | void add_macro_node(Node * n) { |
707 | //assert(n->is_macro(), "must be a macro node"); |
708 | assert(!_macro_nodes.contains(n), "duplicate entry in expand list")do { if (!(!_macro_nodes.contains(n))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 708, "assert(" "!_macro_nodes.contains(n)" ") failed", "duplicate entry in expand list" ); ::breakpoint(); } } while (0); |
709 | _macro_nodes.append(n); |
710 | } |
711 | void remove_macro_node(Node* n) { |
712 | // this function may be called twice for a node so we can only remove it |
713 | // if it's still existing. |
714 | _macro_nodes.remove_if_existing(n); |
715 | // remove from _predicate_opaqs list also if it is there |
716 | if (predicate_count() > 0) { |
717 | _predicate_opaqs.remove_if_existing(n); |
718 | } |
719 | // Remove from coarsened locks list if present |
720 | if (coarsened_count() > 0) { |
721 | remove_coarsened_lock(n); |
722 | } |
723 | } |
724 | void add_expensive_node(Node* n); |
725 | void remove_expensive_node(Node* n) { |
726 | _expensive_nodes.remove_if_existing(n); |
727 | } |
728 | void add_predicate_opaq(Node* n) { |
729 | assert(!_predicate_opaqs.contains(n), "duplicate entry in predicate opaque1")do { if (!(!_predicate_opaqs.contains(n))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 729, "assert(" "!_predicate_opaqs.contains(n)" ") failed", "duplicate entry in predicate opaque1" ); ::breakpoint(); } } while (0); |
730 | assert(_macro_nodes.contains(n), "should have already been in macro list")do { if (!(_macro_nodes.contains(n))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 730, "assert(" "_macro_nodes.contains(n)" ") failed", "should have already been in macro list" ); ::breakpoint(); } } while (0); |
731 | _predicate_opaqs.append(n); |
732 | } |
733 | void add_skeleton_predicate_opaq(Node* n) { |
734 | assert(!_skeleton_predicate_opaqs.contains(n), "duplicate entry in skeleton predicate opaque4 list")do { if (!(!_skeleton_predicate_opaqs.contains(n))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 734, "assert(" "!_skeleton_predicate_opaqs.contains(n)" ") failed" , "duplicate entry in skeleton predicate opaque4 list"); ::breakpoint (); } } while (0); |
735 | _skeleton_predicate_opaqs.append(n); |
736 | } |
737 | void remove_skeleton_predicate_opaq(Node* n) { |
738 | if (skeleton_predicate_count() > 0) { |
739 | _skeleton_predicate_opaqs.remove_if_existing(n); |
740 | } |
741 | } |
742 | void add_coarsened_locks(GrowableArray<AbstractLockNode*>& locks); |
743 | void remove_coarsened_lock(Node* n); |
744 | bool coarsened_locks_consistent(); |
745 | |
746 | bool post_loop_opts_phase() { return _post_loop_opts_phase; } |
747 | void set_post_loop_opts_phase() { _post_loop_opts_phase = true; } |
748 | void reset_post_loop_opts_phase() { _post_loop_opts_phase = false; } |
749 | |
750 | void record_for_post_loop_opts_igvn(Node* n); |
751 | void remove_from_post_loop_opts_igvn(Node* n); |
752 | void process_for_post_loop_opts_igvn(PhaseIterGVN& igvn); |
753 | |
754 | void sort_macro_nodes(); |
755 | |
756 | // remove the opaque nodes that protect the predicates so that the unused checks and |
757 | // uncommon traps will be eliminated from the graph. |
758 | void cleanup_loop_predicates(PhaseIterGVN &igvn); |
759 | bool is_predicate_opaq(Node* n) { |
760 | return _predicate_opaqs.contains(n); |
761 | } |
762 | |
763 | // Are there candidate expensive nodes for optimization? |
764 | bool should_optimize_expensive_nodes(PhaseIterGVN &igvn); |
765 | // Check whether n1 and n2 are similar |
766 | static int cmp_expensive_nodes(Node* n1, Node* n2); |
767 | // Sort expensive nodes to locate similar expensive nodes |
768 | void sort_expensive_nodes(); |
769 | |
770 | // Compilation environment. |
771 | Arena* comp_arena() { return &_comp_arena; } |
772 | ciEnv* env() const { return _env; } |
773 | CompileLog* log() const { return _log; } |
774 | bool failing() const { return _env->failing() || _failure_reason != NULL__null; } |
775 | const char* failure_reason() const { return (_env->failing()) ? _env->failure_reason() : _failure_reason; } |
776 | |
777 | bool failure_reason_is(const char* r) const { |
778 | return (r == _failure_reason) || (r != NULL__null && _failure_reason != NULL__null && strcmp(r, _failure_reason) == 0); |
779 | } |
780 | |
781 | void record_failure(const char* reason); |
782 | void record_method_not_compilable(const char* reason) { |
783 | env()->record_method_not_compilable(reason); |
784 | // Record failure reason. |
785 | record_failure(reason); |
786 | } |
787 | bool check_node_count(uint margin, const char* reason) { |
788 | if (live_nodes() + margin > max_node_limit()) { |
789 | record_method_not_compilable(reason); |
790 | return true; |
791 | } else { |
792 | return false; |
793 | } |
794 | } |
795 | |
796 | // Node management |
797 | uint unique() const { return _unique; } |
798 | uint next_unique() { return _unique++; } |
799 | void set_unique(uint i) { _unique = i; } |
800 | static int debug_idx() { return debug_only(_debug_idx)_debug_idx+0; } |
801 | static void set_debug_idx(int i) { debug_only(_debug_idx = i)_debug_idx = i; } |
802 | Arena* node_arena() { return &_node_arena; } |
803 | Arena* old_arena() { return &_old_arena; } |
804 | RootNode* root() const { return _root; } |
805 | void set_root(RootNode* r) { _root = r; } |
806 | StartNode* start() const; // (Derived from root.) |
807 | void init_start(StartNode* s); |
808 | Node* immutable_memory(); |
809 | |
810 | Node* recent_alloc_ctl() const { return _recent_alloc_ctl; } |
811 | Node* recent_alloc_obj() const { return _recent_alloc_obj; } |
812 | void set_recent_alloc(Node* ctl, Node* obj) { |
813 | _recent_alloc_ctl = ctl; |
814 | _recent_alloc_obj = obj; |
815 | } |
816 | void record_dead_node(uint idx) { if (_dead_node_list.test_set(idx)) return; |
817 | _dead_node_count++; |
818 | } |
819 | void reset_dead_node_list() { _dead_node_list.reset(); |
820 | _dead_node_count = 0; |
821 | } |
822 | uint live_nodes() const { |
823 | int val = _unique - _dead_node_count; |
824 | assert (val >= 0, "number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count)do { if (!(val >= 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 824, "assert(" "val >= 0" ") failed", "number of tracked dead nodes %d more than created nodes %d" , _unique, _dead_node_count); ::breakpoint(); } } while (0); |
825 | return (uint) val; |
826 | } |
827 | #ifdef ASSERT1 |
828 | void set_phase_optimize_finished() { _phase_optimize_finished = true; } |
829 | bool phase_optimize_finished() const { return _phase_optimize_finished; } |
830 | uint count_live_nodes_by_graph_walk(); |
831 | void print_missing_nodes(); |
832 | #endif |
833 | |
834 | // Record modified nodes to check that they are put on IGVN worklist |
835 | void record_modified_node(Node* n) NOT_DEBUG_RETURN; |
836 | void remove_modified_node(Node* n) NOT_DEBUG_RETURN; |
837 | DEBUG_ONLY( Unique_Node_List* modified_nodes() const { return _modified_nodes; } )Unique_Node_List* modified_nodes() const { return _modified_nodes ; } |
838 | |
839 | MachConstantBaseNode* mach_constant_base_node(); |
840 | bool has_mach_constant_base_node() const { return _mach_constant_base_node != NULL__null; } |
841 | // Generated by adlc, true if CallNode requires MachConstantBase. |
842 | bool needs_deep_clone_jvms(); |
843 | |
844 | // Handy undefined Node |
845 | Node* top() const { return _top; } |
846 | |
847 | // these are used by guys who need to know about creation and transformation of top: |
848 | Node* cached_top_node() { return _top; } |
849 | void set_cached_top_node(Node* tn); |
850 | |
851 | GrowableArray<Node_Notes*>* node_note_array() const { return _node_note_array; } |
852 | void set_node_note_array(GrowableArray<Node_Notes*>* arr) { _node_note_array = arr; } |
853 | Node_Notes* default_node_notes() const { return _default_node_notes; } |
854 | void set_default_node_notes(Node_Notes* n) { _default_node_notes = n; } |
855 | |
856 | Node_Notes* node_notes_at(int idx) { |
857 | return locate_node_notes(_node_note_array, idx, false); |
858 | } |
859 | inline bool set_node_notes_at(int idx, Node_Notes* value); |
860 | |
861 | // Copy notes from source to dest, if they exist. |
862 | // Overwrite dest only if source provides something. |
863 | // Return true if information was moved. |
864 | bool copy_node_notes_to(Node* dest, Node* source); |
865 | |
866 | // Workhorse function to sort out the blocked Node_Notes array: |
867 | inline Node_Notes* locate_node_notes(GrowableArray<Node_Notes*>* arr, |
868 | int idx, bool can_grow = false); |
869 | |
870 | void grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by); |
871 | |
872 | // Type management |
873 | Arena* type_arena() { return _type_arena; } |
874 | Dict* type_dict() { return _type_dict; } |
875 | size_t type_last_size() { return _type_last_size; } |
876 | int num_alias_types() { return _num_alias_types; } |
877 | |
878 | void init_type_arena() { _type_arena = &_Compile_types; } |
879 | void set_type_arena(Arena* a) { _type_arena = a; } |
880 | void set_type_dict(Dict* d) { _type_dict = d; } |
881 | void set_type_last_size(size_t sz) { _type_last_size = sz; } |
882 | |
883 | const TypeFunc* last_tf(ciMethod* m) { |
884 | return (m == _last_tf_m) ? _last_tf : NULL__null; |
885 | } |
886 | void set_last_tf(ciMethod* m, const TypeFunc* tf) { |
887 | assert(m != NULL || tf == NULL, "")do { if (!(m != __null || tf == __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 887, "assert(" "m != __null || tf == __null" ") failed", "" ); ::breakpoint(); } } while (0); |
888 | _last_tf_m = m; |
889 | _last_tf = tf; |
890 | } |
891 | |
892 | AliasType* alias_type(int idx) { assert(idx < num_alias_types(), "oob")do { if (!(idx < num_alias_types())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 892, "assert(" "idx < num_alias_types()" ") failed", "oob" ); ::breakpoint(); } } while (0); return _alias_types[idx]; } |
893 | AliasType* alias_type(const TypePtr* adr_type, ciField* field = NULL__null) { return find_alias_type(adr_type, false, field); } |
894 | bool have_alias_type(const TypePtr* adr_type); |
895 | AliasType* alias_type(ciField* field); |
896 | |
897 | int get_alias_index(const TypePtr* at) { return alias_type(at)->index(); } |
898 | const TypePtr* get_adr_type(uint aidx) { return alias_type(aidx)->adr_type(); } |
899 | int get_general_index(uint aidx) { return alias_type(aidx)->general_index(); } |
900 | |
901 | // Building nodes |
902 | void rethrow_exceptions(JVMState* jvms); |
903 | void return_values(JVMState* jvms); |
904 | JVMState* build_start_state(StartNode* start, const TypeFunc* tf); |
905 | |
906 | // Decide how to build a call. |
907 | // The profile factor is a discount to apply to this site's interp. profile. |
908 | CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_does_dispatch, |
909 | JVMState* jvms, bool allow_inline, float profile_factor, ciKlass* speculative_receiver_type = NULL__null, |
910 | bool allow_intrinsics = true); |
911 | bool should_delay_inlining(ciMethod* call_method, JVMState* jvms) { |
912 | return should_delay_string_inlining(call_method, jvms) || |
913 | should_delay_boxing_inlining(call_method, jvms) || |
914 | should_delay_vector_inlining(call_method, jvms); |
915 | } |
916 | bool should_delay_string_inlining(ciMethod* call_method, JVMState* jvms); |
917 | bool should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms); |
918 | bool should_delay_vector_inlining(ciMethod* call_method, JVMState* jvms); |
919 | bool should_delay_vector_reboxing_inlining(ciMethod* call_method, JVMState* jvms); |
920 | |
921 | // Helper functions to identify inlining potential at call-site |
922 | ciMethod* optimize_virtual_call(ciMethod* caller, ciInstanceKlass* klass, |
923 | ciKlass* holder, ciMethod* callee, |
924 | const TypeOopPtr* receiver_type, bool is_virtual, |
925 | bool &call_does_dispatch, int &vtable_index, |
926 | bool check_access = true); |
927 | ciMethod* optimize_inlining(ciMethod* caller, ciInstanceKlass* klass, ciKlass* holder, |
928 | ciMethod* callee, const TypeOopPtr* receiver_type, |
929 | bool check_access = true); |
930 | |
931 | // Report if there were too many traps at a current method and bci. |
932 | // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded. |
933 | // If there is no MDO at all, report no trap unless told to assume it. |
934 | bool too_many_traps(ciMethod* method, int bci, Deoptimization::DeoptReason reason); |
935 | // This version, unspecific to a particular bci, asks if |
936 | // PerMethodTrapLimit was exceeded for all inlined methods seen so far. |
937 | bool too_many_traps(Deoptimization::DeoptReason reason, |
938 | // Privately used parameter for logging: |
939 | ciMethodData* logmd = NULL__null); |
940 | // Report if there were too many recompiles at a method and bci. |
941 | bool too_many_recompiles(ciMethod* method, int bci, Deoptimization::DeoptReason reason); |
942 | // Report if there were too many traps or recompiles at a method and bci. |
943 | bool too_many_traps_or_recompiles(ciMethod* method, int bci, Deoptimization::DeoptReason reason) { |
944 | return too_many_traps(method, bci, reason) || |
945 | too_many_recompiles(method, bci, reason); |
946 | } |
947 | // Return a bitset with the reasons where deoptimization is allowed, |
948 | // i.e., where there were not too many uncommon traps. |
949 | int _allowed_reasons; |
950 | int allowed_deopt_reasons() { return _allowed_reasons; } |
951 | void set_allowed_deopt_reasons(); |
952 | |
953 | // Parsing, optimization |
954 | PhaseGVN* initial_gvn() { return _initial_gvn; } |
955 | Unique_Node_List* for_igvn() { return _for_igvn; } |
956 | inline void record_for_igvn(Node* n); // Body is after class Unique_Node_List. |
957 | void set_initial_gvn(PhaseGVN *gvn) { _initial_gvn = gvn; } |
958 | void set_for_igvn(Unique_Node_List *for_igvn) { _for_igvn = for_igvn; } |
959 | |
960 | // Replace n by nn using initial_gvn, calling hash_delete and |
961 | // record_for_igvn as needed. |
962 | void gvn_replace_by(Node* n, Node* nn); |
963 | |
964 | |
965 | void identify_useful_nodes(Unique_Node_List &useful); |
966 | void update_dead_node_list(Unique_Node_List &useful); |
967 | void remove_useless_nodes (Unique_Node_List &useful); |
968 | |
969 | void remove_useless_node(Node* dead); |
970 | |
971 | // Record this CallGenerator for inlining at the end of parsing. |
972 | void add_late_inline(CallGenerator* cg) { |
973 | _late_inlines.insert_before(_late_inlines_pos, cg); |
974 | _late_inlines_pos++; |
975 | } |
976 | |
977 | void prepend_late_inline(CallGenerator* cg) { |
978 | _late_inlines.insert_before(0, cg); |
979 | } |
980 | |
981 | void add_string_late_inline(CallGenerator* cg) { |
982 | _string_late_inlines.push(cg); |
983 | } |
984 | |
985 | void add_boxing_late_inline(CallGenerator* cg) { |
986 | _boxing_late_inlines.push(cg); |
987 | } |
988 | |
989 | void add_vector_reboxing_late_inline(CallGenerator* cg) { |
990 | _vector_reboxing_late_inlines.push(cg); |
991 | } |
992 | |
993 | void add_native_invoker(RuntimeStub* stub); |
994 | |
995 | const GrowableArray<RuntimeStub*> native_invokers() const { return _native_invokers; } |
996 | |
997 | void remove_useless_nodes (GrowableArray<Node*>& node_list, Unique_Node_List &useful); |
998 | |
999 | void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful); |
1000 | void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Node* dead); |
1001 | |
1002 | void remove_useless_coarsened_locks(Unique_Node_List& useful); |
1003 | |
1004 | void process_print_inlining(); |
1005 | void dump_print_inlining(); |
1006 | |
1007 | bool over_inlining_cutoff() const { |
1008 | if (!inlining_incrementally()) { |
1009 | return unique() > (uint)NodeCountInliningCutoff; |
1010 | } else { |
1011 | // Give some room for incremental inlining algorithm to "breathe" |
1012 | // and avoid thrashing when live node count is close to the limit. |
1013 | // Keep in mind that live_nodes() isn't accurate during inlining until |
1014 | // dead node elimination step happens (see Compile::inline_incrementally). |
1015 | return live_nodes() > (uint)LiveNodeCountInliningCutoff * 11 / 10; |
1016 | } |
1017 | } |
1018 | |
1019 | void inc_number_of_mh_late_inlines() { _number_of_mh_late_inlines++; } |
1020 | void dec_number_of_mh_late_inlines() { assert(_number_of_mh_late_inlines > 0, "_number_of_mh_late_inlines < 0 !")do { if (!(_number_of_mh_late_inlines > 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/compile.hpp" , 1020, "assert(" "_number_of_mh_late_inlines > 0" ") failed" , "_number_of_mh_late_inlines < 0 !"); ::breakpoint(); } } while (0); _number_of_mh_late_inlines--; } |
1021 | bool has_mh_late_inlines() const { return _number_of_mh_late_inlines > 0; } |
1022 | |
1023 | bool inline_incrementally_one(); |
1024 | void inline_incrementally_cleanup(PhaseIterGVN& igvn); |
1025 | void inline_incrementally(PhaseIterGVN& igvn); |
1026 | void inline_string_calls(bool parse_time); |
1027 | void inline_boxing_calls(PhaseIterGVN& igvn); |
1028 | bool optimize_loops(PhaseIterGVN& igvn, LoopOptsMode mode); |
1029 | void remove_root_to_sfpts_edges(PhaseIterGVN& igvn); |
1030 | |
1031 | void inline_vector_reboxing_calls(); |
1032 | bool has_vbox_nodes(); |
1033 | |
1034 | void process_late_inline_calls_no_inline(PhaseIterGVN& igvn); |
1035 | |
1036 | // Matching, CFG layout, allocation, code generation |
1037 | PhaseCFG* cfg() { return _cfg; } |
1038 | bool has_java_calls() const { return _java_calls > 0; } |
1039 | int java_calls() const { return _java_calls; } |
1040 | int inner_loops() const { return _inner_loops; } |
1041 | Matcher* matcher() { return _matcher; } |
1042 | PhaseRegAlloc* regalloc() { return _regalloc; } |
1043 | RegMask& FIRST_STACK_mask() { return _FIRST_STACK_mask; } |
1044 | Arena* indexSet_arena() { return _indexSet_arena; } |
1045 | void* indexSet_free_block_list() { return _indexSet_free_block_list; } |
1046 | DebugInformationRecorder* debug_info() { return env()->debug_info(); } |
1047 | |
1048 | void update_interpreter_frame_size(int size) { |
1049 | if (_interpreter_frame_size < size) { |
1050 | _interpreter_frame_size = size; |
1051 | } |
1052 | } |
1053 | |
1054 | void set_matcher(Matcher* m) { _matcher = m; } |
1055 | //void set_regalloc(PhaseRegAlloc* ra) { _regalloc = ra; } |
1056 | void set_indexSet_arena(Arena* a) { _indexSet_arena = a; } |
1057 | void set_indexSet_free_block_list(void* p) { _indexSet_free_block_list = p; } |
1058 | |
1059 | void set_java_calls(int z) { _java_calls = z; } |
1060 | void set_inner_loops(int z) { _inner_loops = z; } |
1061 | |
1062 | Dependencies* dependencies() { return env()->dependencies(); } |
1063 | |
1064 | // Major entry point. Given a Scope, compile the associated method. |
1065 | // For normal compilations, entry_bci is InvocationEntryBci. For on stack |
1066 | // replacement, entry_bci indicates the bytecode for which to compile a |
1067 | // continuation. |
1068 | Compile(ciEnv* ci_env, ciMethod* target, |
1069 | int entry_bci, Options options, DirectiveSet* directive); |
1070 | |
1071 | // Second major entry point. From the TypeFunc signature, generate code |
1072 | // to pass arguments from the Java calling convention to the C calling |
1073 | // convention. |
1074 | Compile(ciEnv* ci_env, const TypeFunc *(*gen)(), |
1075 | address stub_function, const char *stub_name, |
1076 | int is_fancy_jump, bool pass_tls, |
1077 | bool return_pc, DirectiveSet* directive); |
1078 | |
1079 | // Are we compiling a method? |
1080 | bool has_method() { return method() != NULL__null; } |
1081 | |
1082 | // Maybe print some information about this compile. |
1083 | void print_compile_messages(); |
1084 | |
1085 | // Final graph reshaping, a post-pass after the regular optimizer is done. |
1086 | bool final_graph_reshaping(); |
1087 | |
1088 | // returns true if adr is completely contained in the given alias category |
1089 | bool must_alias(const TypePtr* adr, int alias_idx); |
1090 | |
1091 | // returns true if adr overlaps with the given alias category |
1092 | bool can_alias(const TypePtr* adr, int alias_idx); |
1093 | |
1094 | // Stack slots that may be unused by the calling convention but must |
1095 | // otherwise be preserved. On Intel this includes the return address. |
1096 | // On PowerPC it includes the 4 words holding the old TOC & LR glue. |
1097 | uint in_preserve_stack_slots() { |
1098 | return SharedRuntime::in_preserve_stack_slots(); |
1099 | } |
1100 | |
1101 | // "Top of Stack" slots that may be unused by the calling convention but must |
1102 | // otherwise be preserved. |
1103 | // On Intel these are not necessary and the value can be zero. |
1104 | static uint out_preserve_stack_slots() { |
1105 | return SharedRuntime::out_preserve_stack_slots(); |
1106 | } |
1107 | |
1108 | // Number of outgoing stack slots killed above the out_preserve_stack_slots |
1109 | // for calls to C. Supports the var-args backing area for register parms. |
1110 | uint varargs_C_out_slots_killed() const; |
1111 | |
1112 | // Number of Stack Slots consumed by a synchronization entry |
1113 | int sync_stack_slots() const; |
1114 | |
1115 | // Compute the name of old_SP. See <arch>.ad for frame layout. |
1116 | OptoReg::Name compute_old_SP(); |
1117 | |
1118 | private: |
1119 | // Phase control: |
1120 | void Init(int aliaslevel); // Prepare for a single compilation |
1121 | int Inline_Warm(); // Find more inlining work. |
1122 | void Finish_Warm(); // Give up on further inlines. |
1123 | void Optimize(); // Given a graph, optimize it |
1124 | void Code_Gen(); // Generate code from a graph |
1125 | |
1126 | // Management of the AliasType table. |
1127 | void grow_alias_types(); |
1128 | AliasCacheEntry* probe_alias_cache(const TypePtr* adr_type); |
1129 | const TypePtr *flatten_alias_type(const TypePtr* adr_type) const; |
1130 | AliasType* find_alias_type(const TypePtr* adr_type, bool no_create, ciField* field); |
1131 | |
1132 | void verify_top(Node*) const PRODUCT_RETURN; |
1133 | |
1134 | // Intrinsic setup. |
1135 | CallGenerator* make_vm_intrinsic(ciMethod* m, bool is_virtual); // constructor |
1136 | int intrinsic_insertion_index(ciMethod* m, bool is_virtual, bool& found); // helper |
1137 | CallGenerator* find_intrinsic(ciMethod* m, bool is_virtual); // query fn |
1138 | void register_intrinsic(CallGenerator* cg); // update fn |
1139 | |
1140 | #ifndef PRODUCT |
1141 | static juint _intrinsic_hist_count[]; |
1142 | static jubyte _intrinsic_hist_flags[]; |
1143 | #endif |
1144 | // Function calls made by the public function final_graph_reshaping. |
1145 | // No need to be made public as they are not called elsewhere. |
1146 | void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc); |
1147 | void final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& frc, uint nop); |
1148 | void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ); |
1149 | void eliminate_redundant_card_marks(Node* n); |
1150 | |
1151 | // Logic cone optimization. |
1152 | void optimize_logic_cones(PhaseIterGVN &igvn); |
1153 | void collect_logic_cone_roots(Unique_Node_List& list); |
1154 | void process_logic_cone_root(PhaseIterGVN &igvn, Node* n, VectorSet& visited); |
1155 | bool compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs); |
1156 | uint compute_truth_table(Unique_Node_List& partition, Unique_Node_List& inputs); |
1157 | uint eval_macro_logic_op(uint func, uint op1, uint op2, uint op3); |
1158 | Node* xform_to_MacroLogicV(PhaseIterGVN &igvn, const TypeVect* vt, Unique_Node_List& partitions, Unique_Node_List& inputs); |
1159 | void check_no_dead_use() const NOT_DEBUG_RETURN; |
1160 | |
1161 | public: |
1162 | |
1163 | // Note: Histogram array size is about 1 Kb. |
1164 | enum { // flag bits: |
1165 | _intrinsic_worked = 1, // succeeded at least once |
1166 | _intrinsic_failed = 2, // tried it but it failed |
1167 | _intrinsic_disabled = 4, // was requested but disabled (e.g., -XX:-InlineUnsafeOps) |
1168 | _intrinsic_virtual = 8, // was seen in the virtual form (rare) |
1169 | _intrinsic_both = 16 // was seen in the non-virtual form (usual) |
1170 | }; |
1171 | // Update histogram. Return boolean if this is a first-time occurrence. |
1172 | static bool gather_intrinsic_statistics(vmIntrinsics::ID id, |
1173 | bool is_virtual, int flags) PRODUCT_RETURN0; |
1174 | static void print_intrinsic_statistics() PRODUCT_RETURN; |
1175 | |
1176 | // Graph verification code |
1177 | // Walk the node list, verifying that there is a one-to-one |
1178 | // correspondence between Use-Def edges and Def-Use edges |
1179 | // The option no_dead_code enables stronger checks that the |
1180 | // graph is strongly connected from root in both directions. |
1181 | void verify_graph_edges(bool no_dead_code = false) PRODUCT_RETURN; |
1182 | |
1183 | // End-of-run dumps. |
1184 | static void print_statistics() PRODUCT_RETURN; |
1185 | |
1186 | // Verify ADLC assumptions during startup |
1187 | static void adlc_verification() PRODUCT_RETURN; |
1188 | |
1189 | // Definitions of pd methods |
1190 | static void pd_compiler2_init(); |
1191 | |
1192 | // Static parse-time type checking logic for gen_subtype_check: |
1193 | enum { SSC_always_false, SSC_always_true, SSC_easy_test, SSC_full_test }; |
1194 | int static_subtype_check(ciKlass* superk, ciKlass* subk); |
1195 | |
1196 | static Node* conv_I2X_index(PhaseGVN* phase, Node* offset, const TypeInt* sizetype, |
1197 | // Optional control dependency (for example, on range check) |
1198 | Node* ctrl = NULL__null); |
1199 | |
1200 | // Convert integer value to a narrowed long type dependent on ctrl (for example, a range check) |
1201 | static Node* constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl, bool carry_dependency = false); |
1202 | |
1203 | // Auxiliary methods for randomized fuzzing/stressing |
1204 | int random(); |
1205 | bool randomized_select(int count); |
1206 | |
1207 | // supporting clone_map |
1208 | CloneMap& clone_map(); |
1209 | void set_clone_map(Dict* d); |
1210 | |
1211 | bool needs_clinit_barrier(ciField* ik, ciMethod* accessing_method); |
1212 | bool needs_clinit_barrier(ciMethod* ik, ciMethod* accessing_method); |
1213 | bool needs_clinit_barrier(ciInstanceKlass* ik, ciMethod* accessing_method); |
1214 | |
1215 | #ifdef IA32 |
1216 | private: |
1217 | bool _select_24_bit_instr; // We selected an instruction with a 24-bit result |
1218 | bool _in_24_bit_fp_mode; // We are emitting instructions with 24-bit results |
1219 | |
1220 | // Remember if this compilation changes hardware mode to 24-bit precision. |
1221 | void set_24_bit_selection_and_mode(bool selection, bool mode) { |
1222 | _select_24_bit_instr = selection; |
1223 | _in_24_bit_fp_mode = mode; |
1224 | } |
1225 | |
1226 | public: |
1227 | bool select_24_bit_instr() const { return _select_24_bit_instr; } |
1228 | bool in_24_bit_fp_mode() const { return _in_24_bit_fp_mode; } |
1229 | #endif // IA32 |
1230 | #ifdef ASSERT1 |
1231 | bool _type_verify_symmetry; |
1232 | void set_exception_backedge() { _exception_backedge = true; } |
1233 | bool has_exception_backedge() const { return _exception_backedge; } |
1234 | #endif |
1235 | |
1236 | static bool push_thru_add(PhaseGVN* phase, Node* z, const TypeInteger* tz, const TypeInteger*& rx, const TypeInteger*& ry, |
1237 | BasicType bt); |
1238 | |
1239 | static Node* narrow_value(BasicType bt, Node* value, const Type* type, PhaseGVN* phase, bool transform_res); |
1240 | }; |
1241 | |
1242 | #endif // SHARE_OPTO_COMPILE_HPP |