File: | jdk/src/hotspot/share/opto/superword.cpp |
Warning: | line 229, column 31 Access to field '_idx' results in a dereference of a null pointer (loaded from variable 'n') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. | |||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |||
4 | * | |||
5 | * This code is free software; you can redistribute it and/or modify it | |||
6 | * under the terms of the GNU General Public License version 2 only, as | |||
7 | * published by the Free Software Foundation. | |||
8 | * | |||
9 | * This code is distributed in the hope that it will be useful, but WITHOUT | |||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
12 | * version 2 for more details (a copy is included in the LICENSE file that | |||
13 | * accompanied this code). | |||
14 | * | |||
15 | * You should have received a copy of the GNU General Public License version | |||
16 | * 2 along with this work; if not, write to the Free Software Foundation, | |||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |||
18 | * | |||
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |||
20 | * or visit www.oracle.com if you need additional information or have any | |||
21 | * questions. | |||
22 | */ | |||
23 | ||||
24 | #include "precompiled.hpp" | |||
25 | #include "compiler/compileLog.hpp" | |||
26 | #include "libadt/vectset.hpp" | |||
27 | #include "memory/allocation.inline.hpp" | |||
28 | #include "memory/resourceArea.hpp" | |||
29 | #include "opto/addnode.hpp" | |||
30 | #include "opto/callnode.hpp" | |||
31 | #include "opto/castnode.hpp" | |||
32 | #include "opto/convertnode.hpp" | |||
33 | #include "opto/divnode.hpp" | |||
34 | #include "opto/matcher.hpp" | |||
35 | #include "opto/memnode.hpp" | |||
36 | #include "opto/mulnode.hpp" | |||
37 | #include "opto/opcodes.hpp" | |||
38 | #include "opto/opaquenode.hpp" | |||
39 | #include "opto/superword.hpp" | |||
40 | #include "opto/vectornode.hpp" | |||
41 | #include "opto/movenode.hpp" | |||
42 | #include "utilities/powerOfTwo.hpp" | |||
43 | ||||
44 | // | |||
45 | // S U P E R W O R D T R A N S F O R M | |||
46 | //============================================================================= | |||
47 | ||||
48 | //------------------------------SuperWord--------------------------- | |||
49 | SuperWord::SuperWord(PhaseIdealLoop* phase) : | |||
50 | _phase(phase), | |||
51 | _arena(phase->C->comp_arena()), | |||
52 | _igvn(phase->_igvn), | |||
53 | _packset(arena(), 8, 0, NULL__null), // packs for the current block | |||
54 | _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb | |||
55 | _block(arena(), 8, 0, NULL__null), // nodes in current block | |||
56 | _post_block(arena(), 8, 0, NULL__null), // nodes common to current block which are marked as post loop vectorizable | |||
57 | _data_entry(arena(), 8, 0, NULL__null), // nodes with all inputs from outside | |||
58 | _mem_slice_head(arena(), 8, 0, NULL__null), // memory slice heads | |||
59 | _mem_slice_tail(arena(), 8, 0, NULL__null), // memory slice tails | |||
60 | _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node | |||
61 | _clone_map(phase->C->clone_map()), // map of nodes created in cloning | |||
62 | _cmovev_kit(_arena, this), // map to facilitate CMoveV creation | |||
63 | _align_to_ref(NULL__null), // memory reference to align vectors to | |||
64 | _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs | |||
65 | _dg(_arena), // dependence graph | |||
66 | _visited(arena()), // visited node set | |||
67 | _post_visited(arena()), // post visited node set | |||
68 | _n_idx_list(arena(), 8), // scratch list of (node,index) pairs | |||
69 | _nlist(arena(), 8, 0, NULL__null), // scratch list of nodes | |||
70 | _stk(arena(), 8, 0, NULL__null), // scratch stack of nodes | |||
71 | _lpt(NULL__null), // loop tree node | |||
72 | _lp(NULL__null), // CountedLoopNode | |||
73 | _pre_loop_end(NULL__null), // Pre loop CountedLoopEndNode | |||
74 | _bb(NULL__null), // basic block | |||
75 | _iv(NULL__null), // induction var | |||
76 | _race_possible(false), // cases where SDMU is true | |||
77 | _early_return(true), // analysis evaluations routine | |||
78 | _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style | |||
79 | _do_reserve_copy(DoReserveCopyInSuperWord), | |||
80 | _num_work_vecs(0), // amount of vector work we have | |||
81 | _num_reductions(0), // amount of reduction work we have | |||
82 | _ii_first(-1), // first loop generation index - only if do_vector_loop() | |||
83 | _ii_last(-1), // last loop generation index - only if do_vector_loop() | |||
84 | _ii_order(arena(), 8, 0, 0) | |||
85 | { | |||
86 | #ifndef PRODUCT | |||
87 | _vector_loop_debug = 0; | |||
88 | if (_phase->C->method() != NULL__null) { | |||
89 | _vector_loop_debug = phase->C->directive()->VectorizeDebugOption; | |||
90 | } | |||
91 | ||||
92 | #endif | |||
93 | } | |||
94 | ||||
95 | static const bool _do_vector_loop_experimental = false; // Experimental vectorization which uses data from loop unrolling. | |||
96 | ||||
97 | //------------------------------transform_loop--------------------------- | |||
98 | void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { | |||
99 | assert(UseSuperWord, "should be")do { if (!(UseSuperWord)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 99, "assert(" "UseSuperWord" ") failed", "should be"); ::breakpoint (); } } while (0); | |||
100 | // SuperWord only works with power of two vector sizes. | |||
101 | int vector_width = Matcher::vector_width_in_bytes(T_BYTE); | |||
102 | if (vector_width < 2 || !is_power_of_2(vector_width)) { | |||
103 | return; | |||
104 | } | |||
105 | ||||
106 | assert(lpt->_head->is_CountedLoop(), "must be")do { if (!(lpt->_head->is_CountedLoop())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 106, "assert(" "lpt->_head->is_CountedLoop()" ") failed" , "must be"); ::breakpoint(); } } while (0); | |||
107 | CountedLoopNode *cl = lpt->_head->as_CountedLoop(); | |||
108 | ||||
109 | if (!cl->is_valid_counted_loop(T_INT)) return; // skip malformed counted loop | |||
110 | ||||
111 | bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); | |||
112 | if (post_loop_allowed) { | |||
113 | if (cl->is_reduction_loop()) return; // no predication mapping | |||
114 | Node *limit = cl->limit(); | |||
115 | if (limit->is_Con()) return; // non constant limits only | |||
116 | // Now check the limit for expressions we do not handle | |||
117 | if (limit->is_Add()) { | |||
118 | Node *in2 = limit->in(2); | |||
119 | if (in2->is_Con()) { | |||
120 | int val = in2->get_int(); | |||
121 | // should not try to program these cases | |||
122 | if (val < 0) return; | |||
123 | } | |||
124 | } | |||
125 | } | |||
126 | ||||
127 | // skip any loop that has not been assigned max unroll by analysis | |||
128 | if (do_optimization) { | |||
129 | if (SuperWordLoopUnrollAnalysis && cl->slp_max_unroll() == 0) return; | |||
130 | } | |||
131 | ||||
132 | // Check for no control flow in body (other than exit) | |||
133 | Node *cl_exit = cl->loopexit(); | |||
134 | if (cl->is_main_loop() && (cl_exit->in(0) != lpt->_head)) { | |||
135 | #ifndef PRODUCT | |||
136 | if (TraceSuperWord) { | |||
137 | tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head"); | |||
138 | tty->print("cl_exit %d", cl_exit->_idx); cl_exit->dump(); | |||
139 | tty->print("cl_exit->in(0) %d", cl_exit->in(0)->_idx); cl_exit->in(0)->dump(); | |||
140 | tty->print("lpt->_head %d", lpt->_head->_idx); lpt->_head->dump(); | |||
141 | lpt->dump_head(); | |||
142 | } | |||
143 | #endif | |||
144 | return; | |||
145 | } | |||
146 | ||||
147 | // Make sure the are no extra control users of the loop backedge | |||
148 | if (cl->back_control()->outcnt() != 1) { | |||
149 | return; | |||
150 | } | |||
151 | ||||
152 | // Skip any loops already optimized by slp | |||
153 | if (cl->is_vectorized_loop()) return; | |||
154 | ||||
155 | if (cl->is_unroll_only()) return; | |||
156 | ||||
157 | if (cl->is_main_loop()) { | |||
158 | // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) | |||
159 | CountedLoopEndNode* pre_end = find_pre_loop_end(cl); | |||
160 | if (pre_end == NULL__null) { | |||
161 | return; | |||
162 | } | |||
163 | Node* pre_opaq1 = pre_end->limit(); | |||
164 | if (pre_opaq1->Opcode() != Op_Opaque1) { | |||
165 | return; | |||
166 | } | |||
167 | set_pre_loop_end(pre_end); | |||
168 | } | |||
169 | ||||
170 | init(); // initialize data structures | |||
171 | ||||
172 | set_lpt(lpt); | |||
173 | set_lp(cl); | |||
174 | ||||
175 | // For now, define one block which is the entire loop body | |||
176 | set_bb(cl); | |||
177 | ||||
178 | if (do_optimization) { | |||
179 | assert(_packset.length() == 0, "packset must be empty")do { if (!(_packset.length() == 0)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 179, "assert(" "_packset.length() == 0" ") failed", "packset must be empty" ); ::breakpoint(); } } while (0); | |||
180 | SLP_extract(); | |||
181 | if (PostLoopMultiversioning && Matcher::has_predicated_vectors()) { | |||
182 | if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) { | |||
183 | IdealLoopTree *lpt_next = lpt->_next; | |||
184 | CountedLoopNode *cl_next = lpt_next->_head->as_CountedLoop(); | |||
185 | _phase->has_range_checks(lpt_next); | |||
186 | if (cl_next->is_post_loop() && !cl_next->range_checks_present()) { | |||
187 | if (!cl_next->is_vectorized_loop()) { | |||
188 | int slp_max_unroll_factor = cl->slp_max_unroll(); | |||
189 | cl_next->set_slp_max_unroll(slp_max_unroll_factor); | |||
190 | } | |||
191 | } | |||
192 | } | |||
193 | } | |||
194 | } | |||
195 | } | |||
196 | ||||
197 | //------------------------------early unrolling analysis------------------------------ | |||
198 | void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { | |||
199 | bool is_slp = true; | |||
200 | ResourceMark rm; | |||
201 | size_t ignored_size = lpt()->_body.size(); | |||
202 | int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size)(int*) resource_allocate_bytes((ignored_size) * sizeof(int)); | |||
203 | Node_Stack nstack((int)ignored_size); | |||
204 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
205 | Node *cl_exit = cl->loopexit_or_null(); | |||
206 | int rpo_idx = _post_block.length(); | |||
207 | ||||
208 | assert(rpo_idx == 0, "post loop block is empty")do { if (!(rpo_idx == 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 208, "assert(" "rpo_idx == 0" ") failed", "post loop block is empty" ); ::breakpoint(); } } while (0); | |||
| ||||
209 | ||||
210 | // First clear the entries | |||
211 | for (uint i = 0; i < lpt()->_body.size(); i++) { | |||
212 | ignored_loop_nodes[i] = -1; | |||
213 | } | |||
214 | ||||
215 | int max_vector = Matcher::max_vector_size(T_BYTE); | |||
216 | bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); | |||
217 | ||||
218 | // Process the loop, some/all of the stack entries will not be in order, ergo | |||
219 | // need to preprocess the ignored initial state before we process the loop | |||
220 | for (uint i = 0; i < lpt()->_body.size(); i++) { | |||
221 | Node* n = lpt()->_body.at(i); | |||
222 | if (n == cl->incr() || | |||
223 | n->is_reduction() || | |||
224 | n->is_AddP() || | |||
225 | n->is_Cmp() || | |||
226 | n->is_IfTrue() || | |||
227 | n->is_CountedLoop() || | |||
228 | (n == cl_exit)) { | |||
229 | ignored_loop_nodes[i] = n->_idx; | |||
| ||||
230 | continue; | |||
231 | } | |||
232 | ||||
233 | if (n->is_If()) { | |||
234 | IfNode *iff = n->as_If(); | |||
235 | if (iff->_fcnt != COUNT_UNKNOWN(-1.0f) && iff->_prob != PROB_UNKNOWN(-1.0f)) { | |||
236 | if (lpt()->is_loop_exit(iff)) { | |||
237 | ignored_loop_nodes[i] = n->_idx; | |||
238 | continue; | |||
239 | } | |||
240 | } | |||
241 | } | |||
242 | ||||
243 | if (n->is_Phi() && (n->bottom_type() == Type::MEMORY)) { | |||
244 | Node* n_tail = n->in(LoopNode::LoopBackControl); | |||
245 | if (n_tail != n->in(LoopNode::EntryControl)) { | |||
246 | if (!n_tail->is_Mem()) { | |||
247 | is_slp = false; | |||
248 | break; | |||
249 | } | |||
250 | } | |||
251 | } | |||
252 | ||||
253 | // This must happen after check of phi/if | |||
254 | if (n->is_Phi() || n->is_If()) { | |||
255 | ignored_loop_nodes[i] = n->_idx; | |||
256 | continue; | |||
257 | } | |||
258 | ||||
259 | if (n->is_LoadStore() || n->is_MergeMem() || | |||
260 | (n->is_Proj() && !n->as_Proj()->is_CFG())) { | |||
261 | is_slp = false; | |||
262 | break; | |||
263 | } | |||
264 | ||||
265 | // Ignore nodes with non-primitive type. | |||
266 | BasicType bt; | |||
267 | if (n->is_Mem()) { | |||
268 | bt = n->as_Mem()->memory_type(); | |||
269 | } else { | |||
270 | bt = n->bottom_type()->basic_type(); | |||
271 | } | |||
272 | if (is_java_primitive(bt) == false) { | |||
273 | ignored_loop_nodes[i] = n->_idx; | |||
274 | continue; | |||
275 | } | |||
276 | ||||
277 | if (n->is_Mem()) { | |||
278 | MemNode* current = n->as_Mem(); | |||
279 | Node* adr = n->in(MemNode::Address); | |||
280 | Node* n_ctrl = _phase->get_ctrl(adr); | |||
281 | ||||
282 | // save a queue of post process nodes | |||
283 | if (n_ctrl != NULL__null && lpt()->is_member(_phase->get_loop(n_ctrl))) { | |||
284 | // Process the memory expression | |||
285 | int stack_idx = 0; | |||
286 | bool have_side_effects = true; | |||
287 | if (adr->is_AddP() == false) { | |||
288 | nstack.push(adr, stack_idx++); | |||
289 | } else { | |||
290 | // Mark the components of the memory operation in nstack | |||
291 | SWPointer p1(current, this, &nstack, true); | |||
292 | have_side_effects = p1.node_stack()->is_nonempty(); | |||
293 | } | |||
294 | ||||
295 | // Process the pointer stack | |||
296 | while (have_side_effects) { | |||
297 | Node* pointer_node = nstack.node(); | |||
298 | for (uint j = 0; j < lpt()->_body.size(); j++) { | |||
299 | Node* cur_node = lpt()->_body.at(j); | |||
300 | if (cur_node == pointer_node) { | |||
301 | ignored_loop_nodes[j] = cur_node->_idx; | |||
302 | break; | |||
303 | } | |||
304 | } | |||
305 | nstack.pop(); | |||
306 | have_side_effects = nstack.is_nonempty(); | |||
307 | } | |||
308 | } | |||
309 | } | |||
310 | } | |||
311 | ||||
312 | if (is_slp) { | |||
313 | // Now we try to find the maximum supported consistent vector which the machine | |||
314 | // description can use | |||
315 | bool small_basic_type = false; | |||
316 | bool flag_small_bt = false; | |||
317 | for (uint i = 0; i < lpt()->_body.size(); i++) { | |||
318 | if (ignored_loop_nodes[i] != -1) continue; | |||
319 | ||||
320 | BasicType bt; | |||
321 | Node* n = lpt()->_body.at(i); | |||
322 | if (n->is_Mem()) { | |||
323 | bt = n->as_Mem()->memory_type(); | |||
324 | } else { | |||
325 | bt = n->bottom_type()->basic_type(); | |||
326 | } | |||
327 | ||||
328 | if (post_loop_allowed) { | |||
329 | if (!small_basic_type) { | |||
330 | switch (bt) { | |||
331 | case T_CHAR: | |||
332 | case T_BYTE: | |||
333 | case T_SHORT: | |||
334 | small_basic_type = true; | |||
335 | break; | |||
336 | ||||
337 | case T_LONG: | |||
338 | // TODO: Remove when support completed for mask context with LONG. | |||
339 | // Support needs to be augmented for logical qword operations, currently we map to dword | |||
340 | // buckets for vectors on logicals as these were legacy. | |||
341 | small_basic_type = true; | |||
342 | break; | |||
343 | ||||
344 | default: | |||
345 | break; | |||
346 | } | |||
347 | } | |||
348 | } | |||
349 | ||||
350 | if (is_java_primitive(bt) == false) continue; | |||
351 | ||||
352 | int cur_max_vector = Matcher::max_vector_size(bt); | |||
353 | ||||
354 | // If a max vector exists which is not larger than _local_loop_unroll_factor | |||
355 | // stop looking, we already have the max vector to map to. | |||
356 | if (cur_max_vector < local_loop_unroll_factor) { | |||
357 | is_slp = false; | |||
358 | if (TraceSuperWordLoopUnrollAnalysis) { | |||
359 | tty->print_cr("slp analysis fails: unroll limit greater than max vector\n"); | |||
360 | } | |||
361 | break; | |||
362 | } | |||
363 | ||||
364 | // Map the maximal common vector | |||
365 | if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) { | |||
366 | if (cur_max_vector < max_vector && !flag_small_bt) { | |||
367 | max_vector = cur_max_vector; | |||
368 | } else if (cur_max_vector > max_vector && UseSubwordForMaxVector) { | |||
369 | // Analyse subword in the loop to set maximum vector size to take advantage of full vector width for subword types. | |||
370 | // Here we analyze if narrowing is likely to happen and if it is we set vector size more aggressively. | |||
371 | // We check for possibility of narrowing by looking through chain operations using subword types. | |||
372 | if (is_subword_type(bt)) { | |||
373 | uint start, end; | |||
374 | VectorNode::vector_operands(n, &start, &end); | |||
375 | ||||
376 | for (uint j = start; j < end; j++) { | |||
377 | Node* in = n->in(j); | |||
378 | // Don't propagate through a memory | |||
379 | if (!in->is_Mem() && in_bb(in) && in->bottom_type()->basic_type() == T_INT) { | |||
380 | bool same_type = true; | |||
381 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { | |||
382 | Node *use = in->fast_out(k); | |||
383 | if (!in_bb(use) && use->bottom_type()->basic_type() != bt) { | |||
384 | same_type = false; | |||
385 | break; | |||
386 | } | |||
387 | } | |||
388 | if (same_type) { | |||
389 | max_vector = cur_max_vector; | |||
390 | flag_small_bt = true; | |||
391 | cl->mark_subword_loop(); | |||
392 | } | |||
393 | } | |||
394 | } | |||
395 | } | |||
396 | } | |||
397 | // We only process post loops on predicated targets where we want to | |||
398 | // mask map the loop to a single iteration | |||
399 | if (post_loop_allowed) { | |||
400 | _post_block.at_put_grow(rpo_idx++, n); | |||
401 | } | |||
402 | } | |||
403 | } | |||
404 | if (is_slp) { | |||
405 | local_loop_unroll_factor = max_vector; | |||
406 | cl->mark_passed_slp(); | |||
407 | } | |||
408 | cl->mark_was_slp(); | |||
409 | if (cl->is_main_loop()) { | |||
410 | cl->set_slp_max_unroll(local_loop_unroll_factor); | |||
411 | } else if (post_loop_allowed) { | |||
412 | if (!small_basic_type) { | |||
413 | // avoid replication context for small basic types in programmable masked loops | |||
414 | cl->set_slp_max_unroll(local_loop_unroll_factor); | |||
415 | } | |||
416 | } | |||
417 | } | |||
418 | } | |||
419 | ||||
420 | //------------------------------SLP_extract--------------------------- | |||
421 | // Extract the superword level parallelism | |||
422 | // | |||
423 | // 1) A reverse post-order of nodes in the block is constructed. By scanning | |||
424 | // this list from first to last, all definitions are visited before their uses. | |||
425 | // | |||
426 | // 2) A point-to-point dependence graph is constructed between memory references. | |||
427 | // This simplies the upcoming "independence" checker. | |||
428 | // | |||
429 | // 3) The maximum depth in the node graph from the beginning of the block | |||
430 | // to each node is computed. This is used to prune the graph search | |||
431 | // in the independence checker. | |||
432 | // | |||
433 | // 4) For integer types, the necessary bit width is propagated backwards | |||
434 | // from stores to allow packed operations on byte, char, and short | |||
435 | // integers. This reverses the promotion to type "int" that javac | |||
436 | // did for operations like: char c1,c2,c3; c1 = c2 + c3. | |||
437 | // | |||
438 | // 5) One of the memory references is picked to be an aligned vector reference. | |||
439 | // The pre-loop trip count is adjusted to align this reference in the | |||
440 | // unrolled body. | |||
441 | // | |||
442 | // 6) The initial set of pack pairs is seeded with memory references. | |||
443 | // | |||
444 | // 7) The set of pack pairs is extended by following use->def and def->use links. | |||
445 | // | |||
446 | // 8) The pairs are combined into vector sized packs. | |||
447 | // | |||
448 | // 9) Reorder the memory slices to co-locate members of the memory packs. | |||
449 | // | |||
450 | // 10) Generate ideal vector nodes for the final set of packs and where necessary, | |||
451 | // inserting scalar promotion, vector creation from multiple scalars, and | |||
452 | // extraction of scalar values from vectors. | |||
453 | // | |||
454 | void SuperWord::SLP_extract() { | |||
455 | ||||
456 | #ifndef PRODUCT | |||
457 | if (_do_vector_loop && TraceSuperWord) { | |||
458 | tty->print("SuperWord::SLP_extract\n"); | |||
459 | tty->print("input loop\n"); | |||
460 | _lpt->dump_head(); | |||
461 | _lpt->dump(); | |||
462 | for (uint i = 0; i < _lpt->_body.size(); i++) { | |||
463 | _lpt->_body.at(i)->dump(); | |||
464 | } | |||
465 | } | |||
466 | #endif | |||
467 | // Ready the block | |||
468 | if (!construct_bb()) { | |||
469 | return; // Exit if no interesting nodes or complex graph. | |||
470 | } | |||
471 | ||||
472 | // build _dg, _disjoint_ptrs | |||
473 | dependence_graph(); | |||
474 | ||||
475 | // compute function depth(Node*) | |||
476 | compute_max_depth(); | |||
477 | ||||
478 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
479 | bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); | |||
480 | if (cl->is_main_loop()) { | |||
481 | if (_do_vector_loop_experimental) { | |||
482 | if (mark_generations() != -1) { | |||
483 | hoist_loads_in_graph(); // this only rebuild the graph; all basic structs need rebuild explicitly | |||
484 | ||||
485 | if (!construct_bb()) { | |||
486 | return; // Exit if no interesting nodes or complex graph. | |||
487 | } | |||
488 | dependence_graph(); | |||
489 | compute_max_depth(); | |||
490 | } | |||
491 | ||||
492 | #ifndef PRODUCT | |||
493 | if (TraceSuperWord) { | |||
494 | tty->print_cr("\nSuperWord::_do_vector_loop: graph after hoist_loads_in_graph"); | |||
495 | _lpt->dump_head(); | |||
496 | for (int j = 0; j < _block.length(); j++) { | |||
497 | Node* n = _block.at(j); | |||
498 | int d = depth(n); | |||
499 | for (int i = 0; i < d; i++) tty->print("%s", " "); | |||
500 | tty->print("%d :", d); | |||
501 | n->dump(); | |||
502 | } | |||
503 | } | |||
504 | #endif | |||
505 | } | |||
506 | ||||
507 | compute_vector_element_type(); | |||
508 | ||||
509 | // Attempt vectorization | |||
510 | ||||
511 | find_adjacent_refs(); | |||
512 | ||||
513 | if (align_to_ref() == NULL__null) { | |||
514 | return; // Did not find memory reference to align vectors | |||
515 | } | |||
516 | ||||
517 | extend_packlist(); | |||
518 | ||||
519 | if (_do_vector_loop_experimental) { | |||
520 | if (_packset.length() == 0) { | |||
521 | #ifndef PRODUCT | |||
522 | if (TraceSuperWord) { | |||
523 | tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway"); | |||
524 | } | |||
525 | #endif | |||
526 | pack_parallel(); | |||
527 | } | |||
528 | } | |||
529 | ||||
530 | combine_packs(); | |||
531 | ||||
532 | construct_my_pack_map(); | |||
533 | if (UseVectorCmov) { | |||
534 | merge_packs_to_cmovd(); | |||
535 | } | |||
536 | ||||
537 | filter_packs(); | |||
538 | ||||
539 | schedule(); | |||
540 | } else if (post_loop_allowed) { | |||
541 | int saved_mapped_unroll_factor = cl->slp_max_unroll(); | |||
542 | if (saved_mapped_unroll_factor) { | |||
543 | int vector_mapped_unroll_factor = saved_mapped_unroll_factor; | |||
544 | ||||
545 | // now reset the slp_unroll_factor so that we can check the analysis mapped | |||
546 | // what the vector loop was mapped to | |||
547 | cl->set_slp_max_unroll(0); | |||
548 | ||||
549 | // do the analysis on the post loop | |||
550 | unrolling_analysis(vector_mapped_unroll_factor); | |||
551 | ||||
552 | // if our analyzed loop is a canonical fit, start processing it | |||
553 | if (vector_mapped_unroll_factor == saved_mapped_unroll_factor) { | |||
554 | // now add the vector nodes to packsets | |||
555 | for (int i = 0; i < _post_block.length(); i++) { | |||
556 | Node* n = _post_block.at(i); | |||
557 | Node_List* singleton = new Node_List(); | |||
558 | singleton->push(n); | |||
559 | _packset.append(singleton); | |||
560 | set_my_pack(n, singleton); | |||
561 | } | |||
562 | ||||
563 | // map base types for vector usage | |||
564 | compute_vector_element_type(); | |||
565 | } else { | |||
566 | return; | |||
567 | } | |||
568 | } else { | |||
569 | // for some reason we could not map the slp analysis state of the vectorized loop | |||
570 | return; | |||
571 | } | |||
572 | } | |||
573 | ||||
574 | output(); | |||
575 | } | |||
576 | ||||
577 | //------------------------------find_adjacent_refs--------------------------- | |||
578 | // Find the adjacent memory references and create pack pairs for them. | |||
579 | // This is the initial set of packs that will then be extended by | |||
580 | // following use->def and def->use links. The align positions are | |||
581 | // assigned relative to the reference "align_to_ref" | |||
582 | void SuperWord::find_adjacent_refs() { | |||
583 | // Get list of memory operations | |||
584 | Node_List memops; | |||
585 | for (int i = 0; i < _block.length(); i++) { | |||
586 | Node* n = _block.at(i); | |||
587 | if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && | |||
588 | is_java_primitive(n->as_Mem()->memory_type())) { | |||
589 | int align = memory_alignment(n->as_Mem(), 0); | |||
590 | if (align != bottom_align) { | |||
591 | memops.push(n); | |||
592 | } | |||
593 | } | |||
594 | } | |||
595 | if (TraceSuperWord) { | |||
596 | tty->print_cr("\nfind_adjacent_refs found %d memops", memops.size()); | |||
597 | } | |||
598 | ||||
599 | Node_List align_to_refs; | |||
600 | int max_idx; | |||
601 | int best_iv_adjustment = 0; | |||
602 | MemNode* best_align_to_mem_ref = NULL__null; | |||
603 | ||||
604 | while (memops.size() != 0) { | |||
605 | // Find a memory reference to align to. | |||
606 | MemNode* mem_ref = find_align_to_ref(memops, max_idx); | |||
607 | if (mem_ref == NULL__null) break; | |||
608 | align_to_refs.push(mem_ref); | |||
609 | int iv_adjustment = get_iv_adjustment(mem_ref); | |||
610 | ||||
611 | if (best_align_to_mem_ref == NULL__null) { | |||
612 | // Set memory reference which is the best from all memory operations | |||
613 | // to be used for alignment. The pre-loop trip count is modified to align | |||
614 | // this reference to a vector-aligned address. | |||
615 | best_align_to_mem_ref = mem_ref; | |||
616 | best_iv_adjustment = iv_adjustment; | |||
617 | NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment ); | |||
618 | } | |||
619 | ||||
620 | SWPointer align_to_ref_p(mem_ref, this, NULL__null, false); | |||
621 | // Set alignment relative to "align_to_ref" for all related memory operations. | |||
622 | for (int i = memops.size() - 1; i >= 0; i--) { | |||
623 | MemNode* s = memops.at(i)->as_Mem(); | |||
624 | if (isomorphic(s, mem_ref) && | |||
625 | (!_do_vector_loop || same_origin_idx(s, mem_ref))) { | |||
626 | SWPointer p2(s, this, NULL__null, false); | |||
627 | if (p2.comparable(align_to_ref_p)) { | |||
628 | int align = memory_alignment(s, iv_adjustment); | |||
629 | set_alignment(s, align); | |||
630 | } | |||
631 | } | |||
632 | } | |||
633 | ||||
634 | // Create initial pack pairs of memory operations for which | |||
635 | // alignment is set and vectors will be aligned. | |||
636 | bool create_pack = true; | |||
637 | if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) { | |||
638 | if (vectors_should_be_aligned()) { | |||
639 | int vw = vector_width(mem_ref); | |||
640 | int vw_best = vector_width(best_align_to_mem_ref); | |||
641 | if (vw > vw_best) { | |||
642 | // Do not vectorize a memory access with more elements per vector | |||
643 | // if unaligned memory access is not allowed because number of | |||
644 | // iterations in pre-loop will be not enough to align it. | |||
645 | create_pack = false; | |||
646 | } else { | |||
647 | SWPointer p2(best_align_to_mem_ref, this, NULL__null, false); | |||
648 | if (!align_to_ref_p.invar_equals(p2)) { | |||
649 | // Do not vectorize memory accesses with different invariants | |||
650 | // if unaligned memory accesses are not allowed. | |||
651 | create_pack = false; | |||
652 | } | |||
653 | } | |||
654 | } | |||
655 | } else { | |||
656 | if (same_velt_type(mem_ref, best_align_to_mem_ref)) { | |||
657 | // Can't allow vectorization of unaligned memory accesses with the | |||
658 | // same type since it could be overlapped accesses to the same array. | |||
659 | create_pack = false; | |||
660 | } else { | |||
661 | // Allow independent (different type) unaligned memory operations | |||
662 | // if HW supports them. | |||
663 | if (vectors_should_be_aligned()) { | |||
664 | create_pack = false; | |||
665 | } else { | |||
666 | // Check if packs of the same memory type but | |||
667 | // with a different alignment were created before. | |||
668 | for (uint i = 0; i < align_to_refs.size(); i++) { | |||
669 | MemNode* mr = align_to_refs.at(i)->as_Mem(); | |||
670 | if (mr == mem_ref) { | |||
671 | // Skip when we are looking at same memory operation. | |||
672 | continue; | |||
673 | } | |||
674 | if (same_velt_type(mr, mem_ref) && | |||
675 | memory_alignment(mr, iv_adjustment) != 0) | |||
676 | create_pack = false; | |||
677 | } | |||
678 | } | |||
679 | } | |||
680 | } | |||
681 | if (create_pack) { | |||
682 | for (uint i = 0; i < memops.size(); i++) { | |||
683 | Node* s1 = memops.at(i); | |||
684 | int align = alignment(s1); | |||
685 | if (align == top_align) continue; | |||
686 | for (uint j = 0; j < memops.size(); j++) { | |||
687 | Node* s2 = memops.at(j); | |||
688 | if (alignment(s2) == top_align) continue; | |||
689 | if (s1 != s2 && are_adjacent_refs(s1, s2)) { | |||
690 | if (stmts_can_pack(s1, s2, align)) { | |||
691 | Node_List* pair = new Node_List(); | |||
692 | pair->push(s1); | |||
693 | pair->push(s2); | |||
694 | if (!_do_vector_loop || same_origin_idx(s1, s2)) { | |||
695 | _packset.append(pair); | |||
696 | } | |||
697 | } | |||
698 | } | |||
699 | } | |||
700 | } | |||
701 | } else { // Don't create unaligned pack | |||
702 | // First, remove remaining memory ops of the same type from the list. | |||
703 | for (int i = memops.size() - 1; i >= 0; i--) { | |||
704 | MemNode* s = memops.at(i)->as_Mem(); | |||
705 | if (same_velt_type(s, mem_ref)) { | |||
706 | memops.remove(i); | |||
707 | } | |||
708 | } | |||
709 | ||||
710 | // Second, remove already constructed packs of the same type. | |||
711 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
712 | Node_List* p = _packset.at(i); | |||
713 | MemNode* s = p->at(0)->as_Mem(); | |||
714 | if (same_velt_type(s, mem_ref)) { | |||
715 | remove_pack_at(i); | |||
716 | } | |||
717 | } | |||
718 | ||||
719 | // If needed find the best memory reference for loop alignment again. | |||
720 | if (same_velt_type(mem_ref, best_align_to_mem_ref)) { | |||
721 | // Put memory ops from remaining packs back on memops list for | |||
722 | // the best alignment search. | |||
723 | uint orig_msize = memops.size(); | |||
724 | for (int i = 0; i < _packset.length(); i++) { | |||
725 | Node_List* p = _packset.at(i); | |||
726 | MemNode* s = p->at(0)->as_Mem(); | |||
727 | assert(!same_velt_type(s, mem_ref), "sanity")do { if (!(!same_velt_type(s, mem_ref))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 727, "assert(" "!same_velt_type(s, mem_ref)" ") failed", "sanity" ); ::breakpoint(); } } while (0); | |||
728 | memops.push(s); | |||
729 | } | |||
730 | best_align_to_mem_ref = find_align_to_ref(memops, max_idx); | |||
731 | if (best_align_to_mem_ref == NULL__null) { | |||
732 | if (TraceSuperWord) { | |||
733 | tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL"); | |||
734 | } | |||
735 | // best_align_to_mem_ref will be used for adjusting the pre-loop limit in | |||
736 | // SuperWord::align_initial_loop_index. Find one with the biggest vector size, | |||
737 | // smallest data size and smallest iv offset from memory ops from remaining packs. | |||
738 | if (_packset.length() > 0) { | |||
739 | if (orig_msize == 0) { | |||
740 | best_align_to_mem_ref = memops.at(max_idx)->as_Mem(); | |||
741 | } else { | |||
742 | for (uint i = 0; i < orig_msize; i++) { | |||
743 | memops.remove(0); | |||
744 | } | |||
745 | best_align_to_mem_ref = find_align_to_ref(memops, max_idx); | |||
746 | assert(best_align_to_mem_ref == NULL, "sanity")do { if (!(best_align_to_mem_ref == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 746, "assert(" "best_align_to_mem_ref == __null" ") failed" , "sanity"); ::breakpoint(); } } while (0); | |||
747 | best_align_to_mem_ref = memops.at(max_idx)->as_Mem(); | |||
748 | } | |||
749 | assert(best_align_to_mem_ref != NULL, "sanity")do { if (!(best_align_to_mem_ref != __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 749, "assert(" "best_align_to_mem_ref != __null" ") failed" , "sanity"); ::breakpoint(); } } while (0); | |||
750 | } | |||
751 | break; | |||
752 | } | |||
753 | best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); | |||
754 | NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment ); | |||
755 | // Restore list. | |||
756 | while (memops.size() > orig_msize) | |||
757 | (void)memops.pop(); | |||
758 | } | |||
759 | } // unaligned memory accesses | |||
760 | ||||
761 | // Remove used mem nodes. | |||
762 | for (int i = memops.size() - 1; i >= 0; i--) { | |||
763 | MemNode* m = memops.at(i)->as_Mem(); | |||
764 | if (alignment(m) != top_align) { | |||
765 | memops.remove(i); | |||
766 | } | |||
767 | } | |||
768 | ||||
769 | } // while (memops.size() != 0 | |||
770 | set_align_to_ref(best_align_to_mem_ref); | |||
771 | ||||
772 | if (TraceSuperWord) { | |||
773 | tty->print_cr("\nAfter find_adjacent_refs"); | |||
774 | print_packset(); | |||
775 | } | |||
776 | } | |||
777 | ||||
778 | #ifndef PRODUCT | |||
779 | void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) { | |||
780 | if (is_trace_adjacent()) { | |||
781 | tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d", | |||
782 | best_align_to_mem_ref->_idx, best_iv_adjustment); | |||
783 | best_align_to_mem_ref->dump(); | |||
784 | } | |||
785 | } | |||
786 | #endif | |||
787 | ||||
788 | //------------------------------find_align_to_ref--------------------------- | |||
789 | // Find a memory reference to align the loop induction variable to. | |||
790 | // Looks first at stores then at loads, looking for a memory reference | |||
791 | // with the largest number of references similar to it. | |||
792 | MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) { | |||
793 | GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); | |||
794 | ||||
795 | // Count number of comparable memory ops | |||
796 | for (uint i = 0; i < memops.size(); i++) { | |||
797 | MemNode* s1 = memops.at(i)->as_Mem(); | |||
798 | SWPointer p1(s1, this, NULL__null, false); | |||
799 | // Only discard unalignable memory references if vector memory references | |||
800 | // should be aligned on this platform. | |||
801 | if (vectors_should_be_aligned() && !ref_is_alignable(p1)) { | |||
802 | *cmp_ct.adr_at(i) = 0; | |||
803 | continue; | |||
804 | } | |||
805 | for (uint j = i+1; j < memops.size(); j++) { | |||
806 | MemNode* s2 = memops.at(j)->as_Mem(); | |||
807 | if (isomorphic(s1, s2)) { | |||
808 | SWPointer p2(s2, this, NULL__null, false); | |||
809 | if (p1.comparable(p2)) { | |||
810 | (*cmp_ct.adr_at(i))++; | |||
811 | (*cmp_ct.adr_at(j))++; | |||
812 | } | |||
813 | } | |||
814 | } | |||
815 | } | |||
816 | ||||
817 | // Find Store (or Load) with the greatest number of "comparable" references, | |||
818 | // biggest vector size, smallest data size and smallest iv offset. | |||
819 | int max_ct = 0; | |||
820 | int max_vw = 0; | |||
821 | int max_idx = -1; | |||
822 | int min_size = max_jint; | |||
823 | int min_iv_offset = max_jint; | |||
824 | for (uint j = 0; j < memops.size(); j++) { | |||
825 | MemNode* s = memops.at(j)->as_Mem(); | |||
826 | if (s->is_Store()) { | |||
827 | int vw = vector_width_in_bytes(s); | |||
828 | assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 828, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
829 | SWPointer p(s, this, NULL__null, false); | |||
830 | if ( cmp_ct.at(j) > max_ct || | |||
831 | (cmp_ct.at(j) == max_ct && | |||
832 | ( vw > max_vw || | |||
833 | (vw == max_vw && | |||
834 | ( data_size(s) < min_size || | |||
835 | (data_size(s) == min_size && | |||
836 | p.offset_in_bytes() < min_iv_offset)))))) { | |||
837 | max_ct = cmp_ct.at(j); | |||
838 | max_vw = vw; | |||
839 | max_idx = j; | |||
840 | min_size = data_size(s); | |||
841 | min_iv_offset = p.offset_in_bytes(); | |||
842 | } | |||
843 | } | |||
844 | } | |||
845 | // If no stores, look at loads | |||
846 | if (max_ct == 0) { | |||
847 | for (uint j = 0; j < memops.size(); j++) { | |||
848 | MemNode* s = memops.at(j)->as_Mem(); | |||
849 | if (s->is_Load()) { | |||
850 | int vw = vector_width_in_bytes(s); | |||
851 | assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 851, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
852 | SWPointer p(s, this, NULL__null, false); | |||
853 | if ( cmp_ct.at(j) > max_ct || | |||
854 | (cmp_ct.at(j) == max_ct && | |||
855 | ( vw > max_vw || | |||
856 | (vw == max_vw && | |||
857 | ( data_size(s) < min_size || | |||
858 | (data_size(s) == min_size && | |||
859 | p.offset_in_bytes() < min_iv_offset)))))) { | |||
860 | max_ct = cmp_ct.at(j); | |||
861 | max_vw = vw; | |||
862 | max_idx = j; | |||
863 | min_size = data_size(s); | |||
864 | min_iv_offset = p.offset_in_bytes(); | |||
865 | } | |||
866 | } | |||
867 | } | |||
868 | } | |||
869 | ||||
870 | #ifdef ASSERT1 | |||
871 | if (TraceSuperWord && Verbose) { | |||
872 | tty->print_cr("\nVector memops after find_align_to_ref"); | |||
873 | for (uint i = 0; i < memops.size(); i++) { | |||
874 | MemNode* s = memops.at(i)->as_Mem(); | |||
875 | s->dump(); | |||
876 | } | |||
877 | } | |||
878 | #endif | |||
879 | ||||
880 | idx = max_idx; | |||
881 | if (max_ct > 0) { | |||
882 | #ifdef ASSERT1 | |||
883 | if (TraceSuperWord) { | |||
884 | tty->print("\nVector align to node: "); | |||
885 | memops.at(max_idx)->as_Mem()->dump(); | |||
886 | } | |||
887 | #endif | |||
888 | return memops.at(max_idx)->as_Mem(); | |||
889 | } | |||
890 | return NULL__null; | |||
891 | } | |||
892 | ||||
893 | //------------------span_works_for_memory_size----------------------------- | |||
894 | static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) { | |||
895 | bool span_matches_memory = false; | |||
896 | if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT)) | |||
897 | && ABS(span) == type2aelembytes(T_INT)) { | |||
898 | // There is a mismatch on span size compared to memory. | |||
899 | for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) { | |||
900 | Node* use = mem->fast_out(j); | |||
901 | if (!VectorNode::is_type_transition_to_int(use)) { | |||
902 | return false; | |||
903 | } | |||
904 | } | |||
905 | // If all uses transition to integer, it means that we can successfully align even on mismatch. | |||
906 | return true; | |||
907 | } | |||
908 | else { | |||
909 | span_matches_memory = ABS(span) == mem_size; | |||
910 | } | |||
911 | return span_matches_memory && (ABS(offset) % mem_size) == 0; | |||
912 | } | |||
913 | ||||
914 | //------------------------------ref_is_alignable--------------------------- | |||
915 | // Can the preloop align the reference to position zero in the vector? | |||
916 | bool SuperWord::ref_is_alignable(SWPointer& p) { | |||
917 | if (!p.has_iv()) { | |||
918 | return true; // no induction variable | |||
919 | } | |||
920 | CountedLoopEndNode* pre_end = pre_loop_end(); | |||
921 | assert(pre_end->stride_is_con(), "pre loop stride is constant")do { if (!(pre_end->stride_is_con())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 921, "assert(" "pre_end->stride_is_con()" ") failed", "pre loop stride is constant" ); ::breakpoint(); } } while (0); | |||
922 | int preloop_stride = pre_end->stride_con(); | |||
923 | ||||
924 | int span = preloop_stride * p.scale_in_bytes(); | |||
925 | int mem_size = p.memory_size(); | |||
926 | int offset = p.offset_in_bytes(); | |||
927 | // Stride one accesses are alignable if offset is aligned to memory operation size. | |||
928 | // Offset can be unaligned when UseUnalignedAccesses is used. | |||
929 | if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) { | |||
930 | return true; | |||
931 | } | |||
932 | // If the initial offset from start of the object is computable, | |||
933 | // check if the pre-loop can align the final offset accordingly. | |||
934 | // | |||
935 | // In other words: Can we find an i such that the offset | |||
936 | // after i pre-loop iterations is aligned to vw? | |||
937 | // (init_offset + pre_loop) % vw == 0 (1) | |||
938 | // where | |||
939 | // pre_loop = i * span | |||
940 | // is the number of bytes added to the offset by i pre-loop iterations. | |||
941 | // | |||
942 | // For this to hold we need pre_loop to increase init_offset by | |||
943 | // pre_loop = vw - (init_offset % vw) | |||
944 | // | |||
945 | // This is only possible if pre_loop is divisible by span because each | |||
946 | // pre-loop iteration increases the initial offset by 'span' bytes: | |||
947 | // (vw - (init_offset % vw)) % span == 0 | |||
948 | // | |||
949 | int vw = vector_width_in_bytes(p.mem()); | |||
950 | assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 950, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
951 | Node* init_nd = pre_end->init_trip(); | |||
952 | if (init_nd->is_Con() && p.invar() == NULL__null) { | |||
953 | int init = init_nd->bottom_type()->is_int()->get_con(); | |||
954 | int init_offset = init * p.scale_in_bytes() + offset; | |||
955 | if (init_offset < 0) { // negative offset from object start? | |||
956 | return false; // may happen in dead loop | |||
957 | } | |||
958 | if (vw % span == 0) { | |||
959 | // If vm is a multiple of span, we use formula (1). | |||
960 | if (span > 0) { | |||
961 | return (vw - (init_offset % vw)) % span == 0; | |||
962 | } else { | |||
963 | assert(span < 0, "nonzero stride * scale")do { if (!(span < 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 963, "assert(" "span < 0" ") failed", "nonzero stride * scale" ); ::breakpoint(); } } while (0); | |||
964 | return (init_offset % vw) % -span == 0; | |||
965 | } | |||
966 | } else if (span % vw == 0) { | |||
967 | // If span is a multiple of vw, we can simplify formula (1) to: | |||
968 | // (init_offset + i * span) % vw == 0 | |||
969 | // => | |||
970 | // (init_offset % vw) + ((i * span) % vw) == 0 | |||
971 | // => | |||
972 | // init_offset % vw == 0 | |||
973 | // | |||
974 | // Because we add a multiple of vw to the initial offset, the final | |||
975 | // offset is a multiple of vw if and only if init_offset is a multiple. | |||
976 | // | |||
977 | return (init_offset % vw) == 0; | |||
978 | } | |||
979 | } | |||
980 | return false; | |||
981 | } | |||
982 | //---------------------------get_vw_bytes_special------------------------ | |||
983 | int SuperWord::get_vw_bytes_special(MemNode* s) { | |||
984 | // Get the vector width in bytes. | |||
985 | int vw = vector_width_in_bytes(s); | |||
986 | ||||
987 | // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined. | |||
988 | BasicType btype = velt_basic_type(s); | |||
989 | if (type2aelembytes(btype) == 2) { | |||
990 | bool should_combine_adjacent = true; | |||
991 | for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { | |||
992 | Node* user = s->fast_out(i); | |||
993 | if (!VectorNode::is_muladds2i(user)) { | |||
994 | should_combine_adjacent = false; | |||
995 | } | |||
996 | } | |||
997 | if (should_combine_adjacent) { | |||
998 | vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2); | |||
999 | } | |||
1000 | } | |||
1001 | ||||
1002 | return vw; | |||
1003 | } | |||
1004 | ||||
1005 | //---------------------------get_iv_adjustment--------------------------- | |||
1006 | // Calculate loop's iv adjustment for this memory ops. | |||
1007 | int SuperWord::get_iv_adjustment(MemNode* mem_ref) { | |||
1008 | SWPointer align_to_ref_p(mem_ref, this, NULL__null, false); | |||
1009 | int offset = align_to_ref_p.offset_in_bytes(); | |||
1010 | int scale = align_to_ref_p.scale_in_bytes(); | |||
1011 | int elt_size = align_to_ref_p.memory_size(); | |||
1012 | int vw = get_vw_bytes_special(mem_ref); | |||
1013 | assert(vw > 1, "sanity")do { if (!(vw > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1013, "assert(" "vw > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
1014 | int iv_adjustment; | |||
1015 | if (scale != 0) { | |||
1016 | int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; | |||
1017 | // At least one iteration is executed in pre-loop by default. As result | |||
1018 | // several iterations are needed to align memory operations in main-loop even | |||
1019 | // if offset is 0. | |||
1020 | int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw)); | |||
1021 | // iv_adjustment_in_bytes must be a multiple of elt_size if vector memory | |||
1022 | // references should be aligned on this platform. | |||
1023 | assert((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned(),do { if (!((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || ! vectors_should_be_aligned())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1024, "assert(" "(ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned()" ") failed", "(%d) should be divisible by (%d)", iv_adjustment_in_bytes , elt_size); ::breakpoint(); } } while (0) | |||
1024 | "(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size)do { if (!((ABS(iv_adjustment_in_bytes) % elt_size) == 0 || ! vectors_should_be_aligned())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1024, "assert(" "(ABS(iv_adjustment_in_bytes) % elt_size) == 0 || !vectors_should_be_aligned()" ") failed", "(%d) should be divisible by (%d)", iv_adjustment_in_bytes , elt_size); ::breakpoint(); } } while (0); | |||
1025 | iv_adjustment = iv_adjustment_in_bytes/elt_size; | |||
1026 | } else { | |||
1027 | // This memory op is not dependent on iv (scale == 0) | |||
1028 | iv_adjustment = 0; | |||
1029 | } | |||
1030 | ||||
1031 | #ifndef PRODUCT | |||
1032 | if (TraceSuperWord) { | |||
1033 | tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ", | |||
1034 | mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw); | |||
1035 | mem_ref->dump(); | |||
1036 | } | |||
1037 | #endif | |||
1038 | return iv_adjustment; | |||
1039 | } | |||
1040 | ||||
1041 | //---------------------------dependence_graph--------------------------- | |||
1042 | // Construct dependency graph. | |||
1043 | // Add dependence edges to load/store nodes for memory dependence | |||
1044 | // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) | |||
1045 | void SuperWord::dependence_graph() { | |||
1046 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
1047 | // First, assign a dependence node to each memory node | |||
1048 | for (int i = 0; i < _block.length(); i++ ) { | |||
1049 | Node *n = _block.at(i); | |||
1050 | if (n->is_Mem() || (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { | |||
1051 | _dg.make_node(n); | |||
1052 | } | |||
1053 | } | |||
1054 | ||||
1055 | // For each memory slice, create the dependences | |||
1056 | for (int i = 0; i < _mem_slice_head.length(); i++) { | |||
1057 | Node* n = _mem_slice_head.at(i); | |||
1058 | Node* n_tail = _mem_slice_tail.at(i); | |||
1059 | ||||
1060 | // Get slice in predecessor order (last is first) | |||
1061 | if (cl->is_main_loop()) { | |||
1062 | mem_slice_preds(n_tail, n, _nlist); | |||
1063 | } | |||
1064 | ||||
1065 | #ifndef PRODUCT | |||
1066 | if(TraceSuperWord && Verbose) { | |||
1067 | tty->print_cr("SuperWord::dependence_graph: built a new mem slice"); | |||
1068 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { | |||
1069 | _nlist.at(j)->dump(); | |||
1070 | } | |||
1071 | } | |||
1072 | #endif | |||
1073 | // Make the slice dependent on the root | |||
1074 | DepMem* slice = _dg.dep(n); | |||
1075 | _dg.make_edge(_dg.root(), slice); | |||
1076 | ||||
1077 | // Create a sink for the slice | |||
1078 | DepMem* slice_sink = _dg.make_node(NULL__null); | |||
1079 | _dg.make_edge(slice_sink, _dg.tail()); | |||
1080 | ||||
1081 | // Now visit each pair of memory ops, creating the edges | |||
1082 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { | |||
1083 | Node* s1 = _nlist.at(j); | |||
1084 | ||||
1085 | // If no dependency yet, use slice | |||
1086 | if (_dg.dep(s1)->in_cnt() == 0) { | |||
1087 | _dg.make_edge(slice, s1); | |||
1088 | } | |||
1089 | SWPointer p1(s1->as_Mem(), this, NULL__null, false); | |||
1090 | bool sink_dependent = true; | |||
1091 | for (int k = j - 1; k >= 0; k--) { | |||
1092 | Node* s2 = _nlist.at(k); | |||
1093 | if (s1->is_Load() && s2->is_Load()) | |||
1094 | continue; | |||
1095 | SWPointer p2(s2->as_Mem(), this, NULL__null, false); | |||
1096 | ||||
1097 | int cmp = p1.cmp(p2); | |||
1098 | if (SuperWordRTDepCheck && | |||
1099 | p1.base() != p2.base() && p1.valid() && p2.valid()) { | |||
1100 | // Create a runtime check to disambiguate | |||
1101 | OrderedPair pp(p1.base(), p2.base()); | |||
1102 | _disjoint_ptrs.append_if_missing(pp); | |||
1103 | } else if (!SWPointer::not_equal(cmp)) { | |||
1104 | // Possibly same address | |||
1105 | _dg.make_edge(s1, s2); | |||
1106 | sink_dependent = false; | |||
1107 | } | |||
1108 | } | |||
1109 | if (sink_dependent) { | |||
1110 | _dg.make_edge(s1, slice_sink); | |||
1111 | } | |||
1112 | } | |||
1113 | ||||
1114 | if (TraceSuperWord) { | |||
1115 | tty->print_cr("\nDependence graph for slice: %d", n->_idx); | |||
1116 | for (int q = 0; q < _nlist.length(); q++) { | |||
1117 | _dg.print(_nlist.at(q)); | |||
1118 | } | |||
1119 | tty->cr(); | |||
1120 | } | |||
1121 | ||||
1122 | _nlist.clear(); | |||
1123 | } | |||
1124 | ||||
1125 | if (TraceSuperWord) { | |||
1126 | tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); | |||
1127 | for (int r = 0; r < _disjoint_ptrs.length(); r++) { | |||
1128 | _disjoint_ptrs.at(r).print(); | |||
1129 | tty->cr(); | |||
1130 | } | |||
1131 | tty->cr(); | |||
1132 | } | |||
1133 | ||||
1134 | } | |||
1135 | ||||
1136 | //---------------------------mem_slice_preds--------------------------- | |||
1137 | // Return a memory slice (node list) in predecessor order starting at "start" | |||
1138 | void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { | |||
1139 | assert(preds.length() == 0, "start empty")do { if (!(preds.length() == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1139, "assert(" "preds.length() == 0" ") failed", "start empty" ); ::breakpoint(); } } while (0); | |||
1140 | Node* n = start; | |||
1141 | Node* prev = NULL__null; | |||
1142 | while (true) { | |||
1143 | NOT_PRODUCT( if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);)if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d" , n->_idx); | |||
1144 | assert(in_bb(n), "must be in block")do { if (!(in_bb(n))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1144, "assert(" "in_bb(n)" ") failed", "must be in block"); ::breakpoint(); } } while (0); | |||
1145 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |||
1146 | Node* out = n->fast_out(i); | |||
1147 | if (out->is_Load()) { | |||
1148 | if (in_bb(out)) { | |||
1149 | preds.push(out); | |||
1150 | if (TraceSuperWord && Verbose) { | |||
1151 | tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx); | |||
1152 | } | |||
1153 | } | |||
1154 | } else { | |||
1155 | // FIXME | |||
1156 | if (out->is_MergeMem() && !in_bb(out)) { | |||
1157 | // Either unrolling is causing a memory edge not to disappear, | |||
1158 | // or need to run igvn.optimize() again before SLP | |||
1159 | } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { | |||
1160 | // Ditto. Not sure what else to check further. | |||
1161 | } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) { | |||
1162 | // StoreCM has an input edge used as a precedence edge. | |||
1163 | // Maybe an issue when oop stores are vectorized. | |||
1164 | } else { | |||
1165 | assert(out == prev || prev == NULL, "no branches off of store slice")do { if (!(out == prev || prev == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1165, "assert(" "out == prev || prev == __null" ") failed", "no branches off of store slice"); ::breakpoint(); } } while (0); | |||
1166 | } | |||
1167 | }//else | |||
1168 | }//for | |||
1169 | if (n == stop) break; | |||
1170 | preds.push(n); | |||
1171 | if (TraceSuperWord && Verbose) { | |||
1172 | tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx); | |||
1173 | } | |||
1174 | prev = n; | |||
1175 | assert(n->is_Mem(), "unexpected node %s", n->Name())do { if (!(n->is_Mem())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1175, "assert(" "n->is_Mem()" ") failed", "unexpected node %s" , n->Name()); ::breakpoint(); } } while (0); | |||
1176 | n = n->in(MemNode::Memory); | |||
1177 | } | |||
1178 | } | |||
1179 | ||||
1180 | //------------------------------stmts_can_pack--------------------------- | |||
1181 | // Can s1 and s2 be in a pack with s1 immediately preceding s2 and | |||
1182 | // s1 aligned at "align" | |||
1183 | bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { | |||
1184 | ||||
1185 | // Do not use superword for non-primitives | |||
1186 | BasicType bt1 = velt_basic_type(s1); | |||
1187 | BasicType bt2 = velt_basic_type(s2); | |||
1188 | if(!is_java_primitive(bt1) || !is_java_primitive(bt2)) | |||
1189 | return false; | |||
1190 | if (Matcher::max_vector_size(bt1) < 2) { | |||
1191 | return false; // No vectors for this type | |||
1192 | } | |||
1193 | ||||
1194 | if (isomorphic(s1, s2)) { | |||
1195 | if ((independent(s1, s2) && have_similar_inputs(s1, s2)) || reduction(s1, s2)) { | |||
1196 | if (!exists_at(s1, 0) && !exists_at(s2, 1)) { | |||
1197 | if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { | |||
1198 | int s1_align = alignment(s1); | |||
1199 | int s2_align = alignment(s2); | |||
1200 | if (s1_align == top_align || s1_align == align) { | |||
1201 | if (s2_align == top_align || s2_align == align + data_size(s1)) { | |||
1202 | return true; | |||
1203 | } | |||
1204 | } | |||
1205 | } | |||
1206 | } | |||
1207 | } | |||
1208 | } | |||
1209 | return false; | |||
1210 | } | |||
1211 | ||||
1212 | //------------------------------exists_at--------------------------- | |||
1213 | // Does s exist in a pack at position pos? | |||
1214 | bool SuperWord::exists_at(Node* s, uint pos) { | |||
1215 | for (int i = 0; i < _packset.length(); i++) { | |||
1216 | Node_List* p = _packset.at(i); | |||
1217 | if (p->at(pos) == s) { | |||
1218 | return true; | |||
1219 | } | |||
1220 | } | |||
1221 | return false; | |||
1222 | } | |||
1223 | ||||
1224 | //------------------------------are_adjacent_refs--------------------------- | |||
1225 | // Is s1 immediately before s2 in memory? | |||
1226 | bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { | |||
1227 | if (!s1->is_Mem() || !s2->is_Mem()) return false; | |||
1228 | if (!in_bb(s1) || !in_bb(s2)) return false; | |||
1229 | ||||
1230 | // Do not use superword for non-primitives | |||
1231 | if (!is_java_primitive(s1->as_Mem()->memory_type()) || | |||
1232 | !is_java_primitive(s2->as_Mem()->memory_type())) { | |||
1233 | return false; | |||
1234 | } | |||
1235 | ||||
1236 | // FIXME - co_locate_pack fails on Stores in different mem-slices, so | |||
1237 | // only pack memops that are in the same alias set until that's fixed. | |||
1238 | if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != | |||
1239 | _phase->C->get_alias_index(s2->as_Mem()->adr_type())) | |||
1240 | return false; | |||
1241 | SWPointer p1(s1->as_Mem(), this, NULL__null, false); | |||
1242 | SWPointer p2(s2->as_Mem(), this, NULL__null, false); | |||
1243 | if (p1.base() != p2.base() || !p1.comparable(p2)) return false; | |||
1244 | int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); | |||
1245 | return diff == data_size(s1); | |||
1246 | } | |||
1247 | ||||
1248 | //------------------------------isomorphic--------------------------- | |||
1249 | // Are s1 and s2 similar? | |||
1250 | bool SuperWord::isomorphic(Node* s1, Node* s2) { | |||
1251 | if (s1->Opcode() != s2->Opcode()) return false; | |||
1252 | if (s1->req() != s2->req()) return false; | |||
1253 | if (!same_velt_type(s1, s2)) return false; | |||
1254 | Node* s1_ctrl = s1->in(0); | |||
1255 | Node* s2_ctrl = s2->in(0); | |||
1256 | // If the control nodes are equivalent, no further checks are required to test for isomorphism. | |||
1257 | if (s1_ctrl == s2_ctrl) { | |||
1258 | return true; | |||
1259 | } else { | |||
1260 | bool s1_ctrl_inv = ((s1_ctrl == NULL__null) ? true : lpt()->is_invariant(s1_ctrl)); | |||
1261 | bool s2_ctrl_inv = ((s2_ctrl == NULL__null) ? true : lpt()->is_invariant(s2_ctrl)); | |||
1262 | // If the control nodes are not invariant for the loop, fail isomorphism test. | |||
1263 | if (!s1_ctrl_inv || !s2_ctrl_inv) { | |||
1264 | return false; | |||
1265 | } | |||
1266 | if(s1_ctrl != NULL__null && s2_ctrl != NULL__null) { | |||
1267 | if (s1_ctrl->is_Proj()) { | |||
1268 | s1_ctrl = s1_ctrl->in(0); | |||
1269 | assert(lpt()->is_invariant(s1_ctrl), "must be invariant")do { if (!(lpt()->is_invariant(s1_ctrl))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1269, "assert(" "lpt()->is_invariant(s1_ctrl)" ") failed" , "must be invariant"); ::breakpoint(); } } while (0); | |||
1270 | } | |||
1271 | if (s2_ctrl->is_Proj()) { | |||
1272 | s2_ctrl = s2_ctrl->in(0); | |||
1273 | assert(lpt()->is_invariant(s2_ctrl), "must be invariant")do { if (!(lpt()->is_invariant(s2_ctrl))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1273, "assert(" "lpt()->is_invariant(s2_ctrl)" ") failed" , "must be invariant"); ::breakpoint(); } } while (0); | |||
1274 | } | |||
1275 | if (!s1_ctrl->is_RangeCheck() || !s2_ctrl->is_RangeCheck()) { | |||
1276 | return false; | |||
1277 | } | |||
1278 | } | |||
1279 | // Control nodes are invariant. However, we have no way of checking whether they resolve | |||
1280 | // in an equivalent manner. But, we know that invariant range checks are guaranteed to | |||
1281 | // throw before the loop (if they would have thrown). Thus, the loop would not have been reached. | |||
1282 | // Therefore, if the control nodes for both are range checks, we accept them to be isomorphic. | |||
1283 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1284 | Node* t1 = s1->fast_out(i); | |||
1285 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { | |||
1286 | Node* t2 = s2->fast_out(j); | |||
1287 | if (VectorNode::is_muladds2i(t1) && VectorNode::is_muladds2i(t2)) { | |||
1288 | return true; | |||
1289 | } | |||
1290 | } | |||
1291 | } | |||
1292 | } | |||
1293 | return false; | |||
1294 | } | |||
1295 | ||||
1296 | //------------------------------independent--------------------------- | |||
1297 | // Is there no data path from s1 to s2 or s2 to s1? | |||
1298 | bool SuperWord::independent(Node* s1, Node* s2) { | |||
1299 | // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first"); | |||
1300 | int d1 = depth(s1); | |||
1301 | int d2 = depth(s2); | |||
1302 | if (d1 == d2) return s1 != s2; | |||
1303 | Node* deep = d1 > d2 ? s1 : s2; | |||
1304 | Node* shallow = d1 > d2 ? s2 : s1; | |||
1305 | ||||
1306 | visited_clear(); | |||
1307 | ||||
1308 | return independent_path(shallow, deep); | |||
1309 | } | |||
1310 | ||||
1311 | //--------------------------have_similar_inputs----------------------- | |||
1312 | // For a node pair (s1, s2) which is isomorphic and independent, | |||
1313 | // do s1 and s2 have similar input edges? | |||
1314 | bool SuperWord::have_similar_inputs(Node* s1, Node* s2) { | |||
1315 | // assert(isomorphic(s1, s2) == true, "check isomorphic"); | |||
1316 | // assert(independent(s1, s2) == true, "check independent"); | |||
1317 | if (s1->req() > 1 && !s1->is_Store() && !s1->is_Load()) { | |||
1318 | for (uint i = 1; i < s1->req(); i++) { | |||
1319 | if (s1->in(i)->Opcode() != s2->in(i)->Opcode()) return false; | |||
1320 | } | |||
1321 | } | |||
1322 | return true; | |||
1323 | } | |||
1324 | ||||
1325 | //------------------------------reduction--------------------------- | |||
1326 | // Is there a data path between s1 and s2 and the nodes reductions? | |||
1327 | bool SuperWord::reduction(Node* s1, Node* s2) { | |||
1328 | bool retValue = false; | |||
1329 | int d1 = depth(s1); | |||
1330 | int d2 = depth(s2); | |||
1331 | if (d2 > d1) { | |||
1332 | if (s1->is_reduction() && s2->is_reduction()) { | |||
1333 | // This is an ordered set, so s1 should define s2 | |||
1334 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1335 | Node* t1 = s1->fast_out(i); | |||
1336 | if (t1 == s2) { | |||
1337 | // both nodes are reductions and connected | |||
1338 | retValue = true; | |||
1339 | } | |||
1340 | } | |||
1341 | } | |||
1342 | } | |||
1343 | ||||
1344 | return retValue; | |||
1345 | } | |||
1346 | ||||
1347 | //------------------------------independent_path------------------------------ | |||
1348 | // Helper for independent | |||
1349 | bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) { | |||
1350 | if (dp >= 1000) return false; // stop deep recursion | |||
1351 | visited_set(deep); | |||
1352 | int shal_depth = depth(shallow); | |||
1353 | assert(shal_depth <= depth(deep), "must be")do { if (!(shal_depth <= depth(deep))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1353, "assert(" "shal_depth <= depth(deep)" ") failed", "must be" ); ::breakpoint(); } } while (0); | |||
1354 | for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) { | |||
1355 | Node* pred = preds.current(); | |||
1356 | if (in_bb(pred) && !visited_test(pred)) { | |||
1357 | if (shallow == pred) { | |||
1358 | return false; | |||
1359 | } | |||
1360 | if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { | |||
1361 | return false; | |||
1362 | } | |||
1363 | } | |||
1364 | } | |||
1365 | return true; | |||
1366 | } | |||
1367 | ||||
1368 | //------------------------------set_alignment--------------------------- | |||
1369 | void SuperWord::set_alignment(Node* s1, Node* s2, int align) { | |||
1370 | set_alignment(s1, align); | |||
1371 | if (align == top_align || align == bottom_align) { | |||
1372 | set_alignment(s2, align); | |||
1373 | } else { | |||
1374 | set_alignment(s2, align + data_size(s1)); | |||
1375 | } | |||
1376 | } | |||
1377 | ||||
1378 | //------------------------------data_size--------------------------- | |||
1379 | int SuperWord::data_size(Node* s) { | |||
1380 | Node* use = NULL__null; //test if the node is a candidate for CMoveV optimization, then return the size of CMov | |||
1381 | if (UseVectorCmov) { | |||
1382 | use = _cmovev_kit.is_Bool_candidate(s); | |||
1383 | if (use != NULL__null) { | |||
1384 | return data_size(use); | |||
1385 | } | |||
1386 | use = _cmovev_kit.is_CmpD_candidate(s); | |||
1387 | if (use != NULL__null) { | |||
1388 | return data_size(use); | |||
1389 | } | |||
1390 | } | |||
1391 | ||||
1392 | int bsize = type2aelembytes(velt_basic_type(s)); | |||
1393 | assert(bsize != 0, "valid size")do { if (!(bsize != 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1393, "assert(" "bsize != 0" ") failed", "valid size"); ::breakpoint (); } } while (0); | |||
1394 | return bsize; | |||
1395 | } | |||
1396 | ||||
1397 | //------------------------------extend_packlist--------------------------- | |||
1398 | // Extend packset by following use->def and def->use links from pack members. | |||
1399 | void SuperWord::extend_packlist() { | |||
1400 | bool changed; | |||
1401 | do { | |||
1402 | packset_sort(_packset.length()); | |||
1403 | changed = false; | |||
1404 | for (int i = 0; i < _packset.length(); i++) { | |||
1405 | Node_List* p = _packset.at(i); | |||
1406 | changed |= follow_use_defs(p); | |||
1407 | changed |= follow_def_uses(p); | |||
1408 | } | |||
1409 | } while (changed); | |||
1410 | ||||
1411 | if (_race_possible) { | |||
1412 | for (int i = 0; i < _packset.length(); i++) { | |||
1413 | Node_List* p = _packset.at(i); | |||
1414 | order_def_uses(p); | |||
1415 | } | |||
1416 | } | |||
1417 | ||||
1418 | if (TraceSuperWord) { | |||
1419 | tty->print_cr("\nAfter extend_packlist"); | |||
1420 | print_packset(); | |||
1421 | } | |||
1422 | } | |||
1423 | ||||
1424 | //------------------------------follow_use_defs--------------------------- | |||
1425 | // Extend the packset by visiting operand definitions of nodes in pack p | |||
1426 | bool SuperWord::follow_use_defs(Node_List* p) { | |||
1427 | assert(p->size() == 2, "just checking")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1427, "assert(" "p->size() == 2" ") failed", "just checking" ); ::breakpoint(); } } while (0); | |||
1428 | Node* s1 = p->at(0); | |||
1429 | Node* s2 = p->at(1); | |||
1430 | assert(s1->req() == s2->req(), "just checking")do { if (!(s1->req() == s2->req())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1430, "assert(" "s1->req() == s2->req()" ") failed", "just checking" ); ::breakpoint(); } } while (0); | |||
1431 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking")do { if (!(alignment(s1) + data_size(s1) == alignment(s2))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1431, "assert(" "alignment(s1) + data_size(s1) == alignment(s2)" ") failed", "just checking"); ::breakpoint(); } } while (0); | |||
1432 | ||||
1433 | if (s1->is_Load()) return false; | |||
1434 | ||||
1435 | int align = alignment(s1); | |||
1436 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d" , s1->_idx, align); | |||
1437 | bool changed = false; | |||
1438 | int start = s1->is_Store() ? MemNode::ValueIn : 1; | |||
1439 | int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); | |||
1440 | for (int j = start; j < end; j++) { | |||
1441 | Node* t1 = s1->in(j); | |||
1442 | Node* t2 = s2->in(j); | |||
1443 | if (!in_bb(t1) || !in_bb(t2)) | |||
1444 | continue; | |||
1445 | if (stmts_can_pack(t1, t2, align)) { | |||
1446 | if (est_savings(t1, t2) >= 0) { | |||
1447 | Node_List* pair = new Node_List(); | |||
1448 | pair->push(t1); | |||
1449 | pair->push(t2); | |||
1450 | _packset.append(pair); | |||
1451 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)", t1->_idx, t2->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)" , t1->_idx, t2->_idx, align); | |||
1452 | set_alignment(t1, t2, align); | |||
1453 | changed = true; | |||
1454 | } | |||
1455 | } | |||
1456 | } | |||
1457 | return changed; | |||
1458 | } | |||
1459 | ||||
1460 | //------------------------------follow_def_uses--------------------------- | |||
1461 | // Extend the packset by visiting uses of nodes in pack p | |||
1462 | bool SuperWord::follow_def_uses(Node_List* p) { | |||
1463 | bool changed = false; | |||
1464 | Node* s1 = p->at(0); | |||
1465 | Node* s2 = p->at(1); | |||
1466 | assert(p->size() == 2, "just checking")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1466, "assert(" "p->size() == 2" ") failed", "just checking" ); ::breakpoint(); } } while (0); | |||
1467 | assert(s1->req() == s2->req(), "just checking")do { if (!(s1->req() == s2->req())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1467, "assert(" "s1->req() == s2->req()" ") failed", "just checking" ); ::breakpoint(); } } while (0); | |||
1468 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking")do { if (!(alignment(s1) + data_size(s1) == alignment(s2))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1468, "assert(" "alignment(s1) + data_size(s1) == alignment(s2)" ") failed", "just checking"); ::breakpoint(); } } while (0); | |||
1469 | ||||
1470 | if (s1->is_Store()) return false; | |||
1471 | ||||
1472 | int align = alignment(s1); | |||
1473 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d", s1->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d" , s1->_idx, align); | |||
1474 | int savings = -1; | |||
1475 | int num_s1_uses = 0; | |||
1476 | Node* u1 = NULL__null; | |||
1477 | Node* u2 = NULL__null; | |||
1478 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1479 | Node* t1 = s1->fast_out(i); | |||
1480 | num_s1_uses++; | |||
1481 | if (!in_bb(t1)) continue; | |||
1482 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { | |||
1483 | Node* t2 = s2->fast_out(j); | |||
1484 | if (!in_bb(t2)) continue; | |||
1485 | if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv | |||
1486 | if (!opnd_positions_match(s1, t1, s2, t2)) | |||
1487 | continue; | |||
1488 | if (stmts_can_pack(t1, t2, align)) { | |||
1489 | int my_savings = est_savings(t1, t2); | |||
1490 | if (my_savings > savings) { | |||
1491 | savings = my_savings; | |||
1492 | u1 = t1; | |||
1493 | u2 = t2; | |||
1494 | } | |||
1495 | } | |||
1496 | } | |||
1497 | } | |||
1498 | if (num_s1_uses > 1) { | |||
1499 | _race_possible = true; | |||
1500 | } | |||
1501 | if (savings >= 0) { | |||
1502 | Node_List* pair = new Node_List(); | |||
1503 | pair->push(u1); | |||
1504 | pair->push(u2); | |||
1505 | _packset.append(pair); | |||
1506 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)", u1->_idx, u2->_idx, align);)if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)" , u1->_idx, u2->_idx, align); | |||
1507 | set_alignment(u1, u2, align); | |||
1508 | changed = true; | |||
1509 | } | |||
1510 | return changed; | |||
1511 | } | |||
1512 | ||||
1513 | //------------------------------order_def_uses--------------------------- | |||
1514 | // For extended packsets, ordinally arrange uses packset by major component | |||
1515 | void SuperWord::order_def_uses(Node_List* p) { | |||
1516 | Node* s1 = p->at(0); | |||
1517 | ||||
1518 | if (s1->is_Store()) return; | |||
1519 | ||||
1520 | // reductions are always managed beforehand | |||
1521 | if (s1->is_reduction()) return; | |||
1522 | ||||
1523 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1524 | Node* t1 = s1->fast_out(i); | |||
1525 | ||||
1526 | // Only allow operand swap on commuting operations | |||
1527 | if (!t1->is_Add() && !t1->is_Mul() && !VectorNode::is_muladds2i(t1)) { | |||
1528 | break; | |||
1529 | } | |||
1530 | ||||
1531 | // Now find t1's packset | |||
1532 | Node_List* p2 = NULL__null; | |||
1533 | for (int j = 0; j < _packset.length(); j++) { | |||
1534 | p2 = _packset.at(j); | |||
1535 | Node* first = p2->at(0); | |||
1536 | if (t1 == first) { | |||
1537 | break; | |||
1538 | } | |||
1539 | p2 = NULL__null; | |||
1540 | } | |||
1541 | // Arrange all sub components by the major component | |||
1542 | if (p2 != NULL__null) { | |||
1543 | for (uint j = 1; j < p->size(); j++) { | |||
1544 | Node* d1 = p->at(j); | |||
1545 | Node* u1 = p2->at(j); | |||
1546 | opnd_positions_match(s1, t1, d1, u1); | |||
1547 | } | |||
1548 | } | |||
1549 | } | |||
1550 | } | |||
1551 | ||||
1552 | //---------------------------opnd_positions_match------------------------- | |||
1553 | // Is the use of d1 in u1 at the same operand position as d2 in u2? | |||
1554 | bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { | |||
1555 | // check reductions to see if they are marshalled to represent the reduction | |||
1556 | // operator in a specified opnd | |||
1557 | if (u1->is_reduction() && u2->is_reduction()) { | |||
1558 | // ensure reductions have phis and reduction definitions feeding the 1st operand | |||
1559 | Node* first = u1->in(2); | |||
1560 | if (first->is_Phi() || first->is_reduction()) { | |||
1561 | u1->swap_edges(1, 2); | |||
1562 | } | |||
1563 | // ensure reductions have phis and reduction definitions feeding the 1st operand | |||
1564 | first = u2->in(2); | |||
1565 | if (first->is_Phi() || first->is_reduction()) { | |||
1566 | u2->swap_edges(1, 2); | |||
1567 | } | |||
1568 | return true; | |||
1569 | } | |||
1570 | ||||
1571 | uint ct = u1->req(); | |||
1572 | if (ct != u2->req()) return false; | |||
1573 | uint i1 = 0; | |||
1574 | uint i2 = 0; | |||
1575 | do { | |||
1576 | for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; | |||
1577 | for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; | |||
1578 | if (i1 != i2) { | |||
1579 | if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) { | |||
1580 | // Further analysis relies on operands position matching. | |||
1581 | u2->swap_edges(i1, i2); | |||
1582 | } else if (VectorNode::is_muladds2i(u2) && u1 != u2) { | |||
1583 | if (i1 == 5 - i2) { // ((i1 == 3 && i2 == 2) || (i1 == 2 && i2 == 3) || (i1 == 1 && i2 == 4) || (i1 == 4 && i2 == 1)) | |||
1584 | u2->swap_edges(1, 2); | |||
1585 | u2->swap_edges(3, 4); | |||
1586 | } | |||
1587 | if (i1 == 3 - i2 || i1 == 7 - i2) { // ((i1 == 1 && i2 == 2) || (i1 == 2 && i2 == 1) || (i1 == 3 && i2 == 4) || (i1 == 4 && i2 == 3)) | |||
1588 | u2->swap_edges(2, 3); | |||
1589 | u2->swap_edges(1, 4); | |||
1590 | } | |||
1591 | return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs | |||
1592 | } else { | |||
1593 | return false; | |||
1594 | } | |||
1595 | } else if (i1 == i2 && VectorNode::is_muladds2i(u2) && u1 != u2) { | |||
1596 | u2->swap_edges(1, 3); | |||
1597 | u2->swap_edges(2, 4); | |||
1598 | return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs | |||
1599 | } | |||
1600 | } while (i1 < ct); | |||
1601 | return true; | |||
1602 | } | |||
1603 | ||||
1604 | //------------------------------est_savings--------------------------- | |||
1605 | // Estimate the savings from executing s1 and s2 as a pack | |||
1606 | int SuperWord::est_savings(Node* s1, Node* s2) { | |||
1607 | int save_in = 2 - 1; // 2 operations per instruction in packed form | |||
1608 | ||||
1609 | // inputs | |||
1610 | for (uint i = 1; i < s1->req(); i++) { | |||
1611 | Node* x1 = s1->in(i); | |||
1612 | Node* x2 = s2->in(i); | |||
1613 | if (x1 != x2) { | |||
1614 | if (are_adjacent_refs(x1, x2)) { | |||
1615 | save_in += adjacent_profit(x1, x2); | |||
1616 | } else if (!in_packset(x1, x2)) { | |||
1617 | save_in -= pack_cost(2); | |||
1618 | } else { | |||
1619 | save_in += unpack_cost(2); | |||
1620 | } | |||
1621 | } | |||
1622 | } | |||
1623 | ||||
1624 | // uses of result | |||
1625 | uint ct = 0; | |||
1626 | int save_use = 0; | |||
1627 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | |||
1628 | Node* s1_use = s1->fast_out(i); | |||
1629 | for (int j = 0; j < _packset.length(); j++) { | |||
1630 | Node_List* p = _packset.at(j); | |||
1631 | if (p->at(0) == s1_use) { | |||
1632 | for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { | |||
1633 | Node* s2_use = s2->fast_out(k); | |||
1634 | if (p->at(p->size()-1) == s2_use) { | |||
1635 | ct++; | |||
1636 | if (are_adjacent_refs(s1_use, s2_use)) { | |||
1637 | save_use += adjacent_profit(s1_use, s2_use); | |||
1638 | } | |||
1639 | } | |||
1640 | } | |||
1641 | } | |||
1642 | } | |||
1643 | } | |||
1644 | ||||
1645 | if (ct < s1->outcnt()) save_use += unpack_cost(1); | |||
1646 | if (ct < s2->outcnt()) save_use += unpack_cost(1); | |||
1647 | ||||
1648 | return MAX2(save_in, save_use); | |||
1649 | } | |||
1650 | ||||
1651 | //------------------------------costs--------------------------- | |||
1652 | int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } | |||
1653 | int SuperWord::pack_cost(int ct) { return ct; } | |||
1654 | int SuperWord::unpack_cost(int ct) { return ct; } | |||
1655 | ||||
1656 | //------------------------------combine_packs--------------------------- | |||
1657 | // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last | |||
1658 | void SuperWord::combine_packs() { | |||
1659 | bool changed = true; | |||
1660 | // Combine packs regardless max vector size. | |||
1661 | while (changed) { | |||
1662 | changed = false; | |||
1663 | for (int i = 0; i < _packset.length(); i++) { | |||
1664 | Node_List* p1 = _packset.at(i); | |||
1665 | if (p1 == NULL__null) continue; | |||
1666 | // Because of sorting we can start at i + 1 | |||
1667 | for (int j = i + 1; j < _packset.length(); j++) { | |||
1668 | Node_List* p2 = _packset.at(j); | |||
1669 | if (p2 == NULL__null) continue; | |||
1670 | if (i == j) continue; | |||
1671 | if (p1->at(p1->size()-1) == p2->at(0)) { | |||
1672 | for (uint k = 1; k < p2->size(); k++) { | |||
1673 | p1->push(p2->at(k)); | |||
1674 | } | |||
1675 | _packset.at_put(j, NULL__null); | |||
1676 | changed = true; | |||
1677 | } | |||
1678 | } | |||
1679 | } | |||
1680 | } | |||
1681 | ||||
1682 | // Split packs which have size greater then max vector size. | |||
1683 | for (int i = 0; i < _packset.length(); i++) { | |||
1684 | Node_List* p1 = _packset.at(i); | |||
1685 | if (p1 != NULL__null) { | |||
1686 | BasicType bt = velt_basic_type(p1->at(0)); | |||
1687 | uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector | |||
1688 | assert(is_power_of_2(max_vlen), "sanity")do { if (!(is_power_of_2(max_vlen))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1688, "assert(" "is_power_of_2(max_vlen)" ") failed", "sanity" ); ::breakpoint(); } } while (0); | |||
1689 | uint psize = p1->size(); | |||
1690 | if (!is_power_of_2(psize)) { | |||
1691 | // Skip pack which can't be vector. | |||
1692 | // case1: for(...) { a[i] = i; } elements values are different (i+x) | |||
1693 | // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store | |||
1694 | _packset.at_put(i, NULL__null); | |||
1695 | continue; | |||
1696 | } | |||
1697 | if (psize > max_vlen) { | |||
1698 | Node_List* pack = new Node_List(); | |||
1699 | for (uint j = 0; j < psize; j++) { | |||
1700 | pack->push(p1->at(j)); | |||
1701 | if (pack->size() >= max_vlen) { | |||
1702 | assert(is_power_of_2(pack->size()), "sanity")do { if (!(is_power_of_2(pack->size()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1702, "assert(" "is_power_of_2(pack->size())" ") failed" , "sanity"); ::breakpoint(); } } while (0); | |||
1703 | _packset.append(pack); | |||
1704 | pack = new Node_List(); | |||
1705 | } | |||
1706 | } | |||
1707 | _packset.at_put(i, NULL__null); | |||
1708 | } | |||
1709 | } | |||
1710 | } | |||
1711 | ||||
1712 | // Compress list. | |||
1713 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
1714 | Node_List* p1 = _packset.at(i); | |||
1715 | if (p1 == NULL__null) { | |||
1716 | _packset.remove_at(i); | |||
1717 | } | |||
1718 | } | |||
1719 | ||||
1720 | if (TraceSuperWord) { | |||
1721 | tty->print_cr("\nAfter combine_packs"); | |||
1722 | print_packset(); | |||
1723 | } | |||
1724 | } | |||
1725 | ||||
1726 | //-----------------------------construct_my_pack_map-------------------------- | |||
1727 | // Construct the map from nodes to packs. Only valid after the | |||
1728 | // point where a node is only in one pack (after combine_packs). | |||
1729 | void SuperWord::construct_my_pack_map() { | |||
1730 | Node_List* rslt = NULL__null; | |||
1731 | for (int i = 0; i < _packset.length(); i++) { | |||
1732 | Node_List* p = _packset.at(i); | |||
1733 | for (uint j = 0; j < p->size(); j++) { | |||
1734 | Node* s = p->at(j); | |||
1735 | #ifdef ASSERT1 | |||
1736 | if (my_pack(s) != NULL__null) { | |||
1737 | s->dump(1); | |||
1738 | tty->print_cr("packs[%d]:", i); | |||
1739 | print_pack(p); | |||
1740 | assert(false, "only in one pack")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1740, "assert(" "false" ") failed", "only in one pack"); :: breakpoint(); } } while (0); | |||
1741 | } | |||
1742 | #endif | |||
1743 | set_my_pack(s, p); | |||
1744 | } | |||
1745 | } | |||
1746 | } | |||
1747 | ||||
1748 | //------------------------------filter_packs--------------------------- | |||
1749 | // Remove packs that are not implemented or not profitable. | |||
1750 | void SuperWord::filter_packs() { | |||
1751 | // Remove packs that are not implemented | |||
1752 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
1753 | Node_List* pk = _packset.at(i); | |||
1754 | bool impl = implemented(pk); | |||
1755 | if (!impl) { | |||
1756 | #ifndef PRODUCT | |||
1757 | if ((TraceSuperWord && Verbose) || _vector_loop_debug) { | |||
1758 | tty->print_cr("Unimplemented"); | |||
1759 | pk->at(0)->dump(); | |||
1760 | } | |||
1761 | #endif | |||
1762 | remove_pack_at(i); | |||
1763 | } | |||
1764 | Node *n = pk->at(0); | |||
1765 | if (n->is_reduction()) { | |||
1766 | _num_reductions++; | |||
1767 | } else { | |||
1768 | _num_work_vecs++; | |||
1769 | } | |||
1770 | } | |||
1771 | ||||
1772 | // Remove packs that are not profitable | |||
1773 | bool changed; | |||
1774 | do { | |||
1775 | changed = false; | |||
1776 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
1777 | Node_List* pk = _packset.at(i); | |||
1778 | bool prof = profitable(pk); | |||
1779 | if (!prof) { | |||
1780 | #ifndef PRODUCT | |||
1781 | if ((TraceSuperWord && Verbose) || _vector_loop_debug) { | |||
1782 | tty->print_cr("Unprofitable"); | |||
1783 | pk->at(0)->dump(); | |||
1784 | } | |||
1785 | #endif | |||
1786 | remove_pack_at(i); | |||
1787 | changed = true; | |||
1788 | } | |||
1789 | } | |||
1790 | } while (changed); | |||
1791 | ||||
1792 | #ifndef PRODUCT | |||
1793 | if (TraceSuperWord) { | |||
1794 | tty->print_cr("\nAfter filter_packs"); | |||
1795 | print_packset(); | |||
1796 | tty->cr(); | |||
1797 | } | |||
1798 | #endif | |||
1799 | } | |||
1800 | ||||
1801 | //------------------------------merge_packs_to_cmovd--------------------------- | |||
1802 | // Merge CMoveD into new vector-nodes | |||
1803 | // We want to catch this pattern and subsume CmpD and Bool into CMoveD | |||
1804 | // | |||
1805 | // SubD ConD | |||
1806 | // / | / | |||
1807 | // / | / / | |||
1808 | // / | / / | |||
1809 | // / | / / | |||
1810 | // / / / | |||
1811 | // / / | / | |||
1812 | // v / | / | |||
1813 | // CmpD | / | |||
1814 | // | | / | |||
1815 | // v | / | |||
1816 | // Bool | / | |||
1817 | // \ | / | |||
1818 | // \ | / | |||
1819 | // \ | / | |||
1820 | // \ | / | |||
1821 | // \ v / | |||
1822 | // CMoveD | |||
1823 | // | |||
1824 | ||||
1825 | void SuperWord::merge_packs_to_cmovd() { | |||
1826 | for (int i = _packset.length() - 1; i >= 0; i--) { | |||
1827 | _cmovev_kit.make_cmovevd_pack(_packset.at(i)); | |||
1828 | } | |||
1829 | #ifndef PRODUCT | |||
1830 | if (TraceSuperWord) { | |||
1831 | tty->print_cr("\nSuperWord::merge_packs_to_cmovd(): After merge"); | |||
1832 | print_packset(); | |||
1833 | tty->cr(); | |||
1834 | } | |||
1835 | #endif | |||
1836 | } | |||
1837 | ||||
1838 | Node* CMoveKit::is_Bool_candidate(Node* def) const { | |||
1839 | Node* use = NULL__null; | |||
1840 | if (!def->is_Bool() || def->in(0) != NULL__null || def->outcnt() != 1) { | |||
1841 | return NULL__null; | |||
1842 | } | |||
1843 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |||
1844 | use = def->fast_out(j); | |||
1845 | if (!_sw->same_generation(def, use) || !use->is_CMove()) { | |||
1846 | return NULL__null; | |||
1847 | } | |||
1848 | } | |||
1849 | return use; | |||
1850 | } | |||
1851 | ||||
1852 | Node* CMoveKit::is_CmpD_candidate(Node* def) const { | |||
1853 | Node* use = NULL__null; | |||
1854 | if (!def->is_Cmp() || def->in(0) != NULL__null || def->outcnt() != 1) { | |||
1855 | return NULL__null; | |||
1856 | } | |||
1857 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |||
1858 | use = def->fast_out(j); | |||
1859 | if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == NULL__null || !_sw->same_generation(def, use)) { | |||
1860 | return NULL__null; | |||
1861 | } | |||
1862 | } | |||
1863 | return use; | |||
1864 | } | |||
1865 | ||||
1866 | Node_List* CMoveKit::make_cmovevd_pack(Node_List* cmovd_pk) { | |||
1867 | Node *cmovd = cmovd_pk->at(0); | |||
1868 | if (!cmovd->is_CMove()) { | |||
1869 | return NULL__null; | |||
1870 | } | |||
1871 | if (cmovd->Opcode() != Op_CMoveF && cmovd->Opcode() != Op_CMoveD) { | |||
1872 | return NULL__null; | |||
1873 | } | |||
1874 | if (pack(cmovd) != NULL__null) { // already in the cmov pack | |||
1875 | return NULL__null; | |||
1876 | } | |||
1877 | if (cmovd->in(0) != NULL__null) { | |||
1878 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping...", cmovd->_idx); cmovd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping..." , cmovd->_idx); cmovd->dump();} | |||
1879 | return NULL__null; | |||
1880 | } | |||
1881 | ||||
1882 | Node* bol = cmovd->as_CMove()->in(CMoveNode::Condition); | |||
1883 | if (!bol->is_Bool() | |||
1884 | || bol->outcnt() != 1 | |||
1885 | || !_sw->same_generation(bol, cmovd) | |||
1886 | || bol->in(0) != NULL__null // BoolNode has control flow!! | |||
1887 | || _sw->my_pack(bol) == NULL__null) { | |||
1888 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping...", bol->_idx, cmovd->_idx); bol->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping..." , bol->_idx, cmovd->_idx); bol->dump();} | |||
1889 | return NULL__null; | |||
1890 | } | |||
1891 | Node_List* bool_pk = _sw->my_pack(bol); | |||
1892 | if (bool_pk->size() != cmovd_pk->size() ) { | |||
1893 | return NULL__null; | |||
1894 | } | |||
1895 | ||||
1896 | Node* cmpd = bol->in(1); | |||
1897 | if (!cmpd->is_Cmp() | |||
1898 | || cmpd->outcnt() != 1 | |||
1899 | || !_sw->same_generation(cmpd, cmovd) | |||
1900 | || cmpd->in(0) != NULL__null // CmpDNode has control flow!! | |||
1901 | || _sw->my_pack(cmpd) == NULL__null) { | |||
1902 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping...", cmpd->_idx, cmovd->_idx); cmpd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping..." , cmpd->_idx, cmovd->_idx); cmpd->dump();} | |||
1903 | return NULL__null; | |||
1904 | } | |||
1905 | Node_List* cmpd_pk = _sw->my_pack(cmpd); | |||
1906 | if (cmpd_pk->size() != cmovd_pk->size() ) { | |||
1907 | return NULL__null; | |||
1908 | } | |||
1909 | ||||
1910 | if (!test_cmpd_pack(cmpd_pk, cmovd_pk)) { | |||
1911 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test", cmpd->_idx); cmpd->dump();})if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test" , cmpd->_idx); cmpd->dump();} | |||
1912 | return NULL__null; | |||
1913 | } | |||
1914 | ||||
1915 | Node_List* new_cmpd_pk = new Node_List(); | |||
1916 | uint sz = cmovd_pk->size() - 1; | |||
1917 | for (uint i = 0; i <= sz; ++i) { | |||
1918 | Node* cmov = cmovd_pk->at(i); | |||
1919 | Node* bol = bool_pk->at(i); | |||
1920 | Node* cmp = cmpd_pk->at(i); | |||
1921 | ||||
1922 | new_cmpd_pk->insert(i, cmov); | |||
1923 | ||||
1924 | map(cmov, new_cmpd_pk); | |||
1925 | map(bol, new_cmpd_pk); | |||
1926 | map(cmp, new_cmpd_pk); | |||
1927 | ||||
1928 | _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool | |||
1929 | } | |||
1930 | _sw->_packset.remove(cmovd_pk); | |||
1931 | _sw->_packset.remove(bool_pk); | |||
1932 | _sw->_packset.remove(cmpd_pk); | |||
1933 | _sw->_packset.append(new_cmpd_pk); | |||
1934 | NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);})if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack" ); _sw->print_pack(new_cmpd_pk);} | |||
1935 | return new_cmpd_pk; | |||
1936 | } | |||
1937 | ||||
1938 | bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) { | |||
1939 | Node* cmpd0 = cmpd_pk->at(0); | |||
1940 | assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode")do { if (!(cmpd0->is_Cmp())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1940, "assert(" "cmpd0->is_Cmp()" ") failed", "CMoveKit::test_cmpd_pack: should be CmpDNode" ); ::breakpoint(); } } while (0); | |||
1941 | assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD")do { if (!(cmovd_pk->at(0)->is_CMove())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1941, "assert(" "cmovd_pk->at(0)->is_CMove()" ") failed" , "CMoveKit::test_cmpd_pack: should be CMoveD"); ::breakpoint (); } } while (0); | |||
1942 | assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size")do { if (!(cmpd_pk->size() == cmovd_pk->size())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 1942, "assert(" "cmpd_pk->size() == cmovd_pk->size()" ") failed", "CMoveKit::test_cmpd_pack: should be same size") ; ::breakpoint(); } } while (0); | |||
1943 | Node* in1 = cmpd0->in(1); | |||
1944 | Node* in2 = cmpd0->in(2); | |||
1945 | Node_List* in1_pk = _sw->my_pack(in1); | |||
1946 | Node_List* in2_pk = _sw->my_pack(in2); | |||
1947 | ||||
1948 | if ( (in1_pk != NULL__null && in1_pk->size() != cmpd_pk->size()) | |||
1949 | || (in2_pk != NULL__null && in2_pk->size() != cmpd_pk->size()) ) { | |||
1950 | return false; | |||
1951 | } | |||
1952 | ||||
1953 | // test if "all" in1 are in the same pack or the same node | |||
1954 | if (in1_pk == NULL__null) { | |||
1955 | for (uint j = 1; j < cmpd_pk->size(); j++) { | |||
1956 | if (cmpd_pk->at(j)->in(1) != in1) { | |||
1957 | return false; | |||
1958 | } | |||
1959 | }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1) | |||
1960 | } | |||
1961 | // test if "all" in2 are in the same pack or the same node | |||
1962 | if (in2_pk == NULL__null) { | |||
1963 | for (uint j = 1; j < cmpd_pk->size(); j++) { | |||
1964 | if (cmpd_pk->at(j)->in(2) != in2) { | |||
1965 | return false; | |||
1966 | } | |||
1967 | }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2) | |||
1968 | } | |||
1969 | //now check if cmpd_pk may be subsumed in vector built for cmovd_pk | |||
1970 | int cmovd_ind1, cmovd_ind2; | |||
1971 | if (cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse) | |||
1972 | && cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) { | |||
1973 | cmovd_ind1 = CMoveNode::IfFalse; | |||
1974 | cmovd_ind2 = CMoveNode::IfTrue; | |||
1975 | } else if (cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse) | |||
1976 | && cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) { | |||
1977 | cmovd_ind2 = CMoveNode::IfFalse; | |||
1978 | cmovd_ind1 = CMoveNode::IfTrue; | |||
1979 | } | |||
1980 | else { | |||
1981 | return false; | |||
1982 | } | |||
1983 | ||||
1984 | for (uint j = 1; j < cmpd_pk->size(); j++) { | |||
1985 | if (cmpd_pk->at(j)->in(1) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind1) | |||
1986 | || cmpd_pk->at(j)->in(2) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind2)) { | |||
1987 | return false; | |||
1988 | }//if | |||
1989 | } | |||
1990 | NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: ", cmpd0->_idx); cmpd0->dump(); })if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: " , cmpd0->_idx); cmpd0->dump(); } | |||
1991 | return true; | |||
1992 | } | |||
1993 | ||||
1994 | //------------------------------implemented--------------------------- | |||
1995 | // Can code be generated for pack p? | |||
1996 | bool SuperWord::implemented(Node_List* p) { | |||
1997 | bool retValue = false; | |||
1998 | Node* p0 = p->at(0); | |||
1999 | if (p0 != NULL__null) { | |||
2000 | int opc = p0->Opcode(); | |||
2001 | uint size = p->size(); | |||
2002 | if (p0->is_reduction()) { | |||
2003 | const Type *arith_type = p0->bottom_type(); | |||
2004 | // Length 2 reductions of INT/LONG do not offer performance benefits | |||
2005 | if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) { | |||
2006 | retValue = false; | |||
2007 | } else { | |||
2008 | retValue = ReductionNode::implemented(opc, size, arith_type->basic_type()); | |||
2009 | } | |||
2010 | } else { | |||
2011 | retValue = VectorNode::implemented(opc, size, velt_basic_type(p0)); | |||
2012 | } | |||
2013 | if (!retValue) { | |||
2014 | if (is_cmov_pack(p)) { | |||
2015 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack" ); print_pack(p);} | |||
2016 | return true; | |||
2017 | } | |||
2018 | } | |||
2019 | } | |||
2020 | return retValue; | |||
2021 | } | |||
2022 | ||||
2023 | bool SuperWord::is_cmov_pack(Node_List* p) { | |||
2024 | return _cmovev_kit.pack(p->at(0)) != NULL__null; | |||
2025 | } | |||
2026 | //------------------------------same_inputs-------------------------- | |||
2027 | // For pack p, are all idx operands the same? | |||
2028 | bool SuperWord::same_inputs(Node_List* p, int idx) { | |||
2029 | Node* p0 = p->at(0); | |||
2030 | uint vlen = p->size(); | |||
2031 | Node* p0_def = p0->in(idx); | |||
2032 | for (uint i = 1; i < vlen; i++) { | |||
2033 | Node* pi = p->at(i); | |||
2034 | Node* pi_def = pi->in(idx); | |||
2035 | if (p0_def != pi_def) { | |||
2036 | return false; | |||
2037 | } | |||
2038 | } | |||
2039 | return true; | |||
2040 | } | |||
2041 | ||||
2042 | //------------------------------profitable--------------------------- | |||
2043 | // For pack p, are all operands and all uses (with in the block) vector? | |||
2044 | bool SuperWord::profitable(Node_List* p) { | |||
2045 | Node* p0 = p->at(0); | |||
2046 | uint start, end; | |||
2047 | VectorNode::vector_operands(p0, &start, &end); | |||
2048 | ||||
2049 | // Return false if some inputs are not vectors or vectors with different | |||
2050 | // size or alignment. | |||
2051 | // Also, for now, return false if not scalar promotion case when inputs are | |||
2052 | // the same. Later, implement PackNode and allow differing, non-vector inputs | |||
2053 | // (maybe just the ones from outside the block.) | |||
2054 | for (uint i = start; i < end; i++) { | |||
2055 | if (!is_vector_use(p0, i)) { | |||
2056 | return false; | |||
2057 | } | |||
2058 | } | |||
2059 | // Check if reductions are connected | |||
2060 | if (p0->is_reduction()) { | |||
2061 | Node* second_in = p0->in(2); | |||
2062 | Node_List* second_pk = my_pack(second_in); | |||
2063 | if ((second_pk == NULL__null) || (_num_work_vecs == _num_reductions)) { | |||
2064 | // Remove reduction flag if no parent pack or if not enough work | |||
2065 | // to cover reduction expansion overhead | |||
2066 | p0->remove_flag(Node::Flag_is_reduction); | |||
2067 | return false; | |||
2068 | } else if (second_pk->size() != p->size()) { | |||
2069 | return false; | |||
2070 | } | |||
2071 | } | |||
2072 | if (VectorNode::is_shift(p0)) { | |||
2073 | // For now, return false if shift count is vector or not scalar promotion | |||
2074 | // case (different shift counts) because it is not supported yet. | |||
2075 | Node* cnt = p0->in(2); | |||
2076 | Node_List* cnt_pk = my_pack(cnt); | |||
2077 | if (cnt_pk != NULL__null) | |||
2078 | return false; | |||
2079 | if (!same_inputs(p, 2)) | |||
2080 | return false; | |||
2081 | } | |||
2082 | if (!p0->is_Store()) { | |||
2083 | // For now, return false if not all uses are vector. | |||
2084 | // Later, implement ExtractNode and allow non-vector uses (maybe | |||
2085 | // just the ones outside the block.) | |||
2086 | for (uint i = 0; i < p->size(); i++) { | |||
2087 | Node* def = p->at(i); | |||
2088 | if (is_cmov_pack_internal_node(p, def)) { | |||
2089 | continue; | |||
2090 | } | |||
2091 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |||
2092 | Node* use = def->fast_out(j); | |||
2093 | for (uint k = 0; k < use->req(); k++) { | |||
2094 | Node* n = use->in(k); | |||
2095 | if (def == n) { | |||
2096 | // Reductions should only have a Phi use at the loop head or a non-phi use | |||
2097 | // outside of the loop if it is the last element of the pack (e.g. SafePoint). | |||
2098 | if (def->is_reduction() && | |||
2099 | ((use->is_Phi() && use->in(0) == _lpt->_head) || | |||
2100 | (!_lpt->is_member(_phase->get_loop(_phase->ctrl_or_self(use))) && i == p->size()-1))) { | |||
2101 | continue; | |||
2102 | } | |||
2103 | if (!is_vector_use(use, k)) { | |||
2104 | return false; | |||
2105 | } | |||
2106 | } | |||
2107 | } | |||
2108 | } | |||
2109 | } | |||
2110 | } | |||
2111 | return true; | |||
2112 | } | |||
2113 | ||||
2114 | //------------------------------schedule--------------------------- | |||
2115 | // Adjust the memory graph for the packed operations | |||
2116 | void SuperWord::schedule() { | |||
2117 | ||||
2118 | // Co-locate in the memory graph the members of each memory pack | |||
2119 | for (int i = 0; i < _packset.length(); i++) { | |||
2120 | co_locate_pack(_packset.at(i)); | |||
2121 | } | |||
2122 | } | |||
2123 | ||||
2124 | //-------------------------------remove_and_insert------------------- | |||
2125 | // Remove "current" from its current position in the memory graph and insert | |||
2126 | // it after the appropriate insertion point (lip or uip). | |||
2127 | void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, | |||
2128 | Node *uip, Unique_Node_List &sched_before) { | |||
2129 | Node* my_mem = current->in(MemNode::Memory); | |||
2130 | bool sched_up = sched_before.member(current); | |||
2131 | ||||
2132 | // remove current_store from its current position in the memmory graph | |||
2133 | for (DUIterator i = current->outs(); current->has_out(i); i++) { | |||
2134 | Node* use = current->out(i); | |||
2135 | if (use->is_Mem()) { | |||
2136 | assert(use->in(MemNode::Memory) == current, "must be")do { if (!(use->in(MemNode::Memory) == current)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2136, "assert(" "use->in(MemNode::Memory) == current" ") failed" , "must be"); ::breakpoint(); } } while (0); | |||
2137 | if (use == prev) { // connect prev to my_mem | |||
2138 | _igvn.replace_input_of(use, MemNode::Memory, my_mem); | |||
2139 | --i; //deleted this edge; rescan position | |||
2140 | } else if (sched_before.member(use)) { | |||
2141 | if (!sched_up) { // Will be moved together with current | |||
2142 | _igvn.replace_input_of(use, MemNode::Memory, uip); | |||
2143 | --i; //deleted this edge; rescan position | |||
2144 | } | |||
2145 | } else { | |||
2146 | if (sched_up) { // Will be moved together with current | |||
2147 | _igvn.replace_input_of(use, MemNode::Memory, lip); | |||
2148 | --i; //deleted this edge; rescan position | |||
2149 | } | |||
2150 | } | |||
2151 | } | |||
2152 | } | |||
2153 | ||||
2154 | Node *insert_pt = sched_up ? uip : lip; | |||
2155 | ||||
2156 | // all uses of insert_pt's memory state should use current's instead | |||
2157 | for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { | |||
2158 | Node* use = insert_pt->out(i); | |||
2159 | if (use->is_Mem()) { | |||
2160 | assert(use->in(MemNode::Memory) == insert_pt, "must be")do { if (!(use->in(MemNode::Memory) == insert_pt)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2160, "assert(" "use->in(MemNode::Memory) == insert_pt" ") failed" , "must be"); ::breakpoint(); } } while (0); | |||
2161 | _igvn.replace_input_of(use, MemNode::Memory, current); | |||
2162 | --i; //deleted this edge; rescan position | |||
2163 | } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { | |||
2164 | uint pos; //lip (lower insert point) must be the last one in the memory slice | |||
2165 | for (pos=1; pos < use->req(); pos++) { | |||
2166 | if (use->in(pos) == insert_pt) break; | |||
2167 | } | |||
2168 | _igvn.replace_input_of(use, pos, current); | |||
2169 | --i; | |||
2170 | } | |||
2171 | } | |||
2172 | ||||
2173 | //connect current to insert_pt | |||
2174 | _igvn.replace_input_of(current, MemNode::Memory, insert_pt); | |||
2175 | } | |||
2176 | ||||
2177 | //------------------------------co_locate_pack---------------------------------- | |||
2178 | // To schedule a store pack, we need to move any sandwiched memory ops either before | |||
2179 | // or after the pack, based upon dependence information: | |||
2180 | // (1) If any store in the pack depends on the sandwiched memory op, the | |||
2181 | // sandwiched memory op must be scheduled BEFORE the pack; | |||
2182 | // (2) If a sandwiched memory op depends on any store in the pack, the | |||
2183 | // sandwiched memory op must be scheduled AFTER the pack; | |||
2184 | // (3) If a sandwiched memory op (say, memA) depends on another sandwiched | |||
2185 | // memory op (say memB), memB must be scheduled before memA. So, if memA is | |||
2186 | // scheduled before the pack, memB must also be scheduled before the pack; | |||
2187 | // (4) If there is no dependence restriction for a sandwiched memory op, we simply | |||
2188 | // schedule this store AFTER the pack | |||
2189 | // (5) We know there is no dependence cycle, so there in no other case; | |||
2190 | // (6) Finally, all memory ops in another single pack should be moved in the same direction. | |||
2191 | // | |||
2192 | // To schedule a load pack, we use the memory state of either the first or the last load in | |||
2193 | // the pack, based on the dependence constraint. | |||
2194 | void SuperWord::co_locate_pack(Node_List* pk) { | |||
2195 | if (pk->at(0)->is_Store()) { | |||
2196 | MemNode* first = executed_first(pk)->as_Mem(); | |||
2197 | MemNode* last = executed_last(pk)->as_Mem(); | |||
2198 | Unique_Node_List schedule_before_pack; | |||
2199 | Unique_Node_List memops; | |||
2200 | ||||
2201 | MemNode* current = last->in(MemNode::Memory)->as_Mem(); | |||
2202 | MemNode* previous = last; | |||
2203 | while (true) { | |||
2204 | assert(in_bb(current), "stay in block")do { if (!(in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2204, "assert(" "in_bb(current)" ") failed", "stay in block" ); ::breakpoint(); } } while (0); | |||
2205 | memops.push(previous); | |||
2206 | for (DUIterator i = current->outs(); current->has_out(i); i++) { | |||
2207 | Node* use = current->out(i); | |||
2208 | if (use->is_Mem() && use != previous) | |||
2209 | memops.push(use); | |||
2210 | } | |||
2211 | if (current == first) break; | |||
2212 | previous = current; | |||
2213 | current = current->in(MemNode::Memory)->as_Mem(); | |||
2214 | } | |||
2215 | ||||
2216 | // determine which memory operations should be scheduled before the pack | |||
2217 | for (uint i = 1; i < memops.size(); i++) { | |||
2218 | Node *s1 = memops.at(i); | |||
2219 | if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { | |||
2220 | for (uint j = 0; j< i; j++) { | |||
2221 | Node *s2 = memops.at(j); | |||
2222 | if (!independent(s1, s2)) { | |||
2223 | if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { | |||
2224 | schedule_before_pack.push(s1); // s1 must be scheduled before | |||
2225 | Node_List* mem_pk = my_pack(s1); | |||
2226 | if (mem_pk != NULL__null) { | |||
2227 | for (uint ii = 0; ii < mem_pk->size(); ii++) { | |||
2228 | Node* s = mem_pk->at(ii); // follow partner | |||
2229 | if (memops.member(s) && !schedule_before_pack.member(s)) | |||
2230 | schedule_before_pack.push(s); | |||
2231 | } | |||
2232 | } | |||
2233 | break; | |||
2234 | } | |||
2235 | } | |||
2236 | } | |||
2237 | } | |||
2238 | } | |||
2239 | ||||
2240 | Node* upper_insert_pt = first->in(MemNode::Memory); | |||
2241 | // Following code moves loads connected to upper_insert_pt below aliased stores. | |||
2242 | // Collect such loads here and reconnect them back to upper_insert_pt later. | |||
2243 | memops.clear(); | |||
2244 | for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) { | |||
2245 | Node* use = upper_insert_pt->out(i); | |||
2246 | if (use->is_Mem() && !use->is_Store()) { | |||
2247 | memops.push(use); | |||
2248 | } | |||
2249 | } | |||
2250 | ||||
2251 | MemNode* lower_insert_pt = last; | |||
2252 | previous = last; //previous store in pk | |||
2253 | current = last->in(MemNode::Memory)->as_Mem(); | |||
2254 | ||||
2255 | // start scheduling from "last" to "first" | |||
2256 | while (true) { | |||
2257 | assert(in_bb(current), "stay in block")do { if (!(in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2257, "assert(" "in_bb(current)" ") failed", "stay in block" ); ::breakpoint(); } } while (0); | |||
2258 | assert(in_pack(previous, pk), "previous stays in pack")do { if (!(in_pack(previous, pk))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2258, "assert(" "in_pack(previous, pk)" ") failed", "previous stays in pack" ); ::breakpoint(); } } while (0); | |||
2259 | Node* my_mem = current->in(MemNode::Memory); | |||
2260 | ||||
2261 | if (in_pack(current, pk)) { | |||
2262 | // Forward users of my memory state (except "previous) to my input memory state | |||
2263 | for (DUIterator i = current->outs(); current->has_out(i); i++) { | |||
2264 | Node* use = current->out(i); | |||
2265 | if (use->is_Mem() && use != previous) { | |||
2266 | assert(use->in(MemNode::Memory) == current, "must be")do { if (!(use->in(MemNode::Memory) == current)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2266, "assert(" "use->in(MemNode::Memory) == current" ") failed" , "must be"); ::breakpoint(); } } while (0); | |||
2267 | if (schedule_before_pack.member(use)) { | |||
2268 | _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt); | |||
2269 | } else { | |||
2270 | _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt); | |||
2271 | } | |||
2272 | --i; // deleted this edge; rescan position | |||
2273 | } | |||
2274 | } | |||
2275 | previous = current; | |||
2276 | } else { // !in_pack(current, pk) ==> a sandwiched store | |||
2277 | remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack); | |||
2278 | } | |||
2279 | ||||
2280 | if (current == first) break; | |||
2281 | current = my_mem->as_Mem(); | |||
2282 | } // end while | |||
2283 | ||||
2284 | // Reconnect loads back to upper_insert_pt. | |||
2285 | for (uint i = 0; i < memops.size(); i++) { | |||
2286 | Node *ld = memops.at(i); | |||
2287 | if (ld->in(MemNode::Memory) != upper_insert_pt) { | |||
2288 | _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt); | |||
2289 | } | |||
2290 | } | |||
2291 | } else if (pk->at(0)->is_Load()) { // Load pack | |||
2292 | // All loads in the pack should have the same memory state. By default, | |||
2293 | // we use the memory state of the last load. However, if any load could | |||
2294 | // not be moved down due to the dependence constraint, we use the memory | |||
2295 | // state of the first load. | |||
2296 | Node* mem_input = pick_mem_state(pk); | |||
2297 | _igvn.hash_delete(mem_input); | |||
2298 | // Give each load the same memory state | |||
2299 | for (uint i = 0; i < pk->size(); i++) { | |||
2300 | LoadNode* ld = pk->at(i)->as_Load(); | |||
2301 | _igvn.replace_input_of(ld, MemNode::Memory, mem_input); | |||
2302 | } | |||
2303 | } | |||
2304 | } | |||
2305 | ||||
2306 | // Finds the first and last memory state and then picks either of them by checking dependence constraints. | |||
2307 | // If a store is dependent on an earlier load then we need to pick the memory state of the first load and cannot | |||
2308 | // pick the memory state of the last load. | |||
2309 | Node* SuperWord::pick_mem_state(Node_List* pk) { | |||
2310 | Node* first_mem = find_first_mem_state(pk); | |||
2311 | Node* last_mem = find_last_mem_state(pk, first_mem); | |||
2312 | ||||
2313 | for (uint i = 0; i < pk->size(); i++) { | |||
2314 | Node* ld = pk->at(i); | |||
2315 | for (Node* current = last_mem; current != ld->in(MemNode::Memory); current = current->in(MemNode::Memory)) { | |||
2316 | assert(current->is_Mem() && in_bb(current), "unexpected memory")do { if (!(current->is_Mem() && in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2316, "assert(" "current->is_Mem() && in_bb(current)" ") failed", "unexpected memory"); ::breakpoint(); } } while ( 0); | |||
2317 | assert(current != first_mem, "corrupted memory graph")do { if (!(current != first_mem)) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2317, "assert(" "current != first_mem" ") failed", "corrupted memory graph" ); ::breakpoint(); } } while (0); | |||
2318 | if (!independent(current, ld)) { | |||
2319 | // A later store depends on this load, pick the memory state of the first load. This can happen, for example, | |||
2320 | // if a load pack has interleaving stores that are part of a store pack which, however, is removed at the pack | |||
2321 | // filtering stage. This leaves us with only a load pack for which we cannot take the memory state of the | |||
2322 | // last load as the remaining unvectorized stores could interfere since they have a dependency to the loads. | |||
2323 | // Some stores could be executed before the load vector resulting in a wrong result. We need to take the | |||
2324 | // memory state of the first load to prevent this. | |||
2325 | return first_mem; | |||
2326 | } | |||
2327 | } | |||
2328 | } | |||
2329 | return last_mem; | |||
2330 | } | |||
2331 | ||||
2332 | // Walk the memory graph from the current first load until the | |||
2333 | // start of the loop and check if nodes on the way are memory | |||
2334 | // edges of loads in the pack. The last one we encounter is the | |||
2335 | // first load. | |||
2336 | Node* SuperWord::find_first_mem_state(Node_List* pk) { | |||
2337 | Node* first_mem = pk->at(0)->in(MemNode::Memory); | |||
2338 | for (Node* current = first_mem; in_bb(current); current = current->is_Phi() ? current->in(LoopNode::EntryControl) : current->in(MemNode::Memory)) { | |||
2339 | assert(current->is_Mem() || (current->is_Phi() && current->in(0) == bb()), "unexpected memory")do { if (!(current->is_Mem() || (current->is_Phi() && current->in(0) == bb()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2339, "assert(" "current->is_Mem() || (current->is_Phi() && current->in(0) == bb())" ") failed", "unexpected memory"); ::breakpoint(); } } while ( 0); | |||
2340 | for (uint i = 1; i < pk->size(); i++) { | |||
2341 | Node* ld = pk->at(i); | |||
2342 | if (ld->in(MemNode::Memory) == current) { | |||
2343 | first_mem = current; | |||
2344 | break; | |||
2345 | } | |||
2346 | } | |||
2347 | } | |||
2348 | return first_mem; | |||
2349 | } | |||
2350 | ||||
2351 | // Find the last load by going over the pack again and walking | |||
2352 | // the memory graph from the loads of the pack to the memory of | |||
2353 | // the first load. If we encounter the memory of the current last | |||
2354 | // load, then we started from further down in the memory graph and | |||
2355 | // the load we started from is the last load. | |||
2356 | Node* SuperWord::find_last_mem_state(Node_List* pk, Node* first_mem) { | |||
2357 | Node* last_mem = pk->at(0)->in(MemNode::Memory); | |||
2358 | for (uint i = 0; i < pk->size(); i++) { | |||
2359 | Node* ld = pk->at(i); | |||
2360 | for (Node* current = ld->in(MemNode::Memory); current != first_mem; current = current->in(MemNode::Memory)) { | |||
2361 | assert(current->is_Mem() && in_bb(current), "unexpected memory")do { if (!(current->is_Mem() && in_bb(current))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2361, "assert(" "current->is_Mem() && in_bb(current)" ") failed", "unexpected memory"); ::breakpoint(); } } while ( 0); | |||
2362 | if (current->in(MemNode::Memory) == last_mem) { | |||
2363 | last_mem = ld->in(MemNode::Memory); | |||
2364 | } | |||
2365 | } | |||
2366 | } | |||
2367 | return last_mem; | |||
2368 | } | |||
2369 | ||||
2370 | #ifndef PRODUCT | |||
2371 | void SuperWord::print_loop(bool whole) { | |||
2372 | Node_Stack stack(_arena, _phase->C->unique() >> 2); | |||
2373 | Node_List rpo_list; | |||
2374 | VectorSet visited(_arena); | |||
2375 | visited.set(lpt()->_head->_idx); | |||
2376 | _phase->rpo(lpt()->_head, stack, visited, rpo_list); | |||
2377 | _phase->dump(lpt(), rpo_list.size(), rpo_list ); | |||
2378 | if(whole) { | |||
2379 | tty->print_cr("\n Whole loop tree"); | |||
2380 | _phase->dump(); | |||
2381 | tty->print_cr(" End of whole loop tree\n"); | |||
2382 | } | |||
2383 | } | |||
2384 | #endif | |||
2385 | ||||
2386 | //------------------------------output--------------------------- | |||
2387 | // Convert packs into vector node operations | |||
2388 | void SuperWord::output() { | |||
2389 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
2390 | Compile* C = _phase->C; | |||
2391 | if (_packset.length() == 0) { | |||
2392 | if (cl->is_main_loop()) { | |||
2393 | // Instigate more unrolling for optimization when vectorization fails. | |||
2394 | C->set_major_progress(); | |||
2395 | cl->set_notpassed_slp(); | |||
2396 | cl->mark_do_unroll_only(); | |||
2397 | } | |||
2398 | return; | |||
2399 | } | |||
2400 | ||||
2401 | #ifndef PRODUCT | |||
2402 | if (TraceLoopOpts) { | |||
2403 | tty->print("SuperWord::output "); | |||
2404 | lpt()->dump_head(); | |||
2405 | } | |||
2406 | #endif | |||
2407 | ||||
2408 | if (cl->is_main_loop()) { | |||
2409 | // MUST ENSURE main loop's initial value is properly aligned: | |||
2410 | // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 | |||
2411 | ||||
2412 | align_initial_loop_index(align_to_ref()); | |||
2413 | ||||
2414 | // Insert extract (unpack) operations for scalar uses | |||
2415 | for (int i = 0; i < _packset.length(); i++) { | |||
2416 | insert_extracts(_packset.at(i)); | |||
2417 | } | |||
2418 | } | |||
2419 | ||||
2420 | uint max_vlen_in_bytes = 0; | |||
2421 | uint max_vlen = 0; | |||
2422 | bool can_process_post_loop = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); | |||
2423 | ||||
2424 | NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop" ); print_loop(true);} | |||
2425 | ||||
2426 | CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy()); | |||
2427 | ||||
2428 | NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop" ); print_loop(true);} | |||
2429 | ||||
2430 | if (do_reserve_copy() && !make_reversable.has_reserved()) { | |||
2431 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: loop was not reserved correctly, exiting SuperWord" );} | |||
2432 | return; | |||
2433 | } | |||
2434 | ||||
2435 | for (int i = 0; i < _block.length(); i++) { | |||
2436 | Node* n = _block.at(i); | |||
2437 | Node_List* p = my_pack(n); | |||
2438 | if (p && n == executed_last(p)) { | |||
2439 | uint vlen = p->size(); | |||
2440 | uint vlen_in_bytes = 0; | |||
2441 | Node* vn = NULL__null; | |||
2442 | Node* low_adr = p->at(0); | |||
2443 | Node* first = executed_first(p); | |||
2444 | if (can_process_post_loop) { | |||
2445 | // override vlen with the main loops vector length | |||
2446 | vlen = cl->slp_max_unroll(); | |||
2447 | } | |||
2448 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack" , first->_idx, n->_idx); print_pack(p);} | |||
2449 | int opc = n->Opcode(); | |||
2450 | if (n->is_Load()) { | |||
2451 | Node* ctl = n->in(MemNode::Control); | |||
2452 | Node* mem = first->in(MemNode::Memory); | |||
2453 | SWPointer p1(n->as_Mem(), this, NULL__null, false); | |||
2454 | // Identify the memory dependency for the new loadVector node by | |||
2455 | // walking up through memory chain. | |||
2456 | // This is done to give flexibility to the new loadVector node so that | |||
2457 | // it can move above independent storeVector nodes. | |||
2458 | while (mem->is_StoreVector()) { | |||
2459 | SWPointer p2(mem->as_Mem(), this, NULL__null, false); | |||
2460 | int cmp = p1.cmp(p2); | |||
2461 | if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) { | |||
2462 | mem = mem->in(MemNode::Memory); | |||
2463 | } else { | |||
2464 | break; // dependent memory | |||
2465 | } | |||
2466 | } | |||
2467 | Node* adr = low_adr->in(MemNode::Address); | |||
2468 | const TypePtr* atyp = n->adr_type(); | |||
2469 | vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p)); | |||
2470 | vlen_in_bytes = vn->as_LoadVector()->memory_size(); | |||
2471 | } else if (n->is_Store()) { | |||
2472 | // Promote value to be stored to vector | |||
2473 | Node* val = vector_opd(p, MemNode::ValueIn); | |||
2474 | if (val == NULL__null) { | |||
2475 | if (do_reserve_copy()) { | |||
2476 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: val should not be NULL, exiting SuperWord" );} | |||
2477 | return; //and reverse to backup IG | |||
2478 | } | |||
2479 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2479); ::breakpoint(); } while (0); | |||
2480 | } | |||
2481 | ||||
2482 | Node* ctl = n->in(MemNode::Control); | |||
2483 | Node* mem = first->in(MemNode::Memory); | |||
2484 | Node* adr = low_adr->in(MemNode::Address); | |||
2485 | const TypePtr* atyp = n->adr_type(); | |||
2486 | vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); | |||
2487 | vlen_in_bytes = vn->as_StoreVector()->memory_size(); | |||
2488 | } else if (VectorNode::is_scalar_rotate(n)) { | |||
2489 | Node* in1 = low_adr->in(1); | |||
2490 | Node* in2 = p->at(0)->in(2); | |||
2491 | // If rotation count is non-constant or greater than 8bit value create a vector. | |||
2492 | if (!in2->is_Con() || !Matcher::supports_vector_constant_rotates(in2->get_int())) { | |||
2493 | in2 = vector_opd(p, 2); | |||
2494 | } | |||
2495 | vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); | |||
2496 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2497 | } else if (VectorNode::is_roundopD(n)) { | |||
2498 | Node* in1 = vector_opd(p, 1); | |||
2499 | Node* in2 = low_adr->in(2); | |||
2500 | assert(in2->is_Con(), "Constant rounding mode expected.")do { if (!(in2->is_Con())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2500, "assert(" "in2->is_Con()" ") failed", "Constant rounding mode expected." ); ::breakpoint(); } } while (0); | |||
2501 | vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); | |||
2502 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2503 | } else if (VectorNode::is_muladds2i(n)) { | |||
2504 | assert(n->req() == 5u, "MulAddS2I should have 4 operands.")do { if (!(n->req() == 5u)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2504, "assert(" "n->req() == 5u" ") failed", "MulAddS2I should have 4 operands." ); ::breakpoint(); } } while (0); | |||
2505 | Node* in1 = vector_opd(p, 1); | |||
2506 | Node* in2 = vector_opd(p, 2); | |||
2507 | vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); | |||
2508 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2509 | } else if (n->req() == 3 && !is_cmov_pack(p)) { | |||
2510 | // Promote operands to vector | |||
2511 | Node* in1 = NULL__null; | |||
2512 | bool node_isa_reduction = n->is_reduction(); | |||
2513 | if (node_isa_reduction) { | |||
2514 | // the input to the first reduction operation is retained | |||
2515 | in1 = low_adr->in(1); | |||
2516 | } else { | |||
2517 | in1 = vector_opd(p, 1); | |||
2518 | if (in1 == NULL__null) { | |||
2519 | if (do_reserve_copy()) { | |||
2520 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: in1 should not be NULL, exiting SuperWord" );} | |||
2521 | return; //and reverse to backup IG | |||
2522 | } | |||
2523 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2523); ::breakpoint(); } while (0); | |||
2524 | } | |||
2525 | } | |||
2526 | Node* in2 = vector_opd(p, 2); | |||
2527 | if (in2 == NULL__null) { | |||
2528 | if (do_reserve_copy()) { | |||
2529 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: in2 should not be NULL, exiting SuperWord" );} | |||
2530 | return; //and reverse to backup IG | |||
2531 | } | |||
2532 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2532); ::breakpoint(); } while (0); | |||
2533 | } | |||
2534 | if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) { | |||
2535 | // Move invariant vector input into second position to avoid register spilling. | |||
2536 | Node* tmp = in1; | |||
2537 | in1 = in2; | |||
2538 | in2 = tmp; | |||
2539 | } | |||
2540 | if (node_isa_reduction) { | |||
2541 | const Type *arith_type = n->bottom_type(); | |||
2542 | vn = ReductionNode::make(opc, NULL__null, in1, in2, arith_type->basic_type()); | |||
2543 | if (in2->is_Load()) { | |||
2544 | vlen_in_bytes = in2->as_LoadVector()->memory_size(); | |||
2545 | } else { | |||
2546 | vlen_in_bytes = in2->as_Vector()->length_in_bytes(); | |||
2547 | } | |||
2548 | } else { | |||
2549 | vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); | |||
2550 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2551 | } | |||
2552 | } else if (opc == Op_SqrtF || opc == Op_SqrtD || | |||
2553 | opc == Op_AbsF || opc == Op_AbsD || | |||
2554 | opc == Op_AbsI || opc == Op_AbsL || | |||
2555 | opc == Op_NegF || opc == Op_NegD || | |||
2556 | opc == Op_PopCountI) { | |||
2557 | assert(n->req() == 2, "only one input expected")do { if (!(n->req() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2557, "assert(" "n->req() == 2" ") failed", "only one input expected" ); ::breakpoint(); } } while (0); | |||
2558 | Node* in = vector_opd(p, 1); | |||
2559 | vn = VectorNode::make(opc, in, NULL__null, vlen, velt_basic_type(n)); | |||
2560 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2561 | } else if (opc == Op_ConvI2F || opc == Op_ConvL2D || | |||
2562 | opc == Op_ConvF2I || opc == Op_ConvD2L) { | |||
2563 | assert(n->req() == 2, "only one input expected")do { if (!(n->req() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2563, "assert(" "n->req() == 2" ") failed", "only one input expected" ); ::breakpoint(); } } while (0); | |||
2564 | BasicType bt = velt_basic_type(n); | |||
2565 | int vopc = VectorNode::opcode(opc, bt); | |||
2566 | Node* in = vector_opd(p, 1); | |||
2567 | vn = VectorCastNode::make(vopc, in, bt, vlen); | |||
2568 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2569 | } else if (is_cmov_pack(p)) { | |||
2570 | if (can_process_post_loop) { | |||
2571 | // do not refactor of flow in post loop context | |||
2572 | return; | |||
2573 | } | |||
2574 | if (!n->is_CMove()) { | |||
2575 | continue; | |||
2576 | } | |||
2577 | // place here CMoveVDNode | |||
2578 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization" ); print_loop(false);} | |||
2579 | Node* bol = n->in(CMoveNode::Condition); | |||
2580 | if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) { | |||
2581 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();})if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d" , bol->_idx, bol->in(1)->_idx); bol->dump(); bol-> in(1)->dump();} | |||
2582 | bol = bol->in(1); //may be ExtractNode | |||
2583 | } | |||
2584 | ||||
2585 | assert(bol->is_Bool(), "should be BoolNode - too late to bail out!")do { if (!(bol->is_Bool())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2585, "assert(" "bol->is_Bool()" ") failed", "should be BoolNode - too late to bail out!" ); ::breakpoint(); } } while (0); | |||
2586 | if (!bol->is_Bool()) { | |||
2587 | if (do_reserve_copy()) { | |||
2588 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: expected %d bool node, exiting SuperWord" , bol->_idx); bol->dump();} | |||
2589 | return; //and reverse to backup IG | |||
2590 | } | |||
2591 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2591); ::breakpoint(); } while (0); | |||
2592 | } | |||
2593 | ||||
2594 | int cond = (int)bol->as_Bool()->_test._test; | |||
2595 | Node* in_cc = _igvn.intcon(cond); | |||
2596 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d" , in_cc->_idx); in_cc->dump();} | |||
2597 | Node* cc = bol->clone(); | |||
2598 | cc->set_req(1, in_cc); | |||
2599 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d" , cc->_idx); cc->dump();} | |||
2600 | ||||
2601 | Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse | |||
2602 | if (src1 == NULL__null) { | |||
2603 | if (do_reserve_copy()) { | |||
2604 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: src1 should not be NULL, exiting SuperWord" );} | |||
2605 | return; //and reverse to backup IG | |||
2606 | } | |||
2607 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2607); ::breakpoint(); } while (0); | |||
2608 | } | |||
2609 | Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue | |||
2610 | if (src2 == NULL__null) { | |||
2611 | if (do_reserve_copy()) { | |||
2612 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be NULL, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: src2 should not be NULL, exiting SuperWord" );} | |||
2613 | return; //and reverse to backup IG | |||
2614 | } | |||
2615 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2615); ::breakpoint(); } while (0); | |||
2616 | } | |||
2617 | BasicType bt = velt_basic_type(n); | |||
2618 | const TypeVect* vt = TypeVect::make(bt, vlen); | |||
2619 | assert(bt == T_FLOAT || bt == T_DOUBLE, "Only vectorization for FP cmovs is supported")do { if (!(bt == T_FLOAT || bt == T_DOUBLE)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2619, "assert(" "bt == T_FLOAT || bt == T_DOUBLE" ") failed" , "Only vectorization for FP cmovs is supported"); ::breakpoint (); } } while (0); | |||
2620 | if (bt == T_FLOAT) { | |||
2621 | vn = new CMoveVFNode(cc, src1, src2, vt); | |||
2622 | } else { | |||
2623 | assert(bt == T_DOUBLE, "Expected double")do { if (!(bt == T_DOUBLE)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2623, "assert(" "bt == T_DOUBLE" ") failed", "Expected double" ); ::breakpoint(); } } while (0); | |||
2624 | vn = new CMoveVDNode(cc, src1, src2, vt); | |||
2625 | } | |||
2626 | NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: " , vn->_idx); vn->dump();} | |||
2627 | } else if (opc == Op_FmaD || opc == Op_FmaF) { | |||
2628 | // Promote operands to vector | |||
2629 | Node* in1 = vector_opd(p, 1); | |||
2630 | Node* in2 = vector_opd(p, 2); | |||
2631 | Node* in3 = vector_opd(p, 3); | |||
2632 | vn = VectorNode::make(opc, in1, in2, in3, vlen, velt_basic_type(n)); | |||
2633 | vlen_in_bytes = vn->as_Vector()->length_in_bytes(); | |||
2634 | } else { | |||
2635 | if (do_reserve_copy()) { | |||
2636 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: ShouldNotReachHere, exiting SuperWord"); } | |||
2637 | return; //and reverse to backup IG | |||
2638 | } | |||
2639 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2639); ::breakpoint(); } while (0); | |||
2640 | } | |||
2641 | ||||
2642 | assert(vn != NULL, "sanity")do { if (!(vn != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2642, "assert(" "vn != __null" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
2643 | if (vn == NULL__null) { | |||
2644 | if (do_reserve_copy()){ | |||
2645 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord" );} | |||
2646 | return; //and reverse to backup IG | |||
2647 | } | |||
2648 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2648); ::breakpoint(); } while (0); | |||
2649 | } | |||
2650 | ||||
2651 | _block.at_put(i, vn); | |||
2652 | _igvn.register_new_node_with_optimizer(vn); | |||
2653 | _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); | |||
2654 | for (uint j = 0; j < p->size(); j++) { | |||
2655 | Node* pm = p->at(j); | |||
2656 | _igvn.replace_node(pm, vn); | |||
2657 | } | |||
2658 | _igvn._worklist.push(vn); | |||
2659 | ||||
2660 | if (can_process_post_loop) { | |||
2661 | // first check if the vector size if the maximum vector which we can use on the machine, | |||
2662 | // other vector size have reduced values for predicated data mapping. | |||
2663 | if (vlen_in_bytes != (uint)MaxVectorSize) { | |||
2664 | return; | |||
2665 | } | |||
2666 | } | |||
2667 | ||||
2668 | if (vlen > max_vlen) { | |||
2669 | max_vlen = vlen; | |||
2670 | } | |||
2671 | if (vlen_in_bytes > max_vlen_in_bytes) { | |||
2672 | max_vlen_in_bytes = vlen_in_bytes; | |||
2673 | } | |||
2674 | #ifdef ASSERT1 | |||
2675 | if (TraceNewVectors) { | |||
2676 | tty->print("new Vector node: "); | |||
2677 | vn->dump(); | |||
2678 | } | |||
2679 | #endif | |||
2680 | } | |||
2681 | }//for (int i = 0; i < _block.length(); i++) | |||
2682 | ||||
2683 | if (max_vlen_in_bytes > C->max_vector_size()) { | |||
2684 | C->set_max_vector_size(max_vlen_in_bytes); | |||
2685 | } | |||
2686 | if (max_vlen_in_bytes > 0) { | |||
2687 | cl->mark_loop_vectorized(); | |||
2688 | } | |||
2689 | ||||
2690 | if (SuperWordLoopUnrollAnalysis) { | |||
2691 | if (cl->has_passed_slp()) { | |||
2692 | uint slp_max_unroll_factor = cl->slp_max_unroll(); | |||
2693 | if (slp_max_unroll_factor == max_vlen) { | |||
2694 | if (TraceSuperWordLoopUnrollAnalysis) { | |||
2695 | tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte); | |||
2696 | } | |||
2697 | ||||
2698 | // For atomic unrolled loops which are vector mapped, instigate more unrolling | |||
2699 | cl->set_notpassed_slp(); | |||
2700 | if (cl->is_main_loop()) { | |||
2701 | // if vector resources are limited, do not allow additional unrolling, also | |||
2702 | // do not unroll more on pure vector loops which were not reduced so that we can | |||
2703 | // program the post loop to single iteration execution. | |||
2704 | if (Matcher::float_pressure_limit() > 8) { | |||
2705 | C->set_major_progress(); | |||
2706 | cl->mark_do_unroll_only(); | |||
2707 | } | |||
2708 | } | |||
2709 | ||||
2710 | if (do_reserve_copy()) { | |||
2711 | if (can_process_post_loop) { | |||
2712 | // Now create the difference of trip and limit and use it as our mask index. | |||
2713 | // Note: We limited the unroll of the vectorized loop so that | |||
2714 | // only vlen-1 size iterations can remain to be mask programmed. | |||
2715 | Node *incr = cl->incr(); | |||
2716 | SubINode *index = new SubINode(cl->limit(), cl->init_trip()); | |||
2717 | _igvn.register_new_node_with_optimizer(index); | |||
2718 | SetVectMaskINode *mask = new SetVectMaskINode(_phase->get_ctrl(cl->init_trip()), index); | |||
2719 | _igvn.register_new_node_with_optimizer(mask); | |||
2720 | // make this a single iteration loop | |||
2721 | AddINode *new_incr = new AddINode(incr->in(1), mask); | |||
2722 | _igvn.register_new_node_with_optimizer(new_incr); | |||
2723 | _phase->set_ctrl(new_incr, _phase->get_ctrl(incr)); | |||
2724 | _igvn.replace_node(incr, new_incr); | |||
2725 | cl->mark_is_multiversioned(); | |||
2726 | cl->loopexit()->add_flag(Node::Flag_has_vector_mask_set); | |||
2727 | } | |||
2728 | } | |||
2729 | } | |||
2730 | } | |||
2731 | } | |||
2732 | ||||
2733 | if (do_reserve_copy()) { | |||
2734 | make_reversable.use_new(); | |||
2735 | } | |||
2736 | NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord" ); print_loop(true);} | |||
2737 | return; | |||
2738 | } | |||
2739 | ||||
2740 | //------------------------------vector_opd--------------------------- | |||
2741 | // Create a vector operand for the nodes in pack p for operand: in(opd_idx) | |||
2742 | Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { | |||
2743 | Node* p0 = p->at(0); | |||
2744 | uint vlen = p->size(); | |||
2745 | Node* opd = p0->in(opd_idx); | |||
2746 | CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | |||
2747 | ||||
2748 | if (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()) { | |||
2749 | // override vlen with the main loops vector length | |||
2750 | vlen = cl->slp_max_unroll(); | |||
2751 | } | |||
2752 | ||||
2753 | if (same_inputs(p, opd_idx)) { | |||
2754 | if (opd->is_Vector() || opd->is_LoadVector()) { | |||
2755 | assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector")do { if (!(((opd_idx != 2) || !VectorNode::is_shift(p0)))) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2755, "assert(" "((opd_idx != 2) || !VectorNode::is_shift(p0))" ") failed", "shift's count can't be vector"); ::breakpoint() ; } } while (0); | |||
2756 | if (opd_idx == 2 && VectorNode::is_shift(p0)) { | |||
2757 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("shift's count can't be vector");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("shift's count can't be vector");} | |||
2758 | return NULL__null; | |||
2759 | } | |||
2760 | return opd; // input is matching vector | |||
2761 | } | |||
2762 | if ((opd_idx == 2) && VectorNode::is_shift(p0)) { | |||
2763 | Compile* C = _phase->C; | |||
2764 | Node* cnt = opd; | |||
2765 | // Vector instructions do not mask shift count, do it here. | |||
2766 | juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); | |||
2767 | const TypeInt* t = opd->find_int_type(); | |||
2768 | if (t != NULL__null && t->is_con()) { | |||
2769 | juint shift = t->get_con(); | |||
2770 | if (shift > mask) { // Unsigned cmp | |||
2771 | cnt = ConNode::make(TypeInt::make(shift & mask)); | |||
2772 | } | |||
2773 | } else { | |||
2774 | if (t == NULL__null || t->_lo < 0 || t->_hi > (int)mask) { | |||
2775 | cnt = ConNode::make(TypeInt::make(mask)); | |||
2776 | _igvn.register_new_node_with_optimizer(cnt); | |||
2777 | cnt = new AndINode(opd, cnt); | |||
2778 | _igvn.register_new_node_with_optimizer(cnt); | |||
2779 | _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); | |||
2780 | } | |||
2781 | assert(opd->bottom_type()->isa_int(), "int type only")do { if (!(opd->bottom_type()->isa_int())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2781, "assert(" "opd->bottom_type()->isa_int()" ") failed" , "int type only"); ::breakpoint(); } } while (0); | |||
2782 | if (!opd->bottom_type()->isa_int()) { | |||
2783 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should be int type only");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("Should be int type only");} | |||
2784 | return NULL__null; | |||
2785 | } | |||
2786 | } | |||
2787 | // Move shift count into vector register. | |||
2788 | cnt = VectorNode::shift_count(p0->Opcode(), cnt, vlen, velt_basic_type(p0)); | |||
2789 | _igvn.register_new_node_with_optimizer(cnt); | |||
2790 | _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); | |||
2791 | return cnt; | |||
2792 | } | |||
2793 | assert(!opd->is_StoreVector(), "such vector is not expected here")do { if (!(!opd->is_StoreVector())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2793, "assert(" "!opd->is_StoreVector()" ") failed", "such vector is not expected here" ); ::breakpoint(); } } while (0); | |||
2794 | if (opd->is_StoreVector()) { | |||
2795 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("StoreVector is not expected here");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("StoreVector is not expected here");} | |||
2796 | return NULL__null; | |||
2797 | } | |||
2798 | // Convert scalar input to vector with the same number of elements as | |||
2799 | // p0's vector. Use p0's type because size of operand's container in | |||
2800 | // vector should match p0's size regardless operand's size. | |||
2801 | const Type* p0_t = NULL__null; | |||
2802 | VectorNode* vn = NULL__null; | |||
2803 | if (opd_idx == 2 && VectorNode::is_scalar_rotate(p0)) { | |||
2804 | Node* conv = opd; | |||
2805 | p0_t = TypeInt::INT; | |||
2806 | if (p0->bottom_type()->isa_long()) { | |||
2807 | p0_t = TypeLong::LONG; | |||
2808 | conv = new ConvI2LNode(opd); | |||
2809 | _igvn.register_new_node_with_optimizer(conv); | |||
2810 | _phase->set_ctrl(conv, _phase->get_ctrl(opd)); | |||
2811 | } | |||
2812 | vn = VectorNode::scalar2vector(conv, vlen, p0_t); | |||
2813 | } else { | |||
2814 | p0_t = velt_type(p0); | |||
2815 | vn = VectorNode::scalar2vector(opd, vlen, p0_t); | |||
2816 | } | |||
2817 | ||||
2818 | _igvn.register_new_node_with_optimizer(vn); | |||
2819 | _phase->set_ctrl(vn, _phase->get_ctrl(opd)); | |||
2820 | #ifdef ASSERT1 | |||
2821 | if (TraceNewVectors) { | |||
2822 | tty->print("new Vector node: "); | |||
2823 | vn->dump(); | |||
2824 | } | |||
2825 | #endif | |||
2826 | return vn; | |||
2827 | } | |||
2828 | ||||
2829 | // Insert pack operation | |||
2830 | BasicType bt = velt_basic_type(p0); | |||
2831 | PackNode* pk = PackNode::make(opd, vlen, bt); | |||
2832 | DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )const BasicType opd_bt = opd->bottom_type()->basic_type (); | |||
2833 | ||||
2834 | for (uint i = 1; i < vlen; i++) { | |||
2835 | Node* pi = p->at(i); | |||
2836 | Node* in = pi->in(opd_idx); | |||
2837 | assert(my_pack(in) == NULL, "Should already have been unpacked")do { if (!(my_pack(in) == __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2837, "assert(" "my_pack(in) == __null" ") failed", "Should already have been unpacked" ); ::breakpoint(); } } while (0); | |||
2838 | if (my_pack(in) != NULL__null) { | |||
2839 | NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr ("Should already have been unpacked");} | |||
2840 | return NULL__null; | |||
2841 | } | |||
2842 | assert(opd_bt == in->bottom_type()->basic_type(), "all same type")do { if (!(opd_bt == in->bottom_type()->basic_type())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2842, "assert(" "opd_bt == in->bottom_type()->basic_type()" ") failed", "all same type"); ::breakpoint(); } } while (0); | |||
2843 | pk->add_opd(in); | |||
2844 | if (VectorNode::is_muladds2i(pi)) { | |||
2845 | Node* in2 = pi->in(opd_idx + 2); | |||
2846 | assert(my_pack(in2) == NULL, "Should already have been unpacked")do { if (!(my_pack(in2) == __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2846, "assert(" "my_pack(in2) == __null" ") failed", "Should already have been unpacked" ); ::breakpoint(); } } while (0); | |||
2847 | if (my_pack(in2) != NULL__null) { | |||
2848 | NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); })if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr ("Should already have been unpacked"); } | |||
2849 | return NULL__null; | |||
2850 | } | |||
2851 | assert(opd_bt == in2->bottom_type()->basic_type(), "all same type")do { if (!(opd_bt == in2->bottom_type()->basic_type())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2851, "assert(" "opd_bt == in2->bottom_type()->basic_type()" ") failed", "all same type"); ::breakpoint(); } } while (0); | |||
2852 | pk->add_opd(in2); | |||
2853 | } | |||
2854 | } | |||
2855 | _igvn.register_new_node_with_optimizer(pk); | |||
2856 | _phase->set_ctrl(pk, _phase->get_ctrl(opd)); | |||
2857 | #ifdef ASSERT1 | |||
2858 | if (TraceNewVectors) { | |||
2859 | tty->print("new Vector node: "); | |||
2860 | pk->dump(); | |||
2861 | } | |||
2862 | #endif | |||
2863 | return pk; | |||
2864 | } | |||
2865 | ||||
2866 | //------------------------------insert_extracts--------------------------- | |||
2867 | // If a use of pack p is not a vector use, then replace the | |||
2868 | // use with an extract operation. | |||
2869 | void SuperWord::insert_extracts(Node_List* p) { | |||
2870 | if (p->at(0)->is_Store()) return; | |||
2871 | assert(_n_idx_list.is_empty(), "empty (node,index) list")do { if (!(_n_idx_list.is_empty())) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2871, "assert(" "_n_idx_list.is_empty()" ") failed", "empty (node,index) list" ); ::breakpoint(); } } while (0); | |||
2872 | ||||
2873 | // Inspect each use of each pack member. For each use that is | |||
2874 | // not a vector use, replace the use with an extract operation. | |||
2875 | ||||
2876 | for (uint i = 0; i < p->size(); i++) { | |||
2877 | Node* def = p->at(i); | |||
2878 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |||
2879 | Node* use = def->fast_out(j); | |||
2880 | for (uint k = 0; k < use->req(); k++) { | |||
2881 | Node* n = use->in(k); | |||
2882 | if (def == n) { | |||
2883 | Node_List* u_pk = my_pack(use); | |||
2884 | if ((u_pk == NULL__null || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) { | |||
2885 | _n_idx_list.push(use, k); | |||
2886 | } | |||
2887 | } | |||
2888 | } | |||
2889 | } | |||
2890 | } | |||
2891 | ||||
2892 | while (_n_idx_list.is_nonempty()) { | |||
2893 | Node* use = _n_idx_list.node(); | |||
2894 | int idx = _n_idx_list.index(); | |||
2895 | _n_idx_list.pop(); | |||
2896 | Node* def = use->in(idx); | |||
2897 | ||||
2898 | if (def->is_reduction()) continue; | |||
2899 | ||||
2900 | // Insert extract operation | |||
2901 | _igvn.hash_delete(def); | |||
2902 | int def_pos = alignment(def) / data_size(def); | |||
2903 | ||||
2904 | Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def)); | |||
2905 | _igvn.register_new_node_with_optimizer(ex); | |||
2906 | _phase->set_ctrl(ex, _phase->get_ctrl(def)); | |||
2907 | _igvn.replace_input_of(use, idx, ex); | |||
2908 | _igvn._worklist.push(def); | |||
2909 | ||||
2910 | bb_insert_after(ex, bb_idx(def)); | |||
2911 | set_velt_type(ex, velt_type(def)); | |||
2912 | } | |||
2913 | } | |||
2914 | ||||
2915 | //------------------------------is_vector_use--------------------------- | |||
2916 | // Is use->in(u_idx) a vector use? | |||
2917 | bool SuperWord::is_vector_use(Node* use, int u_idx) { | |||
2918 | Node_List* u_pk = my_pack(use); | |||
2919 | if (u_pk == NULL__null) return false; | |||
2920 | if (use->is_reduction()) return true; | |||
2921 | Node* def = use->in(u_idx); | |||
2922 | Node_List* d_pk = my_pack(def); | |||
2923 | if (d_pk == NULL__null) { | |||
2924 | // check for scalar promotion | |||
2925 | Node* n = u_pk->at(0)->in(u_idx); | |||
2926 | for (uint i = 1; i < u_pk->size(); i++) { | |||
2927 | if (u_pk->at(i)->in(u_idx) != n) return false; | |||
2928 | } | |||
2929 | return true; | |||
2930 | } | |||
2931 | if (VectorNode::is_muladds2i(use)) { | |||
2932 | // MulAddS2I takes shorts and produces ints - hence the special checks | |||
2933 | // on alignment and size. | |||
2934 | if (u_pk->size() * 2 != d_pk->size()) { | |||
2935 | return false; | |||
2936 | } | |||
2937 | for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) { | |||
2938 | Node* ui = u_pk->at(i); | |||
2939 | Node* di = d_pk->at(i); | |||
2940 | if (alignment(ui) != alignment(di) * 2) { | |||
2941 | return false; | |||
2942 | } | |||
2943 | } | |||
2944 | return true; | |||
2945 | } | |||
2946 | if (u_pk->size() != d_pk->size()) | |||
2947 | return false; | |||
2948 | for (uint i = 0; i < u_pk->size(); i++) { | |||
2949 | Node* ui = u_pk->at(i); | |||
2950 | Node* di = d_pk->at(i); | |||
2951 | if (ui->in(u_idx) != di || alignment(ui) != alignment(di)) | |||
2952 | return false; | |||
2953 | } | |||
2954 | return true; | |||
2955 | } | |||
2956 | ||||
2957 | //------------------------------construct_bb--------------------------- | |||
2958 | // Construct reverse postorder list of block members | |||
2959 | bool SuperWord::construct_bb() { | |||
2960 | Node* entry = bb(); | |||
2961 | ||||
2962 | assert(_stk.length() == 0, "stk is empty")do { if (!(_stk.length() == 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2962, "assert(" "_stk.length() == 0" ") failed", "stk is empty" ); ::breakpoint(); } } while (0); | |||
2963 | assert(_block.length() == 0, "block is empty")do { if (!(_block.length() == 0)) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2963, "assert(" "_block.length() == 0" ") failed", "block is empty" ); ::breakpoint(); } } while (0); | |||
2964 | assert(_data_entry.length() == 0, "data_entry is empty")do { if (!(_data_entry.length() == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2964, "assert(" "_data_entry.length() == 0" ") failed", "data_entry is empty" ); ::breakpoint(); } } while (0); | |||
2965 | assert(_mem_slice_head.length() == 0, "mem_slice_head is empty")do { if (!(_mem_slice_head.length() == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2965, "assert(" "_mem_slice_head.length() == 0" ") failed", "mem_slice_head is empty"); ::breakpoint(); } } while (0); | |||
2966 | assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty")do { if (!(_mem_slice_tail.length() == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2966, "assert(" "_mem_slice_tail.length() == 0" ") failed", "mem_slice_tail is empty"); ::breakpoint(); } } while (0); | |||
2967 | ||||
2968 | // Find non-control nodes with no inputs from within block, | |||
2969 | // create a temporary map from node _idx to bb_idx for use | |||
2970 | // by the visited and post_visited sets, | |||
2971 | // and count number of nodes in block. | |||
2972 | int bb_ct = 0; | |||
2973 | for (uint i = 0; i < lpt()->_body.size(); i++) { | |||
2974 | Node *n = lpt()->_body.at(i); | |||
2975 | set_bb_idx(n, i); // Create a temporary map | |||
2976 | if (in_bb(n)) { | |||
2977 | if (n->is_LoadStore() || n->is_MergeMem() || | |||
2978 | (n->is_Proj() && !n->as_Proj()->is_CFG())) { | |||
2979 | // Bailout if the loop has LoadStore, MergeMem or data Proj | |||
2980 | // nodes. Superword optimization does not work with them. | |||
2981 | return false; | |||
2982 | } | |||
2983 | bb_ct++; | |||
2984 | if (!n->is_CFG()) { | |||
2985 | bool found = false; | |||
2986 | for (uint j = 0; j < n->req(); j++) { | |||
2987 | Node* def = n->in(j); | |||
2988 | if (def && in_bb(def)) { | |||
2989 | found = true; | |||
2990 | break; | |||
2991 | } | |||
2992 | } | |||
2993 | if (!found) { | |||
2994 | assert(n != entry, "can't be entry")do { if (!(n != entry)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 2994, "assert(" "n != entry" ") failed", "can't be entry"); ::breakpoint(); } } while (0); | |||
2995 | _data_entry.push(n); | |||
2996 | } | |||
2997 | } | |||
2998 | } | |||
2999 | } | |||
3000 | ||||
3001 | // Find memory slices (head and tail) | |||
3002 | for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) { | |||
3003 | Node *n = lp()->fast_out(i); | |||
3004 | if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { | |||
3005 | Node* n_tail = n->in(LoopNode::LoopBackControl); | |||
3006 | if (n_tail != n->in(LoopNode::EntryControl)) { | |||
3007 | if (!n_tail->is_Mem()) { | |||
3008 | assert(n_tail->is_Mem(), "unexpected node for memory slice: %s", n_tail->Name())do { if (!(n_tail->is_Mem())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3008, "assert(" "n_tail->is_Mem()" ") failed", "unexpected node for memory slice: %s" , n_tail->Name()); ::breakpoint(); } } while (0); | |||
3009 | return false; // Bailout | |||
3010 | } | |||
3011 | _mem_slice_head.push(n); | |||
3012 | _mem_slice_tail.push(n_tail); | |||
3013 | } | |||
3014 | } | |||
3015 | } | |||
3016 | ||||
3017 | // Create an RPO list of nodes in block | |||
3018 | ||||
3019 | visited_clear(); | |||
3020 | post_visited_clear(); | |||
3021 | ||||
3022 | // Push all non-control nodes with no inputs from within block, then control entry | |||
3023 | for (int j = 0; j < _data_entry.length(); j++) { | |||
3024 | Node* n = _data_entry.at(j); | |||
3025 | visited_set(n); | |||
3026 | _stk.push(n); | |||
3027 | } | |||
3028 | visited_set(entry); | |||
3029 | _stk.push(entry); | |||
3030 | ||||
3031 | // Do a depth first walk over out edges | |||
3032 | int rpo_idx = bb_ct - 1; | |||
3033 | int size; | |||
3034 | int reduction_uses = 0; | |||
3035 | while ((size = _stk.length()) > 0) { | |||
3036 | Node* n = _stk.top(); // Leave node on stack | |||
3037 | if (!visited_test_set(n)) { | |||
3038 | // forward arc in graph | |||
3039 | } else if (!post_visited_test(n)) { | |||
3040 | // cross or back arc | |||
3041 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |||
3042 | Node *use = n->fast_out(i); | |||
3043 | if (in_bb(use) && !visited_test(use) && | |||
3044 | // Don't go around backedge | |||
3045 | (!use->is_Phi() || n == entry)) { | |||
3046 | if (use->is_reduction()) { | |||
3047 | // First see if we can map the reduction on the given system we are on, then | |||
3048 | // make a data entry operation for each reduction we see. | |||
3049 | BasicType bt = use->bottom_type()->basic_type(); | |||
3050 | if (ReductionNode::implemented(use->Opcode(), Matcher::min_vector_size(bt), bt)) { | |||
3051 | reduction_uses++; | |||
3052 | } | |||
3053 | } | |||
3054 | _stk.push(use); | |||
3055 | } | |||
3056 | } | |||
3057 | if (_stk.length() == size) { | |||
3058 | // There were no additional uses, post visit node now | |||
3059 | _stk.pop(); // Remove node from stack | |||
3060 | assert(rpo_idx >= 0, "")do { if (!(rpo_idx >= 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3060, "assert(" "rpo_idx >= 0" ") failed", ""); ::breakpoint (); } } while (0); | |||
3061 | _block.at_put_grow(rpo_idx, n); | |||
3062 | rpo_idx--; | |||
3063 | post_visited_set(n); | |||
3064 | assert(rpo_idx >= 0 || _stk.is_empty(), "")do { if (!(rpo_idx >= 0 || _stk.is_empty())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3064, "assert(" "rpo_idx >= 0 || _stk.is_empty()" ") failed" , ""); ::breakpoint(); } } while (0); | |||
3065 | } | |||
3066 | } else { | |||
3067 | _stk.pop(); // Remove post-visited node from stack | |||
3068 | } | |||
3069 | }//while | |||
3070 | ||||
3071 | int ii_current = -1; | |||
3072 | unsigned int load_idx = (unsigned int)-1; | |||
3073 | // Build iterations order if needed | |||
3074 | bool build_ii_order = _do_vector_loop_experimental && _ii_order.is_empty(); | |||
3075 | // Create real map of block indices for nodes | |||
3076 | for (int j = 0; j < _block.length(); j++) { | |||
3077 | Node* n = _block.at(j); | |||
3078 | set_bb_idx(n, j); | |||
3079 | if (build_ii_order && n->is_Load()) { | |||
3080 | if (ii_current == -1) { | |||
3081 | ii_current = _clone_map.gen(n->_idx); | |||
3082 | _ii_order.push(ii_current); | |||
3083 | load_idx = _clone_map.idx(n->_idx); | |||
3084 | } else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) { | |||
3085 | ii_current = _clone_map.gen(n->_idx); | |||
3086 | _ii_order.push(ii_current); | |||
3087 | } | |||
3088 | } | |||
3089 | }//for | |||
3090 | ||||
3091 | // Ensure extra info is allocated. | |||
3092 | initialize_bb(); | |||
3093 | ||||
3094 | #ifndef PRODUCT | |||
3095 | if (_vector_loop_debug && _ii_order.length() > 0) { | |||
3096 | tty->print("SuperWord::construct_bb: List of generations: "); | |||
3097 | for (int jj = 0; jj < _ii_order.length(); ++jj) { | |||
3098 | tty->print(" %d:%d", jj, _ii_order.at(jj)); | |||
3099 | } | |||
3100 | tty->print_cr(" "); | |||
3101 | } | |||
3102 | if (TraceSuperWord) { | |||
3103 | print_bb(); | |||
3104 | tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); | |||
3105 | for (int m = 0; m < _data_entry.length(); m++) { | |||
3106 | tty->print("%3d ", m); | |||
3107 | _data_entry.at(m)->dump(); | |||
3108 | } | |||
3109 | tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE"); | |||
3110 | for (int m = 0; m < _mem_slice_head.length(); m++) { | |||
3111 | tty->print("%3d ", m); _mem_slice_head.at(m)->dump(); | |||
3112 | tty->print(" "); _mem_slice_tail.at(m)->dump(); | |||
3113 | } | |||
3114 | } | |||
3115 | #endif | |||
3116 | assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found")do { if (!(rpo_idx == -1 && bb_ct == _block.length()) ) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3116, "assert(" "rpo_idx == -1 && bb_ct == _block.length()" ") failed", "all block members found"); ::breakpoint(); } } while (0); | |||
3117 | return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0); | |||
3118 | } | |||
3119 | ||||
3120 | //------------------------------initialize_bb--------------------------- | |||
3121 | // Initialize per node info | |||
3122 | void SuperWord::initialize_bb() { | |||
3123 | Node* last = _block.at(_block.length() - 1); | |||
3124 | grow_node_info(bb_idx(last)); | |||
3125 | } | |||
3126 | ||||
3127 | //------------------------------bb_insert_after--------------------------- | |||
3128 | // Insert n into block after pos | |||
3129 | void SuperWord::bb_insert_after(Node* n, int pos) { | |||
3130 | int n_pos = pos + 1; | |||
3131 | // Make room | |||
3132 | for (int i = _block.length() - 1; i >= n_pos; i--) { | |||
3133 | _block.at_put_grow(i+1, _block.at(i)); | |||
3134 | } | |||
3135 | for (int j = _node_info.length() - 1; j >= n_pos; j--) { | |||
3136 | _node_info.at_put_grow(j+1, _node_info.at(j)); | |||
3137 | } | |||
3138 | // Set value | |||
3139 | _block.at_put_grow(n_pos, n); | |||
3140 | _node_info.at_put_grow(n_pos, SWNodeInfo::initial); | |||
3141 | // Adjust map from node->_idx to _block index | |||
3142 | for (int i = n_pos; i < _block.length(); i++) { | |||
3143 | set_bb_idx(_block.at(i), i); | |||
3144 | } | |||
3145 | } | |||
3146 | ||||
3147 | //------------------------------compute_max_depth--------------------------- | |||
3148 | // Compute max depth for expressions from beginning of block | |||
3149 | // Use to prune search paths during test for independence. | |||
3150 | void SuperWord::compute_max_depth() { | |||
3151 | int ct = 0; | |||
3152 | bool again; | |||
3153 | do { | |||
3154 | again = false; | |||
3155 | for (int i = 0; i < _block.length(); i++) { | |||
3156 | Node* n = _block.at(i); | |||
3157 | if (!n->is_Phi()) { | |||
3158 | int d_orig = depth(n); | |||
3159 | int d_in = 0; | |||
3160 | for (DepPreds preds(n, _dg); !preds.done(); preds.next()) { | |||
3161 | Node* pred = preds.current(); | |||
3162 | if (in_bb(pred)) { | |||
3163 | d_in = MAX2(d_in, depth(pred)); | |||
3164 | } | |||
3165 | } | |||
3166 | if (d_in + 1 != d_orig) { | |||
3167 | set_depth(n, d_in + 1); | |||
3168 | again = true; | |||
3169 | } | |||
3170 | } | |||
3171 | } | |||
3172 | ct++; | |||
3173 | } while (again); | |||
3174 | ||||
3175 | if (TraceSuperWord && Verbose) { | |||
3176 | tty->print_cr("compute_max_depth iterated: %d times", ct); | |||
3177 | } | |||
3178 | } | |||
3179 | ||||
3180 | //-------------------------compute_vector_element_type----------------------- | |||
3181 | // Compute necessary vector element type for expressions | |||
3182 | // This propagates backwards a narrower integer type when the | |||
3183 | // upper bits of the value are not needed. | |||
3184 | // Example: char a,b,c; a = b + c; | |||
3185 | // Normally the type of the add is integer, but for packed character | |||
3186 | // operations the type of the add needs to be char. | |||
3187 | void SuperWord::compute_vector_element_type() { | |||
3188 | if (TraceSuperWord && Verbose) { | |||
3189 | tty->print_cr("\ncompute_velt_type:"); | |||
3190 | } | |||
3191 | ||||
3192 | // Initial type | |||
3193 | for (int i = 0; i < _block.length(); i++) { | |||
3194 | Node* n = _block.at(i); | |||
3195 | set_velt_type(n, container_type(n)); | |||
3196 | } | |||
3197 | ||||
3198 | // Propagate integer narrowed type backwards through operations | |||
3199 | // that don't depend on higher order bits | |||
3200 | for (int i = _block.length() - 1; i >= 0; i--) { | |||
3201 | Node* n = _block.at(i); | |||
3202 | // Only integer types need be examined | |||
3203 | const Type* vtn = velt_type(n); | |||
3204 | if (vtn->basic_type() == T_INT) { | |||
3205 | uint start, end; | |||
3206 | VectorNode::vector_operands(n, &start, &end); | |||
3207 | ||||
3208 | for (uint j = start; j < end; j++) { | |||
3209 | Node* in = n->in(j); | |||
3210 | // Don't propagate through a memory | |||
3211 | if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT && | |||
3212 | data_size(n) < data_size(in)) { | |||
3213 | bool same_type = true; | |||
3214 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { | |||
3215 | Node *use = in->fast_out(k); | |||
3216 | if (!in_bb(use) || !same_velt_type(use, n)) { | |||
3217 | same_type = false; | |||
3218 | break; | |||
3219 | } | |||
3220 | } | |||
3221 | if (same_type) { | |||
3222 | // In any Java arithmetic operation, operands of small integer types | |||
3223 | // (boolean, byte, char & short) should be promoted to int first. As | |||
3224 | // vector elements of small types don't have upper bits of int, for | |||
3225 | // RShiftI or AbsI operations, the compiler has to know the precise | |||
3226 | // signedness info of the 1st operand. These operations shouldn't be | |||
3227 | // vectorized if the signedness info is imprecise. | |||
3228 | const Type* vt = vtn; | |||
3229 | int op = in->Opcode(); | |||
3230 | if (VectorNode::is_shift_opcode(op) || op == Op_AbsI) { | |||
3231 | Node* load = in->in(1); | |||
3232 | if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) { | |||
3233 | // Only Load nodes distinguish signed (LoadS/LoadB) and unsigned | |||
3234 | // (LoadUS/LoadUB) values. Store nodes only have one version. | |||
3235 | vt = velt_type(load); | |||
3236 | } else if (op != Op_LShiftI) { | |||
3237 | // Widen type to int to avoid the creation of vector nodes. Note | |||
3238 | // that left shifts work regardless of the signedness. | |||
3239 | vt = TypeInt::INT; | |||
3240 | } | |||
3241 | } | |||
3242 | set_velt_type(in, vt); | |||
3243 | } | |||
3244 | } | |||
3245 | } | |||
3246 | } | |||
3247 | } | |||
3248 | #ifndef PRODUCT | |||
3249 | if (TraceSuperWord && Verbose) { | |||
3250 | for (int i = 0; i < _block.length(); i++) { | |||
3251 | Node* n = _block.at(i); | |||
3252 | velt_type(n)->dump(); | |||
3253 | tty->print("\t"); | |||
3254 | n->dump(); | |||
3255 | } | |||
3256 | } | |||
3257 | #endif | |||
3258 | } | |||
3259 | ||||
3260 | //------------------------------memory_alignment--------------------------- | |||
3261 | // Alignment within a vector memory reference | |||
3262 | int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { | |||
3263 | #ifndef PRODUCT | |||
3264 | if ((TraceSuperWord && Verbose) || is_trace_alignment()) { | |||
3265 | tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump(); | |||
3266 | } | |||
3267 | #endif | |||
3268 | NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);)SWPointer::Tracer::Depth ddd(0); | |||
3269 | SWPointer p(s, this, NULL__null, false); | |||
3270 | if (!p.valid()) { | |||
3271 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align" ); | |||
3272 | return bottom_align; | |||
3273 | } | |||
3274 | int vw = get_vw_bytes_special(s); | |||
3275 | if (vw < 2) { | |||
3276 | NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align" ); | |||
3277 | return bottom_align; // No vectors for this type | |||
3278 | } | |||
3279 | int offset = p.offset_in_bytes(); | |||
3280 | offset += iv_adjust*p.memory_size(); | |||
3281 | int off_rem = offset % vw; | |||
3282 | int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; | |||
3283 | #ifndef PRODUCT | |||
3284 | if ((TraceSuperWord && Verbose) || is_trace_alignment()) { | |||
3285 | tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod); | |||
3286 | } | |||
3287 | #endif | |||
3288 | return off_mod; | |||
3289 | } | |||
3290 | ||||
3291 | //---------------------------container_type--------------------------- | |||
3292 | // Smallest type containing range of values | |||
3293 | const Type* SuperWord::container_type(Node* n) { | |||
3294 | if (n->is_Mem()) { | |||
3295 | BasicType bt = n->as_Mem()->memory_type(); | |||
3296 | if (n->is_Store() && (bt == T_CHAR)) { | |||
3297 | // Use T_SHORT type instead of T_CHAR for stored values because any | |||
3298 | // preceding arithmetic operation extends values to signed Int. | |||
3299 | bt = T_SHORT; | |||
3300 | } | |||
3301 | if (n->Opcode() == Op_LoadUB) { | |||
3302 | // Adjust type for unsigned byte loads, it is important for right shifts. | |||
3303 | // T_BOOLEAN is used because there is no basic type representing type | |||
3304 | // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only | |||
3305 | // size (one byte) and sign is important. | |||
3306 | bt = T_BOOLEAN; | |||
3307 | } | |||
3308 | return Type::get_const_basic_type(bt); | |||
3309 | } | |||
3310 | const Type* t = _igvn.type(n); | |||
3311 | if (t->basic_type() == T_INT) { | |||
3312 | // A narrow type of arithmetic operations will be determined by | |||
3313 | // propagating the type of memory operations. | |||
3314 | return TypeInt::INT; | |||
3315 | } | |||
3316 | return t; | |||
3317 | } | |||
3318 | ||||
3319 | bool SuperWord::same_velt_type(Node* n1, Node* n2) { | |||
3320 | const Type* vt1 = velt_type(n1); | |||
3321 | const Type* vt2 = velt_type(n2); | |||
3322 | if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) { | |||
3323 | // Compare vectors element sizes for integer types. | |||
3324 | return data_size(n1) == data_size(n2); | |||
3325 | } | |||
3326 | return vt1 == vt2; | |||
3327 | } | |||
3328 | ||||
3329 | //------------------------------in_packset--------------------------- | |||
3330 | // Are s1 and s2 in a pack pair and ordered as s1,s2? | |||
3331 | bool SuperWord::in_packset(Node* s1, Node* s2) { | |||
3332 | for (int i = 0; i < _packset.length(); i++) { | |||
3333 | Node_List* p = _packset.at(i); | |||
3334 | assert(p->size() == 2, "must be")do { if (!(p->size() == 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3334, "assert(" "p->size() == 2" ") failed", "must be"); ::breakpoint(); } } while (0); | |||
3335 | if (p->at(0) == s1 && p->at(p->size()-1) == s2) { | |||
3336 | return true; | |||
3337 | } | |||
3338 | } | |||
3339 | return false; | |||
3340 | } | |||
3341 | ||||
3342 | //------------------------------in_pack--------------------------- | |||
3343 | // Is s in pack p? | |||
3344 | Node_List* SuperWord::in_pack(Node* s, Node_List* p) { | |||
3345 | for (uint i = 0; i < p->size(); i++) { | |||
3346 | if (p->at(i) == s) { | |||
3347 | return p; | |||
3348 | } | |||
3349 | } | |||
3350 | return NULL__null; | |||
3351 | } | |||
3352 | ||||
3353 | //------------------------------remove_pack_at--------------------------- | |||
3354 | // Remove the pack at position pos in the packset | |||
3355 | void SuperWord::remove_pack_at(int pos) { | |||
3356 | Node_List* p = _packset.at(pos); | |||
3357 | for (uint i = 0; i < p->size(); i++) { | |||
3358 | Node* s = p->at(i); | |||
3359 | set_my_pack(s, NULL__null); | |||
3360 | } | |||
3361 | _packset.remove_at(pos); | |||
3362 | } | |||
3363 | ||||
3364 | void SuperWord::packset_sort(int n) { | |||
3365 | // simple bubble sort so that we capitalize with O(n) when its already sorted | |||
3366 | while (n != 0) { | |||
3367 | bool swapped = false; | |||
3368 | for (int i = 1; i < n; i++) { | |||
3369 | Node_List* q_low = _packset.at(i-1); | |||
3370 | Node_List* q_i = _packset.at(i); | |||
3371 | ||||
3372 | // only swap when we find something to swap | |||
3373 | if (alignment(q_low->at(0)) > alignment(q_i->at(0))) { | |||
3374 | Node_List* t = q_i; | |||
3375 | *(_packset.adr_at(i)) = q_low; | |||
3376 | *(_packset.adr_at(i-1)) = q_i; | |||
3377 | swapped = true; | |||
3378 | } | |||
3379 | } | |||
3380 | if (swapped == false) break; | |||
3381 | n--; | |||
3382 | } | |||
3383 | } | |||
3384 | ||||
3385 | //------------------------------executed_first--------------------------- | |||
3386 | // Return the node executed first in pack p. Uses the RPO block list | |||
3387 | // to determine order. | |||
3388 | Node* SuperWord::executed_first(Node_List* p) { | |||
3389 | Node* n = p->at(0); | |||
3390 | int n_rpo = bb_idx(n); | |||
3391 | for (uint i = 1; i < p->size(); i++) { | |||
3392 | Node* s = p->at(i); | |||
3393 | int s_rpo = bb_idx(s); | |||
3394 | if (s_rpo < n_rpo) { | |||
3395 | n = s; | |||
3396 | n_rpo = s_rpo; | |||
3397 | } | |||
3398 | } | |||
3399 | return n; | |||
3400 | } | |||
3401 | ||||
3402 | //------------------------------executed_last--------------------------- | |||
3403 | // Return the node executed last in pack p. | |||
3404 | Node* SuperWord::executed_last(Node_List* p) { | |||
3405 | Node* n = p->at(0); | |||
3406 | int n_rpo = bb_idx(n); | |||
3407 | for (uint i = 1; i < p->size(); i++) { | |||
3408 | Node* s = p->at(i); | |||
3409 | int s_rpo = bb_idx(s); | |||
3410 | if (s_rpo > n_rpo) { | |||
3411 | n = s; | |||
3412 | n_rpo = s_rpo; | |||
3413 | } | |||
3414 | } | |||
3415 | return n; | |||
3416 | } | |||
3417 | ||||
3418 | LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) { | |||
3419 | LoadNode::ControlDependency dep = LoadNode::DependsOnlyOnTest; | |||
3420 | for (uint i = 0; i < p->size(); i++) { | |||
3421 | Node* n = p->at(i); | |||
3422 | assert(n->is_Load(), "only meaningful for loads")do { if (!(n->is_Load())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3422, "assert(" "n->is_Load()" ") failed", "only meaningful for loads" ); ::breakpoint(); } } while (0); | |||
3423 | if (!n->depends_only_on_test()) { | |||
3424 | if (n->as_Load()->has_unknown_control_dependency() && | |||
3425 | dep != LoadNode::Pinned) { | |||
3426 | // Upgrade to unknown control... | |||
3427 | dep = LoadNode::UnknownControl; | |||
3428 | } else { | |||
3429 | // Otherwise, we must pin it. | |||
3430 | dep = LoadNode::Pinned; | |||
3431 | } | |||
3432 | } | |||
3433 | } | |||
3434 | return dep; | |||
3435 | } | |||
3436 | ||||
3437 | ||||
3438 | //----------------------------align_initial_loop_index--------------------------- | |||
3439 | // Adjust pre-loop limit so that in main loop, a load/store reference | |||
3440 | // to align_to_ref will be a position zero in the vector. | |||
3441 | // (iv + k) mod vector_align == 0 | |||
3442 | void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { | |||
3443 | assert(lp()->is_main_loop(), "")do { if (!(lp()->is_main_loop())) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3443, "assert(" "lp()->is_main_loop()" ") failed", ""); :: breakpoint(); } } while (0); | |||
3444 | CountedLoopEndNode* pre_end = pre_loop_end(); | |||
3445 | Node* pre_opaq1 = pre_end->limit(); | |||
3446 | assert(pre_opaq1->Opcode() == Op_Opaque1, "")do { if (!(pre_opaq1->Opcode() == Op_Opaque1)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3446, "assert(" "pre_opaq1->Opcode() == Op_Opaque1" ") failed" , ""); ::breakpoint(); } } while (0); | |||
3447 | Opaque1Node* pre_opaq = (Opaque1Node*)pre_opaq1; | |||
3448 | Node* lim0 = pre_opaq->in(1); | |||
3449 | ||||
3450 | // Where we put new limit calculations | |||
3451 | Node* pre_ctrl = pre_loop_head()->in(LoopNode::EntryControl); | |||
3452 | ||||
3453 | // Ensure the original loop limit is available from the | |||
3454 | // pre-loop Opaque1 node. | |||
3455 | Node* orig_limit = pre_opaq->original_loop_limit(); | |||
3456 | assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "")do { if (!(orig_limit != __null && _igvn.type(orig_limit ) != Type::TOP)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3456, "assert(" "orig_limit != __null && _igvn.type(orig_limit) != Type::TOP" ") failed", ""); ::breakpoint(); } } while (0); | |||
3457 | ||||
3458 | SWPointer align_to_ref_p(align_to_ref, this, NULL__null, false); | |||
3459 | assert(align_to_ref_p.valid(), "sanity")do { if (!(align_to_ref_p.valid())) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3459, "assert(" "align_to_ref_p.valid()" ") failed", "sanity" ); ::breakpoint(); } } while (0); | |||
3460 | ||||
3461 | // Given: | |||
3462 | // lim0 == original pre loop limit | |||
3463 | // V == v_align (power of 2) | |||
3464 | // invar == extra invariant piece of the address expression | |||
3465 | // e == offset [ +/- invar ] | |||
3466 | // | |||
3467 | // When reassociating expressions involving '%' the basic rules are: | |||
3468 | // (a - b) % k == 0 => a % k == b % k | |||
3469 | // and: | |||
3470 | // (a + b) % k == 0 => a % k == (k - b) % k | |||
3471 | // | |||
3472 | // For stride > 0 && scale > 0, | |||
3473 | // Derive the new pre-loop limit "lim" such that the two constraints: | |||
3474 | // (1) lim = lim0 + N (where N is some positive integer < V) | |||
3475 | // (2) (e + lim) % V == 0 | |||
3476 | // are true. | |||
3477 | // | |||
3478 | // Substituting (1) into (2), | |||
3479 | // (e + lim0 + N) % V == 0 | |||
3480 | // solve for N: | |||
3481 | // N = (V - (e + lim0)) % V | |||
3482 | // substitute back into (1), so that new limit | |||
3483 | // lim = lim0 + (V - (e + lim0)) % V | |||
3484 | // | |||
3485 | // For stride > 0 && scale < 0 | |||
3486 | // Constraints: | |||
3487 | // lim = lim0 + N | |||
3488 | // (e - lim) % V == 0 | |||
3489 | // Solving for lim: | |||
3490 | // (e - lim0 - N) % V == 0 | |||
3491 | // N = (e - lim0) % V | |||
3492 | // lim = lim0 + (e - lim0) % V | |||
3493 | // | |||
3494 | // For stride < 0 && scale > 0 | |||
3495 | // Constraints: | |||
3496 | // lim = lim0 - N | |||
3497 | // (e + lim) % V == 0 | |||
3498 | // Solving for lim: | |||
3499 | // (e + lim0 - N) % V == 0 | |||
3500 | // N = (e + lim0) % V | |||
3501 | // lim = lim0 - (e + lim0) % V | |||
3502 | // | |||
3503 | // For stride < 0 && scale < 0 | |||
3504 | // Constraints: | |||
3505 | // lim = lim0 - N | |||
3506 | // (e - lim) % V == 0 | |||
3507 | // Solving for lim: | |||
3508 | // (e - lim0 + N) % V == 0 | |||
3509 | // N = (V - (e - lim0)) % V | |||
3510 | // lim = lim0 - (V - (e - lim0)) % V | |||
3511 | ||||
3512 | int vw = vector_width_in_bytes(align_to_ref); | |||
3513 | int stride = iv_stride(); | |||
3514 | int scale = align_to_ref_p.scale_in_bytes(); | |||
3515 | int elt_size = align_to_ref_p.memory_size(); | |||
3516 | int v_align = vw / elt_size; | |||
3517 | assert(v_align > 1, "sanity")do { if (!(v_align > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3517, "assert(" "v_align > 1" ") failed", "sanity"); ::breakpoint (); } } while (0); | |||
3518 | int offset = align_to_ref_p.offset_in_bytes() / elt_size; | |||
3519 | Node *offsn = _igvn.intcon(offset); | |||
3520 | ||||
3521 | Node *e = offsn; | |||
3522 | if (align_to_ref_p.invar() != NULL__null) { | |||
3523 | // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) | |||
3524 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); | |||
3525 | Node* invar = align_to_ref_p.invar(); | |||
3526 | if (_igvn.type(invar)->isa_long()) { | |||
3527 | // Computations are done % (vector width/element size) so it's | |||
3528 | // safe to simply convert invar to an int and loose the upper 32 | |||
3529 | // bit half. | |||
3530 | invar = new ConvL2INode(invar); | |||
3531 | _igvn.register_new_node_with_optimizer(invar); | |||
3532 | } | |||
3533 | Node* invar_scale = align_to_ref_p.invar_scale(); | |||
3534 | if (invar_scale != NULL__null) { | |||
3535 | invar = new LShiftINode(invar, invar_scale); | |||
3536 | _igvn.register_new_node_with_optimizer(invar); | |||
3537 | } | |||
3538 | Node* aref = new URShiftINode(invar, log2_elt); | |||
3539 | _igvn.register_new_node_with_optimizer(aref); | |||
3540 | _phase->set_ctrl(aref, pre_ctrl); | |||
3541 | if (align_to_ref_p.negate_invar()) { | |||
3542 | e = new SubINode(e, aref); | |||
3543 | } else { | |||
3544 | e = new AddINode(e, aref); | |||
3545 | } | |||
3546 | _igvn.register_new_node_with_optimizer(e); | |||
3547 | _phase->set_ctrl(e, pre_ctrl); | |||
3548 | } | |||
3549 | if (vw > ObjectAlignmentInBytes || align_to_ref_p.base()->is_top()) { | |||
3550 | // incorporate base e +/- base && Mask >>> log2(elt) | |||
3551 | Node* xbase = new CastP2XNode(NULL__null, align_to_ref_p.adr()); | |||
3552 | _igvn.register_new_node_with_optimizer(xbase); | |||
3553 | #ifdef _LP641 | |||
3554 | xbase = new ConvL2INode(xbase); | |||
3555 | _igvn.register_new_node_with_optimizer(xbase); | |||
3556 | #endif | |||
3557 | Node* mask = _igvn.intcon(vw-1); | |||
3558 | Node* masked_xbase = new AndINode(xbase, mask); | |||
3559 | _igvn.register_new_node_with_optimizer(masked_xbase); | |||
3560 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); | |||
3561 | Node* bref = new URShiftINode(masked_xbase, log2_elt); | |||
3562 | _igvn.register_new_node_with_optimizer(bref); | |||
3563 | _phase->set_ctrl(bref, pre_ctrl); | |||
3564 | e = new AddINode(e, bref); | |||
3565 | _igvn.register_new_node_with_optimizer(e); | |||
3566 | _phase->set_ctrl(e, pre_ctrl); | |||
3567 | } | |||
3568 | ||||
3569 | // compute e +/- lim0 | |||
3570 | if (scale < 0) { | |||
3571 | e = new SubINode(e, lim0); | |||
3572 | } else { | |||
3573 | e = new AddINode(e, lim0); | |||
3574 | } | |||
3575 | _igvn.register_new_node_with_optimizer(e); | |||
3576 | _phase->set_ctrl(e, pre_ctrl); | |||
3577 | ||||
3578 | if (stride * scale > 0) { | |||
3579 | // compute V - (e +/- lim0) | |||
3580 | Node* va = _igvn.intcon(v_align); | |||
3581 | e = new SubINode(va, e); | |||
3582 | _igvn.register_new_node_with_optimizer(e); | |||
3583 | _phase->set_ctrl(e, pre_ctrl); | |||
3584 | } | |||
3585 | // compute N = (exp) % V | |||
3586 | Node* va_msk = _igvn.intcon(v_align - 1); | |||
3587 | Node* N = new AndINode(e, va_msk); | |||
3588 | _igvn.register_new_node_with_optimizer(N); | |||
3589 | _phase->set_ctrl(N, pre_ctrl); | |||
3590 | ||||
3591 | // substitute back into (1), so that new limit | |||
3592 | // lim = lim0 + N | |||
3593 | Node* lim; | |||
3594 | if (stride < 0) { | |||
3595 | lim = new SubINode(lim0, N); | |||
3596 | } else { | |||
3597 | lim = new AddINode(lim0, N); | |||
3598 | } | |||
3599 | _igvn.register_new_node_with_optimizer(lim); | |||
3600 | _phase->set_ctrl(lim, pre_ctrl); | |||
3601 | Node* constrained = | |||
3602 | (stride > 0) ? (Node*) new MinINode(lim, orig_limit) | |||
3603 | : (Node*) new MaxINode(lim, orig_limit); | |||
3604 | _igvn.register_new_node_with_optimizer(constrained); | |||
3605 | _phase->set_ctrl(constrained, pre_ctrl); | |||
3606 | _igvn.replace_input_of(pre_opaq, 1, constrained); | |||
3607 | } | |||
3608 | ||||
3609 | //----------------------------get_pre_loop_end--------------------------- | |||
3610 | // Find pre loop end from main loop. Returns null if none. | |||
3611 | CountedLoopEndNode* SuperWord::find_pre_loop_end(CountedLoopNode* cl) const { | |||
3612 | // The loop cannot be optimized if the graph shape at | |||
3613 | // the loop entry is inappropriate. | |||
3614 | if (cl->is_canonical_loop_entry() == NULL__null) { | |||
3615 | return NULL__null; | |||
3616 | } | |||
3617 | ||||
3618 | Node* p_f = cl->skip_predicates()->in(0)->in(0); | |||
3619 | if (!p_f->is_IfFalse()) return NULL__null; | |||
3620 | if (!p_f->in(0)->is_CountedLoopEnd()) return NULL__null; | |||
3621 | CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd(); | |||
3622 | CountedLoopNode* loop_node = pre_end->loopnode(); | |||
3623 | if (loop_node == NULL__null || !loop_node->is_pre_loop()) return NULL__null; | |||
3624 | return pre_end; | |||
3625 | } | |||
3626 | ||||
3627 | //------------------------------init--------------------------- | |||
3628 | void SuperWord::init() { | |||
3629 | _dg.init(); | |||
3630 | _packset.clear(); | |||
3631 | _disjoint_ptrs.clear(); | |||
3632 | _block.clear(); | |||
3633 | _post_block.clear(); | |||
3634 | _data_entry.clear(); | |||
3635 | _mem_slice_head.clear(); | |||
3636 | _mem_slice_tail.clear(); | |||
3637 | _iteration_first.clear(); | |||
3638 | _iteration_last.clear(); | |||
3639 | _node_info.clear(); | |||
3640 | _align_to_ref = NULL__null; | |||
3641 | _lpt = NULL__null; | |||
3642 | _lp = NULL__null; | |||
3643 | _bb = NULL__null; | |||
3644 | _iv = NULL__null; | |||
3645 | _race_possible = 0; | |||
3646 | _early_return = false; | |||
3647 | _num_work_vecs = 0; | |||
3648 | _num_reductions = 0; | |||
3649 | } | |||
3650 | ||||
3651 | //------------------------------restart--------------------------- | |||
3652 | void SuperWord::restart() { | |||
3653 | _dg.init(); | |||
3654 | _packset.clear(); | |||
3655 | _disjoint_ptrs.clear(); | |||
3656 | _block.clear(); | |||
3657 | _post_block.clear(); | |||
3658 | _data_entry.clear(); | |||
3659 | _mem_slice_head.clear(); | |||
3660 | _mem_slice_tail.clear(); | |||
3661 | _node_info.clear(); | |||
3662 | } | |||
3663 | ||||
3664 | //------------------------------print_packset--------------------------- | |||
3665 | void SuperWord::print_packset() { | |||
3666 | #ifndef PRODUCT | |||
3667 | tty->print_cr("packset"); | |||
3668 | for (int i = 0; i < _packset.length(); i++) { | |||
3669 | tty->print_cr("Pack: %d", i); | |||
3670 | Node_List* p = _packset.at(i); | |||
3671 | print_pack(p); | |||
3672 | } | |||
3673 | #endif | |||
3674 | } | |||
3675 | ||||
3676 | //------------------------------print_pack--------------------------- | |||
3677 | void SuperWord::print_pack(Node_List* p) { | |||
3678 | for (uint i = 0; i < p->size(); i++) { | |||
3679 | print_stmt(p->at(i)); | |||
3680 | } | |||
3681 | } | |||
3682 | ||||
3683 | //------------------------------print_bb--------------------------- | |||
3684 | void SuperWord::print_bb() { | |||
3685 | #ifndef PRODUCT | |||
3686 | tty->print_cr("\nBlock"); | |||
3687 | for (int i = 0; i < _block.length(); i++) { | |||
3688 | Node* n = _block.at(i); | |||
3689 | tty->print("%d ", i); | |||
3690 | if (n) { | |||
3691 | n->dump(); | |||
3692 | } | |||
3693 | } | |||
3694 | #endif | |||
3695 | } | |||
3696 | ||||
3697 | //------------------------------print_stmt--------------------------- | |||
3698 | void SuperWord::print_stmt(Node* s) { | |||
3699 | #ifndef PRODUCT | |||
3700 | tty->print(" align: %d \t", alignment(s)); | |||
3701 | s->dump(); | |||
3702 | #endif | |||
3703 | } | |||
3704 | ||||
3705 | //------------------------------blank--------------------------- | |||
3706 | char* SuperWord::blank(uint depth) { | |||
3707 | static char blanks[101]; | |||
3708 | assert(depth < 101, "too deep")do { if (!(depth < 101)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3708, "assert(" "depth < 101" ") failed", "too deep"); :: breakpoint(); } } while (0); | |||
3709 | for (uint i = 0; i < depth; i++) blanks[i] = ' '; | |||
3710 | blanks[depth] = '\0'; | |||
3711 | return blanks; | |||
3712 | } | |||
3713 | ||||
3714 | ||||
3715 | //==============================SWPointer=========================== | |||
3716 | #ifndef PRODUCT | |||
3717 | int SWPointer::Tracer::_depth = 0; | |||
3718 | #endif | |||
3719 | //----------------------------SWPointer------------------------ | |||
3720 | SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) : | |||
3721 | _mem(mem), _slp(slp), _base(NULL__null), _adr(NULL__null), | |||
3722 | _scale(0), _offset(0), _invar(NULL__null), _negate_invar(false), | |||
3723 | _invar_scale(NULL__null), | |||
3724 | _nstack(nstack), _analyze_only(analyze_only), | |||
3725 | _stack_idx(0) | |||
3726 | #ifndef PRODUCT | |||
3727 | , _tracer(slp) | |||
3728 | #endif | |||
3729 | { | |||
3730 | NOT_PRODUCT(_tracer.ctor_1(mem);)_tracer.ctor_1(mem); | |||
3731 | ||||
3732 | Node* adr = mem->in(MemNode::Address); | |||
3733 | if (!adr->is_AddP()) { | |||
3734 | assert(!valid(), "too complex")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3734, "assert(" "!valid()" ") failed", "too complex"); ::breakpoint (); } } while (0); | |||
3735 | return; | |||
3736 | } | |||
3737 | // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) | |||
3738 | Node* base = adr->in(AddPNode::Base); | |||
3739 | // The base address should be loop invariant | |||
3740 | if (is_main_loop_member(base)) { | |||
3741 | assert(!valid(), "base address is loop variant")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3741, "assert(" "!valid()" ") failed", "base address is loop variant" ); ::breakpoint(); } } while (0); | |||
3742 | return; | |||
3743 | } | |||
3744 | // unsafe references require misaligned vector access support | |||
3745 | if (base->is_top() && !Matcher::misaligned_vectors_ok()) { | |||
3746 | assert(!valid(), "unsafe access")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3746, "assert(" "!valid()" ") failed", "unsafe access"); :: breakpoint(); } } while (0); | |||
3747 | return; | |||
3748 | } | |||
3749 | ||||
3750 | NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();)if(_slp->is_trace_alignment()) _tracer.store_depth(); | |||
3751 | NOT_PRODUCT(_tracer.ctor_2(adr);)_tracer.ctor_2(adr); | |||
3752 | ||||
3753 | int i; | |||
3754 | for (i = 0; i < 3; i++) { | |||
3755 | NOT_PRODUCT(_tracer.ctor_3(adr, i);)_tracer.ctor_3(adr, i); | |||
3756 | ||||
3757 | if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { | |||
3758 | assert(!valid(), "too complex")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3758, "assert(" "!valid()" ") failed", "too complex"); ::breakpoint (); } } while (0); | |||
3759 | return; | |||
3760 | } | |||
3761 | adr = adr->in(AddPNode::Address); | |||
3762 | NOT_PRODUCT(_tracer.ctor_4(adr, i);)_tracer.ctor_4(adr, i); | |||
3763 | ||||
3764 | if (base == adr || !adr->is_AddP()) { | |||
3765 | NOT_PRODUCT(_tracer.ctor_5(adr, base, i);)_tracer.ctor_5(adr, base, i); | |||
3766 | break; // stop looking at addp's | |||
3767 | } | |||
3768 | } | |||
3769 | if (is_main_loop_member(adr)) { | |||
3770 | assert(!valid(), "adr is loop variant")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3770, "assert(" "!valid()" ") failed", "adr is loop variant" ); ::breakpoint(); } } while (0); | |||
3771 | return; | |||
3772 | } | |||
3773 | ||||
3774 | if (!base->is_top() && adr != base) { | |||
3775 | assert(!valid(), "adr and base differ")do { if (!(!valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3775, "assert(" "!valid()" ") failed", "adr and base differ" ); ::breakpoint(); } } while (0); | |||
3776 | return; | |||
3777 | } | |||
3778 | ||||
3779 | NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();)if(_slp->is_trace_alignment()) _tracer.restore_depth(); | |||
3780 | NOT_PRODUCT(_tracer.ctor_6(mem);)_tracer.ctor_6(mem); | |||
3781 | ||||
3782 | _base = base; | |||
3783 | _adr = adr; | |||
3784 | assert(valid(), "Usable")do { if (!(valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 3784, "assert(" "valid()" ") failed", "Usable"); ::breakpoint (); } } while (0); | |||
3785 | } | |||
3786 | ||||
3787 | // Following is used to create a temporary object during | |||
3788 | // the pattern match of an address expression. | |||
3789 | SWPointer::SWPointer(SWPointer* p) : | |||
3790 | _mem(p->_mem), _slp(p->_slp), _base(NULL__null), _adr(NULL__null), | |||
3791 | _scale(0), _offset(0), _invar(NULL__null), _negate_invar(false), | |||
3792 | _invar_scale(NULL__null), | |||
3793 | _nstack(p->_nstack), _analyze_only(p->_analyze_only), | |||
3794 | _stack_idx(p->_stack_idx) | |||
3795 | #ifndef PRODUCT | |||
3796 | , _tracer(p->_slp) | |||
3797 | #endif | |||
3798 | {} | |||
3799 | ||||
3800 | bool SWPointer::is_main_loop_member(Node* n) const { | |||
3801 | Node* n_c = phase()->get_ctrl(n); | |||
3802 | return lpt()->is_member(phase()->get_loop(n_c)); | |||
3803 | } | |||
3804 | ||||
3805 | bool SWPointer::invariant(Node* n) const { | |||
3806 | NOT_PRODUCT(Tracer::Depth dd;)Tracer::Depth dd; | |||
3807 | Node* n_c = phase()->get_ctrl(n); | |||
3808 | NOT_PRODUCT(_tracer.invariant_1(n, n_c);)_tracer.invariant_1(n, n_c); | |||
3809 | bool is_not_member = !is_main_loop_member(n); | |||
3810 | if (is_not_member && _slp->lp()->is_main_loop()) { | |||
3811 | // Check that n_c dominates the pre loop head node. If it does not, then we cannot use n as invariant for the pre loop | |||
3812 | // CountedLoopEndNode check because n_c is either part of the pre loop or between the pre and the main loop (illegal | |||
3813 | // invariant: Happens, for example, when n_c is a CastII node that prevents data nodes to flow above the main loop). | |||
3814 | return phase()->is_dominator(n_c, _slp->pre_loop_head()); | |||
3815 | } | |||
3816 | return is_not_member; | |||
3817 | } | |||
3818 | ||||
3819 | //------------------------scaled_iv_plus_offset-------------------- | |||
3820 | // Match: k*iv + offset | |||
3821 | // where: k is a constant that maybe zero, and | |||
3822 | // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional | |||
3823 | bool SWPointer::scaled_iv_plus_offset(Node* n) { | |||
3824 | NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd; | |||
3825 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);)_tracer.scaled_iv_plus_offset_1(n); | |||
3826 | ||||
3827 | if (scaled_iv(n)) { | |||
3828 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);)_tracer.scaled_iv_plus_offset_2(n); | |||
3829 | return true; | |||
3830 | } | |||
3831 | ||||
3832 | if (offset_plus_k(n)) { | |||
3833 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);)_tracer.scaled_iv_plus_offset_3(n); | |||
3834 | return true; | |||
3835 | } | |||
3836 | ||||
3837 | int opc = n->Opcode(); | |||
3838 | if (opc == Op_AddI) { | |||
3839 | if (offset_plus_k(n->in(2)) && scaled_iv_plus_offset(n->in(1))) { | |||
3840 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);)_tracer.scaled_iv_plus_offset_4(n); | |||
3841 | return true; | |||
3842 | } | |||
3843 | if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) { | |||
3844 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);)_tracer.scaled_iv_plus_offset_5(n); | |||
3845 | return true; | |||
3846 | } | |||
3847 | } else if (opc == Op_SubI) { | |||
3848 | if (offset_plus_k(n->in(2), true) && scaled_iv_plus_offset(n->in(1))) { | |||
3849 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);)_tracer.scaled_iv_plus_offset_6(n); | |||
3850 | return true; | |||
3851 | } | |||
3852 | if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) { | |||
3853 | _scale *= -1; | |||
3854 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);)_tracer.scaled_iv_plus_offset_7(n); | |||
3855 | return true; | |||
3856 | } | |||
3857 | } | |||
3858 | ||||
3859 | NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);)_tracer.scaled_iv_plus_offset_8(n); | |||
3860 | return false; | |||
3861 | } | |||
3862 | ||||
3863 | //----------------------------scaled_iv------------------------ | |||
3864 | // Match: k*iv where k is a constant that's not zero | |||
3865 | bool SWPointer::scaled_iv(Node* n) { | |||
3866 | NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd; | |||
3867 | NOT_PRODUCT(_tracer.scaled_iv_1(n);)_tracer.scaled_iv_1(n); | |||
3868 | ||||
3869 | if (_scale != 0) { // already found a scale | |||
3870 | NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);)_tracer.scaled_iv_2(n, _scale); | |||
3871 | return false; | |||
3872 | } | |||
3873 | ||||
3874 | if (n == iv()) { | |||
3875 | _scale = 1; | |||
3876 | NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);)_tracer.scaled_iv_3(n, _scale); | |||
3877 | return true; | |||
3878 | } | |||
3879 | if (_analyze_only && (is_main_loop_member(n))) { | |||
3880 | _nstack->push(n, _stack_idx++); | |||
3881 | } | |||
3882 | ||||
3883 | int opc = n->Opcode(); | |||
3884 | if (opc == Op_MulI) { | |||
3885 | if (n->in(1) == iv() && n->in(2)->is_Con()) { | |||
3886 | _scale = n->in(2)->get_int(); | |||
3887 | NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);)_tracer.scaled_iv_4(n, _scale); | |||
3888 | return true; | |||
3889 | } else if (n->in(2) == iv() && n->in(1)->is_Con()) { | |||
3890 | _scale = n->in(1)->get_int(); | |||
3891 | NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);)_tracer.scaled_iv_5(n, _scale); | |||
3892 | return true; | |||
3893 | } | |||
3894 | } else if (opc == Op_LShiftI) { | |||
3895 | if (n->in(1) == iv() && n->in(2)->is_Con()) { | |||
3896 | _scale = 1 << n->in(2)->get_int(); | |||
3897 | NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)_tracer.scaled_iv_6(n, _scale); | |||
3898 | return true; | |||
3899 | } | |||
3900 | } else if (opc == Op_ConvI2L || opc == Op_CastII) { | |||
3901 | if (scaled_iv_plus_offset(n->in(1))) { | |||
3902 | NOT_PRODUCT(_tracer.scaled_iv_7(n);)_tracer.scaled_iv_7(n); | |||
3903 | return true; | |||
3904 | } | |||
3905 | } else if (opc == Op_LShiftL && n->in(2)->is_Con()) { | |||
3906 | if (!has_iv() && _invar == NULL__null) { | |||
3907 | // Need to preserve the current _offset value, so | |||
3908 | // create a temporary object for this expression subtree. | |||
3909 | // Hacky, so should re-engineer the address pattern match. | |||
3910 | NOT_PRODUCT(Tracer::Depth dddd;)Tracer::Depth dddd; | |||
3911 | SWPointer tmp(this); | |||
3912 | NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)_tracer.scaled_iv_8(n, &tmp); | |||
3913 | ||||
3914 | if (tmp.scaled_iv_plus_offset(n->in(1))) { | |||
3915 | int scale = n->in(2)->get_int(); | |||
3916 | _scale = tmp._scale << scale; | |||
3917 | _offset += tmp._offset << scale; | |||
3918 | _invar = tmp._invar; | |||
3919 | if (_invar != NULL__null) { | |||
3920 | _negate_invar = tmp._negate_invar; | |||
3921 | _invar_scale = n->in(2); | |||
3922 | } | |||
3923 | NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar, _negate_invar);)_tracer.scaled_iv_9(n, _scale, _offset, _invar, _negate_invar ); | |||
3924 | return true; | |||
3925 | } | |||
3926 | } | |||
3927 | } | |||
3928 | NOT_PRODUCT(_tracer.scaled_iv_10(n);)_tracer.scaled_iv_10(n); | |||
3929 | return false; | |||
3930 | } | |||
3931 | ||||
3932 | //----------------------------offset_plus_k------------------------ | |||
3933 | // Match: offset is (k [+/- invariant]) | |||
3934 | // where k maybe zero and invariant is optional, but not both. | |||
3935 | bool SWPointer::offset_plus_k(Node* n, bool negate) { | |||
3936 | NOT_PRODUCT(Tracer::Depth ddd;)Tracer::Depth ddd; | |||
3937 | NOT_PRODUCT(_tracer.offset_plus_k_1(n);)_tracer.offset_plus_k_1(n); | |||
3938 | ||||
3939 | int opc = n->Opcode(); | |||
3940 | if (opc == Op_ConI) { | |||
3941 | _offset += negate ? -(n->get_int()) : n->get_int(); | |||
3942 | NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)_tracer.offset_plus_k_2(n, _offset); | |||
3943 | return true; | |||
3944 | } else if (opc == Op_ConL) { | |||
3945 | // Okay if value fits into an int | |||
3946 | const TypeLong* t = n->find_long_type(); | |||
3947 | if (t->higher_equal(TypeLong::INT)) { | |||
3948 | jlong loff = n->get_long(); | |||
3949 | jint off = (jint)loff; | |||
3950 | _offset += negate ? -off : loff; | |||
3951 | NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)_tracer.offset_plus_k_3(n, _offset); | |||
3952 | return true; | |||
3953 | } | |||
3954 | NOT_PRODUCT(_tracer.offset_plus_k_4(n);)_tracer.offset_plus_k_4(n); | |||
3955 | return false; | |||
3956 | } | |||
3957 | if (_invar != NULL__null) { // already has an invariant | |||
3958 | NOT_PRODUCT(_tracer.offset_plus_k_5(n, _invar);)_tracer.offset_plus_k_5(n, _invar); | |||
3959 | return false; | |||
3960 | } | |||
3961 | ||||
3962 | if (_analyze_only && is_main_loop_member(n)) { | |||
3963 | _nstack->push(n, _stack_idx++); | |||
3964 | } | |||
3965 | if (opc == Op_AddI) { | |||
3966 | if (n->in(2)->is_Con() && invariant(n->in(1))) { | |||
3967 | _negate_invar = negate; | |||
3968 | _invar = n->in(1); | |||
3969 | _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); | |||
3970 | NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset); | |||
3971 | return true; | |||
3972 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { | |||
3973 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); | |||
3974 | _negate_invar = negate; | |||
3975 | _invar = n->in(2); | |||
3976 | NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset); | |||
3977 | return true; | |||
3978 | } | |||
3979 | } | |||
3980 | if (opc == Op_SubI) { | |||
3981 | if (n->in(2)->is_Con() && invariant(n->in(1))) { | |||
3982 | _negate_invar = negate; | |||
3983 | _invar = n->in(1); | |||
3984 | _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); | |||
3985 | NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset); | |||
3986 | return true; | |||
3987 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { | |||
3988 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); | |||
3989 | _negate_invar = !negate; | |||
3990 | _invar = n->in(2); | |||
3991 | NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset); | |||
3992 | return true; | |||
3993 | } | |||
3994 | } | |||
3995 | ||||
3996 | if (!is_main_loop_member(n)) { | |||
3997 | // 'n' is loop invariant. Skip ConvI2L and CastII nodes before checking if 'n' is dominating the pre loop. | |||
3998 | if (opc == Op_ConvI2L) { | |||
3999 | n = n->in(1); | |||
4000 | } | |||
4001 | if (n->Opcode() == Op_CastII) { | |||
4002 | // Skip CastII nodes | |||
4003 | assert(!is_main_loop_member(n), "sanity")do { if (!(!is_main_loop_member(n))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4003, "assert(" "!is_main_loop_member(n)" ") failed", "sanity" ); ::breakpoint(); } } while (0); | |||
4004 | n = n->in(1); | |||
4005 | } | |||
4006 | // Check if 'n' can really be used as invariant (not in main loop and dominating the pre loop). | |||
4007 | if (invariant(n)) { | |||
4008 | _negate_invar = negate; | |||
4009 | _invar = n; | |||
4010 | NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);)_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset); | |||
4011 | return true; | |||
4012 | } | |||
4013 | } | |||
4014 | ||||
4015 | NOT_PRODUCT(_tracer.offset_plus_k_11(n);)_tracer.offset_plus_k_11(n); | |||
4016 | return false; | |||
4017 | } | |||
4018 | ||||
4019 | //----------------------------print------------------------ | |||
4020 | void SWPointer::print() { | |||
4021 | #ifndef PRODUCT | |||
4022 | tty->print("base: [%d] adr: [%d] scale: %d offset: %d", | |||
4023 | _base != NULL__null ? _base->_idx : 0, | |||
4024 | _adr != NULL__null ? _adr->_idx : 0, | |||
4025 | _scale, _offset); | |||
4026 | if (_invar != NULL__null) { | |||
4027 | tty->print(" invar: %c[%d] << [%d]", _negate_invar?'-':'+', _invar->_idx, _invar_scale->_idx); | |||
4028 | } | |||
4029 | tty->cr(); | |||
4030 | #endif | |||
4031 | } | |||
4032 | ||||
4033 | //----------------------------tracing------------------------ | |||
4034 | #ifndef PRODUCT | |||
4035 | void SWPointer::Tracer::print_depth() const { | |||
4036 | for (int ii = 0; ii < _depth; ++ii) { | |||
4037 | tty->print(" "); | |||
4038 | } | |||
4039 | } | |||
4040 | ||||
4041 | void SWPointer::Tracer::ctor_1 (Node* mem) { | |||
4042 | if(_slp->is_trace_alignment()) { | |||
4043 | print_depth(); tty->print(" %d SWPointer::SWPointer: start alignment analysis", mem->_idx); mem->dump(); | |||
4044 | } | |||
4045 | } | |||
4046 | ||||
4047 | void SWPointer::Tracer::ctor_2(Node* adr) { | |||
4048 | if(_slp->is_trace_alignment()) { | |||
4049 | //store_depth(); | |||
4050 | inc_depth(); | |||
4051 | print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump(); | |||
4052 | inc_depth(); | |||
4053 | print_depth(); tty->print(" %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump(); | |||
4054 | } | |||
4055 | } | |||
4056 | ||||
4057 | void SWPointer::Tracer::ctor_3(Node* adr, int i) { | |||
4058 | if(_slp->is_trace_alignment()) { | |||
4059 | inc_depth(); | |||
4060 | Node* offset = adr->in(AddPNode::Offset); | |||
4061 | print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump(); | |||
4062 | } | |||
4063 | } | |||
4064 | ||||
4065 | void SWPointer::Tracer::ctor_4(Node* adr, int i) { | |||
4066 | if(_slp->is_trace_alignment()) { | |||
4067 | inc_depth(); | |||
4068 | print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i); adr->dump(); | |||
4069 | } | |||
4070 | } | |||
4071 | ||||
4072 | void SWPointer::Tracer::ctor_5(Node* adr, Node* base, int i) { | |||
4073 | if(_slp->is_trace_alignment()) { | |||
4074 | inc_depth(); | |||
4075 | if (base == adr) { | |||
4076 | print_depth(); tty->print_cr(" \\ %d (adr) == %d (base) SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i); | |||
4077 | } else if (!adr->is_AddP()) { | |||
4078 | print_depth(); tty->print_cr(" \\ %d (adr) is NOT Addp SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, i); | |||
4079 | } | |||
4080 | } | |||
4081 | } | |||
4082 | ||||
4083 | void SWPointer::Tracer::ctor_6(Node* mem) { | |||
4084 | if(_slp->is_trace_alignment()) { | |||
4085 | //restore_depth(); | |||
4086 | print_depth(); tty->print_cr(" %d (adr) SWPointer::SWPointer: stop analysis", mem->_idx); | |||
4087 | } | |||
4088 | } | |||
4089 | ||||
4090 | void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) const { | |||
4091 | if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) { | |||
4092 | int is_member = _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)); | |||
4093 | int in_bb = _slp->in_bb(n); | |||
4094 | print_depth(); tty->print(" \\ "); tty->print_cr(" %d SWPointer::invariant conditions differ: n_c %d", n->_idx, n_c->_idx); | |||
4095 | print_depth(); tty->print(" \\ "); tty->print_cr("is_member %d, in_bb %d", is_member, in_bb); | |||
4096 | print_depth(); tty->print(" \\ "); n->dump(); | |||
4097 | print_depth(); tty->print(" \\ "); n_c->dump(); | |||
4098 | } | |||
4099 | } | |||
4100 | ||||
4101 | void SWPointer::Tracer::scaled_iv_plus_offset_1(Node* n) { | |||
4102 | if(_slp->is_trace_alignment()) { | |||
4103 | print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx); | |||
4104 | n->dump(); | |||
4105 | } | |||
4106 | } | |||
4107 | ||||
4108 | void SWPointer::Tracer::scaled_iv_plus_offset_2(Node* n) { | |||
4109 | if(_slp->is_trace_alignment()) { | |||
4110 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); | |||
4111 | } | |||
4112 | } | |||
4113 | ||||
4114 | void SWPointer::Tracer::scaled_iv_plus_offset_3(Node* n) { | |||
4115 | if(_slp->is_trace_alignment()) { | |||
4116 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); | |||
4117 | } | |||
4118 | } | |||
4119 | ||||
4120 | void SWPointer::Tracer::scaled_iv_plus_offset_4(Node* n) { | |||
4121 | if(_slp->is_trace_alignment()) { | |||
4122 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); | |||
4123 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4124 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4125 | } | |||
4126 | } | |||
4127 | ||||
4128 | void SWPointer::Tracer::scaled_iv_plus_offset_5(Node* n) { | |||
4129 | if(_slp->is_trace_alignment()) { | |||
4130 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); | |||
4131 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4132 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4133 | } | |||
4134 | } | |||
4135 | ||||
4136 | void SWPointer::Tracer::scaled_iv_plus_offset_6(Node* n) { | |||
4137 | if(_slp->is_trace_alignment()) { | |||
4138 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx); | |||
4139 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4140 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4141 | } | |||
4142 | } | |||
4143 | ||||
4144 | void SWPointer::Tracer::scaled_iv_plus_offset_7(Node* n) { | |||
4145 | if(_slp->is_trace_alignment()) { | |||
4146 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx); | |||
4147 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4148 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4149 | } | |||
4150 | } | |||
4151 | ||||
4152 | void SWPointer::Tracer::scaled_iv_plus_offset_8(Node* n) { | |||
4153 | if(_slp->is_trace_alignment()) { | |||
4154 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: FAILED", n->_idx); | |||
4155 | } | |||
4156 | } | |||
4157 | ||||
4158 | void SWPointer::Tracer::scaled_iv_1(Node* n) { | |||
4159 | if(_slp->is_trace_alignment()) { | |||
4160 | print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump(); | |||
4161 | } | |||
4162 | } | |||
4163 | ||||
4164 | void SWPointer::Tracer::scaled_iv_2(Node* n, int scale) { | |||
4165 | if(_slp->is_trace_alignment()) { | |||
4166 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx); | |||
4167 | print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: _scale (%d) != 0", scale); | |||
4168 | } | |||
4169 | } | |||
4170 | ||||
4171 | void SWPointer::Tracer::scaled_iv_3(Node* n, int scale) { | |||
4172 | if(_slp->is_trace_alignment()) { | |||
4173 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale); | |||
4174 | } | |||
4175 | } | |||
4176 | ||||
4177 | void SWPointer::Tracer::scaled_iv_4(Node* n, int scale) { | |||
4178 | if(_slp->is_trace_alignment()) { | |||
4179 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); | |||
4180 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4181 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4182 | } | |||
4183 | } | |||
4184 | ||||
4185 | void SWPointer::Tracer::scaled_iv_5(Node* n, int scale) { | |||
4186 | if(_slp->is_trace_alignment()) { | |||
4187 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); | |||
4188 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4189 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4190 | } | |||
4191 | } | |||
4192 | ||||
4193 | void SWPointer::Tracer::scaled_iv_6(Node* n, int scale) { | |||
4194 | if(_slp->is_trace_alignment()) { | |||
4195 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale); | |||
4196 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4197 | print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4198 | } | |||
4199 | } | |||
4200 | ||||
4201 | void SWPointer::Tracer::scaled_iv_7(Node* n) { | |||
4202 | if(_slp->is_trace_alignment()) { | |||
4203 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx); | |||
4204 | print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx); | |||
4205 | inc_depth(); inc_depth(); | |||
4206 | print_depth(); n->in(1)->dump(); | |||
4207 | dec_depth(); dec_depth(); | |||
4208 | } | |||
4209 | } | |||
4210 | ||||
4211 | void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) { | |||
4212 | if(_slp->is_trace_alignment()) { | |||
4213 | print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp->print(); | |||
4214 | } | |||
4215 | } | |||
4216 | ||||
4217 | void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int offset, Node* invar, bool negate_invar) { | |||
4218 | if(_slp->is_trace_alignment()) { | |||
4219 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, offset); | |||
4220 | print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) [%d] is scaled_iv_plus_offset, in(2) [%d] used to scale: _scale = %d, _offset = %d", | |||
4221 | n->in(1)->_idx, n->in(2)->_idx, scale, offset); | |||
4222 | if (invar != NULL__null) { | |||
4223 | print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: scaled invariant: %c[%d]", (negate_invar?'-':'+'), invar->_idx); | |||
4224 | } | |||
4225 | inc_depth(); inc_depth(); | |||
4226 | print_depth(); n->in(1)->dump(); | |||
4227 | print_depth(); n->in(2)->dump(); | |||
4228 | if (invar != NULL__null) { | |||
4229 | print_depth(); invar->dump(); | |||
4230 | } | |||
4231 | dec_depth(); dec_depth(); | |||
4232 | } | |||
4233 | } | |||
4234 | ||||
4235 | void SWPointer::Tracer::scaled_iv_10(Node* n) { | |||
4236 | if(_slp->is_trace_alignment()) { | |||
4237 | print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED", n->_idx); | |||
4238 | } | |||
4239 | } | |||
4240 | ||||
4241 | void SWPointer::Tracer::offset_plus_k_1(Node* n) { | |||
4242 | if(_slp->is_trace_alignment()) { | |||
4243 | print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump(); | |||
4244 | } | |||
4245 | } | |||
4246 | ||||
4247 | void SWPointer::Tracer::offset_plus_k_2(Node* n, int _offset) { | |||
4248 | if(_slp->is_trace_alignment()) { | |||
4249 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset); | |||
4250 | } | |||
4251 | } | |||
4252 | ||||
4253 | void SWPointer::Tracer::offset_plus_k_3(Node* n, int _offset) { | |||
4254 | if(_slp->is_trace_alignment()) { | |||
4255 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset); | |||
4256 | } | |||
4257 | } | |||
4258 | ||||
4259 | void SWPointer::Tracer::offset_plus_k_4(Node* n) { | |||
4260 | if(_slp->is_trace_alignment()) { | |||
4261 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); | |||
4262 | print_depth(); tty->print_cr(" \\ " JLONG_FORMAT"%" "l" "d" " SWPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long()); | |||
4263 | } | |||
4264 | } | |||
4265 | ||||
4266 | void SWPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) { | |||
4267 | if(_slp->is_trace_alignment()) { | |||
4268 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx); | |||
4269 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: _invar != NULL: ", _invar->_idx); _invar->dump(); | |||
4270 | } | |||
4271 | } | |||
4272 | ||||
4273 | void SWPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4274 | if(_slp->is_trace_alignment()) { | |||
4275 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", | |||
4276 | n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4277 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4278 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); | |||
4279 | } | |||
4280 | } | |||
4281 | ||||
4282 | void SWPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4283 | if(_slp->is_trace_alignment()) { | |||
4284 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", | |||
4285 | n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4286 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4287 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); | |||
4288 | } | |||
4289 | } | |||
4290 | ||||
4291 | void SWPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4292 | if(_slp->is_trace_alignment()) { | |||
4293 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", | |||
4294 | n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4295 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); | |||
4296 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); | |||
4297 | } | |||
4298 | } | |||
4299 | ||||
4300 | void SWPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4301 | if(_slp->is_trace_alignment()) { | |||
4302 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4303 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); | |||
4304 | print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); | |||
4305 | } | |||
4306 | } | |||
4307 | ||||
4308 | void SWPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) { | |||
4309 | if(_slp->is_trace_alignment()) { | |||
4310 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); | |||
4311 | print_depth(); tty->print_cr(" \\ %d SWPointer::offset_plus_k: is invariant", n->_idx); | |||
4312 | } | |||
4313 | } | |||
4314 | ||||
4315 | void SWPointer::Tracer::offset_plus_k_11(Node* n) { | |||
4316 | if(_slp->is_trace_alignment()) { | |||
4317 | print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); | |||
4318 | } | |||
4319 | } | |||
4320 | ||||
4321 | #endif | |||
4322 | // ========================= OrderedPair ===================== | |||
4323 | ||||
4324 | const OrderedPair OrderedPair::initial; | |||
4325 | ||||
4326 | // ========================= SWNodeInfo ===================== | |||
4327 | ||||
4328 | const SWNodeInfo SWNodeInfo::initial; | |||
4329 | ||||
4330 | ||||
4331 | // ============================ DepGraph =========================== | |||
4332 | ||||
4333 | //------------------------------make_node--------------------------- | |||
4334 | // Make a new dependence graph node for an ideal node. | |||
4335 | DepMem* DepGraph::make_node(Node* node) { | |||
4336 | DepMem* m = new (_arena) DepMem(node); | |||
4337 | if (node != NULL__null) { | |||
4338 | assert(_map.at_grow(node->_idx) == NULL, "one init only")do { if (!(_map.at_grow(node->_idx) == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4338, "assert(" "_map.at_grow(node->_idx) == __null" ") failed" , "one init only"); ::breakpoint(); } } while (0); | |||
4339 | _map.at_put_grow(node->_idx, m); | |||
4340 | } | |||
4341 | return m; | |||
4342 | } | |||
4343 | ||||
4344 | //------------------------------make_edge--------------------------- | |||
4345 | // Make a new dependence graph edge from dpred -> dsucc | |||
4346 | DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) { | |||
4347 | DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head()); | |||
4348 | dpred->set_out_head(e); | |||
4349 | dsucc->set_in_head(e); | |||
4350 | return e; | |||
4351 | } | |||
4352 | ||||
4353 | // ========================== DepMem ======================== | |||
4354 | ||||
4355 | //------------------------------in_cnt--------------------------- | |||
4356 | int DepMem::in_cnt() { | |||
4357 | int ct = 0; | |||
4358 | for (DepEdge* e = _in_head; e != NULL__null; e = e->next_in()) ct++; | |||
4359 | return ct; | |||
4360 | } | |||
4361 | ||||
4362 | //------------------------------out_cnt--------------------------- | |||
4363 | int DepMem::out_cnt() { | |||
4364 | int ct = 0; | |||
4365 | for (DepEdge* e = _out_head; e != NULL__null; e = e->next_out()) ct++; | |||
4366 | return ct; | |||
4367 | } | |||
4368 | ||||
4369 | //------------------------------print----------------------------- | |||
4370 | void DepMem::print() { | |||
4371 | #ifndef PRODUCT | |||
4372 | tty->print(" DepNode %d (", _node->_idx); | |||
4373 | for (DepEdge* p = _in_head; p != NULL__null; p = p->next_in()) { | |||
4374 | Node* pred = p->pred()->node(); | |||
4375 | tty->print(" %d", pred != NULL__null ? pred->_idx : 0); | |||
4376 | } | |||
4377 | tty->print(") ["); | |||
4378 | for (DepEdge* s = _out_head; s != NULL__null; s = s->next_out()) { | |||
4379 | Node* succ = s->succ()->node(); | |||
4380 | tty->print(" %d", succ != NULL__null ? succ->_idx : 0); | |||
4381 | } | |||
4382 | tty->print_cr(" ]"); | |||
4383 | #endif | |||
4384 | } | |||
4385 | ||||
4386 | // =========================== DepEdge ========================= | |||
4387 | ||||
4388 | //------------------------------DepPreds--------------------------- | |||
4389 | void DepEdge::print() { | |||
4390 | #ifndef PRODUCT | |||
4391 | tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx); | |||
4392 | #endif | |||
4393 | } | |||
4394 | ||||
4395 | // =========================== DepPreds ========================= | |||
4396 | // Iterator over predecessor edges in the dependence graph. | |||
4397 | ||||
4398 | //------------------------------DepPreds--------------------------- | |||
4399 | DepPreds::DepPreds(Node* n, DepGraph& dg) { | |||
4400 | _n = n; | |||
4401 | _done = false; | |||
4402 | if (_n->is_Store() || _n->is_Load()) { | |||
4403 | _next_idx = MemNode::Address; | |||
4404 | _end_idx = n->req(); | |||
4405 | _dep_next = dg.dep(_n)->in_head(); | |||
4406 | } else if (_n->is_Mem()) { | |||
4407 | _next_idx = 0; | |||
4408 | _end_idx = 0; | |||
4409 | _dep_next = dg.dep(_n)->in_head(); | |||
4410 | } else { | |||
4411 | _next_idx = 1; | |||
4412 | _end_idx = _n->req(); | |||
4413 | _dep_next = NULL__null; | |||
4414 | } | |||
4415 | next(); | |||
4416 | } | |||
4417 | ||||
4418 | //------------------------------next--------------------------- | |||
4419 | void DepPreds::next() { | |||
4420 | if (_dep_next != NULL__null) { | |||
4421 | _current = _dep_next->pred()->node(); | |||
4422 | _dep_next = _dep_next->next_in(); | |||
4423 | } else if (_next_idx < _end_idx) { | |||
4424 | _current = _n->in(_next_idx++); | |||
4425 | } else { | |||
4426 | _done = true; | |||
4427 | } | |||
4428 | } | |||
4429 | ||||
4430 | // =========================== DepSuccs ========================= | |||
4431 | // Iterator over successor edges in the dependence graph. | |||
4432 | ||||
4433 | //------------------------------DepSuccs--------------------------- | |||
4434 | DepSuccs::DepSuccs(Node* n, DepGraph& dg) { | |||
4435 | _n = n; | |||
4436 | _done = false; | |||
4437 | if (_n->is_Load()) { | |||
4438 | _next_idx = 0; | |||
4439 | _end_idx = _n->outcnt(); | |||
4440 | _dep_next = dg.dep(_n)->out_head(); | |||
4441 | } else if (_n->is_Mem() || (_n->is_Phi() && _n->bottom_type() == Type::MEMORY)) { | |||
4442 | _next_idx = 0; | |||
4443 | _end_idx = 0; | |||
4444 | _dep_next = dg.dep(_n)->out_head(); | |||
4445 | } else { | |||
4446 | _next_idx = 0; | |||
4447 | _end_idx = _n->outcnt(); | |||
4448 | _dep_next = NULL__null; | |||
4449 | } | |||
4450 | next(); | |||
4451 | } | |||
4452 | ||||
4453 | //-------------------------------next--------------------------- | |||
4454 | void DepSuccs::next() { | |||
4455 | if (_dep_next != NULL__null) { | |||
4456 | _current = _dep_next->succ()->node(); | |||
4457 | _dep_next = _dep_next->next_out(); | |||
4458 | } else if (_next_idx < _end_idx) { | |||
4459 | _current = _n->raw_out(_next_idx++); | |||
4460 | } else { | |||
4461 | _done = true; | |||
4462 | } | |||
4463 | } | |||
4464 | ||||
4465 | // | |||
4466 | // --------------------------------- vectorization/simd ----------------------------------- | |||
4467 | // | |||
4468 | bool SuperWord::same_origin_idx(Node* a, Node* b) const { | |||
4469 | return a != NULL__null && b != NULL__null && _clone_map.same_idx(a->_idx, b->_idx); | |||
4470 | } | |||
4471 | bool SuperWord::same_generation(Node* a, Node* b) const { | |||
4472 | return a != NULL__null && b != NULL__null && _clone_map.same_gen(a->_idx, b->_idx); | |||
4473 | } | |||
4474 | ||||
4475 | Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) { | |||
4476 | assert(in_bb(ld), "must be in block")do { if (!(in_bb(ld))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4476, "assert(" "in_bb(ld)" ") failed", "must be in block") ; ::breakpoint(); } } while (0); | |||
4477 | if (_clone_map.gen(ld->_idx) == _ii_first) { | |||
4478 | #ifndef PRODUCT | |||
4479 | if (_vector_loop_debug) { | |||
4480 | tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(ld->_idx)=%d", | |||
4481 | _clone_map.gen(ld->_idx)); | |||
4482 | } | |||
4483 | #endif | |||
4484 | return NULL__null; //we think that any ld in the first gen being vectorizable | |||
4485 | } | |||
4486 | ||||
4487 | Node* mem = ld->in(MemNode::Memory); | |||
4488 | if (mem->outcnt() <= 1) { | |||
4489 | // we don't want to remove the only edge from mem node to load | |||
4490 | #ifndef PRODUCT | |||
4491 | if (_vector_loop_debug) { | |||
4492 | tty->print_cr("SuperWord::find_phi_for_mem_dep input node %d to load %d has no other outputs and edge mem->load cannot be removed", | |||
4493 | mem->_idx, ld->_idx); | |||
4494 | ld->dump(); | |||
4495 | mem->dump(); | |||
4496 | } | |||
4497 | #endif | |||
4498 | return NULL__null; | |||
4499 | } | |||
4500 | if (!in_bb(mem) || same_generation(mem, ld)) { | |||
4501 | #ifndef PRODUCT | |||
4502 | if (_vector_loop_debug) { | |||
4503 | tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d", | |||
4504 | _clone_map.gen(mem->_idx)); | |||
4505 | } | |||
4506 | #endif | |||
4507 | return NULL__null; // does not depend on loop volatile node or depends on the same generation | |||
4508 | } | |||
4509 | ||||
4510 | //otherwise first node should depend on mem-phi | |||
4511 | Node* first = first_node(ld); | |||
4512 | assert(first->is_Load(), "must be Load")do { if (!(first->is_Load())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4512, "assert(" "first->is_Load()" ") failed", "must be Load" ); ::breakpoint(); } } while (0); | |||
4513 | Node* phi = first->as_Load()->in(MemNode::Memory); | |||
4514 | if (!phi->is_Phi() || phi->bottom_type() != Type::MEMORY) { | |||
4515 | #ifndef PRODUCT | |||
4516 | if (_vector_loop_debug) { | |||
4517 | tty->print_cr("SuperWord::find_phi_for_mem_dep load is not vectorizable node, since it's `first` does not take input from mem phi"); | |||
4518 | ld->dump(); | |||
4519 | first->dump(); | |||
4520 | } | |||
4521 | #endif | |||
4522 | return NULL__null; | |||
4523 | } | |||
4524 | ||||
4525 | Node* tail = 0; | |||
4526 | for (int m = 0; m < _mem_slice_head.length(); m++) { | |||
4527 | if (_mem_slice_head.at(m) == phi) { | |||
4528 | tail = _mem_slice_tail.at(m); | |||
4529 | } | |||
4530 | } | |||
4531 | if (tail == 0) { //test that found phi is in the list _mem_slice_head | |||
4532 | #ifndef PRODUCT | |||
4533 | if (_vector_loop_debug) { | |||
4534 | tty->print_cr("SuperWord::find_phi_for_mem_dep load %d is not vectorizable node, its phi %d is not _mem_slice_head", | |||
4535 | ld->_idx, phi->_idx); | |||
4536 | ld->dump(); | |||
4537 | phi->dump(); | |||
4538 | } | |||
4539 | #endif | |||
4540 | return NULL__null; | |||
4541 | } | |||
4542 | ||||
4543 | // now all conditions are met | |||
4544 | return phi; | |||
4545 | } | |||
4546 | ||||
4547 | Node* SuperWord::first_node(Node* nd) { | |||
4548 | for (int ii = 0; ii < _iteration_first.length(); ii++) { | |||
4549 | Node* nnn = _iteration_first.at(ii); | |||
4550 | if (same_origin_idx(nnn, nd)) { | |||
4551 | #ifndef PRODUCT | |||
4552 | if (_vector_loop_debug) { | |||
4553 | tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)", | |||
4554 | nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx)); | |||
4555 | } | |||
4556 | #endif | |||
4557 | return nnn; | |||
4558 | } | |||
4559 | } | |||
4560 | ||||
4561 | #ifndef PRODUCT | |||
4562 | if (_vector_loop_debug) { | |||
4563 | tty->print_cr("SuperWord::first_node: did not find first iteration node for %d (_clone_map.idx(nd->_idx)=%d)", | |||
4564 | nd->_idx, _clone_map.idx(nd->_idx)); | |||
4565 | } | |||
4566 | #endif | |||
4567 | return 0; | |||
4568 | } | |||
4569 | ||||
4570 | Node* SuperWord::last_node(Node* nd) { | |||
4571 | for (int ii = 0; ii < _iteration_last.length(); ii++) { | |||
4572 | Node* nnn = _iteration_last.at(ii); | |||
4573 | if (same_origin_idx(nnn, nd)) { | |||
4574 | #ifndef PRODUCT | |||
4575 | if (_vector_loop_debug) { | |||
4576 | tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d", | |||
4577 | _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx)); | |||
4578 | } | |||
4579 | #endif | |||
4580 | return nnn; | |||
4581 | } | |||
4582 | } | |||
4583 | return 0; | |||
4584 | } | |||
4585 | ||||
4586 | int SuperWord::mark_generations() { | |||
4587 | Node *ii_err = NULL__null, *tail_err = NULL__null; | |||
4588 | for (int i = 0; i < _mem_slice_head.length(); i++) { | |||
4589 | Node* phi = _mem_slice_head.at(i); | |||
4590 | assert(phi->is_Phi(), "must be phi")do { if (!(phi->is_Phi())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4590, "assert(" "phi->is_Phi()" ") failed", "must be phi" ); ::breakpoint(); } } while (0); | |||
4591 | ||||
4592 | Node* tail = _mem_slice_tail.at(i); | |||
4593 | if (_ii_last == -1) { | |||
4594 | tail_err = tail; | |||
4595 | _ii_last = _clone_map.gen(tail->_idx); | |||
4596 | } | |||
4597 | else if (_ii_last != _clone_map.gen(tail->_idx)) { | |||
4598 | #ifndef PRODUCT | |||
4599 | if (TraceSuperWord && Verbose) { | |||
4600 | tty->print_cr("SuperWord::mark_generations _ii_last error - found different generations in two tail nodes "); | |||
4601 | tail->dump(); | |||
4602 | tail_err->dump(); | |||
4603 | } | |||
4604 | #endif | |||
4605 | return -1; | |||
4606 | } | |||
4607 | ||||
4608 | // find first iteration in the loop | |||
4609 | for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) { | |||
4610 | Node* ii = phi->fast_out(i); | |||
4611 | if (in_bb(ii) && ii->is_Store()) { // we speculate that normally Stores of one and one only generation have deps from mem phi | |||
4612 | if (_ii_first == -1) { | |||
4613 | ii_err = ii; | |||
4614 | _ii_first = _clone_map.gen(ii->_idx); | |||
4615 | } else if (_ii_first != _clone_map.gen(ii->_idx)) { | |||
4616 | #ifndef PRODUCT | |||
4617 | if (TraceSuperWord && Verbose) { | |||
4618 | tty->print_cr("SuperWord::mark_generations: _ii_first was found before and not equal to one in this node (%d)", _ii_first); | |||
4619 | ii->dump(); | |||
4620 | if (ii_err!= 0) { | |||
4621 | ii_err->dump(); | |||
4622 | } | |||
4623 | } | |||
4624 | #endif | |||
4625 | return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized | |||
4626 | } | |||
4627 | } | |||
4628 | }//for (DUIterator_Fast imax, | |||
4629 | }//for (int i... | |||
4630 | ||||
4631 | if (_ii_first == -1 || _ii_last == -1) { | |||
4632 | if (TraceSuperWord && Verbose) { | |||
4633 | tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong"); | |||
4634 | } | |||
4635 | return -1; // something vent wrong | |||
4636 | } | |||
4637 | // collect nodes in the first and last generations | |||
4638 | assert(_iteration_first.length() == 0, "_iteration_first must be empty")do { if (!(_iteration_first.length() == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4638, "assert(" "_iteration_first.length() == 0" ") failed" , "_iteration_first must be empty"); ::breakpoint(); } } while (0); | |||
4639 | assert(_iteration_last.length() == 0, "_iteration_last must be empty")do { if (!(_iteration_last.length() == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4639, "assert(" "_iteration_last.length() == 0" ") failed", "_iteration_last must be empty"); ::breakpoint(); } } while ( 0); | |||
4640 | for (int j = 0; j < _block.length(); j++) { | |||
4641 | Node* n = _block.at(j); | |||
4642 | node_idx_t gen = _clone_map.gen(n->_idx); | |||
4643 | if ((signed)gen == _ii_first) { | |||
4644 | _iteration_first.push(n); | |||
4645 | } else if ((signed)gen == _ii_last) { | |||
4646 | _iteration_last.push(n); | |||
4647 | } | |||
4648 | } | |||
4649 | ||||
4650 | // building order of iterations | |||
4651 | if (_ii_order.length() == 0 && ii_err != 0) { | |||
4652 | assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb")do { if (!(in_bb(ii_err) && ii_err->is_Store())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4652, "assert(" "in_bb(ii_err) && ii_err->is_Store()" ") failed", "should be Store in bb"); ::breakpoint(); } } while (0); | |||
4653 | Node* nd = ii_err; | |||
4654 | while(_clone_map.gen(nd->_idx) != _ii_last) { | |||
4655 | _ii_order.push(_clone_map.gen(nd->_idx)); | |||
4656 | bool found = false; | |||
4657 | for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) { | |||
4658 | Node* use = nd->fast_out(i); | |||
4659 | if (same_origin_idx(use, nd) && use->as_Store()->in(MemNode::Memory) == nd) { | |||
4660 | found = true; | |||
4661 | nd = use; | |||
4662 | break; | |||
4663 | } | |||
4664 | }//for | |||
4665 | ||||
4666 | if (found == false) { | |||
4667 | if (TraceSuperWord && Verbose) { | |||
4668 | tty->print_cr("SuperWord::mark_generations: Cannot build order of iterations - no dependent Store for %d", nd->_idx); | |||
4669 | } | |||
4670 | _ii_order.clear(); | |||
4671 | return -1; | |||
4672 | } | |||
4673 | } //while | |||
4674 | _ii_order.push(_clone_map.gen(nd->_idx)); | |||
4675 | } | |||
4676 | ||||
4677 | #ifndef PRODUCT | |||
4678 | if (_vector_loop_debug) { | |||
4679 | tty->print_cr("SuperWord::mark_generations"); | |||
4680 | tty->print_cr("First generation (%d) nodes:", _ii_first); | |||
4681 | for (int ii = 0; ii < _iteration_first.length(); ii++) _iteration_first.at(ii)->dump(); | |||
4682 | tty->print_cr("Last generation (%d) nodes:", _ii_last); | |||
4683 | for (int ii = 0; ii < _iteration_last.length(); ii++) _iteration_last.at(ii)->dump(); | |||
4684 | tty->print_cr(" "); | |||
4685 | ||||
4686 | tty->print("SuperWord::List of generations: "); | |||
4687 | for (int jj = 0; jj < _ii_order.length(); ++jj) { | |||
4688 | tty->print("%d:%d ", jj, _ii_order.at(jj)); | |||
4689 | } | |||
4690 | tty->print_cr(" "); | |||
4691 | } | |||
4692 | #endif | |||
4693 | ||||
4694 | return _ii_first; | |||
4695 | } | |||
4696 | ||||
4697 | bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) { | |||
4698 | assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes")do { if (!(gold->is_Add() && fix->is_Add() || gold ->is_Mul() && fix->is_Mul())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4698, "assert(" "gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul()" ") failed", "should be only Add or Mul nodes"); ::breakpoint (); } } while (0); | |||
4699 | assert(same_origin_idx(gold, fix), "should be clones of the same node")do { if (!(same_origin_idx(gold, fix))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/superword.cpp" , 4699, "assert(" "same_origin_idx(gold, fix)" ") failed", "should be clones of the same node" ); ::breakpoint(); } } while (0); | |||
4700 | Node* gin1 = gold->in(1); | |||
4701 | Node* gin2 = gold->in(2); | |||
4702 | Node* fin1 = fix->in(1); | |||
4703 | Node* fin2 = fix->in(2); | |||
4704 | bool swapped = false; | |||
4705 | ||||
4706 | if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin2)) { | |||
4707 | if (same_origin_idx(gin1, fin1) && | |||
4708 | same_origin_idx(gin2, fin2)) { | |||
4709 | return true; // nothing to fix | |||
4710 | } | |||
4711 | if (same_origin_idx(gin1, fin2) && | |||
4712 | same_origin_idx(gin2, fin1)) { | |||
4713 | fix->swap_edges(1, 2); | |||
4714 | swapped = true; | |||
4715 | } | |||
4716 | } | |||
4717 | // at least one input comes from outside of bb | |||
4718 | if (gin1->_idx == fin1->_idx) { | |||
4719 | return true; // nothing to fix | |||
4720 | } | |||
4721 | if (!swapped && (gin1->_idx == fin2->_idx || gin2->_idx == fin1->_idx)) { //swapping is expensive, check condition first | |||
4722 | fix->swap_edges(1, 2); | |||
4723 | swapped = true; | |||
4724 | } | |||
4725 | ||||
4726 | if (swapped) { | |||
4727 | #ifndef PRODUCT | |||
4728 | if (_vector_loop_debug) { | |||
4729 | tty->print_cr("SuperWord::fix_commutative_inputs: fixed node %d", fix->_idx); | |||
4730 | } | |||
4731 | #endif | |||
4732 | return true; | |||
4733 | } | |||
4734 | ||||
4735 | if (TraceSuperWord && Verbose) { | |||
4736 | tty->print_cr("SuperWord::fix_commutative_inputs: cannot fix node %d", fix->_idx); | |||
4737 | } | |||
4738 | ||||
4739 | return false; | |||
4740 | } | |||
4741 | ||||
4742 | bool SuperWord::pack_parallel() { | |||
4743 | #ifndef PRODUCT | |||
4744 | if (_vector_loop_debug) { | |||
4745 | tty->print_cr("SuperWord::pack_parallel: START"); | |||
4746 | } | |||
4747 | #endif | |||
4748 | ||||
4749 | _packset.clear(); | |||
4750 | ||||
4751 | if (_ii_order.is_empty()) { | |||
4752 | #ifndef PRODUCT | |||
4753 | if (_vector_loop_debug) { | |||
4754 | tty->print_cr("SuperWord::pack_parallel: EMPTY"); | |||
4755 | } | |||
4756 | #endif | |||
4757 | return false; | |||
4758 | } | |||
4759 | ||||
4760 | for (int ii = 0; ii < _iteration_first.length(); ii++) { | |||
4761 | Node* nd = _iteration_first.at(ii); | |||
4762 | if (in_bb(nd) && (nd->is_Load() || nd->is_Store() || nd->is_Add() || nd->is_Mul())) { | |||
4763 | Node_List* pk = new Node_List(); | |||
4764 | pk->push(nd); | |||
4765 | for (int gen = 1; gen < _ii_order.length(); ++gen) { | |||
4766 | for (int kk = 0; kk < _block.length(); kk++) { | |||
4767 | Node* clone = _block.at(kk); | |||
4768 | if (same_origin_idx(clone, nd) && | |||
4769 | _clone_map.gen(clone->_idx) == _ii_order.at(gen)) { | |||
4770 | if (nd->is_Add() || nd->is_Mul()) { | |||
4771 | fix_commutative_inputs(nd, clone); | |||
4772 | } | |||
4773 | pk->push(clone); | |||
4774 | if (pk->size() == 4) { | |||
4775 | _packset.append(pk); | |||
4776 | #ifndef PRODUCT | |||
4777 | if (_vector_loop_debug) { | |||
4778 | tty->print_cr("SuperWord::pack_parallel: added pack "); | |||
4779 | pk->dump(); | |||
4780 | } | |||
4781 | #endif | |||
4782 | if (_clone_map.gen(clone->_idx) != _ii_last) { | |||
4783 | pk = new Node_List(); | |||
4784 | } | |||
4785 | } | |||
4786 | break; | |||
4787 | } | |||
4788 | } | |||
4789 | }//for | |||
4790 | }//if | |||
4791 | }//for | |||
4792 | ||||
4793 | #ifndef PRODUCT | |||
4794 | if (_vector_loop_debug) { | |||
4795 | tty->print_cr("SuperWord::pack_parallel: END"); | |||
4796 | } | |||
4797 | #endif | |||
4798 | ||||
4799 | return true; | |||
4800 | } | |||
4801 | ||||
4802 | bool SuperWord::hoist_loads_in_graph() { | |||
4803 | GrowableArray<Node*> loads; | |||
4804 | ||||
4805 | #ifndef PRODUCT | |||
4806 | if (_vector_loop_debug) { | |||
4807 | tty->print_cr("SuperWord::hoist_loads_in_graph: total number _mem_slice_head.length() = %d", _mem_slice_head.length()); | |||
4808 | } | |||
4809 | #endif | |||
4810 | ||||
4811 | for (int i = 0; i < _mem_slice_head.length(); i++) { | |||
4812 | Node* n = _mem_slice_head.at(i); | |||
4813 | if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) { | |||
4814 | if (TraceSuperWord && Verbose) { | |||
4815 | tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx); | |||
4816 | } | |||
4817 | continue; | |||
4818 | } | |||
4819 | ||||
4820 | #ifndef PRODUCT | |||
4821 | if (_vector_loop_debug) { | |||
4822 | tty->print_cr("SuperWord::hoist_loads_in_graph: processing phi %d = _mem_slice_head.at(%d);", n->_idx, i); | |||
4823 | } | |||
4824 | #endif | |||
4825 | ||||
4826 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |||
4827 | Node* ld = n->fast_out(i); | |||
4828 | if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) { | |||
4829 | for (int i = 0; i < _block.length(); i++) { | |||
4830 | Node* ld2 = _block.at(i); | |||
4831 | if (ld2->is_Load() && same_origin_idx(ld, ld2) && | |||
4832 | !same_generation(ld, ld2)) { // <= do not collect the first generation ld | |||
4833 | #ifndef PRODUCT | |||
4834 | if (_vector_loop_debug) { | |||
4835 | tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)", | |||
4836 | ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx); | |||
4837 | } | |||
4838 | #endif | |||
4839 | // could not do on-the-fly, since iterator is immutable | |||
4840 | loads.push(ld2); | |||
4841 | } | |||
4842 | }// for | |||
4843 | }//if | |||
4844 | }//for (DUIterator_Fast imax, | |||
4845 | }//for (int i = 0; i | |||
4846 | ||||
4847 | for (int i = 0; i < loads.length(); i++) { | |||
4848 | LoadNode* ld = loads.at(i)->as_Load(); | |||
4849 | Node* phi = find_phi_for_mem_dep(ld); | |||
4850 | if (phi != NULL__null) { | |||
4851 | #ifndef PRODUCT | |||
4852 | if (_vector_loop_debug) { | |||
4853 | tty->print_cr("SuperWord::hoist_loads_in_graph replacing MemNode::Memory(%d) edge in %d with one from %d", | |||
4854 | MemNode::Memory, ld->_idx, phi->_idx); | |||
4855 | } | |||
4856 | #endif | |||
4857 | _igvn.replace_input_of(ld, MemNode::Memory, phi); | |||
4858 | } | |||
4859 | }//for | |||
4860 | ||||
4861 | restart(); // invalidate all basic structures, since we rebuilt the graph | |||
4862 | ||||
4863 | if (TraceSuperWord && Verbose) { | |||
4864 | tty->print_cr("\nSuperWord::hoist_loads_in_graph() the graph was rebuilt, all structures invalidated and need rebuild"); | |||
4865 | } | |||
4866 | ||||
4867 | return true; | |||
4868 | } |
1 | /* |
2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #ifndef SHARE_OPTO_NODE_HPP |
26 | #define SHARE_OPTO_NODE_HPP |
27 | |
28 | #include "libadt/vectset.hpp" |
29 | #include "opto/compile.hpp" |
30 | #include "opto/type.hpp" |
31 | #include "utilities/copy.hpp" |
32 | |
33 | // Portions of code courtesy of Clifford Click |
34 | |
35 | // Optimization - Graph Style |
36 | |
37 | |
38 | class AbstractLockNode; |
39 | class AddNode; |
40 | class AddPNode; |
41 | class AliasInfo; |
42 | class AllocateArrayNode; |
43 | class AllocateNode; |
44 | class ArrayCopyNode; |
45 | class BaseCountedLoopNode; |
46 | class BaseCountedLoopEndNode; |
47 | class BlackholeNode; |
48 | class Block; |
49 | class BoolNode; |
50 | class BoxLockNode; |
51 | class CMoveNode; |
52 | class CallDynamicJavaNode; |
53 | class CallJavaNode; |
54 | class CallLeafNode; |
55 | class CallLeafNoFPNode; |
56 | class CallNode; |
57 | class CallRuntimeNode; |
58 | class CallNativeNode; |
59 | class CallStaticJavaNode; |
60 | class CastFFNode; |
61 | class CastDDNode; |
62 | class CastVVNode; |
63 | class CastIINode; |
64 | class CastLLNode; |
65 | class CatchNode; |
66 | class CatchProjNode; |
67 | class CheckCastPPNode; |
68 | class ClearArrayNode; |
69 | class CmpNode; |
70 | class CodeBuffer; |
71 | class ConstraintCastNode; |
72 | class ConNode; |
73 | class CompareAndSwapNode; |
74 | class CompareAndExchangeNode; |
75 | class CountedLoopNode; |
76 | class CountedLoopEndNode; |
77 | class DecodeNarrowPtrNode; |
78 | class DecodeNNode; |
79 | class DecodeNKlassNode; |
80 | class EncodeNarrowPtrNode; |
81 | class EncodePNode; |
82 | class EncodePKlassNode; |
83 | class FastLockNode; |
84 | class FastUnlockNode; |
85 | class HaltNode; |
86 | class IfNode; |
87 | class IfProjNode; |
88 | class IfFalseNode; |
89 | class IfTrueNode; |
90 | class InitializeNode; |
91 | class JVMState; |
92 | class JumpNode; |
93 | class JumpProjNode; |
94 | class LoadNode; |
95 | class LoadStoreNode; |
96 | class LoadStoreConditionalNode; |
97 | class LockNode; |
98 | class LongCountedLoopNode; |
99 | class LongCountedLoopEndNode; |
100 | class LoopNode; |
101 | class LShiftNode; |
102 | class MachBranchNode; |
103 | class MachCallDynamicJavaNode; |
104 | class MachCallJavaNode; |
105 | class MachCallLeafNode; |
106 | class MachCallNode; |
107 | class MachCallNativeNode; |
108 | class MachCallRuntimeNode; |
109 | class MachCallStaticJavaNode; |
110 | class MachConstantBaseNode; |
111 | class MachConstantNode; |
112 | class MachGotoNode; |
113 | class MachIfNode; |
114 | class MachJumpNode; |
115 | class MachNode; |
116 | class MachNullCheckNode; |
117 | class MachProjNode; |
118 | class MachReturnNode; |
119 | class MachSafePointNode; |
120 | class MachSpillCopyNode; |
121 | class MachTempNode; |
122 | class MachMergeNode; |
123 | class MachMemBarNode; |
124 | class Matcher; |
125 | class MemBarNode; |
126 | class MemBarStoreStoreNode; |
127 | class MemNode; |
128 | class MergeMemNode; |
129 | class MoveNode; |
130 | class MulNode; |
131 | class MultiNode; |
132 | class MultiBranchNode; |
133 | class NeverBranchNode; |
134 | class Opaque1Node; |
135 | class OuterStripMinedLoopNode; |
136 | class OuterStripMinedLoopEndNode; |
137 | class Node; |
138 | class Node_Array; |
139 | class Node_List; |
140 | class Node_Stack; |
141 | class OopMap; |
142 | class ParmNode; |
143 | class PCTableNode; |
144 | class PhaseCCP; |
145 | class PhaseGVN; |
146 | class PhaseIterGVN; |
147 | class PhaseRegAlloc; |
148 | class PhaseTransform; |
149 | class PhaseValues; |
150 | class PhiNode; |
151 | class Pipeline; |
152 | class ProjNode; |
153 | class RangeCheckNode; |
154 | class RegMask; |
155 | class RegionNode; |
156 | class RootNode; |
157 | class SafePointNode; |
158 | class SafePointScalarObjectNode; |
159 | class StartNode; |
160 | class State; |
161 | class StoreNode; |
162 | class SubNode; |
163 | class SubTypeCheckNode; |
164 | class Type; |
165 | class TypeNode; |
166 | class UnlockNode; |
167 | class VectorNode; |
168 | class LoadVectorNode; |
169 | class LoadVectorMaskedNode; |
170 | class StoreVectorMaskedNode; |
171 | class LoadVectorGatherNode; |
172 | class StoreVectorNode; |
173 | class StoreVectorScatterNode; |
174 | class VectorMaskCmpNode; |
175 | class VectorUnboxNode; |
176 | class VectorSet; |
177 | class VectorReinterpretNode; |
178 | class ShiftVNode; |
179 | |
180 | // The type of all node counts and indexes. |
181 | // It must hold at least 16 bits, but must also be fast to load and store. |
182 | // This type, if less than 32 bits, could limit the number of possible nodes. |
183 | // (To make this type platform-specific, move to globalDefinitions_xxx.hpp.) |
184 | typedef unsigned int node_idx_t; |
185 | |
186 | |
187 | #ifndef OPTO_DU_ITERATOR_ASSERT1 |
188 | #ifdef ASSERT1 |
189 | #define OPTO_DU_ITERATOR_ASSERT1 1 |
190 | #else |
191 | #define OPTO_DU_ITERATOR_ASSERT1 0 |
192 | #endif |
193 | #endif //OPTO_DU_ITERATOR_ASSERT |
194 | |
195 | #if OPTO_DU_ITERATOR_ASSERT1 |
196 | class DUIterator; |
197 | class DUIterator_Fast; |
198 | class DUIterator_Last; |
199 | #else |
200 | typedef uint DUIterator; |
201 | typedef Node** DUIterator_Fast; |
202 | typedef Node** DUIterator_Last; |
203 | #endif |
204 | |
205 | // Node Sentinel |
206 | #define NodeSentinel(Node*)-1 (Node*)-1 |
207 | |
208 | // Unknown count frequency |
209 | #define COUNT_UNKNOWN(-1.0f) (-1.0f) |
210 | |
211 | //------------------------------Node------------------------------------------- |
212 | // Nodes define actions in the program. They create values, which have types. |
213 | // They are both vertices in a directed graph and program primitives. Nodes |
214 | // are labeled; the label is the "opcode", the primitive function in the lambda |
215 | // calculus sense that gives meaning to the Node. Node inputs are ordered (so |
216 | // that "a-b" is different from "b-a"). The inputs to a Node are the inputs to |
217 | // the Node's function. These inputs also define a Type equation for the Node. |
218 | // Solving these Type equations amounts to doing dataflow analysis. |
219 | // Control and data are uniformly represented in the graph. Finally, Nodes |
220 | // have a unique dense integer index which is used to index into side arrays |
221 | // whenever I have phase-specific information. |
222 | |
223 | class Node { |
224 | friend class VMStructs; |
225 | |
226 | // Lots of restrictions on cloning Nodes |
227 | NONCOPYABLE(Node)Node(Node const&) = delete; Node& operator=(Node const &) = delete; |
228 | |
229 | public: |
230 | friend class Compile; |
231 | #if OPTO_DU_ITERATOR_ASSERT1 |
232 | friend class DUIterator_Common; |
233 | friend class DUIterator; |
234 | friend class DUIterator_Fast; |
235 | friend class DUIterator_Last; |
236 | #endif |
237 | |
238 | // Because Nodes come and go, I define an Arena of Node structures to pull |
239 | // from. This should allow fast access to node creation & deletion. This |
240 | // field is a local cache of a value defined in some "program fragment" for |
241 | // which these Nodes are just a part of. |
242 | |
243 | inline void* operator new(size_t x) throw() { |
244 | Compile* C = Compile::current(); |
245 | Node* n = (Node*)C->node_arena()->AmallocWords(x); |
246 | return (void*)n; |
247 | } |
248 | |
249 | // Delete is a NOP |
250 | void operator delete( void *ptr ) {} |
251 | // Fancy destructor; eagerly attempt to reclaim Node numberings and storage |
252 | void destruct(PhaseValues* phase); |
253 | |
254 | // Create a new Node. Required is the number is of inputs required for |
255 | // semantic correctness. |
256 | Node( uint required ); |
257 | |
258 | // Create a new Node with given input edges. |
259 | // This version requires use of the "edge-count" new. |
260 | // E.g. new (C,3) FooNode( C, NULL, left, right ); |
261 | Node( Node *n0 ); |
262 | Node( Node *n0, Node *n1 ); |
263 | Node( Node *n0, Node *n1, Node *n2 ); |
264 | Node( Node *n0, Node *n1, Node *n2, Node *n3 ); |
265 | Node( Node *n0, Node *n1, Node *n2, Node *n3, Node *n4 ); |
266 | Node( Node *n0, Node *n1, Node *n2, Node *n3, Node *n4, Node *n5 ); |
267 | Node( Node *n0, Node *n1, Node *n2, Node *n3, |
268 | Node *n4, Node *n5, Node *n6 ); |
269 | |
270 | // Clone an inherited Node given only the base Node type. |
271 | Node* clone() const; |
272 | |
273 | // Clone a Node, immediately supplying one or two new edges. |
274 | // The first and second arguments, if non-null, replace in(1) and in(2), |
275 | // respectively. |
276 | Node* clone_with_data_edge(Node* in1, Node* in2 = NULL__null) const { |
277 | Node* nn = clone(); |
278 | if (in1 != NULL__null) nn->set_req(1, in1); |
279 | if (in2 != NULL__null) nn->set_req(2, in2); |
280 | return nn; |
281 | } |
282 | |
283 | private: |
284 | // Shared setup for the above constructors. |
285 | // Handles all interactions with Compile::current. |
286 | // Puts initial values in all Node fields except _idx. |
287 | // Returns the initial value for _idx, which cannot |
288 | // be initialized by assignment. |
289 | inline int Init(int req); |
290 | |
291 | //----------------- input edge handling |
292 | protected: |
293 | friend class PhaseCFG; // Access to address of _in array elements |
294 | Node **_in; // Array of use-def references to Nodes |
295 | Node **_out; // Array of def-use references to Nodes |
296 | |
297 | // Input edges are split into two categories. Required edges are required |
298 | // for semantic correctness; order is important and NULLs are allowed. |
299 | // Precedence edges are used to help determine execution order and are |
300 | // added, e.g., for scheduling purposes. They are unordered and not |
301 | // duplicated; they have no embedded NULLs. Edges from 0 to _cnt-1 |
302 | // are required, from _cnt to _max-1 are precedence edges. |
303 | node_idx_t _cnt; // Total number of required Node inputs. |
304 | |
305 | node_idx_t _max; // Actual length of input array. |
306 | |
307 | // Output edges are an unordered list of def-use edges which exactly |
308 | // correspond to required input edges which point from other nodes |
309 | // to this one. Thus the count of the output edges is the number of |
310 | // users of this node. |
311 | node_idx_t _outcnt; // Total number of Node outputs. |
312 | |
313 | node_idx_t _outmax; // Actual length of output array. |
314 | |
315 | // Grow the actual input array to the next larger power-of-2 bigger than len. |
316 | void grow( uint len ); |
317 | // Grow the output array to the next larger power-of-2 bigger than len. |
318 | void out_grow( uint len ); |
319 | |
320 | public: |
321 | // Each Node is assigned a unique small/dense number. This number is used |
322 | // to index into auxiliary arrays of data and bit vectors. |
323 | // The field _idx is declared constant to defend against inadvertent assignments, |
324 | // since it is used by clients as a naked field. However, the field's value can be |
325 | // changed using the set_idx() method. |
326 | // |
327 | // The PhaseRenumberLive phase renumbers nodes based on liveness information. |
328 | // Therefore, it updates the value of the _idx field. The parse-time _idx is |
329 | // preserved in _parse_idx. |
330 | const node_idx_t _idx; |
331 | DEBUG_ONLY(const node_idx_t _parse_idx;)const node_idx_t _parse_idx; |
332 | // IGV node identifier. Two nodes, possibly in different compilation phases, |
333 | // have the same IGV identifier if (and only if) they are the very same node |
334 | // (same memory address) or one is "derived" from the other (by e.g. |
335 | // renumbering or matching). This identifier makes it possible to follow the |
336 | // entire lifetime of a node in IGV even if its C2 identifier (_idx) changes. |
337 | NOT_PRODUCT(node_idx_t _igv_idx;)node_idx_t _igv_idx; |
338 | |
339 | // Get the (read-only) number of input edges |
340 | uint req() const { return _cnt; } |
341 | uint len() const { return _max; } |
342 | // Get the (read-only) number of output edges |
343 | uint outcnt() const { return _outcnt; } |
344 | |
345 | #if OPTO_DU_ITERATOR_ASSERT1 |
346 | // Iterate over the out-edges of this node. Deletions are illegal. |
347 | inline DUIterator outs() const; |
348 | // Use this when the out array might have changed to suppress asserts. |
349 | inline DUIterator& refresh_out_pos(DUIterator& i) const; |
350 | // Does the node have an out at this position? (Used for iteration.) |
351 | inline bool has_out(DUIterator& i) const; |
352 | inline Node* out(DUIterator& i) const; |
353 | // Iterate over the out-edges of this node. All changes are illegal. |
354 | inline DUIterator_Fast fast_outs(DUIterator_Fast& max) const; |
355 | inline Node* fast_out(DUIterator_Fast& i) const; |
356 | // Iterate over the out-edges of this node, deleting one at a time. |
357 | inline DUIterator_Last last_outs(DUIterator_Last& min) const; |
358 | inline Node* last_out(DUIterator_Last& i) const; |
359 | // The inline bodies of all these methods are after the iterator definitions. |
360 | #else |
361 | // Iterate over the out-edges of this node. Deletions are illegal. |
362 | // This iteration uses integral indexes, to decouple from array reallocations. |
363 | DUIterator outs() const { return 0; } |
364 | // Use this when the out array might have changed to suppress asserts. |
365 | DUIterator refresh_out_pos(DUIterator i) const { return i; } |
366 | |
367 | // Reference to the i'th output Node. Error if out of bounds. |
368 | Node* out(DUIterator i) const { assert(i < _outcnt, "oob")do { if (!(i < _outcnt)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 368, "assert(" "i < _outcnt" ") failed", "oob"); ::breakpoint (); } } while (0); return _out[i]; } |
369 | // Does the node have an out at this position? (Used for iteration.) |
370 | bool has_out(DUIterator i) const { return i < _outcnt; } |
371 | |
372 | // Iterate over the out-edges of this node. All changes are illegal. |
373 | // This iteration uses a pointer internal to the out array. |
374 | DUIterator_Fast fast_outs(DUIterator_Fast& max) const { |
375 | Node** out = _out; |
376 | // Assign a limit pointer to the reference argument: |
377 | max = out + (ptrdiff_t)_outcnt; |
378 | // Return the base pointer: |
379 | return out; |
380 | } |
381 | Node* fast_out(DUIterator_Fast i) const { return *i; } |
382 | // Iterate over the out-edges of this node, deleting one at a time. |
383 | // This iteration uses a pointer internal to the out array. |
384 | DUIterator_Last last_outs(DUIterator_Last& min) const { |
385 | Node** out = _out; |
386 | // Assign a limit pointer to the reference argument: |
387 | min = out; |
388 | // Return the pointer to the start of the iteration: |
389 | return out + (ptrdiff_t)_outcnt - 1; |
390 | } |
391 | Node* last_out(DUIterator_Last i) const { return *i; } |
392 | #endif |
393 | |
394 | // Reference to the i'th input Node. Error if out of bounds. |
395 | Node* in(uint i) const { assert(i < _max, "oob: i=%d, _max=%d", i, _max)do { if (!(i < _max)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 395, "assert(" "i < _max" ") failed", "oob: i=%d, _max=%d" , i, _max); ::breakpoint(); } } while (0); return _in[i]; } |
396 | // Reference to the i'th input Node. NULL if out of bounds. |
397 | Node* lookup(uint i) const { return ((i < _max) ? _in[i] : NULL__null); } |
398 | // Reference to the i'th output Node. Error if out of bounds. |
399 | // Use this accessor sparingly. We are going trying to use iterators instead. |
400 | Node* raw_out(uint i) const { assert(i < _outcnt,"oob")do { if (!(i < _outcnt)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 400, "assert(" "i < _outcnt" ") failed", "oob"); ::breakpoint (); } } while (0); return _out[i]; } |
401 | // Return the unique out edge. |
402 | Node* unique_out() const { assert(_outcnt==1,"not unique")do { if (!(_outcnt==1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 402, "assert(" "_outcnt==1" ") failed", "not unique"); ::breakpoint (); } } while (0); return _out[0]; } |
403 | // Delete out edge at position 'i' by moving last out edge to position 'i' |
404 | void raw_del_out(uint i) { |
405 | assert(i < _outcnt,"oob")do { if (!(i < _outcnt)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 405, "assert(" "i < _outcnt" ") failed", "oob"); ::breakpoint (); } } while (0); |
406 | assert(_outcnt > 0,"oob")do { if (!(_outcnt > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 406, "assert(" "_outcnt > 0" ") failed", "oob"); ::breakpoint (); } } while (0); |
407 | #if OPTO_DU_ITERATOR_ASSERT1 |
408 | // Record that a change happened here. |
409 | debug_only(_last_del = _out[i]; ++_del_tick)_last_del = _out[i]; ++_del_tick; |
410 | #endif |
411 | _out[i] = _out[--_outcnt]; |
412 | // Smash the old edge so it can't be used accidentally. |
413 | debug_only(_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef)_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef; |
414 | } |
415 | |
416 | #ifdef ASSERT1 |
417 | bool is_dead() const; |
418 | #define is_not_dead(n)((n) == __null || !VerifyIterativeGVN || !((n)->is_dead()) ) ((n) == NULL__null || !VerifyIterativeGVN || !((n)->is_dead())) |
419 | bool is_reachable_from_root() const; |
420 | #endif |
421 | // Check whether node has become unreachable |
422 | bool is_unreachable(PhaseIterGVN &igvn) const; |
423 | |
424 | // Set a required input edge, also updates corresponding output edge |
425 | void add_req( Node *n ); // Append a NEW required input |
426 | void add_req( Node *n0, Node *n1 ) { |
427 | add_req(n0); add_req(n1); } |
428 | void add_req( Node *n0, Node *n1, Node *n2 ) { |
429 | add_req(n0); add_req(n1); add_req(n2); } |
430 | void add_req_batch( Node* n, uint m ); // Append m NEW required inputs (all n). |
431 | void del_req( uint idx ); // Delete required edge & compact |
432 | void del_req_ordered( uint idx ); // Delete required edge & compact with preserved order |
433 | void ins_req( uint i, Node *n ); // Insert a NEW required input |
434 | void set_req( uint i, Node *n ) { |
435 | assert( is_not_dead(n), "can not use dead node")do { if (!(((n) == __null || !VerifyIterativeGVN || !((n)-> is_dead())))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 435, "assert(" "((n) == __null || !VerifyIterativeGVN || !((n)->is_dead()))" ") failed", "can not use dead node"); ::breakpoint(); } } while (0); |
436 | assert( i < _cnt, "oob: i=%d, _cnt=%d", i, _cnt)do { if (!(i < _cnt)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 436, "assert(" "i < _cnt" ") failed", "oob: i=%d, _cnt=%d" , i, _cnt); ::breakpoint(); } } while (0); |
437 | assert( !VerifyHashTableKeys || _hash_lock == 0,do { if (!(!VerifyHashTableKeys || _hash_lock == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 438, "assert(" "!VerifyHashTableKeys || _hash_lock == 0" ") failed" , "remove node from hash table before modifying it"); ::breakpoint (); } } while (0) |
438 | "remove node from hash table before modifying it")do { if (!(!VerifyHashTableKeys || _hash_lock == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 438, "assert(" "!VerifyHashTableKeys || _hash_lock == 0" ") failed" , "remove node from hash table before modifying it"); ::breakpoint (); } } while (0); |
439 | Node** p = &_in[i]; // cache this._in, across the del_out call |
440 | if (*p != NULL__null) (*p)->del_out((Node *)this); |
441 | (*p) = n; |
442 | if (n != NULL__null) n->add_out((Node *)this); |
443 | Compile::current()->record_modified_node(this); |
444 | } |
445 | // Light version of set_req() to init inputs after node creation. |
446 | void init_req( uint i, Node *n ) { |
447 | assert( i == 0 && this == n ||do { if (!(i == 0 && this == n || ((n) == __null || ! VerifyIterativeGVN || !((n)->is_dead())))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 448, "assert(" "i == 0 && this == n || ((n) == __null || !VerifyIterativeGVN || !((n)->is_dead()))" ") failed", "can not use dead node"); ::breakpoint(); } } while (0) |
448 | is_not_dead(n), "can not use dead node")do { if (!(i == 0 && this == n || ((n) == __null || ! VerifyIterativeGVN || !((n)->is_dead())))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 448, "assert(" "i == 0 && this == n || ((n) == __null || !VerifyIterativeGVN || !((n)->is_dead()))" ") failed", "can not use dead node"); ::breakpoint(); } } while (0); |
449 | assert( i < _cnt, "oob")do { if (!(i < _cnt)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 449, "assert(" "i < _cnt" ") failed", "oob"); ::breakpoint (); } } while (0); |
450 | assert( !VerifyHashTableKeys || _hash_lock == 0,do { if (!(!VerifyHashTableKeys || _hash_lock == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 451, "assert(" "!VerifyHashTableKeys || _hash_lock == 0" ") failed" , "remove node from hash table before modifying it"); ::breakpoint (); } } while (0) |
451 | "remove node from hash table before modifying it")do { if (!(!VerifyHashTableKeys || _hash_lock == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 451, "assert(" "!VerifyHashTableKeys || _hash_lock == 0" ") failed" , "remove node from hash table before modifying it"); ::breakpoint (); } } while (0); |
452 | assert( _in[i] == NULL, "sanity")do { if (!(_in[i] == __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 452, "assert(" "_in[i] == __null" ") failed", "sanity"); :: breakpoint(); } } while (0); |
453 | _in[i] = n; |
454 | if (n != NULL__null) n->add_out((Node *)this); |
455 | Compile::current()->record_modified_node(this); |
456 | } |
457 | // Find first occurrence of n among my edges: |
458 | int find_edge(Node* n); |
459 | int find_prec_edge(Node* n) { |
460 | for (uint i = req(); i < len(); i++) { |
461 | if (_in[i] == n) return i; |
462 | if (_in[i] == NULL__null) { |
463 | DEBUG_ONLY( while ((++i) < len()) assert(_in[i] == NULL, "Gap in prec edges!"); )while ((++i) < len()) do { if (!(_in[i] == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 463, "assert(" "_in[i] == __null" ") failed", "Gap in prec edges!" ); ::breakpoint(); } } while (0); |
464 | break; |
465 | } |
466 | } |
467 | return -1; |
468 | } |
469 | int replace_edge(Node* old, Node* neww, PhaseGVN* gvn = NULL__null); |
470 | int replace_edges_in_range(Node* old, Node* neww, int start, int end, PhaseGVN* gvn); |
471 | // NULL out all inputs to eliminate incoming Def-Use edges. |
472 | void disconnect_inputs(Compile* C); |
473 | |
474 | // Quickly, return true if and only if I am Compile::current()->top(). |
475 | bool is_top() const { |
476 | assert((this == (Node*) Compile::current()->top()) == (_out == NULL), "")do { if (!((this == (Node*) Compile::current()->top()) == ( _out == __null))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 476, "assert(" "(this == (Node*) Compile::current()->top()) == (_out == __null)" ") failed", ""); ::breakpoint(); } } while (0); |
477 | return (_out == NULL__null); |
478 | } |
479 | // Reaffirm invariants for is_top. (Only from Compile::set_cached_top_node.) |
480 | void setup_is_top(); |
481 | |
482 | // Strip away casting. (It is depth-limited.) |
483 | Node* uncast(bool keep_deps = false) const; |
484 | // Return whether two Nodes are equivalent, after stripping casting. |
485 | bool eqv_uncast(const Node* n, bool keep_deps = false) const { |
486 | return (this->uncast(keep_deps) == n->uncast(keep_deps)); |
487 | } |
488 | |
489 | // Find out of current node that matches opcode. |
490 | Node* find_out_with(int opcode); |
491 | // Return true if the current node has an out that matches opcode. |
492 | bool has_out_with(int opcode); |
493 | // Return true if the current node has an out that matches any of the opcodes. |
494 | bool has_out_with(int opcode1, int opcode2, int opcode3, int opcode4); |
495 | |
496 | private: |
497 | static Node* uncast_helper(const Node* n, bool keep_deps); |
498 | |
499 | // Add an output edge to the end of the list |
500 | void add_out( Node *n ) { |
501 | if (is_top()) return; |
502 | if( _outcnt == _outmax ) out_grow(_outcnt); |
503 | _out[_outcnt++] = n; |
504 | } |
505 | // Delete an output edge |
506 | void del_out( Node *n ) { |
507 | if (is_top()) return; |
508 | Node** outp = &_out[_outcnt]; |
509 | // Find and remove n |
510 | do { |
511 | assert(outp > _out, "Missing Def-Use edge")do { if (!(outp > _out)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 511, "assert(" "outp > _out" ") failed", "Missing Def-Use edge" ); ::breakpoint(); } } while (0); |
512 | } while (*--outp != n); |
513 | *outp = _out[--_outcnt]; |
514 | // Smash the old edge so it can't be used accidentally. |
515 | debug_only(_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef)_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef; |
516 | // Record that a change happened here. |
517 | #if OPTO_DU_ITERATOR_ASSERT1 |
518 | debug_only(_last_del = n; ++_del_tick)_last_del = n; ++_del_tick; |
519 | #endif |
520 | } |
521 | // Close gap after removing edge. |
522 | void close_prec_gap_at(uint gap) { |
523 | assert(_cnt <= gap && gap < _max, "no valid prec edge")do { if (!(_cnt <= gap && gap < _max)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 523, "assert(" "_cnt <= gap && gap < _max" ") failed" , "no valid prec edge"); ::breakpoint(); } } while (0); |
524 | uint i = gap; |
525 | Node *last = NULL__null; |
526 | for (; i < _max-1; ++i) { |
527 | Node *next = _in[i+1]; |
528 | if (next == NULL__null) break; |
529 | last = next; |
530 | } |
531 | _in[gap] = last; // Move last slot to empty one. |
532 | _in[i] = NULL__null; // NULL out last slot. |
533 | } |
534 | |
535 | public: |
536 | // Globally replace this node by a given new node, updating all uses. |
537 | void replace_by(Node* new_node); |
538 | // Globally replace this node by a given new node, updating all uses |
539 | // and cutting input edges of old node. |
540 | void subsume_by(Node* new_node, Compile* c) { |
541 | replace_by(new_node); |
542 | disconnect_inputs(c); |
543 | } |
544 | void set_req_X(uint i, Node *n, PhaseIterGVN *igvn); |
545 | void set_req_X(uint i, Node *n, PhaseGVN *gvn); |
546 | // Find the one non-null required input. RegionNode only |
547 | Node *nonnull_req() const; |
548 | // Add or remove precedence edges |
549 | void add_prec( Node *n ); |
550 | void rm_prec( uint i ); |
551 | |
552 | // Note: prec(i) will not necessarily point to n if edge already exists. |
553 | void set_prec( uint i, Node *n ) { |
554 | assert(i < _max, "oob: i=%d, _max=%d", i, _max)do { if (!(i < _max)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 554, "assert(" "i < _max" ") failed", "oob: i=%d, _max=%d" , i, _max); ::breakpoint(); } } while (0); |
555 | assert(is_not_dead(n), "can not use dead node")do { if (!(((n) == __null || !VerifyIterativeGVN || !((n)-> is_dead())))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 555, "assert(" "((n) == __null || !VerifyIterativeGVN || !((n)->is_dead()))" ") failed", "can not use dead node"); ::breakpoint(); } } while (0); |
556 | assert(i >= _cnt, "not a precedence edge")do { if (!(i >= _cnt)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 556, "assert(" "i >= _cnt" ") failed", "not a precedence edge" ); ::breakpoint(); } } while (0); |
557 | // Avoid spec violation: duplicated prec edge. |
558 | if (_in[i] == n) return; |
559 | if (n == NULL__null || find_prec_edge(n) != -1) { |
560 | rm_prec(i); |
561 | return; |
562 | } |
563 | if (_in[i] != NULL__null) _in[i]->del_out((Node *)this); |
564 | _in[i] = n; |
565 | n->add_out((Node *)this); |
566 | } |
567 | |
568 | // Set this node's index, used by cisc_version to replace current node |
569 | void set_idx(uint new_idx) { |
570 | const node_idx_t* ref = &_idx; |
571 | *(node_idx_t*)ref = new_idx; |
572 | } |
573 | // Swap input edge order. (Edge indexes i1 and i2 are usually 1 and 2.) |
574 | void swap_edges(uint i1, uint i2) { |
575 | debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH)uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH; |
576 | // Def-Use info is unchanged |
577 | Node* n1 = in(i1); |
578 | Node* n2 = in(i2); |
579 | _in[i1] = n2; |
580 | _in[i2] = n1; |
581 | // If this node is in the hash table, make sure it doesn't need a rehash. |
582 | assert(check_hash == NO_HASH || check_hash == hash(), "edge swap must preserve hash code")do { if (!(check_hash == NO_HASH || check_hash == hash())) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 582, "assert(" "check_hash == NO_HASH || check_hash == hash()" ") failed", "edge swap must preserve hash code"); ::breakpoint (); } } while (0); |
583 | } |
584 | |
585 | // Iterators over input Nodes for a Node X are written as: |
586 | // for( i = 0; i < X.req(); i++ ) ... X[i] ... |
587 | // NOTE: Required edges can contain embedded NULL pointers. |
588 | |
589 | //----------------- Other Node Properties |
590 | |
591 | // Generate class IDs for (some) ideal nodes so that it is possible to determine |
592 | // the type of a node using a non-virtual method call (the method is_<Node>() below). |
593 | // |
594 | // A class ID of an ideal node is a set of bits. In a class ID, a single bit determines |
595 | // the type of the node the ID represents; another subset of an ID's bits are reserved |
596 | // for the superclasses of the node represented by the ID. |
597 | // |
598 | // By design, if A is a supertype of B, A.is_B() returns true and B.is_A() |
599 | // returns false. A.is_A() returns true. |
600 | // |
601 | // If two classes, A and B, have the same superclass, a different bit of A's class id |
602 | // is reserved for A's type than for B's type. That bit is specified by the third |
603 | // parameter in the macro DEFINE_CLASS_ID. |
604 | // |
605 | // By convention, classes with deeper hierarchy are declared first. Moreover, |
606 | // classes with the same hierarchy depth are sorted by usage frequency. |
607 | // |
608 | // The query method masks the bits to cut off bits of subclasses and then compares |
609 | // the result with the class id (see the macro DEFINE_CLASS_QUERY below). |
610 | // |
611 | // Class_MachCall=30, ClassMask_MachCall=31 |
612 | // 12 8 4 0 |
613 | // 0 0 0 0 0 0 0 0 1 1 1 1 0 |
614 | // | | | | |
615 | // | | | Bit_Mach=2 |
616 | // | | Bit_MachReturn=4 |
617 | // | Bit_MachSafePoint=8 |
618 | // Bit_MachCall=16 |
619 | // |
620 | // Class_CountedLoop=56, ClassMask_CountedLoop=63 |
621 | // 12 8 4 0 |
622 | // 0 0 0 0 0 0 0 1 1 1 0 0 0 |
623 | // | | | |
624 | // | | Bit_Region=8 |
625 | // | Bit_Loop=16 |
626 | // Bit_CountedLoop=32 |
627 | |
628 | #define DEFINE_CLASS_ID(cl, supcl, subn) \ |
629 | Bit_##cl = (Class_##supcl == 0) ? 1 << subn : (Bit_##supcl) << (1 + subn) , \ |
630 | Class_##cl = Class_##supcl + Bit_##cl , \ |
631 | ClassMask_##cl = ((Bit_##cl << 1) - 1) , |
632 | |
633 | // This enum is used only for C2 ideal and mach nodes with is_<node>() methods |
634 | // so that its values fit into 32 bits. |
635 | enum NodeClasses { |
636 | Bit_Node = 0x00000000, |
637 | Class_Node = 0x00000000, |
638 | ClassMask_Node = 0xFFFFFFFF, |
639 | |
640 | DEFINE_CLASS_ID(Multi, Node, 0) |
641 | DEFINE_CLASS_ID(SafePoint, Multi, 0) |
642 | DEFINE_CLASS_ID(Call, SafePoint, 0) |
643 | DEFINE_CLASS_ID(CallJava, Call, 0) |
644 | DEFINE_CLASS_ID(CallStaticJava, CallJava, 0) |
645 | DEFINE_CLASS_ID(CallDynamicJava, CallJava, 1) |
646 | DEFINE_CLASS_ID(CallRuntime, Call, 1) |
647 | DEFINE_CLASS_ID(CallLeaf, CallRuntime, 0) |
648 | DEFINE_CLASS_ID(CallLeafNoFP, CallLeaf, 0) |
649 | DEFINE_CLASS_ID(Allocate, Call, 2) |
650 | DEFINE_CLASS_ID(AllocateArray, Allocate, 0) |
651 | DEFINE_CLASS_ID(AbstractLock, Call, 3) |
652 | DEFINE_CLASS_ID(Lock, AbstractLock, 0) |
653 | DEFINE_CLASS_ID(Unlock, AbstractLock, 1) |
654 | DEFINE_CLASS_ID(ArrayCopy, Call, 4) |
655 | DEFINE_CLASS_ID(CallNative, Call, 5) |
656 | DEFINE_CLASS_ID(MultiBranch, Multi, 1) |
657 | DEFINE_CLASS_ID(PCTable, MultiBranch, 0) |
658 | DEFINE_CLASS_ID(Catch, PCTable, 0) |
659 | DEFINE_CLASS_ID(Jump, PCTable, 1) |
660 | DEFINE_CLASS_ID(If, MultiBranch, 1) |
661 | DEFINE_CLASS_ID(BaseCountedLoopEnd, If, 0) |
662 | DEFINE_CLASS_ID(CountedLoopEnd, BaseCountedLoopEnd, 0) |
663 | DEFINE_CLASS_ID(LongCountedLoopEnd, BaseCountedLoopEnd, 1) |
664 | DEFINE_CLASS_ID(RangeCheck, If, 1) |
665 | DEFINE_CLASS_ID(OuterStripMinedLoopEnd, If, 2) |
666 | DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2) |
667 | DEFINE_CLASS_ID(Start, Multi, 2) |
668 | DEFINE_CLASS_ID(MemBar, Multi, 3) |
669 | DEFINE_CLASS_ID(Initialize, MemBar, 0) |
670 | DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1) |
671 | |
672 | DEFINE_CLASS_ID(Mach, Node, 1) |
673 | DEFINE_CLASS_ID(MachReturn, Mach, 0) |
674 | DEFINE_CLASS_ID(MachSafePoint, MachReturn, 0) |
675 | DEFINE_CLASS_ID(MachCall, MachSafePoint, 0) |
676 | DEFINE_CLASS_ID(MachCallJava, MachCall, 0) |
677 | DEFINE_CLASS_ID(MachCallStaticJava, MachCallJava, 0) |
678 | DEFINE_CLASS_ID(MachCallDynamicJava, MachCallJava, 1) |
679 | DEFINE_CLASS_ID(MachCallRuntime, MachCall, 1) |
680 | DEFINE_CLASS_ID(MachCallLeaf, MachCallRuntime, 0) |
681 | DEFINE_CLASS_ID(MachCallNative, MachCall, 2) |
682 | DEFINE_CLASS_ID(MachBranch, Mach, 1) |
683 | DEFINE_CLASS_ID(MachIf, MachBranch, 0) |
684 | DEFINE_CLASS_ID(MachGoto, MachBranch, 1) |
685 | DEFINE_CLASS_ID(MachNullCheck, MachBranch, 2) |
686 | DEFINE_CLASS_ID(MachSpillCopy, Mach, 2) |
687 | DEFINE_CLASS_ID(MachTemp, Mach, 3) |
688 | DEFINE_CLASS_ID(MachConstantBase, Mach, 4) |
689 | DEFINE_CLASS_ID(MachConstant, Mach, 5) |
690 | DEFINE_CLASS_ID(MachJump, MachConstant, 0) |
691 | DEFINE_CLASS_ID(MachMerge, Mach, 6) |
692 | DEFINE_CLASS_ID(MachMemBar, Mach, 7) |
693 | |
694 | DEFINE_CLASS_ID(Type, Node, 2) |
695 | DEFINE_CLASS_ID(Phi, Type, 0) |
696 | DEFINE_CLASS_ID(ConstraintCast, Type, 1) |
697 | DEFINE_CLASS_ID(CastII, ConstraintCast, 0) |
698 | DEFINE_CLASS_ID(CheckCastPP, ConstraintCast, 1) |
699 | DEFINE_CLASS_ID(CastLL, ConstraintCast, 2) |
700 | DEFINE_CLASS_ID(CastFF, ConstraintCast, 3) |
701 | DEFINE_CLASS_ID(CastDD, ConstraintCast, 4) |
702 | DEFINE_CLASS_ID(CastVV, ConstraintCast, 5) |
703 | DEFINE_CLASS_ID(CMove, Type, 3) |
704 | DEFINE_CLASS_ID(SafePointScalarObject, Type, 4) |
705 | DEFINE_CLASS_ID(DecodeNarrowPtr, Type, 5) |
706 | DEFINE_CLASS_ID(DecodeN, DecodeNarrowPtr, 0) |
707 | DEFINE_CLASS_ID(DecodeNKlass, DecodeNarrowPtr, 1) |
708 | DEFINE_CLASS_ID(EncodeNarrowPtr, Type, 6) |
709 | DEFINE_CLASS_ID(EncodeP, EncodeNarrowPtr, 0) |
710 | DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1) |
711 | DEFINE_CLASS_ID(Vector, Type, 7) |
712 | DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0) |
713 | DEFINE_CLASS_ID(VectorUnbox, Vector, 1) |
714 | DEFINE_CLASS_ID(VectorReinterpret, Vector, 2) |
715 | DEFINE_CLASS_ID(ShiftV, Vector, 3) |
716 | |
717 | DEFINE_CLASS_ID(Proj, Node, 3) |
718 | DEFINE_CLASS_ID(CatchProj, Proj, 0) |
719 | DEFINE_CLASS_ID(JumpProj, Proj, 1) |
720 | DEFINE_CLASS_ID(IfProj, Proj, 2) |
721 | DEFINE_CLASS_ID(IfTrue, IfProj, 0) |
722 | DEFINE_CLASS_ID(IfFalse, IfProj, 1) |
723 | DEFINE_CLASS_ID(Parm, Proj, 4) |
724 | DEFINE_CLASS_ID(MachProj, Proj, 5) |
725 | |
726 | DEFINE_CLASS_ID(Mem, Node, 4) |
727 | DEFINE_CLASS_ID(Load, Mem, 0) |
728 | DEFINE_CLASS_ID(LoadVector, Load, 0) |
729 | DEFINE_CLASS_ID(LoadVectorGather, LoadVector, 0) |
730 | DEFINE_CLASS_ID(LoadVectorMasked, LoadVector, 1) |
731 | DEFINE_CLASS_ID(Store, Mem, 1) |
732 | DEFINE_CLASS_ID(StoreVector, Store, 0) |
733 | DEFINE_CLASS_ID(StoreVectorScatter, StoreVector, 0) |
734 | DEFINE_CLASS_ID(StoreVectorMasked, StoreVector, 1) |
735 | DEFINE_CLASS_ID(LoadStore, Mem, 2) |
736 | DEFINE_CLASS_ID(LoadStoreConditional, LoadStore, 0) |
737 | DEFINE_CLASS_ID(CompareAndSwap, LoadStoreConditional, 0) |
738 | DEFINE_CLASS_ID(CompareAndExchangeNode, LoadStore, 1) |
739 | |
740 | DEFINE_CLASS_ID(Region, Node, 5) |
741 | DEFINE_CLASS_ID(Loop, Region, 0) |
742 | DEFINE_CLASS_ID(Root, Loop, 0) |
743 | DEFINE_CLASS_ID(BaseCountedLoop, Loop, 1) |
744 | DEFINE_CLASS_ID(CountedLoop, BaseCountedLoop, 0) |
745 | DEFINE_CLASS_ID(LongCountedLoop, BaseCountedLoop, 1) |
746 | DEFINE_CLASS_ID(OuterStripMinedLoop, Loop, 2) |
747 | |
748 | DEFINE_CLASS_ID(Sub, Node, 6) |
749 | DEFINE_CLASS_ID(Cmp, Sub, 0) |
750 | DEFINE_CLASS_ID(FastLock, Cmp, 0) |
751 | DEFINE_CLASS_ID(FastUnlock, Cmp, 1) |
752 | DEFINE_CLASS_ID(SubTypeCheck,Cmp, 2) |
753 | |
754 | DEFINE_CLASS_ID(MergeMem, Node, 7) |
755 | DEFINE_CLASS_ID(Bool, Node, 8) |
756 | DEFINE_CLASS_ID(AddP, Node, 9) |
757 | DEFINE_CLASS_ID(BoxLock, Node, 10) |
758 | DEFINE_CLASS_ID(Add, Node, 11) |
759 | DEFINE_CLASS_ID(Mul, Node, 12) |
760 | DEFINE_CLASS_ID(ClearArray, Node, 14) |
761 | DEFINE_CLASS_ID(Halt, Node, 15) |
762 | DEFINE_CLASS_ID(Opaque1, Node, 16) |
763 | DEFINE_CLASS_ID(Move, Node, 17) |
764 | DEFINE_CLASS_ID(LShift, Node, 18) |
765 | |
766 | _max_classes = ClassMask_Move |
767 | }; |
768 | #undef DEFINE_CLASS_ID |
769 | |
770 | // Flags are sorted by usage frequency. |
771 | enum NodeFlags { |
772 | Flag_is_Copy = 1 << 0, // should be first bit to avoid shift |
773 | Flag_rematerialize = 1 << 1, |
774 | Flag_needs_anti_dependence_check = 1 << 2, |
775 | Flag_is_macro = 1 << 3, |
776 | Flag_is_Con = 1 << 4, |
777 | Flag_is_cisc_alternate = 1 << 5, |
778 | Flag_is_dead_loop_safe = 1 << 6, |
779 | Flag_may_be_short_branch = 1 << 7, |
780 | Flag_avoid_back_to_back_before = 1 << 8, |
781 | Flag_avoid_back_to_back_after = 1 << 9, |
782 | Flag_has_call = 1 << 10, |
783 | Flag_is_reduction = 1 << 11, |
784 | Flag_is_scheduled = 1 << 12, |
785 | Flag_has_vector_mask_set = 1 << 13, |
786 | Flag_is_expensive = 1 << 14, |
787 | Flag_is_predicated_vector = 1 << 15, |
788 | Flag_for_post_loop_opts_igvn = 1 << 16, |
789 | _last_flag = Flag_for_post_loop_opts_igvn |
790 | }; |
791 | |
792 | class PD; |
793 | |
794 | private: |
795 | juint _class_id; |
796 | juint _flags; |
797 | |
798 | static juint max_flags(); |
799 | |
800 | protected: |
801 | // These methods should be called from constructors only. |
802 | void init_class_id(juint c) { |
803 | _class_id = c; // cast out const |
804 | } |
805 | void init_flags(uint fl) { |
806 | assert(fl <= max_flags(), "invalid node flag")do { if (!(fl <= max_flags())) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 806, "assert(" "fl <= max_flags()" ") failed", "invalid node flag" ); ::breakpoint(); } } while (0); |
807 | _flags |= fl; |
808 | } |
809 | void clear_flag(uint fl) { |
810 | assert(fl <= max_flags(), "invalid node flag")do { if (!(fl <= max_flags())) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 810, "assert(" "fl <= max_flags()" ") failed", "invalid node flag" ); ::breakpoint(); } } while (0); |
811 | _flags &= ~fl; |
812 | } |
813 | |
814 | public: |
815 | const juint class_id() const { return _class_id; } |
816 | |
817 | const juint flags() const { return _flags; } |
818 | |
819 | void add_flag(juint fl) { init_flags(fl); } |
820 | |
821 | void remove_flag(juint fl) { clear_flag(fl); } |
822 | |
823 | // Return a dense integer opcode number |
824 | virtual int Opcode() const; |
825 | |
826 | // Virtual inherited Node size |
827 | virtual uint size_of() const; |
828 | |
829 | // Other interesting Node properties |
830 | #define DEFINE_CLASS_QUERY(type) \ |
831 | bool is_##type() const { \ |
832 | return ((_class_id & ClassMask_##type) == Class_##type); \ |
833 | } \ |
834 | type##Node *as_##type() const { \ |
835 | assert(is_##type(), "invalid node class: %s", Name())do { if (!(is_##type())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 835, "assert(" "is_##type()" ") failed", "invalid node class: %s" , Name()); ::breakpoint(); } } while (0); \ |
836 | return (type##Node*)this; \ |
837 | } \ |
838 | type##Node* isa_##type() const { \ |
839 | return (is_##type()) ? as_##type() : NULL__null; \ |
840 | } |
841 | |
842 | DEFINE_CLASS_QUERY(AbstractLock) |
843 | DEFINE_CLASS_QUERY(Add) |
844 | DEFINE_CLASS_QUERY(AddP) |
845 | DEFINE_CLASS_QUERY(Allocate) |
846 | DEFINE_CLASS_QUERY(AllocateArray) |
847 | DEFINE_CLASS_QUERY(ArrayCopy) |
848 | DEFINE_CLASS_QUERY(BaseCountedLoop) |
849 | DEFINE_CLASS_QUERY(BaseCountedLoopEnd) |
850 | DEFINE_CLASS_QUERY(Bool) |
851 | DEFINE_CLASS_QUERY(BoxLock) |
852 | DEFINE_CLASS_QUERY(Call) |
853 | DEFINE_CLASS_QUERY(CallNative) |
854 | DEFINE_CLASS_QUERY(CallDynamicJava) |
855 | DEFINE_CLASS_QUERY(CallJava) |
856 | DEFINE_CLASS_QUERY(CallLeaf) |
857 | DEFINE_CLASS_QUERY(CallLeafNoFP) |
858 | DEFINE_CLASS_QUERY(CallRuntime) |
859 | DEFINE_CLASS_QUERY(CallStaticJava) |
860 | DEFINE_CLASS_QUERY(Catch) |
861 | DEFINE_CLASS_QUERY(CatchProj) |
862 | DEFINE_CLASS_QUERY(CheckCastPP) |
863 | DEFINE_CLASS_QUERY(CastII) |
864 | DEFINE_CLASS_QUERY(CastLL) |
865 | DEFINE_CLASS_QUERY(ConstraintCast) |
866 | DEFINE_CLASS_QUERY(ClearArray) |
867 | DEFINE_CLASS_QUERY(CMove) |
868 | DEFINE_CLASS_QUERY(Cmp) |
869 | DEFINE_CLASS_QUERY(CountedLoop) |
870 | DEFINE_CLASS_QUERY(CountedLoopEnd) |
871 | DEFINE_CLASS_QUERY(DecodeNarrowPtr) |
872 | DEFINE_CLASS_QUERY(DecodeN) |
873 | DEFINE_CLASS_QUERY(DecodeNKlass) |
874 | DEFINE_CLASS_QUERY(EncodeNarrowPtr) |
875 | DEFINE_CLASS_QUERY(EncodeP) |
876 | DEFINE_CLASS_QUERY(EncodePKlass) |
877 | DEFINE_CLASS_QUERY(FastLock) |
878 | DEFINE_CLASS_QUERY(FastUnlock) |
879 | DEFINE_CLASS_QUERY(Halt) |
880 | DEFINE_CLASS_QUERY(If) |
881 | DEFINE_CLASS_QUERY(RangeCheck) |
882 | DEFINE_CLASS_QUERY(IfProj) |
883 | DEFINE_CLASS_QUERY(IfFalse) |
884 | DEFINE_CLASS_QUERY(IfTrue) |
885 | DEFINE_CLASS_QUERY(Initialize) |
886 | DEFINE_CLASS_QUERY(Jump) |
887 | DEFINE_CLASS_QUERY(JumpProj) |
888 | DEFINE_CLASS_QUERY(LongCountedLoop) |
889 | DEFINE_CLASS_QUERY(LongCountedLoopEnd) |
890 | DEFINE_CLASS_QUERY(Load) |
891 | DEFINE_CLASS_QUERY(LoadStore) |
892 | DEFINE_CLASS_QUERY(LoadStoreConditional) |
893 | DEFINE_CLASS_QUERY(Lock) |
894 | DEFINE_CLASS_QUERY(Loop) |
895 | DEFINE_CLASS_QUERY(LShift) |
896 | DEFINE_CLASS_QUERY(Mach) |
897 | DEFINE_CLASS_QUERY(MachBranch) |
898 | DEFINE_CLASS_QUERY(MachCall) |
899 | DEFINE_CLASS_QUERY(MachCallNative) |
900 | DEFINE_CLASS_QUERY(MachCallDynamicJava) |
901 | DEFINE_CLASS_QUERY(MachCallJava) |
902 | DEFINE_CLASS_QUERY(MachCallLeaf) |
903 | DEFINE_CLASS_QUERY(MachCallRuntime) |
904 | DEFINE_CLASS_QUERY(MachCallStaticJava) |
905 | DEFINE_CLASS_QUERY(MachConstantBase) |
906 | DEFINE_CLASS_QUERY(MachConstant) |
907 | DEFINE_CLASS_QUERY(MachGoto) |
908 | DEFINE_CLASS_QUERY(MachIf) |
909 | DEFINE_CLASS_QUERY(MachJump) |
910 | DEFINE_CLASS_QUERY(MachNullCheck) |
911 | DEFINE_CLASS_QUERY(MachProj) |
912 | DEFINE_CLASS_QUERY(MachReturn) |
913 | DEFINE_CLASS_QUERY(MachSafePoint) |
914 | DEFINE_CLASS_QUERY(MachSpillCopy) |
915 | DEFINE_CLASS_QUERY(MachTemp) |
916 | DEFINE_CLASS_QUERY(MachMemBar) |
917 | DEFINE_CLASS_QUERY(MachMerge) |
918 | DEFINE_CLASS_QUERY(Mem) |
919 | DEFINE_CLASS_QUERY(MemBar) |
920 | DEFINE_CLASS_QUERY(MemBarStoreStore) |
921 | DEFINE_CLASS_QUERY(MergeMem) |
922 | DEFINE_CLASS_QUERY(Move) |
923 | DEFINE_CLASS_QUERY(Mul) |
924 | DEFINE_CLASS_QUERY(Multi) |
925 | DEFINE_CLASS_QUERY(MultiBranch) |
926 | DEFINE_CLASS_QUERY(Opaque1) |
927 | DEFINE_CLASS_QUERY(OuterStripMinedLoop) |
928 | DEFINE_CLASS_QUERY(OuterStripMinedLoopEnd) |
929 | DEFINE_CLASS_QUERY(Parm) |
930 | DEFINE_CLASS_QUERY(PCTable) |
931 | DEFINE_CLASS_QUERY(Phi) |
932 | DEFINE_CLASS_QUERY(Proj) |
933 | DEFINE_CLASS_QUERY(Region) |
934 | DEFINE_CLASS_QUERY(Root) |
935 | DEFINE_CLASS_QUERY(SafePoint) |
936 | DEFINE_CLASS_QUERY(SafePointScalarObject) |
937 | DEFINE_CLASS_QUERY(Start) |
938 | DEFINE_CLASS_QUERY(Store) |
939 | DEFINE_CLASS_QUERY(Sub) |
940 | DEFINE_CLASS_QUERY(SubTypeCheck) |
941 | DEFINE_CLASS_QUERY(Type) |
942 | DEFINE_CLASS_QUERY(Vector) |
943 | DEFINE_CLASS_QUERY(VectorMaskCmp) |
944 | DEFINE_CLASS_QUERY(VectorUnbox) |
945 | DEFINE_CLASS_QUERY(VectorReinterpret); |
946 | DEFINE_CLASS_QUERY(LoadVector) |
947 | DEFINE_CLASS_QUERY(LoadVectorGather) |
948 | DEFINE_CLASS_QUERY(StoreVector) |
949 | DEFINE_CLASS_QUERY(StoreVectorScatter) |
950 | DEFINE_CLASS_QUERY(ShiftV) |
951 | DEFINE_CLASS_QUERY(Unlock) |
952 | |
953 | #undef DEFINE_CLASS_QUERY |
954 | |
955 | // duplicate of is_MachSpillCopy() |
956 | bool is_SpillCopy () const { |
957 | return ((_class_id & ClassMask_MachSpillCopy) == Class_MachSpillCopy); |
958 | } |
959 | |
960 | bool is_Con () const { return (_flags & Flag_is_Con) != 0; } |
961 | // The data node which is safe to leave in dead loop during IGVN optimization. |
962 | bool is_dead_loop_safe() const; |
963 | |
964 | // is_Copy() returns copied edge index (0 or 1) |
965 | uint is_Copy() const { return (_flags & Flag_is_Copy); } |
966 | |
967 | virtual bool is_CFG() const { return false; } |
968 | |
969 | // If this node is control-dependent on a test, can it be |
970 | // rerouted to a dominating equivalent test? This is usually |
971 | // true of non-CFG nodes, but can be false for operations which |
972 | // depend for their correct sequencing on more than one test. |
973 | // (In that case, hoisting to a dominating test may silently |
974 | // skip some other important test.) |
975 | virtual bool depends_only_on_test() const { assert(!is_CFG(), "")do { if (!(!is_CFG())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 975, "assert(" "!is_CFG()" ") failed", ""); ::breakpoint(); } } while (0); return true; }; |
976 | |
977 | // When building basic blocks, I need to have a notion of block beginning |
978 | // Nodes, next block selector Nodes (block enders), and next block |
979 | // projections. These calls need to work on their machine equivalents. The |
980 | // Ideal beginning Nodes are RootNode, RegionNode and StartNode. |
981 | bool is_block_start() const { |
982 | if ( is_Region() ) |
983 | return this == (const Node*)in(0); |
984 | else |
985 | return is_Start(); |
986 | } |
987 | |
988 | // The Ideal control projection Nodes are IfTrue/IfFalse, JumpProjNode, Root, |
989 | // Goto and Return. This call also returns the block ending Node. |
990 | virtual const Node *is_block_proj() const; |
991 | |
992 | // The node is a "macro" node which needs to be expanded before matching |
993 | bool is_macro() const { return (_flags & Flag_is_macro) != 0; } |
994 | // The node is expensive: the best control is set during loop opts |
995 | bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL__null; } |
996 | |
997 | // An arithmetic node which accumulates a data in a loop. |
998 | // It must have the loop's phi as input and provide a def to the phi. |
999 | bool is_reduction() const { return (_flags & Flag_is_reduction) != 0; } |
1000 | |
1001 | bool is_predicated_vector() const { return (_flags & Flag_is_predicated_vector) != 0; } |
1002 | |
1003 | // The node is a CountedLoopEnd with a mask annotation so as to emit a restore context |
1004 | bool has_vector_mask_set() const { return (_flags & Flag_has_vector_mask_set) != 0; } |
1005 | |
1006 | // Used in lcm to mark nodes that have scheduled |
1007 | bool is_scheduled() const { return (_flags & Flag_is_scheduled) != 0; } |
1008 | |
1009 | bool for_post_loop_opts_igvn() const { return (_flags & Flag_for_post_loop_opts_igvn) != 0; } |
1010 | |
1011 | //----------------- Optimization |
1012 | |
1013 | // Get the worst-case Type output for this Node. |
1014 | virtual const class Type *bottom_type() const; |
1015 | |
1016 | // If we find a better type for a node, try to record it permanently. |
1017 | // Return true if this node actually changed. |
1018 | // Be sure to do the hash_delete game in the "rehash" variant. |
1019 | void raise_bottom_type(const Type* new_type); |
1020 | |
1021 | // Get the address type with which this node uses and/or defs memory, |
1022 | // or NULL if none. The address type is conservatively wide. |
1023 | // Returns non-null for calls, membars, loads, stores, etc. |
1024 | // Returns TypePtr::BOTTOM if the node touches memory "broadly". |
1025 | virtual const class TypePtr *adr_type() const { return NULL__null; } |
1026 | |
1027 | // Return an existing node which computes the same function as this node. |
1028 | // The optimistic combined algorithm requires this to return a Node which |
1029 | // is a small number of steps away (e.g., one of my inputs). |
1030 | virtual Node* Identity(PhaseGVN* phase); |
1031 | |
1032 | // Return the set of values this Node can take on at runtime. |
1033 | virtual const Type* Value(PhaseGVN* phase) const; |
1034 | |
1035 | // Return a node which is more "ideal" than the current node. |
1036 | // The invariants on this call are subtle. If in doubt, read the |
1037 | // treatise in node.cpp above the default implemention AND TEST WITH |
1038 | // +VerifyIterativeGVN! |
1039 | virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); |
1040 | |
1041 | // Some nodes have specific Ideal subgraph transformations only if they are |
1042 | // unique users of specific nodes. Such nodes should be put on IGVN worklist |
1043 | // for the transformations to happen. |
1044 | bool has_special_unique_user() const; |
1045 | |
1046 | // Skip Proj and CatchProj nodes chains. Check for Null and Top. |
1047 | Node* find_exact_control(Node* ctrl); |
1048 | |
1049 | // Check if 'this' node dominates or equal to 'sub'. |
1050 | bool dominates(Node* sub, Node_List &nlist); |
1051 | |
1052 | protected: |
1053 | bool remove_dead_region(PhaseGVN *phase, bool can_reshape); |
1054 | public: |
1055 | |
1056 | // See if there is valid pipeline info |
1057 | static const Pipeline *pipeline_class(); |
1058 | virtual const Pipeline *pipeline() const; |
1059 | |
1060 | // Compute the latency from the def to this instruction of the ith input node |
1061 | uint latency(uint i); |
1062 | |
1063 | // Hash & compare functions, for pessimistic value numbering |
1064 | |
1065 | // If the hash function returns the special sentinel value NO_HASH, |
1066 | // the node is guaranteed never to compare equal to any other node. |
1067 | // If we accidentally generate a hash with value NO_HASH the node |
1068 | // won't go into the table and we'll lose a little optimization. |
1069 | static const uint NO_HASH = 0; |
1070 | virtual uint hash() const; |
1071 | virtual bool cmp( const Node &n ) const; |
1072 | |
1073 | // Operation appears to be iteratively computed (such as an induction variable) |
1074 | // It is possible for this operation to return false for a loop-varying |
1075 | // value, if it appears (by local graph inspection) to be computed by a simple conditional. |
1076 | bool is_iteratively_computed(); |
1077 | |
1078 | // Determine if a node is a counted loop induction variable. |
1079 | // NOTE: The method is defined in "loopnode.cpp". |
1080 | bool is_cloop_ind_var() const; |
1081 | |
1082 | // Return a node with opcode "opc" and same inputs as "this" if one can |
1083 | // be found; Otherwise return NULL; |
1084 | Node* find_similar(int opc); |
1085 | |
1086 | // Return the unique control out if only one. Null if none or more than one. |
1087 | Node* unique_ctrl_out() const; |
1088 | |
1089 | // Set control or add control as precedence edge |
1090 | void ensure_control_or_add_prec(Node* c); |
1091 | |
1092 | //----------------- Code Generation |
1093 | |
1094 | // Ideal register class for Matching. Zero means unmatched instruction |
1095 | // (these are cloned instead of converted to machine nodes). |
1096 | virtual uint ideal_reg() const; |
1097 | |
1098 | static const uint NotAMachineReg; // must be > max. machine register |
1099 | |
1100 | // Do we Match on this edge index or not? Generally false for Control |
1101 | // and true for everything else. Weird for calls & returns. |
1102 | virtual uint match_edge(uint idx) const; |
1103 | |
1104 | // Register class output is returned in |
1105 | virtual const RegMask &out_RegMask() const; |
1106 | // Register class input is expected in |
1107 | virtual const RegMask &in_RegMask(uint) const; |
1108 | // Should we clone rather than spill this instruction? |
1109 | bool rematerialize() const; |
1110 | |
1111 | // Return JVM State Object if this Node carries debug info, or NULL otherwise |
1112 | virtual JVMState* jvms() const; |
1113 | |
1114 | // Print as assembly |
1115 | virtual void format( PhaseRegAlloc *, outputStream* st = tty ) const; |
1116 | // Emit bytes starting at parameter 'ptr' |
1117 | // Bump 'ptr' by the number of output bytes |
1118 | virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const; |
1119 | // Size of instruction in bytes |
1120 | virtual uint size(PhaseRegAlloc *ra_) const; |
1121 | |
1122 | // Convenience function to extract an integer constant from a node. |
1123 | // If it is not an integer constant (either Con, CastII, or Mach), |
1124 | // return value_if_unknown. |
1125 | jint find_int_con(jint value_if_unknown) const { |
1126 | const TypeInt* t = find_int_type(); |
1127 | return (t != NULL__null && t->is_con()) ? t->get_con() : value_if_unknown; |
1128 | } |
1129 | // Return the constant, knowing it is an integer constant already |
1130 | jint get_int() const { |
1131 | const TypeInt* t = find_int_type(); |
1132 | guarantee(t != NULL, "must be con")do { if (!(t != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1132, "guarantee(" "t != NULL" ") failed", "must be con"); :: breakpoint(); } } while (0); |
1133 | return t->get_con(); |
1134 | } |
1135 | // Here's where the work is done. Can produce non-constant int types too. |
1136 | const TypeInt* find_int_type() const; |
1137 | const TypeInteger* find_integer_type(BasicType bt) const; |
1138 | |
1139 | // Same thing for long (and intptr_t, via type.hpp): |
1140 | jlong get_long() const { |
1141 | const TypeLong* t = find_long_type(); |
1142 | guarantee(t != NULL, "must be con")do { if (!(t != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1142, "guarantee(" "t != NULL" ") failed", "must be con"); :: breakpoint(); } } while (0); |
1143 | return t->get_con(); |
1144 | } |
1145 | jlong find_long_con(jint value_if_unknown) const { |
1146 | const TypeLong* t = find_long_type(); |
1147 | return (t != NULL__null && t->is_con()) ? t->get_con() : value_if_unknown; |
1148 | } |
1149 | const TypeLong* find_long_type() const; |
1150 | |
1151 | jlong get_integer_as_long(BasicType bt) const { |
1152 | const TypeInteger* t = find_integer_type(bt); |
1153 | guarantee(t != NULL, "must be con")do { if (!(t != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1153, "guarantee(" "t != NULL" ") failed", "must be con"); :: breakpoint(); } } while (0); |
1154 | return t->get_con_as_long(bt); |
1155 | } |
1156 | const TypePtr* get_ptr_type() const; |
1157 | |
1158 | // These guys are called by code generated by ADLC: |
1159 | intptr_t get_ptr() const; |
1160 | intptr_t get_narrowcon() const; |
1161 | jdouble getd() const; |
1162 | jfloat getf() const; |
1163 | |
1164 | // Nodes which are pinned into basic blocks |
1165 | virtual bool pinned() const { return false; } |
1166 | |
1167 | // Nodes which use memory without consuming it, hence need antidependences |
1168 | // More specifically, needs_anti_dependence_check returns true iff the node |
1169 | // (a) does a load, and (b) does not perform a store (except perhaps to a |
1170 | // stack slot or some other unaliased location). |
1171 | bool needs_anti_dependence_check() const; |
1172 | |
1173 | // Return which operand this instruction may cisc-spill. In other words, |
1174 | // return operand position that can convert from reg to memory access |
1175 | virtual int cisc_operand() const { return AdlcVMDeps::Not_cisc_spillable; } |
1176 | bool is_cisc_alternate() const { return (_flags & Flag_is_cisc_alternate) != 0; } |
1177 | |
1178 | // Whether this is a memory-writing machine node. |
1179 | bool is_memory_writer() const { return is_Mach() && bottom_type()->has_memory(); } |
1180 | |
1181 | //----------------- Printing, etc |
1182 | #ifndef PRODUCT |
1183 | private: |
1184 | int _indent; |
1185 | |
1186 | public: |
1187 | void set_indent(int indent) { _indent = indent; } |
1188 | |
1189 | private: |
1190 | static bool add_to_worklist(Node* n, Node_List* worklist, Arena* old_arena, VectorSet* old_space, VectorSet* new_space); |
1191 | public: |
1192 | Node* find(int idx, bool only_ctrl = false); // Search the graph for the given idx. |
1193 | Node* find_ctrl(int idx); // Search control ancestors for the given idx. |
1194 | void dump() const { dump("\n"); } // Print this node. |
1195 | void dump(const char* suffix, bool mark = false, outputStream *st = tty) const; // Print this node. |
1196 | void dump(int depth) const; // Print this node, recursively to depth d |
1197 | void dump_ctrl(int depth) const; // Print control nodes, to depth d |
1198 | void dump_comp() const; // Print this node in compact representation. |
1199 | // Print this node in compact representation. |
1200 | void dump_comp(const char* suffix, outputStream *st = tty) const; |
1201 | virtual void dump_req(outputStream *st = tty) const; // Print required-edge info |
1202 | virtual void dump_prec(outputStream *st = tty) const; // Print precedence-edge info |
1203 | virtual void dump_out(outputStream *st = tty) const; // Print the output edge info |
1204 | virtual void dump_spec(outputStream *st) const {}; // Print per-node info |
1205 | // Print compact per-node info |
1206 | virtual void dump_compact_spec(outputStream *st) const { dump_spec(st); } |
1207 | void dump_related() const; // Print related nodes (depends on node at hand). |
1208 | // Print related nodes up to given depths for input and output nodes. |
1209 | void dump_related(uint d_in, uint d_out) const; |
1210 | void dump_related_compact() const; // Print related nodes in compact representation. |
1211 | // Collect related nodes. |
1212 | virtual void related(GrowableArray<Node*> *in_rel, GrowableArray<Node*> *out_rel, bool compact) const; |
1213 | // Collect nodes starting from this node, explicitly including/excluding control and data links. |
1214 | void collect_nodes(GrowableArray<Node*> *ns, int d, bool ctrl, bool data) const; |
1215 | |
1216 | // Node collectors, to be used in implementations of Node::rel(). |
1217 | // Collect the entire data input graph. Include control inputs if requested. |
1218 | void collect_nodes_in_all_data(GrowableArray<Node*> *ns, bool ctrl) const; |
1219 | // Collect the entire control input graph. Include data inputs if requested. |
1220 | void collect_nodes_in_all_ctrl(GrowableArray<Node*> *ns, bool data) const; |
1221 | // Collect the entire output graph until hitting and including control nodes. |
1222 | void collect_nodes_out_all_ctrl_boundary(GrowableArray<Node*> *ns) const; |
1223 | |
1224 | void verify_edges(Unique_Node_List &visited); // Verify bi-directional edges |
1225 | static void verify(int verify_depth, VectorSet& visited, Node_List& worklist); |
1226 | |
1227 | // This call defines a class-unique string used to identify class instances |
1228 | virtual const char *Name() const; |
1229 | |
1230 | void dump_format(PhaseRegAlloc *ra) const; // debug access to MachNode::format(...) |
1231 | // RegMask Print Functions |
1232 | void dump_in_regmask(int idx) { in_RegMask(idx).dump(); } |
1233 | void dump_out_regmask() { out_RegMask().dump(); } |
1234 | static bool in_dump() { return Compile::current()->_in_dump_cnt > 0; } |
1235 | void fast_dump() const { |
1236 | tty->print("%4d: %-17s", _idx, Name()); |
1237 | for (uint i = 0; i < len(); i++) |
1238 | if (in(i)) |
1239 | tty->print(" %4d", in(i)->_idx); |
1240 | else |
1241 | tty->print(" NULL"); |
1242 | tty->print("\n"); |
1243 | } |
1244 | #endif |
1245 | #ifdef ASSERT1 |
1246 | void verify_construction(); |
1247 | bool verify_jvms(const JVMState* jvms) const; |
1248 | int _debug_idx; // Unique value assigned to every node. |
1249 | int debug_idx() const { return _debug_idx; } |
1250 | void set_debug_idx( int debug_idx ) { _debug_idx = debug_idx; } |
1251 | |
1252 | Node* _debug_orig; // Original version of this, if any. |
1253 | Node* debug_orig() const { return _debug_orig; } |
1254 | void set_debug_orig(Node* orig); // _debug_orig = orig |
1255 | void dump_orig(outputStream *st, bool print_key = true) const; |
1256 | |
1257 | int _hash_lock; // Barrier to modifications of nodes in the hash table |
1258 | void enter_hash_lock() { ++_hash_lock; assert(_hash_lock < 99, "in too many hash tables?")do { if (!(_hash_lock < 99)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1258, "assert(" "_hash_lock < 99" ") failed", "in too many hash tables?" ); ::breakpoint(); } } while (0); } |
1259 | void exit_hash_lock() { --_hash_lock; assert(_hash_lock >= 0, "mispaired hash locks")do { if (!(_hash_lock >= 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1259, "assert(" "_hash_lock >= 0" ") failed", "mispaired hash locks" ); ::breakpoint(); } } while (0); } |
1260 | |
1261 | static void init_NodeProperty(); |
1262 | |
1263 | #if OPTO_DU_ITERATOR_ASSERT1 |
1264 | const Node* _last_del; // The last deleted node. |
1265 | uint _del_tick; // Bumped when a deletion happens.. |
1266 | #endif |
1267 | #endif |
1268 | }; |
1269 | |
1270 | inline bool not_a_node(const Node* n) { |
1271 | if (n == NULL__null) return true; |
1272 | if (((intptr_t)n & 1) != 0) return true; // uninitialized, etc. |
1273 | if (*(address*)n == badAddress((address)::badAddressVal)) return true; // kill by Node::destruct |
1274 | return false; |
1275 | } |
1276 | |
1277 | //----------------------------------------------------------------------------- |
1278 | // Iterators over DU info, and associated Node functions. |
1279 | |
1280 | #if OPTO_DU_ITERATOR_ASSERT1 |
1281 | |
1282 | // Common code for assertion checking on DU iterators. |
1283 | class DUIterator_Common { |
1284 | #ifdef ASSERT1 |
1285 | protected: |
1286 | bool _vdui; // cached value of VerifyDUIterators |
1287 | const Node* _node; // the node containing the _out array |
1288 | uint _outcnt; // cached node->_outcnt |
1289 | uint _del_tick; // cached node->_del_tick |
1290 | Node* _last; // last value produced by the iterator |
1291 | |
1292 | void sample(const Node* node); // used by c'tor to set up for verifies |
1293 | void verify(const Node* node, bool at_end_ok = false); |
1294 | void verify_resync(); |
1295 | void reset(const DUIterator_Common& that); |
1296 | |
1297 | // The VDUI_ONLY macro protects code conditionalized on VerifyDUIterators |
1298 | #define I_VDUI_ONLY(i,x) { if ((i)._vdui) { x; } } |
1299 | #else |
1300 | #define I_VDUI_ONLY(i,x) { } |
1301 | #endif //ASSERT |
1302 | }; |
1303 | |
1304 | #define VDUI_ONLY(x) I_VDUI_ONLY(*this, x) |
1305 | |
1306 | // Default DU iterator. Allows appends onto the out array. |
1307 | // Allows deletion from the out array only at the current point. |
1308 | // Usage: |
1309 | // for (DUIterator i = x->outs(); x->has_out(i); i++) { |
1310 | // Node* y = x->out(i); |
1311 | // ... |
1312 | // } |
1313 | // Compiles in product mode to a unsigned integer index, which indexes |
1314 | // onto a repeatedly reloaded base pointer of x->_out. The loop predicate |
1315 | // also reloads x->_outcnt. If you delete, you must perform "--i" just |
1316 | // before continuing the loop. You must delete only the last-produced |
1317 | // edge. You must delete only a single copy of the last-produced edge, |
1318 | // or else you must delete all copies at once (the first time the edge |
1319 | // is produced by the iterator). |
1320 | class DUIterator : public DUIterator_Common { |
1321 | friend class Node; |
1322 | |
1323 | // This is the index which provides the product-mode behavior. |
1324 | // Whatever the product-mode version of the system does to the |
1325 | // DUI index is done to this index. All other fields in |
1326 | // this class are used only for assertion checking. |
1327 | uint _idx; |
1328 | |
1329 | #ifdef ASSERT1 |
1330 | uint _refresh_tick; // Records the refresh activity. |
1331 | |
1332 | void sample(const Node* node); // Initialize _refresh_tick etc. |
1333 | void verify(const Node* node, bool at_end_ok = false); |
1334 | void verify_increment(); // Verify an increment operation. |
1335 | void verify_resync(); // Verify that we can back up over a deletion. |
1336 | void verify_finish(); // Verify that the loop terminated properly. |
1337 | void refresh(); // Resample verification info. |
1338 | void reset(const DUIterator& that); // Resample after assignment. |
1339 | #endif |
1340 | |
1341 | DUIterator(const Node* node, int dummy_to_avoid_conversion) |
1342 | { _idx = 0; debug_only(sample(node))sample(node); } |
1343 | |
1344 | public: |
1345 | // initialize to garbage; clear _vdui to disable asserts |
1346 | DUIterator() |
1347 | { /*initialize to garbage*/ debug_only(_vdui = false)_vdui = false; } |
1348 | |
1349 | DUIterator(const DUIterator& that) |
1350 | { _idx = that._idx; debug_only(_vdui = false; reset(that))_vdui = false; reset(that); } |
1351 | |
1352 | void operator++(int dummy_to_specify_postfix_op) |
1353 | { _idx++; VDUI_ONLY(verify_increment()); } |
1354 | |
1355 | void operator--() |
1356 | { VDUI_ONLY(verify_resync()); --_idx; } |
1357 | |
1358 | ~DUIterator() |
1359 | { VDUI_ONLY(verify_finish()); } |
1360 | |
1361 | void operator=(const DUIterator& that) |
1362 | { _idx = that._idx; debug_only(reset(that))reset(that); } |
1363 | }; |
1364 | |
1365 | DUIterator Node::outs() const |
1366 | { return DUIterator(this, 0); } |
1367 | DUIterator& Node::refresh_out_pos(DUIterator& i) const |
1368 | { I_VDUI_ONLY(i, i.refresh()); return i; } |
1369 | bool Node::has_out(DUIterator& i) const |
1370 | { I_VDUI_ONLY(i, i.verify(this,true));return i._idx < _outcnt; } |
1371 | Node* Node::out(DUIterator& i) const |
1372 | { I_VDUI_ONLY(i, i.verify(this)); return debug_only(i._last=)i._last= _out[i._idx]; } |
1373 | |
1374 | |
1375 | // Faster DU iterator. Disallows insertions into the out array. |
1376 | // Allows deletion from the out array only at the current point. |
1377 | // Usage: |
1378 | // for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) { |
1379 | // Node* y = x->fast_out(i); |
1380 | // ... |
1381 | // } |
1382 | // Compiles in product mode to raw Node** pointer arithmetic, with |
1383 | // no reloading of pointers from the original node x. If you delete, |
1384 | // you must perform "--i; --imax" just before continuing the loop. |
1385 | // If you delete multiple copies of the same edge, you must decrement |
1386 | // imax, but not i, multiple times: "--i, imax -= num_edges". |
1387 | class DUIterator_Fast : public DUIterator_Common { |
1388 | friend class Node; |
1389 | friend class DUIterator_Last; |
1390 | |
1391 | // This is the pointer which provides the product-mode behavior. |
1392 | // Whatever the product-mode version of the system does to the |
1393 | // DUI pointer is done to this pointer. All other fields in |
1394 | // this class are used only for assertion checking. |
1395 | Node** _outp; |
1396 | |
1397 | #ifdef ASSERT1 |
1398 | void verify(const Node* node, bool at_end_ok = false); |
1399 | void verify_limit(); |
1400 | void verify_resync(); |
1401 | void verify_relimit(uint n); |
1402 | void reset(const DUIterator_Fast& that); |
1403 | #endif |
1404 | |
1405 | // Note: offset must be signed, since -1 is sometimes passed |
1406 | DUIterator_Fast(const Node* node, ptrdiff_t offset) |
1407 | { _outp = node->_out + offset; debug_only(sample(node))sample(node); } |
1408 | |
1409 | public: |
1410 | // initialize to garbage; clear _vdui to disable asserts |
1411 | DUIterator_Fast() |
1412 | { /*initialize to garbage*/ debug_only(_vdui = false)_vdui = false; } |
1413 | |
1414 | DUIterator_Fast(const DUIterator_Fast& that) |
1415 | { _outp = that._outp; debug_only(_vdui = false; reset(that))_vdui = false; reset(that); } |
1416 | |
1417 | void operator++(int dummy_to_specify_postfix_op) |
1418 | { _outp++; VDUI_ONLY(verify(_node, true)); } |
1419 | |
1420 | void operator--() |
1421 | { VDUI_ONLY(verify_resync()); --_outp; } |
1422 | |
1423 | void operator-=(uint n) // applied to the limit only |
1424 | { _outp -= n; VDUI_ONLY(verify_relimit(n)); } |
1425 | |
1426 | bool operator<(DUIterator_Fast& limit) { |
1427 | I_VDUI_ONLY(*this, this->verify(_node, true)); |
1428 | I_VDUI_ONLY(limit, limit.verify_limit()); |
1429 | return _outp < limit._outp; |
1430 | } |
1431 | |
1432 | void operator=(const DUIterator_Fast& that) |
1433 | { _outp = that._outp; debug_only(reset(that))reset(that); } |
1434 | }; |
1435 | |
1436 | DUIterator_Fast Node::fast_outs(DUIterator_Fast& imax) const { |
1437 | // Assign a limit pointer to the reference argument: |
1438 | imax = DUIterator_Fast(this, (ptrdiff_t)_outcnt); |
1439 | // Return the base pointer: |
1440 | return DUIterator_Fast(this, 0); |
1441 | } |
1442 | Node* Node::fast_out(DUIterator_Fast& i) const { |
1443 | I_VDUI_ONLY(i, i.verify(this)); |
1444 | return debug_only(i._last=)i._last= *i._outp; |
1445 | } |
1446 | |
1447 | |
1448 | // Faster DU iterator. Requires each successive edge to be removed. |
1449 | // Does not allow insertion of any edges. |
1450 | // Usage: |
1451 | // for (DUIterator_Last imin, i = x->last_outs(imin); i >= imin; i -= num_edges) { |
1452 | // Node* y = x->last_out(i); |
1453 | // ... |
1454 | // } |
1455 | // Compiles in product mode to raw Node** pointer arithmetic, with |
1456 | // no reloading of pointers from the original node x. |
1457 | class DUIterator_Last : private DUIterator_Fast { |
1458 | friend class Node; |
1459 | |
1460 | #ifdef ASSERT1 |
1461 | void verify(const Node* node, bool at_end_ok = false); |
1462 | void verify_limit(); |
1463 | void verify_step(uint num_edges); |
1464 | #endif |
1465 | |
1466 | // Note: offset must be signed, since -1 is sometimes passed |
1467 | DUIterator_Last(const Node* node, ptrdiff_t offset) |
1468 | : DUIterator_Fast(node, offset) { } |
1469 | |
1470 | void operator++(int dummy_to_specify_postfix_op) {} // do not use |
1471 | void operator<(int) {} // do not use |
1472 | |
1473 | public: |
1474 | DUIterator_Last() { } |
1475 | // initialize to garbage |
1476 | |
1477 | DUIterator_Last(const DUIterator_Last& that) = default; |
1478 | |
1479 | void operator--() |
1480 | { _outp--; VDUI_ONLY(verify_step(1)); } |
1481 | |
1482 | void operator-=(uint n) |
1483 | { _outp -= n; VDUI_ONLY(verify_step(n)); } |
1484 | |
1485 | bool operator>=(DUIterator_Last& limit) { |
1486 | I_VDUI_ONLY(*this, this->verify(_node, true)); |
1487 | I_VDUI_ONLY(limit, limit.verify_limit()); |
1488 | return _outp >= limit._outp; |
1489 | } |
1490 | |
1491 | DUIterator_Last& operator=(const DUIterator_Last& that) = default; |
1492 | }; |
1493 | |
1494 | DUIterator_Last Node::last_outs(DUIterator_Last& imin) const { |
1495 | // Assign a limit pointer to the reference argument: |
1496 | imin = DUIterator_Last(this, 0); |
1497 | // Return the initial pointer: |
1498 | return DUIterator_Last(this, (ptrdiff_t)_outcnt - 1); |
1499 | } |
1500 | Node* Node::last_out(DUIterator_Last& i) const { |
1501 | I_VDUI_ONLY(i, i.verify(this)); |
1502 | return debug_only(i._last=)i._last= *i._outp; |
1503 | } |
1504 | |
1505 | #endif //OPTO_DU_ITERATOR_ASSERT |
1506 | |
1507 | #undef I_VDUI_ONLY |
1508 | #undef VDUI_ONLY |
1509 | |
1510 | // An Iterator that truly follows the iterator pattern. Doesn't |
1511 | // support deletion but could be made to. |
1512 | // |
1513 | // for (SimpleDUIterator i(n); i.has_next(); i.next()) { |
1514 | // Node* m = i.get(); |
1515 | // |
1516 | class SimpleDUIterator : public StackObj { |
1517 | private: |
1518 | Node* node; |
1519 | DUIterator_Fast i; |
1520 | DUIterator_Fast imax; |
1521 | public: |
1522 | SimpleDUIterator(Node* n): node(n), i(n->fast_outs(imax)) {} |
1523 | bool has_next() { return i < imax; } |
1524 | void next() { i++; } |
1525 | Node* get() { return node->fast_out(i); } |
1526 | }; |
1527 | |
1528 | |
1529 | //----------------------------------------------------------------------------- |
1530 | // Map dense integer indices to Nodes. Uses classic doubling-array trick. |
1531 | // Abstractly provides an infinite array of Node*'s, initialized to NULL. |
1532 | // Note that the constructor just zeros things, and since I use Arena |
1533 | // allocation I do not need a destructor to reclaim storage. |
1534 | class Node_Array : public ResourceObj { |
1535 | friend class VMStructs; |
1536 | protected: |
1537 | Arena* _a; // Arena to allocate in |
1538 | uint _max; |
1539 | Node** _nodes; |
1540 | void grow( uint i ); // Grow array node to fit |
1541 | public: |
1542 | Node_Array(Arena* a, uint max = OptoNodeListSize) : _a(a), _max(max) { |
1543 | _nodes = NEW_ARENA_ARRAY(a, Node*, max)(Node**) (a)->Amalloc((max) * sizeof(Node*)); |
1544 | clear(); |
1545 | } |
1546 | |
1547 | Node_Array(Node_Array* na) : _a(na->_a), _max(na->_max), _nodes(na->_nodes) {} |
1548 | Node *operator[] ( uint i ) const // Lookup, or NULL for not mapped |
1549 | { return (i<_max) ? _nodes[i] : (Node*)NULL__null; } |
1550 | Node* at(uint i) const { assert(i<_max,"oob")do { if (!(i<_max)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1550, "assert(" "i<_max" ") failed", "oob"); ::breakpoint (); } } while (0); return _nodes[i]; } |
1551 | Node** adr() { return _nodes; } |
1552 | // Extend the mapping: index i maps to Node *n. |
1553 | void map( uint i, Node *n ) { if( i>=_max ) grow(i); _nodes[i] = n; } |
1554 | void insert( uint i, Node *n ); |
1555 | void remove( uint i ); // Remove, preserving order |
1556 | // Clear all entries in _nodes to NULL but keep storage |
1557 | void clear() { |
1558 | Copy::zero_to_bytes(_nodes, _max * sizeof(Node*)); |
1559 | } |
1560 | |
1561 | uint Size() const { return _max; } |
1562 | void dump() const; |
1563 | }; |
1564 | |
1565 | class Node_List : public Node_Array { |
1566 | friend class VMStructs; |
1567 | uint _cnt; |
1568 | public: |
1569 | Node_List(uint max = OptoNodeListSize) : Node_Array(Thread::current()->resource_area(), max), _cnt(0) {} |
1570 | Node_List(Arena *a, uint max = OptoNodeListSize) : Node_Array(a, max), _cnt(0) {} |
1571 | bool contains(const Node* n) const { |
1572 | for (uint e = 0; e < size(); e++) { |
1573 | if (at(e) == n) return true; |
1574 | } |
1575 | return false; |
1576 | } |
1577 | void insert( uint i, Node *n ) { Node_Array::insert(i,n); _cnt++; } |
1578 | void remove( uint i ) { Node_Array::remove(i); _cnt--; } |
1579 | void push( Node *b ) { map(_cnt++,b); } |
1580 | void yank( Node *n ); // Find and remove |
1581 | Node *pop() { return _nodes[--_cnt]; } |
1582 | void clear() { _cnt = 0; Node_Array::clear(); } // retain storage |
1583 | void copy(const Node_List& from) { |
1584 | if (from._max > _max) { |
1585 | grow(from._max); |
1586 | } |
1587 | _cnt = from._cnt; |
1588 | Copy::conjoint_words_to_higher((HeapWord*)&from._nodes[0], (HeapWord*)&_nodes[0], from._max * sizeof(Node*)); |
1589 | } |
1590 | |
1591 | uint size() const { return _cnt; } |
1592 | void dump() const; |
1593 | void dump_simple() const; |
1594 | }; |
1595 | |
1596 | //------------------------------Unique_Node_List------------------------------- |
1597 | class Unique_Node_List : public Node_List { |
1598 | friend class VMStructs; |
1599 | VectorSet _in_worklist; |
1600 | uint _clock_index; // Index in list where to pop from next |
1601 | public: |
1602 | Unique_Node_List() : Node_List(), _clock_index(0) {} |
1603 | Unique_Node_List(Arena *a) : Node_List(a), _in_worklist(a), _clock_index(0) {} |
1604 | |
1605 | void remove( Node *n ); |
1606 | bool member( Node *n ) { return _in_worklist.test(n->_idx) != 0; } |
1607 | VectorSet& member_set(){ return _in_worklist; } |
1608 | |
1609 | void push(Node* b) { |
1610 | if( !_in_worklist.test_set(b->_idx) ) |
1611 | Node_List::push(b); |
1612 | } |
1613 | Node *pop() { |
1614 | if( _clock_index >= size() ) _clock_index = 0; |
1615 | Node *b = at(_clock_index); |
1616 | map( _clock_index, Node_List::pop()); |
1617 | if (size() != 0) _clock_index++; // Always start from 0 |
1618 | _in_worklist.remove(b->_idx); |
1619 | return b; |
1620 | } |
1621 | Node *remove(uint i) { |
1622 | Node *b = Node_List::at(i); |
1623 | _in_worklist.remove(b->_idx); |
1624 | map(i,Node_List::pop()); |
1625 | return b; |
1626 | } |
1627 | void yank(Node *n) { |
1628 | _in_worklist.remove(n->_idx); |
1629 | Node_List::yank(n); |
1630 | } |
1631 | void clear() { |
1632 | _in_worklist.clear(); // Discards storage but grows automatically |
1633 | Node_List::clear(); |
1634 | _clock_index = 0; |
1635 | } |
1636 | |
1637 | // Used after parsing to remove useless nodes before Iterative GVN |
1638 | void remove_useless_nodes(VectorSet& useful); |
1639 | |
1640 | bool contains(const Node* n) const { |
1641 | fatal("use faster member() instead")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1641, "use faster member() instead"); ::breakpoint(); } while (0); |
1642 | return false; |
1643 | } |
1644 | |
1645 | #ifndef PRODUCT |
1646 | void print_set() const { _in_worklist.print(); } |
1647 | #endif |
1648 | }; |
1649 | |
1650 | // Inline definition of Compile::record_for_igvn must be deferred to this point. |
1651 | inline void Compile::record_for_igvn(Node* n) { |
1652 | _for_igvn->push(n); |
1653 | } |
1654 | |
1655 | //------------------------------Node_Stack------------------------------------- |
1656 | class Node_Stack { |
1657 | friend class VMStructs; |
1658 | protected: |
1659 | struct INode { |
1660 | Node *node; // Processed node |
1661 | uint indx; // Index of next node's child |
1662 | }; |
1663 | INode *_inode_top; // tos, stack grows up |
1664 | INode *_inode_max; // End of _inodes == _inodes + _max |
1665 | INode *_inodes; // Array storage for the stack |
1666 | Arena *_a; // Arena to allocate in |
1667 | void grow(); |
1668 | public: |
1669 | Node_Stack(int size) { |
1670 | size_t max = (size > OptoNodeListSize) ? size : OptoNodeListSize; |
1671 | _a = Thread::current()->resource_area(); |
1672 | _inodes = NEW_ARENA_ARRAY( _a, INode, max )(INode*) (_a)->Amalloc((max) * sizeof(INode)); |
1673 | _inode_max = _inodes + max; |
1674 | _inode_top = _inodes - 1; // stack is empty |
1675 | } |
1676 | |
1677 | Node_Stack(Arena *a, int size) : _a(a) { |
1678 | size_t max = (size > OptoNodeListSize) ? size : OptoNodeListSize; |
1679 | _inodes = NEW_ARENA_ARRAY( _a, INode, max )(INode*) (_a)->Amalloc((max) * sizeof(INode)); |
1680 | _inode_max = _inodes + max; |
1681 | _inode_top = _inodes - 1; // stack is empty |
1682 | } |
1683 | |
1684 | void pop() { |
1685 | assert(_inode_top >= _inodes, "node stack underflow")do { if (!(_inode_top >= _inodes)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1685, "assert(" "_inode_top >= _inodes" ") failed", "node stack underflow" ); ::breakpoint(); } } while (0); |
1686 | --_inode_top; |
1687 | } |
1688 | void push(Node *n, uint i) { |
1689 | ++_inode_top; |
1690 | if (_inode_top >= _inode_max) grow(); |
1691 | INode *top = _inode_top; // optimization |
1692 | top->node = n; |
1693 | top->indx = i; |
1694 | } |
1695 | Node *node() const { |
1696 | return _inode_top->node; |
1697 | } |
1698 | Node* node_at(uint i) const { |
1699 | assert(_inodes + i <= _inode_top, "in range")do { if (!(_inodes + i <= _inode_top)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1699, "assert(" "_inodes + i <= _inode_top" ") failed", "in range" ); ::breakpoint(); } } while (0); |
1700 | return _inodes[i].node; |
1701 | } |
1702 | uint index() const { |
1703 | return _inode_top->indx; |
1704 | } |
1705 | uint index_at(uint i) const { |
1706 | assert(_inodes + i <= _inode_top, "in range")do { if (!(_inodes + i <= _inode_top)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1706, "assert(" "_inodes + i <= _inode_top" ") failed", "in range" ); ::breakpoint(); } } while (0); |
1707 | return _inodes[i].indx; |
1708 | } |
1709 | void set_node(Node *n) { |
1710 | _inode_top->node = n; |
1711 | } |
1712 | void set_index(uint i) { |
1713 | _inode_top->indx = i; |
1714 | } |
1715 | uint size_max() const { return (uint)pointer_delta(_inode_max, _inodes, sizeof(INode)); } // Max size |
1716 | uint size() const { return (uint)pointer_delta((_inode_top+1), _inodes, sizeof(INode)); } // Current size |
1717 | bool is_nonempty() const { return (_inode_top >= _inodes); } |
1718 | bool is_empty() const { return (_inode_top < _inodes); } |
1719 | void clear() { _inode_top = _inodes - 1; } // retain storage |
1720 | |
1721 | // Node_Stack is used to map nodes. |
1722 | Node* find(uint idx) const; |
1723 | }; |
1724 | |
1725 | |
1726 | //-----------------------------Node_Notes-------------------------------------- |
1727 | // Debugging or profiling annotations loosely and sparsely associated |
1728 | // with some nodes. See Compile::node_notes_at for the accessor. |
1729 | class Node_Notes { |
1730 | friend class VMStructs; |
1731 | JVMState* _jvms; |
1732 | |
1733 | public: |
1734 | Node_Notes(JVMState* jvms = NULL__null) { |
1735 | _jvms = jvms; |
1736 | } |
1737 | |
1738 | JVMState* jvms() { return _jvms; } |
1739 | void set_jvms(JVMState* x) { _jvms = x; } |
1740 | |
1741 | // True if there is nothing here. |
1742 | bool is_clear() { |
1743 | return (_jvms == NULL__null); |
1744 | } |
1745 | |
1746 | // Make there be nothing here. |
1747 | void clear() { |
1748 | _jvms = NULL__null; |
1749 | } |
1750 | |
1751 | // Make a new, clean node notes. |
1752 | static Node_Notes* make(Compile* C) { |
1753 | Node_Notes* nn = NEW_ARENA_ARRAY(C->comp_arena(), Node_Notes, 1)(Node_Notes*) (C->comp_arena())->Amalloc((1) * sizeof(Node_Notes )); |
1754 | nn->clear(); |
1755 | return nn; |
1756 | } |
1757 | |
1758 | Node_Notes* clone(Compile* C) { |
1759 | Node_Notes* nn = NEW_ARENA_ARRAY(C->comp_arena(), Node_Notes, 1)(Node_Notes*) (C->comp_arena())->Amalloc((1) * sizeof(Node_Notes )); |
1760 | (*nn) = (*this); |
1761 | return nn; |
1762 | } |
1763 | |
1764 | // Absorb any information from source. |
1765 | bool update_from(Node_Notes* source) { |
1766 | bool changed = false; |
1767 | if (source != NULL__null) { |
1768 | if (source->jvms() != NULL__null) { |
1769 | set_jvms(source->jvms()); |
1770 | changed = true; |
1771 | } |
1772 | } |
1773 | return changed; |
1774 | } |
1775 | }; |
1776 | |
1777 | // Inlined accessors for Compile::node_nodes that require the preceding class: |
1778 | inline Node_Notes* |
1779 | Compile::locate_node_notes(GrowableArray<Node_Notes*>* arr, |
1780 | int idx, bool can_grow) { |
1781 | assert(idx >= 0, "oob")do { if (!(idx >= 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1781, "assert(" "idx >= 0" ") failed", "oob"); ::breakpoint (); } } while (0); |
1782 | int block_idx = (idx >> _log2_node_notes_block_size); |
1783 | int grow_by = (block_idx - (arr == NULL__null? 0: arr->length())); |
1784 | if (grow_by >= 0) { |
1785 | if (!can_grow) return NULL__null; |
1786 | grow_node_notes(arr, grow_by + 1); |
1787 | } |
1788 | if (arr == NULL__null) return NULL__null; |
1789 | // (Every element of arr is a sub-array of length _node_notes_block_size.) |
1790 | return arr->at(block_idx) + (idx & (_node_notes_block_size-1)); |
1791 | } |
1792 | |
1793 | inline bool |
1794 | Compile::set_node_notes_at(int idx, Node_Notes* value) { |
1795 | if (value == NULL__null || value->is_clear()) |
1796 | return false; // nothing to write => write nothing |
1797 | Node_Notes* loc = locate_node_notes(_node_note_array, idx, true); |
1798 | assert(loc != NULL, "")do { if (!(loc != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1798, "assert(" "loc != __null" ") failed", ""); ::breakpoint (); } } while (0); |
1799 | return loc->update_from(value); |
1800 | } |
1801 | |
1802 | |
1803 | //------------------------------TypeNode--------------------------------------- |
1804 | // Node with a Type constant. |
1805 | class TypeNode : public Node { |
1806 | protected: |
1807 | virtual uint hash() const; // Check the type |
1808 | virtual bool cmp( const Node &n ) const; |
1809 | virtual uint size_of() const; // Size is bigger |
1810 | const Type* const _type; |
1811 | public: |
1812 | void set_type(const Type* t) { |
1813 | assert(t != NULL, "sanity")do { if (!(t != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1813, "assert(" "t != __null" ") failed", "sanity"); ::breakpoint (); } } while (0); |
1814 | debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH)uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH; |
1815 | *(const Type**)&_type = t; // cast away const-ness |
1816 | // If this node is in the hash table, make sure it doesn't need a rehash. |
1817 | assert(check_hash == NO_HASH || check_hash == hash(), "type change must preserve hash code")do { if (!(check_hash == NO_HASH || check_hash == hash())) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1817, "assert(" "check_hash == NO_HASH || check_hash == hash()" ") failed", "type change must preserve hash code"); ::breakpoint (); } } while (0); |
1818 | } |
1819 | const Type* type() const { assert(_type != NULL, "sanity")do { if (!(_type != __null)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1819, "assert(" "_type != __null" ") failed", "sanity"); :: breakpoint(); } } while (0); return _type; }; |
1820 | TypeNode( const Type *t, uint required ) : Node(required), _type(t) { |
1821 | init_class_id(Class_Type); |
1822 | } |
1823 | virtual const Type* Value(PhaseGVN* phase) const; |
1824 | virtual const Type *bottom_type() const; |
1825 | virtual uint ideal_reg() const; |
1826 | #ifndef PRODUCT |
1827 | virtual void dump_spec(outputStream *st) const; |
1828 | virtual void dump_compact_spec(outputStream *st) const; |
1829 | #endif |
1830 | }; |
1831 | |
1832 | #include "opto/opcodes.hpp" |
1833 | |
1834 | #define Op_IL(op)inline int Op_op(BasicType bt) { do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1834, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); if (bt == T_INT) { return Op_opI ; } return Op_opL; } \ |
1835 | inline int Op_ ## op(BasicType bt) { \ |
1836 | assert(bt == T_INT || bt == T_LONG, "only for int or longs")do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1836, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); \ |
1837 | if (bt == T_INT) { \ |
1838 | return Op_## op ## I; \ |
1839 | } \ |
1840 | return Op_## op ## L; \ |
1841 | } |
1842 | |
1843 | Op_IL(Add)inline int Op_Add(BasicType bt) { do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1843, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); if (bt == T_INT) { return Op_AddI ; } return Op_AddL; } |
1844 | Op_IL(Sub)inline int Op_Sub(BasicType bt) { do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1844, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); if (bt == T_INT) { return Op_SubI ; } return Op_SubL; } |
1845 | Op_IL(Mul)inline int Op_Mul(BasicType bt) { do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1845, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); if (bt == T_INT) { return Op_MulI ; } return Op_MulL; } |
1846 | Op_IL(URShift)inline int Op_URShift(BasicType bt) { do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1846, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); if (bt == T_INT) { return Op_URShiftI ; } return Op_URShiftL; } |
1847 | Op_IL(LShift)inline int Op_LShift(BasicType bt) { do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error( "/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1847, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); if (bt == T_INT) { return Op_LShiftI ; } return Op_LShiftL; } |
1848 | Op_IL(Xor)inline int Op_Xor(BasicType bt) { do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1848, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); if (bt == T_INT) { return Op_XorI ; } return Op_XorL; } |
1849 | Op_IL(Cmp)inline int Op_Cmp(BasicType bt) { do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1849, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); if (bt == T_INT) { return Op_CmpI ; } return Op_CmpL; } |
1850 | |
1851 | inline int Op_Cmp_unsigned(BasicType bt) { |
1852 | assert(bt == T_INT || bt == T_LONG, "only for int or longs")do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1852, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); |
1853 | if (bt == T_INT) { |
1854 | return Op_CmpU; |
1855 | } |
1856 | return Op_CmpUL; |
1857 | } |
1858 | |
1859 | inline int Op_Cast(BasicType bt) { |
1860 | assert(bt == T_INT || bt == T_LONG, "only for int or longs")do { if (!(bt == T_INT || bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/share/opto/node.hpp" , 1860, "assert(" "bt == T_INT || bt == T_LONG" ") failed", "only for int or longs" ); ::breakpoint(); } } while (0); |
1861 | if (bt == T_INT) { |
1862 | return Op_CastII; |
1863 | } |
1864 | return Op_CastLL; |
1865 | } |
1866 | |
1867 | #endif // SHARE_OPTO_NODE_HPP |