diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2022-06-15 11:12:51 +0100 |
---|---|---|
committer | Kewen Lin <linkw@linux.ibm.com> | 2022-06-22 21:23:59 -0500 |
commit | bb1a6d92989a50df13b1a21085c86625089c9e53 (patch) | |
tree | 10c914d395b17b691ecf0be1b7465cbf1e38b474 | |
parent | vect: Move suggested_unroll_factor applying [PR105940] (diff) | |
download | gcc-bb1a6d92989a50df13b1a21085c86625089c9e53.tar.gz gcc-bb1a6d92989a50df13b1a21085c86625089c9e53.tar.bz2 gcc-bb1a6d92989a50df13b1a21085c86625089c9e53.tar.xz |
aarch64: Revert bogus fix for PR105254
In f2ebf2d98efe0ac2314b58cf474f44cb8ebd5244 I'd forced the
chosen unroll factor to be a factor of the VF, in order to
work around an exact_div ICE in PR105254. This was completely
bogus -- clearly I didn't look in enough detail at why we ended
up with an unrolled VF that wasn't a multiple of the UF.
Kewen has since fixed the bug properly for PR105940, so this
patch reverts my earlier attempt. Sorry for the stupidity.
gcc/
PR tree-optimization/105254
PR tree-optimization/105940
Revert:
* config/aarch64/aarch64.cc
(aarch64_vector_costs::determine_suggested_unroll_factor): Take a
loop_vec_info as argument. Restrict the unroll factor to values
that divide the VF.
(aarch64_vector_costs::finish_cost): Update call accordingly.
gcc/testsuite/
* gcc.target/aarch64/sve/cost_model_14.c: New test.
(cherry picked from commit 2636660b6f35423e0cfbf53bfad5c5fed6ae6471)
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c | 13 |
2 files changed, 17 insertions, 8 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index f4d2a800f39..5c9e7791a12 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc | |||
@@ -15637,7 +15637,7 @@ private: | |||
15637 | unsigned int adjust_body_cost (loop_vec_info, const aarch64_vector_costs *, | 15637 | unsigned int adjust_body_cost (loop_vec_info, const aarch64_vector_costs *, |
15638 | unsigned int); | 15638 | unsigned int); |
15639 | bool prefer_unrolled_loop () const; | 15639 | bool prefer_unrolled_loop () const; |
15640 | unsigned int determine_suggested_unroll_factor (loop_vec_info); | 15640 | unsigned int determine_suggested_unroll_factor (); |
15641 | 15641 | ||
15642 | /* True if we have performed one-time initialization based on the | 15642 | /* True if we have performed one-time initialization based on the |
15643 | vec_info. */ | 15643 | vec_info. */ |
@@ -16746,8 +16746,7 @@ adjust_body_cost_sve (const aarch64_vec_op_count *ops, | |||
16746 | } | 16746 | } |
16747 | 16747 | ||
16748 | unsigned int | 16748 | unsigned int |
16749 | aarch64_vector_costs:: | 16749 | aarch64_vector_costs::determine_suggested_unroll_factor () |
16750 | determine_suggested_unroll_factor (loop_vec_info loop_vinfo) | ||
16751 | { | 16750 | { |
16752 | bool sve = m_vec_flags & VEC_ANY_SVE; | 16751 | bool sve = m_vec_flags & VEC_ANY_SVE; |
16753 | /* If we are trying to unroll an Advanced SIMD main loop that contains | 16752 | /* If we are trying to unroll an Advanced SIMD main loop that contains |
@@ -16761,7 +16760,6 @@ determine_suggested_unroll_factor (loop_vec_info loop_vinfo) | |||
16761 | return 1; | 16760 | return 1; |
16762 | 16761 | ||
16763 | unsigned int max_unroll_factor = 1; | 16762 | unsigned int max_unroll_factor = 1; |
16764 | auto vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | ||
16765 | for (auto vec_ops : m_ops) | 16763 | for (auto vec_ops : m_ops) |
16766 | { | 16764 | { |
16767 | aarch64_simd_vec_issue_info const *vec_issue | 16765 | aarch64_simd_vec_issue_info const *vec_issue |
@@ -16770,8 +16768,7 @@ determine_suggested_unroll_factor (loop_vec_info loop_vinfo) | |||
16770 | return 1; | 16768 | return 1; |
16771 | /* Limit unroll factor to a value adjustable by the user, the default | 16769 | /* Limit unroll factor to a value adjustable by the user, the default |
16772 | value is 4. */ | 16770 | value is 4. */ |
16773 | unsigned int unroll_factor = MIN (aarch64_vect_unroll_limit, | 16771 | unsigned int unroll_factor = aarch64_vect_unroll_limit; |
16774 | (int) known_alignment (vf)); | ||
16775 | unsigned int factor | 16772 | unsigned int factor |
16776 | = vec_ops.reduction_latency > 1 ? vec_ops.reduction_latency : 1; | 16773 | = vec_ops.reduction_latency > 1 ? vec_ops.reduction_latency : 1; |
16777 | unsigned int temp; | 16774 | unsigned int temp; |
@@ -16949,8 +16946,7 @@ aarch64_vector_costs::finish_cost (const vector_costs *uncast_scalar_costs) | |||
16949 | { | 16946 | { |
16950 | m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs, | 16947 | m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs, |
16951 | m_costs[vect_body]); | 16948 | m_costs[vect_body]); |
16952 | m_suggested_unroll_factor | 16949 | m_suggested_unroll_factor = determine_suggested_unroll_factor (); |
16953 | = determine_suggested_unroll_factor (loop_vinfo); | ||
16954 | } | 16950 | } |
16955 | 16951 | ||
16956 | /* Apply the heuristic described above m_stp_sequence_cost. Prefer | 16952 | /* Apply the heuristic described above m_stp_sequence_cost. Prefer |
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c new file mode 100644 index 00000000000..b65826b0889 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c | |||
@@ -0,0 +1,13 @@ | |||
1 | /* { dg-options "-O3 -mtune=neoverse-v1" } */ | ||
2 | |||
3 | #include <stdint.h> | ||
4 | |||
5 | uint64_t f2(uint64_t *ptr, int n) { | ||
6 | uint64_t res = 0; | ||
7 | for (int i = 0; i < n; ++i) | ||
8 | res += ptr[i]; | ||
9 | return res; | ||
10 | } | ||
11 | |||
12 | /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 } } */ | ||
13 | /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d,} 8 } } */ | ||