summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2022-06-15 11:12:51 +0100
committerKewen Lin <linkw@linux.ibm.com>2022-06-22 21:23:59 -0500
commitbb1a6d92989a50df13b1a21085c86625089c9e53 (patch)
tree10c914d395b17b691ecf0be1b7465cbf1e38b474
parentvect: Move suggested_unroll_factor applying [PR105940] (diff)
downloadgcc-bb1a6d92989a50df13b1a21085c86625089c9e53.tar.gz
gcc-bb1a6d92989a50df13b1a21085c86625089c9e53.tar.bz2
gcc-bb1a6d92989a50df13b1a21085c86625089c9e53.tar.xz
aarch64: Revert bogus fix for PR105254
In f2ebf2d98efe0ac2314b58cf474f44cb8ebd5244 I'd forced the chosen unroll factor to be a factor of the VF, in order to work around an exact_div ICE in PR105254. This was completely bogus -- clearly I didn't look in enough detail at why we ended up with an unrolled VF that wasn't a multiple of the UF. Kewen has since fixed the bug properly for PR105940, so this patch reverts my earlier attempt. Sorry for the stupidity. gcc/ PR tree-optimization/105254 PR tree-optimization/105940 Revert: * config/aarch64/aarch64.cc (aarch64_vector_costs::determine_suggested_unroll_factor): Take a loop_vec_info as argument. Restrict the unroll factor to values that divide the VF. (aarch64_vector_costs::finish_cost): Update call accordingly. gcc/testsuite/ * gcc.target/aarch64/sve/cost_model_14.c: New test. (cherry picked from commit 2636660b6f35423e0cfbf53bfad5c5fed6ae6471)
-rw-r--r--gcc/config/aarch64/aarch64.cc12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c13
2 files changed, 17 insertions, 8 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f4d2a800f39..5c9e7791a12 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -15637,7 +15637,7 @@ private:
15637 unsigned int adjust_body_cost (loop_vec_info, const aarch64_vector_costs *, 15637 unsigned int adjust_body_cost (loop_vec_info, const aarch64_vector_costs *,
15638 unsigned int); 15638 unsigned int);
15639 bool prefer_unrolled_loop () const; 15639 bool prefer_unrolled_loop () const;
15640 unsigned int determine_suggested_unroll_factor (loop_vec_info); 15640 unsigned int determine_suggested_unroll_factor ();
15641 15641
15642 /* True if we have performed one-time initialization based on the 15642 /* True if we have performed one-time initialization based on the
15643 vec_info. */ 15643 vec_info. */
@@ -16746,8 +16746,7 @@ adjust_body_cost_sve (const aarch64_vec_op_count *ops,
16746} 16746}
16747 16747
16748unsigned int 16748unsigned int
16749aarch64_vector_costs:: 16749aarch64_vector_costs::determine_suggested_unroll_factor ()
16750determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
16751{ 16750{
16752 bool sve = m_vec_flags & VEC_ANY_SVE; 16751 bool sve = m_vec_flags & VEC_ANY_SVE;
16753 /* If we are trying to unroll an Advanced SIMD main loop that contains 16752 /* If we are trying to unroll an Advanced SIMD main loop that contains
@@ -16761,7 +16760,6 @@ determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
16761 return 1; 16760 return 1;
16762 16761
16763 unsigned int max_unroll_factor = 1; 16762 unsigned int max_unroll_factor = 1;
16764 auto vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
16765 for (auto vec_ops : m_ops) 16763 for (auto vec_ops : m_ops)
16766 { 16764 {
16767 aarch64_simd_vec_issue_info const *vec_issue 16765 aarch64_simd_vec_issue_info const *vec_issue
@@ -16770,8 +16768,7 @@ determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
16770 return 1; 16768 return 1;
16771 /* Limit unroll factor to a value adjustable by the user, the default 16769 /* Limit unroll factor to a value adjustable by the user, the default
16772 value is 4. */ 16770 value is 4. */
16773 unsigned int unroll_factor = MIN (aarch64_vect_unroll_limit, 16771 unsigned int unroll_factor = aarch64_vect_unroll_limit;
16774 (int) known_alignment (vf));
16775 unsigned int factor 16772 unsigned int factor
16776 = vec_ops.reduction_latency > 1 ? vec_ops.reduction_latency : 1; 16773 = vec_ops.reduction_latency > 1 ? vec_ops.reduction_latency : 1;
16777 unsigned int temp; 16774 unsigned int temp;
@@ -16949,8 +16946,7 @@ aarch64_vector_costs::finish_cost (const vector_costs *uncast_scalar_costs)
16949 { 16946 {
16950 m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs, 16947 m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs,
16951 m_costs[vect_body]); 16948 m_costs[vect_body]);
16952 m_suggested_unroll_factor 16949 m_suggested_unroll_factor = determine_suggested_unroll_factor ();
16953 = determine_suggested_unroll_factor (loop_vinfo);
16954 } 16950 }
16955 16951
16956 /* Apply the heuristic described above m_stp_sequence_cost. Prefer 16952 /* Apply the heuristic described above m_stp_sequence_cost. Prefer
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c
new file mode 100644
index 00000000000..b65826b0889
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c
@@ -0,0 +1,13 @@
1/* { dg-options "-O3 -mtune=neoverse-v1" } */
2
3#include <stdint.h>
4
5uint64_t f2(uint64_t *ptr, int n) {
6 uint64_t res = 0;
7 for (int i = 0; i < n; ++i)
8 res += ptr[i];
9 return res;
10}
11
12/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 } } */
13/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d,} 8 } } */