summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2022-05-19 15:32:22 +0800
committerliuhongt <hongtao.liu@intel.com>2022-05-23 09:57:04 +0800
commit657612fb9f58c9cca44b091e3cf40d704fe3ec75 (patch)
treed906d8bf40ac9c803b936c1bb7656df992c629b2
parentDaily bump. (diff)
downloadgcc-657612fb9f58c9cca44b091e3cf40d704fe3ec75.tar.gz
gcc-657612fb9f58c9cca44b091e3cf40d704fe3ec75.tar.bz2
gcc-657612fb9f58c9cca44b091e3cf40d704fe3ec75.tar.xz
Increase move cost between mask and gpr.
kmovd only uses port5 which is often the bottleneck of performance. Also from latency perspective, spill and reload mostly could be STLF or even MRN which only take 1 cycle. So the patch increase move cost between gpr and mask to be the same as gpr <-> sse register. gcc/ChangeLog: * config/i386/x86-tune-costs.h (skylake_cost): Increase gpr <-> mask cost from 5 to 6. (icelake_cost): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/spill_to_mask-1.c: New test.
-rw-r--r--gcc/config/i386/x86-tune-costs.h4
-rw-r--r--gcc/testsuite/gcc.target/i386/spill_to_mask-1.c2
2 files changed, 3 insertions, 3 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 017ffa69958..05cbd49ec87 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1866,7 +1866,7 @@ struct processor_costs skylake_cost = {
1866 {8, 8, 8, 12, 24}, /* cost of storing SSE registers 1866 {8, 8, 8, 12, 24}, /* cost of storing SSE registers
1867 in 32,64,128,256 and 512-bit */ 1867 in 32,64,128,256 and 512-bit */
1868 6, 6, /* SSE->integer and integer->SSE moves */ 1868 6, 6, /* SSE->integer and integer->SSE moves */
1869 5, 5, /* mask->integer and integer->mask moves */ 1869 6, 6, /* mask->integer and integer->mask moves */
1870 {8, 8, 8}, /* cost of loading mask register 1870 {8, 8, 8}, /* cost of loading mask register
1871 in QImode, HImode, SImode. */ 1871 in QImode, HImode, SImode. */
1872 {6, 6, 6}, /* cost if storing mask register 1872 {6, 6, 6}, /* cost if storing mask register
@@ -1992,7 +1992,7 @@ struct processor_costs icelake_cost = {
1992 {8, 8, 8, 12, 24}, /* cost of storing SSE registers 1992 {8, 8, 8, 12, 24}, /* cost of storing SSE registers
1993 in 32,64,128,256 and 512-bit */ 1993 in 32,64,128,256 and 512-bit */
1994 6, 6, /* SSE->integer and integer->SSE moves */ 1994 6, 6, /* SSE->integer and integer->SSE moves */
1995 5, 5, /* mask->integer and integer->mask moves */ 1995 6, 6, /* mask->integer and integer->mask moves */
1996 {8, 8, 8}, /* cost of loading mask register 1996 {8, 8, 8}, /* cost of loading mask register
1997 in QImode, HImode, SImode. */ 1997 in QImode, HImode, SImode. */
1998 {6, 6, 6}, /* cost if storing mask register 1998 {6, 6, 6}, /* cost if storing mask register
diff --git a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
index 94d6764fc56..be19239a685 100644
--- a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
+++ b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
@@ -120,7 +120,7 @@ void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16])
120 out[7] += h; 120 out[7] += h;
121} 121}
122 122
123/* { dg-final { scan-assembler "kmovd" } } */ 123/* { dg-final { scan-assembler "kmovd" { xfail *-*-* } } } */
124/* { dg-final { scan-assembler-not "knot" } } */ 124/* { dg-final { scan-assembler-not "knot" } } */
125/* { dg-final { scan-assembler-not "kxor" } } */ 125/* { dg-final { scan-assembler-not "kxor" } } */
126/* { dg-final { scan-assembler-not "kor" } } */ 126/* { dg-final { scan-assembler-not "kor" } } */