summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIvo Raisr <ivosh@ivosh.net>2017-09-22 22:50:11 +0200
committerIvo Raisr <ivosh@ivosh.net>2017-10-11 20:56:48 +0200
commit83cabd32492e6d19d483a63522e4e874fa64b617 (patch)
tree0c09cdab7d5c550e3d87ff21f66ad257b9c22afd
parentmips: add support for bi-arch build on mips64 (diff)
downloadvalgrind-83cabd32492e6d19d483a63522e4e874fa64b617.tar.gz
valgrind-83cabd32492e6d19d483a63522e4e874fa64b617.tar.bz2
valgrind-83cabd32492e6d19d483a63522e4e874fa64b617.tar.xz
Refactor tracking of MOV coalescing.
Reg<->Reg MOV coalescing status is now a part of the HRegUsage. This allows register allocation to query it two times without incurring a performance penalty. This in turn allows to better keep track of vreg<->vreg MOV coalescing so that all vregs in the coalesce chain get the effective |dead_before| of the last vreg. A small performance improvement has been observed because this allows to coalesce even spilled vregs (previously only assigned ones).
-rw-r--r--VEX/priv/host_amd64_defs.c55
-rw-r--r--VEX/priv/host_amd64_defs.h1
-rw-r--r--VEX/priv/host_arm64_defs.c29
-rw-r--r--VEX/priv/host_arm64_defs.h1
-rw-r--r--VEX/priv/host_arm_defs.c68
-rw-r--r--VEX/priv/host_arm_defs.h1
-rw-r--r--VEX/priv/host_generic_reg_alloc2.c16
-rw-r--r--VEX/priv/host_generic_reg_alloc3.c292
-rw-r--r--VEX/priv/host_generic_regs.c3
-rw-r--r--VEX/priv/host_generic_regs.h21
-rw-r--r--VEX/priv/host_mips_defs.c31
-rw-r--r--VEX/priv/host_mips_defs.h1
-rw-r--r--VEX/priv/host_ppc_defs.c46
-rw-r--r--VEX/priv/host_ppc_defs.h1
-rw-r--r--VEX/priv/host_s390_defs.c34
-rw-r--r--VEX/priv/host_s390_defs.h1
-rw-r--r--VEX/priv/host_x86_defs.c53
-rw-r--r--VEX/priv/host_x86_defs.h1
-rw-r--r--VEX/priv/main_main.c20
19 files changed, 342 insertions, 333 deletions
diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c
index d9949d4..a554e28 100644
--- a/VEX/priv/host_amd64_defs.c
+++ b/VEX/priv/host_amd64_defs.c
@@ -1406,6 +1406,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1406 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src); 1406 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1407 if (i->Ain.Alu64R.op == Aalu_MOV) { 1407 if (i->Ain.Alu64R.op == Aalu_MOV) {
1408 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst); 1408 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1409
1410 if (i->Ain.Alu64R.src->tag == Armi_Reg) {
1411 u->isRegRegMove = True;
1412 u->regMoveSrc = i->Ain.Alu64R.src->Armi.Reg.reg;
1413 u->regMoveDst = i->Ain.Alu64R.dst;
1414 }
1409 return; 1415 return;
1410 } 1416 }
1411 if (i->Ain.Alu64R.op == Aalu_CMP) { 1417 if (i->Ain.Alu64R.op == Aalu_CMP) {
@@ -1668,6 +1674,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1668 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV 1674 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1669 ? HRmWrite : HRmModify, 1675 ? HRmWrite : HRmModify,
1670 i->Ain.SseReRg.dst); 1676 i->Ain.SseReRg.dst);
1677
1678 if (i->Ain.SseReRg.op == Asse_MOV) {
1679 u->isRegRegMove = True;
1680 u->regMoveSrc = i->Ain.SseReRg.src;
1681 u->regMoveDst = i->Ain.SseReRg.dst;
1682 }
1671 } 1683 }
1672 return; 1684 return;
1673 case Ain_SseCMov: 1685 case Ain_SseCMov:
@@ -1694,6 +1706,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1694 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV 1706 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1695 //uu ? HRmWrite : HRmModify, 1707 //uu ? HRmWrite : HRmModify,
1696 //uu i->Ain.AvxReRg.dst); 1708 //uu i->Ain.AvxReRg.dst);
1709 //uu
1710 //uu if (i->Ain.AvxReRg.op == Asse_MOV) {
1711 //uu u->isRegRegMove = True;
1712 //uu u->regMoveSrc = i->Ain.AvxReRg.src;
1713 //uu u->regMoveDst = i->Ain.AvxReRg.dst;
1714 //uu }
1697 //uu } 1715 //uu }
1698 //uu return; 1716 //uu return;
1699 case Ain_EvCheck: 1717 case Ain_EvCheck:
@@ -1910,43 +1928,6 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1910 } 1928 }
1911} 1929}
1912 1930
1913/* Figure out if i represents a reg-reg move, and if so assign the
1914 source and destination to *src and *dst. If in doubt say No. Used
1915 by the register allocator to do move coalescing.
1916*/
1917Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst )
1918{
1919 switch (i->tag) {
1920 case Ain_Alu64R:
1921 /* Moves between integer regs */
1922 if (i->Ain.Alu64R.op != Aalu_MOV)
1923 return False;
1924 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1925 return False;
1926 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1927 *dst = i->Ain.Alu64R.dst;
1928 return True;
1929 case Ain_SseReRg:
1930 /* Moves between SSE regs */
1931 if (i->Ain.SseReRg.op != Asse_MOV)
1932 return False;
1933 *src = i->Ain.SseReRg.src;
1934 *dst = i->Ain.SseReRg.dst;
1935 return True;
1936 //uu case Ain_AvxReRg:
1937 //uu /* Moves between AVX regs */
1938 //uu if (i->Ain.AvxReRg.op != Asse_MOV)
1939 //uu return False;
1940 //uu *src = i->Ain.AvxReRg.src;
1941 //uu *dst = i->Ain.AvxReRg.dst;
1942 //uu return True;
1943 default:
1944 return False;
1945 }
1946 /*NOTREACHED*/
1947}
1948
1949
1950/* Generate amd64 spill/reload instructions under the direction of the 1931/* Generate amd64 spill/reload instructions under the direction of the
1951 register allocator. Note it's critical these don't write the 1932 register allocator. Note it's critical these don't write the
1952 condition codes. */ 1933 condition codes. */
diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h
index 92730fa..68e199a 100644
--- a/VEX/priv/host_amd64_defs.h
+++ b/VEX/priv/host_amd64_defs.h
@@ -785,7 +785,6 @@ extern void ppAMD64Instr ( const AMD64Instr*, Bool );
785 of the underlying instruction set. */ 785 of the underlying instruction set. */
786extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool ); 786extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool );
787extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool ); 787extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool );
788extern Bool isMove_AMD64Instr ( const AMD64Instr*, HReg*, HReg* );
789extern Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, 788extern Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
790 UChar* buf, Int nbuf, 789 UChar* buf, Int nbuf,
791 const AMD64Instr* i, 790 const AMD64Instr* i,
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
index 2506512..4d088c7 100644
--- a/VEX/priv/host_arm64_defs.c
+++ b/VEX/priv/host_arm64_defs.c
@@ -1958,6 +1958,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
1958 case ARM64in_MovI: 1958 case ARM64in_MovI:
1959 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst); 1959 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
1960 addHRegUse(u, HRmRead, i->ARM64in.MovI.src); 1960 addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
1961 u->isRegRegMove = True;
1962 u->regMoveSrc = i->ARM64in.MovI.src;
1963 u->regMoveDst = i->ARM64in.MovI.dst;
1961 return; 1964 return;
1962 case ARM64in_Imm64: 1965 case ARM64in_Imm64:
1963 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst); 1966 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
@@ -2238,6 +2241,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
2238 case ARM64in_VMov: 2241 case ARM64in_VMov:
2239 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst); 2242 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2240 addHRegUse(u, HRmRead, i->ARM64in.VMov.src); 2243 addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
2244 u->isRegRegMove = True;
2245 u->regMoveSrc = i->ARM64in.VMov.src;
2246 u->regMoveDst = i->ARM64in.VMov.dst;
2241 return; 2247 return;
2242 case ARM64in_EvCheck: 2248 case ARM64in_EvCheck:
2243 /* We expect both amodes only to mention x21, so this is in 2249 /* We expect both amodes only to mention x21, so this is in
@@ -2510,29 +2516,6 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2510 } 2516 }
2511} 2517}
2512 2518
2513/* Figure out if i represents a reg-reg move, and if so assign the
2514 source and destination to *src and *dst. If in doubt say No. Used
2515 by the register allocator to do move coalescing.
2516*/
2517Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
2518{
2519 switch (i->tag) {
2520 case ARM64in_MovI:
2521 *src = i->ARM64in.MovI.src;
2522 *dst = i->ARM64in.MovI.dst;
2523 return True;
2524 case ARM64in_VMov:
2525 *src = i->ARM64in.VMov.src;
2526 *dst = i->ARM64in.VMov.dst;
2527 return True;
2528 default:
2529 break;
2530 }
2531
2532 return False;
2533}
2534
2535
2536/* Generate arm spill/reload instructions under the direction of the 2519/* Generate arm spill/reload instructions under the direction of the
2537 register allocator. Note it's critical these don't write the 2520 register allocator. Note it's critical these don't write the
2538 condition codes. */ 2521 condition codes. */
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
index e7da4f9..277a55b 100644
--- a/VEX/priv/host_arm64_defs.h
+++ b/VEX/priv/host_arm64_defs.h
@@ -993,7 +993,6 @@ extern void ppARM64Instr ( const ARM64Instr* );
993 of the underlying instruction set. */ 993 of the underlying instruction set. */
994extern void getRegUsage_ARM64Instr ( HRegUsage*, const ARM64Instr*, Bool ); 994extern void getRegUsage_ARM64Instr ( HRegUsage*, const ARM64Instr*, Bool );
995extern void mapRegs_ARM64Instr ( HRegRemap*, ARM64Instr*, Bool ); 995extern void mapRegs_ARM64Instr ( HRegRemap*, ARM64Instr*, Bool );
996extern Bool isMove_ARM64Instr ( const ARM64Instr*, HReg*, HReg* );
997extern Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 996extern Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
998 UChar* buf, Int nbuf, const ARM64Instr* i, 997 UChar* buf, Int nbuf, const ARM64Instr* i,
999 Bool mode64, 998 Bool mode64,
diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c
index 9bf87cd..3de6d50 100644
--- a/VEX/priv/host_arm_defs.c
+++ b/VEX/priv/host_arm_defs.c
@@ -2108,6 +2108,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2108 case ARMin_Mov: 2108 case ARMin_Mov:
2109 addHRegUse(u, HRmWrite, i->ARMin.Mov.dst); 2109 addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2110 addRegUsage_ARMRI84(u, i->ARMin.Mov.src); 2110 addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2111
2112 if (i->ARMin.Mov.src->tag == ARMri84_R) {
2113 u->isRegRegMove = True;
2114 u->regMoveSrc = i->ARMin.Mov.src->ARMri84.R.reg;
2115 u->regMoveDst = i->ARMin.Mov.dst;
2116 }
2111 return; 2117 return;
2112 case ARMin_Imm32: 2118 case ARMin_Imm32:
2113 addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst); 2119 addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
@@ -2256,10 +2262,22 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2256 case ARMin_VUnaryD: 2262 case ARMin_VUnaryD:
2257 addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst); 2263 addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2258 addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src); 2264 addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2265
2266 if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2267 u->isRegRegMove = True;
2268 u->regMoveSrc = i->ARMin.VUnaryD.src;
2269 u->regMoveDst = i->ARMin.VUnaryD.dst;
2270 }
2259 return; 2271 return;
2260 case ARMin_VUnaryS: 2272 case ARMin_VUnaryS:
2261 addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst); 2273 addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2262 addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src); 2274 addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2275
2276 if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2277 u->isRegRegMove = True;
2278 u->regMoveSrc = i->ARMin.VUnaryS.src;
2279 u->regMoveDst = i->ARMin.VUnaryS.dst;
2280 }
2263 return; 2281 return;
2264 case ARMin_VCmpD: 2282 case ARMin_VCmpD:
2265 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL); 2283 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
@@ -2350,6 +2368,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2350 case ARMin_NUnary: 2368 case ARMin_NUnary:
2351 addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst); 2369 addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2352 addHRegUse(u, HRmRead, i->ARMin.NUnary.src); 2370 addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2371
2372 if (i->ARMin.NUnary.op == ARMneon_COPY) {
2373 u->isRegRegMove = True;
2374 u->regMoveSrc = i->ARMin.NUnary.src;
2375 u->regMoveDst = i->ARMin.NUnary.dst;
2376 }
2353 return; 2377 return;
2354 case ARMin_NUnaryS: 2378 case ARMin_NUnaryS:
2355 addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg); 2379 addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
@@ -2620,50 +2644,6 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2620 } 2644 }
2621} 2645}
2622 2646
2623/* Figure out if i represents a reg-reg move, and if so assign the
2624 source and destination to *src and *dst. If in doubt say No. Used
2625 by the register allocator to do move coalescing.
2626*/
2627Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
2628{
2629 /* Moves between integer regs */
2630 switch (i->tag) {
2631 case ARMin_Mov:
2632 if (i->ARMin.Mov.src->tag == ARMri84_R) {
2633 *src = i->ARMin.Mov.src->ARMri84.R.reg;
2634 *dst = i->ARMin.Mov.dst;
2635 return True;
2636 }
2637 break;
2638 case ARMin_VUnaryD:
2639 if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2640 *src = i->ARMin.VUnaryD.src;
2641 *dst = i->ARMin.VUnaryD.dst;
2642 return True;
2643 }
2644 break;
2645 case ARMin_VUnaryS:
2646 if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2647 *src = i->ARMin.VUnaryS.src;
2648 *dst = i->ARMin.VUnaryS.dst;
2649 return True;
2650 }
2651 break;
2652 case ARMin_NUnary:
2653 if (i->ARMin.NUnary.op == ARMneon_COPY) {
2654 *src = i->ARMin.NUnary.src;
2655 *dst = i->ARMin.NUnary.dst;
2656 return True;
2657 }
2658 break;
2659 default:
2660 break;
2661 }
2662
2663 return False;
2664}
2665
2666
2667/* Generate arm spill/reload instructions under the direction of the 2647/* Generate arm spill/reload instructions under the direction of the
2668 register allocator. Note it's critical these don't write the 2648 register allocator. Note it's critical these don't write the
2669 condition codes. */ 2649 condition codes. */
diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h
index 56c4ec5..b88c85a 100644
--- a/VEX/priv/host_arm_defs.h
+++ b/VEX/priv/host_arm_defs.h
@@ -1056,7 +1056,6 @@ extern void ppARMInstr ( const ARMInstr* );
1056 of the underlying instruction set. */ 1056 of the underlying instruction set. */
1057extern void getRegUsage_ARMInstr ( HRegUsage*, const ARMInstr*, Bool ); 1057extern void getRegUsage_ARMInstr ( HRegUsage*, const ARMInstr*, Bool );
1058extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool ); 1058extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool );
1059extern Bool isMove_ARMInstr ( const ARMInstr*, HReg*, HReg* );
1060extern Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc, 1059extern Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
1061 UChar* buf, Int nbuf, const ARMInstr* i, 1060 UChar* buf, Int nbuf, const ARMInstr* i,
1062 Bool mode64, 1061 Bool mode64,
diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c
index eb4600e..166f52b 100644
--- a/VEX/priv/host_generic_reg_alloc2.c
+++ b/VEX/priv/host_generic_reg_alloc2.c
@@ -45,8 +45,6 @@
45 45
46/* TODO 27 Oct 04: 46/* TODO 27 Oct 04:
47 47
48 Better consistency checking from what isMove tells us.
49
50 We can possibly do V-V coalescing even when the src is spilled, 48 We can possibly do V-V coalescing even when the src is spilled,
51 providing we can arrange for the dst to have the same spill slot. 49 providing we can arrange for the dst to have the same spill slot.
52 50
@@ -515,6 +513,10 @@ HInstrArray* doRegisterAllocation_v2 (
515 for (Int ii = 0; ii < instrs_in->arr_used; ii++) { 513 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
516 514
517 con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii], con->mode64); 515 con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii], con->mode64);
516 reg_usage_arr[ii].isVregVregMove
517 = reg_usage_arr[ii].isRegRegMove
518 && hregIsVirtual(reg_usage_arr[ii].regMoveSrc)
519 && hregIsVirtual(reg_usage_arr[ii].regMoveDst);
518 520
519 if (0) { 521 if (0) {
520 vex_printf("\n%d stage1: ", ii); 522 vex_printf("\n%d stage1: ", ii);
@@ -1025,12 +1027,10 @@ HInstrArray* doRegisterAllocation_v2 (
1025 /* If doing a reg-reg move between two vregs, and the src's live 1027 /* If doing a reg-reg move between two vregs, and the src's live
1026 range ends here and the dst's live range starts here, bind 1028 range ends here and the dst's live range starts here, bind
1027 the dst to the src's rreg, and that's all. */ 1029 the dst to the src's rreg, and that's all. */
1028 HReg vregS = INVALID_HREG; 1030 if (reg_usage_arr[ii].isVregVregMove) {
1029 HReg vregD = INVALID_HREG; 1031 HReg vregS = reg_usage_arr[ii].regMoveSrc;
1030 if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) { 1032 HReg vregD = reg_usage_arr[ii].regMoveDst;
1031 if (!hregIsVirtual(vregS)) goto cannot_coalesce; 1033 /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
1032 if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1033 /* Check that *isMove is not telling us a bunch of lies ... */
1034 vassert(hregClass(vregS) == hregClass(vregD)); 1034 vassert(hregClass(vregS) == hregClass(vregD));
1035 Int k = hregIndex(vregS); 1035 Int k = hregIndex(vregS);
1036 Int m = hregIndex(vregD); 1036 Int m = hregIndex(vregD);
diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c
index 929dee5..9ab9549 100644
--- a/VEX/priv/host_generic_reg_alloc3.c
+++ b/VEX/priv/host_generic_reg_alloc3.c
@@ -72,6 +72,18 @@ typedef
72 /* The "home" spill slot. The offset is relative to the beginning of 72 /* The "home" spill slot. The offset is relative to the beginning of
73 the guest state. */ 73 the guest state. */
74 UShort spill_offset; 74 UShort spill_offset;
75
76 /* This vreg (vregS) is coalesced to another vreg
77 if |coalescedTo| != INVALID_HREG.
78 Coalescing means that there is a MOV instruction which occurs in the
79 instruction stream right at vregS' dead_before
80 and vregD's live_after. */
81 HReg coalescedTo; /* Which vreg it is coalesced to. */
82 HReg coalescedFirst; /* First vreg in the coalescing chain. */
83
84 /* If this vregS is coalesced to another vregD, what is the combined
85 dead_before for vregS+vregD. Used to effectively allocate registers. */
86 Short effective_dead_before;
75 } 87 }
76 VRegState; 88 VRegState;
77 89
@@ -190,13 +202,20 @@ static inline void print_state(
190 const RRegLRState* rreg_lr_state, 202 const RRegLRState* rreg_lr_state,
191 UShort current_ii) 203 UShort current_ii)
192{ 204{
205# define RIGHT_JUSTIFY(_total, _written) \
206 do { \
207 for (Int w = (_total) - (_written); w > 0; w--) { \
208 vex_printf(" "); \
209 } \
210 } while (0)
211
193 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { 212 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
194 const VRegState* vreg = &vreg_state[v_idx]; 213 const VRegState* vreg = &vreg_state[v_idx];
195 214
196 if (vreg->live_after == INVALID_INSTRNO) { 215 if (vreg->live_after == INVALID_INSTRNO) {
197 continue; /* This is a dead vreg. Never comes into live. */ 216 continue; /* This is a dead vreg. Never comes into live. */
198 } 217 }
199 vex_printf("vreg_state[%3u] \t", v_idx); 218 vex_printf("vreg_state[%3u] ", v_idx);
200 219
201 UInt written; 220 UInt written;
202 switch (vreg->disp) { 221 switch (vreg->disp) {
@@ -213,15 +232,26 @@ static inline void print_state(
213 default: 232 default:
214 vassert(0); 233 vassert(0);
215 } 234 }
235 RIGHT_JUSTIFY(25, written);
216 236
217 for (Int w = 30 - written; w > 0; w--) { 237 written = vex_printf("lr: [%d, %d) ",
218 vex_printf(" "); 238 vreg->live_after, vreg->dead_before);
219 } 239 RIGHT_JUSTIFY(15, written);
240
241 written = vex_printf("effective lr: [%d, %d)",
242 vreg->live_after, vreg->effective_dead_before);
243 RIGHT_JUSTIFY(25, written);
220 244
221 if (vreg->live_after > (Short) current_ii) { 245 if (vreg->live_after > (Short) current_ii) {
222 vex_printf("[not live yet]\n"); 246 vex_printf("[not live yet]\n");
223 } else if ((Short) current_ii >= vreg->dead_before) { 247 } else if ((Short) current_ii >= vreg->dead_before) {
224 vex_printf("[now dead]\n"); 248 if (hregIsInvalid(vreg->coalescedTo)) {
249 vex_printf("[now dead]\n");
250 } else {
251 vex_printf("[now dead, coalesced to ");
252 con->ppReg(vreg->coalescedTo);
253 vex_printf("]\n");
254 }
225 } else { 255 } else {
226 vex_printf("[live]\n"); 256 vex_printf("[live]\n");
227 } 257 }
@@ -232,9 +262,7 @@ static inline void print_state(
232 const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; 262 const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
233 vex_printf("rreg_state[%2u] = ", r_idx); 263 vex_printf("rreg_state[%2u] = ", r_idx);
234 UInt written = con->ppReg(con->univ->regs[r_idx]); 264 UInt written = con->ppReg(con->univ->regs[r_idx]);
235 for (Int w = 10 - written; w > 0; w--) { 265 RIGHT_JUSTIFY(10, written);
236 vex_printf(" ");
237 }
238 266
239 switch (rreg->disp) { 267 switch (rreg->disp) {
240 case Free: 268 case Free:
@@ -255,6 +283,8 @@ static inline void print_state(
255 break; 283 break;
256 } 284 }
257 } 285 }
286
287# undef RIGHT_JUSTIFY
258} 288}
259 289
260static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out, 290static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out,
@@ -383,8 +413,8 @@ static inline HReg find_vreg_to_spill(
383 a callee-save register because it won't be used for parameter passing 413 a callee-save register because it won't be used for parameter passing
384 around helper function calls. */ 414 around helper function calls. */
385static Bool find_free_rreg( 415static Bool find_free_rreg(
386 VRegState* vreg_state, UInt n_vregs, 416 const VRegState* vreg_state, UInt n_vregs,
387 RRegState* rreg_state, UInt n_rregs, 417 const RRegState* rreg_state, UInt n_rregs,
388 const RRegLRState* rreg_lr_state, 418 const RRegLRState* rreg_lr_state,
389 UInt current_ii, HRegClass target_hregclass, 419 UInt current_ii, HRegClass target_hregclass,
390 Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) 420 Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found)
@@ -476,6 +506,10 @@ HInstrArray* doRegisterAllocation_v3(
476 HRegUsage* reg_usage 506 HRegUsage* reg_usage
477 = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); 507 = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
478 508
509 /* Mark vreg indexes where coalesce chains start at. */
510 UInt* coalesce_heads = LibVEX_Alloc_inline(n_vregs * sizeof(UInt));
511 UInt nr_coalesce_heads = 0;
512
479 /* The live range numbers are signed shorts, and so limiting the 513 /* The live range numbers are signed shorts, and so limiting the
480 number of instructions to 15000 comfortably guards against them 514 number of instructions to 15000 comfortably guards against them
481 overflowing 32k. */ 515 overflowing 32k. */
@@ -512,9 +546,9 @@ HInstrArray* doRegisterAllocation_v3(
512 instruction and makes free the corresponding rreg. */ 546 instruction and makes free the corresponding rreg. */
513# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \ 547# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \
514 ({ \ 548 ({ \
515 UInt _r_free_idx = -1; \ 549 UInt _r_free_idx; \
516 Bool free_rreg_found = find_free_rreg( \ 550 Bool free_rreg_found = find_free_rreg( \
517 vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ 551 vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \
518 (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \ 552 (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \
519 if (!free_rreg_found) { \ 553 if (!free_rreg_found) { \
520 HReg vreg_to_spill = find_vreg_to_spill( \ 554 HReg vreg_to_spill = find_vreg_to_spill( \
@@ -536,12 +570,15 @@ HInstrArray* doRegisterAllocation_v3(
536 570
537 /* --- Stage 0. Initialize the state. --- */ 571 /* --- Stage 0. Initialize the state. --- */
538 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { 572 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
539 vreg_state[v_idx].live_after = INVALID_INSTRNO; 573 vreg_state[v_idx].live_after = INVALID_INSTRNO;
540 vreg_state[v_idx].dead_before = INVALID_INSTRNO; 574 vreg_state[v_idx].dead_before = INVALID_INSTRNO;
541 vreg_state[v_idx].reg_class = HRcINVALID; 575 vreg_state[v_idx].reg_class = HRcINVALID;
542 vreg_state[v_idx].disp = Unallocated; 576 vreg_state[v_idx].disp = Unallocated;
543 vreg_state[v_idx].rreg = INVALID_HREG; 577 vreg_state[v_idx].rreg = INVALID_HREG;
544 vreg_state[v_idx].spill_offset = 0; 578 vreg_state[v_idx].spill_offset = 0;
579 vreg_state[v_idx].coalescedTo = INVALID_HREG;
580 vreg_state[v_idx].coalescedFirst = INVALID_HREG;
581 vreg_state[v_idx].effective_dead_before = INVALID_INSTRNO;
545 } 582 }
546 583
547 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { 584 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
@@ -565,6 +602,10 @@ HInstrArray* doRegisterAllocation_v3(
565 const HInstr* instr = instrs_in->arr[ii]; 602 const HInstr* instr = instrs_in->arr[ii];
566 603
567 con->getRegUsage(&reg_usage[ii], instr, con->mode64); 604 con->getRegUsage(&reg_usage[ii], instr, con->mode64);
605 reg_usage[ii].isVregVregMove
606 = reg_usage[ii].isRegRegMove
607 && hregIsVirtual(reg_usage[ii].regMoveSrc)
608 && hregIsVirtual(reg_usage[ii].regMoveDst);
568 609
569 if (0) { 610 if (0) {
570 vex_printf("\n%u stage 1: ", ii); 611 vex_printf("\n%u stage 1: ", ii);
@@ -602,23 +643,24 @@ HInstrArray* doRegisterAllocation_v3(
602 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { 643 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
603 OFFENDING_VREG(v_idx, instr, "Read"); 644 OFFENDING_VREG(v_idx, instr, "Read");
604 } 645 }
605 vreg_state[v_idx].dead_before = toShort(ii + 1);
606 break; 646 break;
607 case HRmWrite: 647 case HRmWrite:
608 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { 648 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
609 vreg_state[v_idx].live_after = toShort(ii); 649 vreg_state[v_idx].live_after = toShort(ii);
610 } 650 }
611 vreg_state[v_idx].dead_before = toShort(ii + 1);
612 break; 651 break;
613 case HRmModify: 652 case HRmModify:
614 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { 653 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
615 OFFENDING_VREG(v_idx, instr, "Modify"); 654 OFFENDING_VREG(v_idx, instr, "Modify");
616 } 655 }
617 vreg_state[v_idx].dead_before = toShort(ii + 1);
618 break; 656 break;
619 default: 657 default:
620 vassert(0); 658 vassert(0);
621 } 659 }
660
661 vreg_state[v_idx].dead_before = toShort(ii + 1);
662 vreg_state[v_idx].effective_dead_before
663 = vreg_state[v_idx].dead_before;
622 } 664 }
623 665
624 /* Process real registers mentioned in the instruction. */ 666 /* Process real registers mentioned in the instruction. */
@@ -703,7 +745,59 @@ HInstrArray* doRegisterAllocation_v3(
703 } 745 }
704 } 746 }
705 747
706 /* --- Stage 2. Allocate spill slots. --- */ 748
749 /* --- Stage 2. MOV coalescing (preparation). --- */
750 /* Optimise register coalescing:
751 MOV v <-> v coalescing (done here).
752 MOV v <-> r coalescing (TODO: not yet, not here). */
753 /* If doing a reg-reg move between two vregs, and the src's live range ends
754 here and the dst's live range starts here, coalesce the src vreg
755 to the dst vreg. */
756 Bool coalesce_happened = False;
757 for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
758 if (reg_usage[ii].isVregVregMove) {
759 HReg vregS = reg_usage[ii].regMoveSrc;
760 HReg vregD = reg_usage[ii].regMoveDst;
761
762 /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
763 vassert(hregClass(vregS) == hregClass(vregD));
764 UInt vs_idx = hregIndex(vregS);
765 UInt vd_idx = hregIndex(vregD);
766 vassert(IS_VALID_VREGNO(vs_idx));
767 vassert(IS_VALID_VREGNO(vd_idx));
768 vassert(! sameHReg(vregS, vregD));
769 VRegState* vs_st = &vreg_state[vs_idx];
770 VRegState* vd_st = &vreg_state[vd_idx];
771
772 if ((vs_st->dead_before == ii + 1) && (vd_st->live_after == ii)) {
773 /* Live ranges are adjacent. */
774
775 vs_st->coalescedTo = vregD;
776 if (hregIsInvalid(vs_st->coalescedFirst)) {
777 vd_st->coalescedFirst = vregS;
778 coalesce_heads[nr_coalesce_heads] = vs_idx;
779 nr_coalesce_heads += 1;
780 } else {
781 vd_st->coalescedFirst = vs_st->coalescedFirst;
782 }
783
784 vreg_state[hregIndex(vd_st->coalescedFirst)].effective_dead_before
785 = vd_st->dead_before;
786
787 if (DEBUG_REGALLOC) {
788 vex_printf("vreg coalescing: ");
789 con->ppReg(vregS);
790 vex_printf(" -> ");
791 con->ppReg(vregD);
792 vex_printf("\n");
793 }
794
795 coalesce_happened = True;
796 }
797 }
798 }
799
800 /* --- Stage 3. Allocate spill slots. --- */
707 801
708 /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits 802 /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits
709 to spill (for example classes Flt64 and Vec128), we have to allocate two 803 to spill (for example classes Flt64 and Vec128), we have to allocate two
@@ -742,6 +836,11 @@ HInstrArray* doRegisterAllocation_v3(
742 vassert(vreg_state[v_idx].reg_class == HRcINVALID); 836 vassert(vreg_state[v_idx].reg_class == HRcINVALID);
743 continue; 837 continue;
744 } 838 }
839 if (! hregIsInvalid(vreg_state[v_idx].coalescedFirst)) {
840 /* Coalesced vregs should share the same spill slot with the first vreg
841 in the coalescing chain. But we don't have that information, yet. */
842 continue;
843 }
745 844
746 /* The spill slots are 64 bits in size. As per the comment on definition 845 /* The spill slots are 64 bits in size. As per the comment on definition
747 of HRegClass in host_generic_regs.h, that means, to spill a vreg of 846 of HRegClass in host_generic_regs.h, that means, to spill a vreg of
@@ -763,8 +862,10 @@ HInstrArray* doRegisterAllocation_v3(
763 if (ss_no >= N_SPILL64S - 1) { 862 if (ss_no >= N_SPILL64S - 1) {
764 vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); 863 vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
765 } 864 }
766 ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before; 865 ss_busy_until_before[ss_no + 0]
767 ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before; 866 = vreg_state[v_idx].effective_dead_before;
867 ss_busy_until_before[ss_no + 1]
868 = vreg_state[v_idx].effective_dead_before;
768 break; 869 break;
769 default: 870 default:
770 /* The ordinary case -- just find a single lowest-numbered spill 871 /* The ordinary case -- just find a single lowest-numbered spill
@@ -777,7 +878,8 @@ HInstrArray* doRegisterAllocation_v3(
777 if (ss_no == N_SPILL64S) { 878 if (ss_no == N_SPILL64S) {
778 vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); 879 vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
779 } 880 }
780 ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before; 881 ss_busy_until_before[ss_no]
882 = vreg_state[v_idx].effective_dead_before;
781 break; 883 break;
782 } 884 }
783 885
@@ -798,15 +900,38 @@ HInstrArray* doRegisterAllocation_v3(
798 } 900 }
799 } 901 }
800 902
903 /* Fill in the spill offsets and effective_dead_before for coalesced vregs.*/
904 for (UInt i = 0; i < nr_coalesce_heads; i++) {
905 UInt vs_idx = coalesce_heads[i];
906 Short effective_dead_before = vreg_state[vs_idx].effective_dead_before;
907 UShort spill_offset = vreg_state[vs_idx].spill_offset;
908 HReg vregD = vreg_state[vs_idx].coalescedTo;
909 while (! hregIsInvalid(vregD)) {
910 UInt vd_idx = hregIndex(vregD);
911 vreg_state[vd_idx].effective_dead_before = effective_dead_before;
912 vreg_state[vd_idx].spill_offset = spill_offset;
913 vregD = vreg_state[vd_idx].coalescedTo;
914 }
915 }
916
917 if (DEBUG_REGALLOC && coalesce_happened) {
918 UInt ii = 0;
919 vex_printf("After vreg<->vreg MOV coalescing:\n");
920 PRINT_STATE;
921 }
922
801 if (0) { 923 if (0) {
802 vex_printf("\n\n"); 924 vex_printf("\n\n");
803 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) 925 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
804 vex_printf("vreg %3u --> spill offset %u\n", 926 if (vreg_state[v_idx].live_after != INVALID_INSTRNO) {
805 v_idx, vreg_state[v_idx].spill_offset); 927 vex_printf("vreg %3u --> spill offset %u\n",
928 v_idx, vreg_state[v_idx].spill_offset);
929 }
930 }
806 } 931 }
807 932
808 933
809 /* --- State 3. Process instructions. --- */ 934 /* --- State 4. Process instructions. --- */
810 for (UShort ii = 0; ii < instrs_in->arr_used; ii++) { 935 for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
811 HInstr* instr = instrs_in->arr[ii]; 936 HInstr* instr = instrs_in->arr[ii];
812 937
@@ -873,65 +998,82 @@ HInstrArray* doRegisterAllocation_v3(
873 vassert((Short) ii < rreg_lrs->lr_current->dead_before); 998 vassert((Short) ii < rreg_lrs->lr_current->dead_before);
874 } 999 }
875 } 1000 }
1001
1002 /* Sanity check: if vregS has been marked as coalesced to vregD,
1003 then the effective live range of vregS must also cover live range
1004 of vregD. */
1005 /* The following sanity check is quite expensive. Some basic blocks
1006 contain very lengthy coalescing chains... */
1007 if (SANITY_CHECKS_EVERY_INSTR) {
1008 for (UInt vs_idx = 0; vs_idx < n_vregs; vs_idx++) {
1009 const VRegState* vS_st = &vreg_state[vs_idx];
1010 HReg vregD = vS_st->coalescedTo;
1011 while (! hregIsInvalid(vregD)) {
1012 const VRegState* vD_st = &vreg_state[hregIndex(vregD)];
1013 vassert(vS_st->live_after <= vD_st->live_after);
1014 vassert(vS_st->effective_dead_before >= vD_st->dead_before);
1015 vregD = vD_st->coalescedTo;
1016 }
1017 }
1018 }
876 } 1019 }
877 1020
878 1021
879 /* --- MOV coalescing --- */ 1022 /* --- MOV coalescing (finishing) --- */
880 /* Optimise register coalescing: 1023 /* Optimise register coalescing:
881 MOV v <-> v coalescing (done here). 1024 MOV v <-> v coalescing (finished here).
882 MOV v <-> r coalescing (TODO: not yet). */ 1025 MOV v <-> r coalescing (TODO: not yet). */
883 /* If doing a reg-reg move between two vregs, and the src's live 1026 if (reg_usage[ii].isVregVregMove) {
884 range ends here and the dst's live range starts here, bind the dst 1027 HReg vregS = reg_usage[ii].regMoveSrc;
885 to the src's rreg, and that's all. */ 1028 HReg vregD = reg_usage[ii].regMoveDst;
886 HReg vregS = INVALID_HREG; 1029 UInt vs_idx = hregIndex(vregS);
887 HReg vregD = INVALID_HREG; 1030 UInt vd_idx = hregIndex(vregD);
888 if (con->isMove(instr, &vregS, &vregD)) { 1031
889 if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) { 1032 if (sameHReg(vreg_state[vs_idx].coalescedTo, vregD)) {
890 /* Check that |isMove| is not telling us a bunch of lies ... */ 1033 /* Finally do the coalescing. */
891 vassert(hregClass(vregS) == hregClass(vregD)); 1034
892 UInt vs_idx = hregIndex(vregS); 1035 HReg rreg = vreg_state[vs_idx].rreg;
893 UInt vd_idx = hregIndex(vregD); 1036 switch (vreg_state[vs_idx].disp) {
894 vassert(IS_VALID_VREGNO(vs_idx)); 1037 case Assigned:
895 vassert(IS_VALID_VREGNO(vd_idx));
896
897 if ((vreg_state[vs_idx].dead_before == ii + 1)
898 && (vreg_state[vd_idx].live_after == ii)
899 && (vreg_state[vs_idx].disp == Assigned)) {
900
901 /* Live ranges are adjacent and source vreg is bound.
902 Finally we can do the coalescing. */
903 HReg rreg = vreg_state[vs_idx].rreg;
904 vreg_state[vd_idx].disp = Assigned;
905 vreg_state[vd_idx].rreg = rreg; 1038 vreg_state[vd_idx].rreg = rreg;
906 FREE_VREG(&vreg_state[vs_idx]);
907
908 UInt r_idx = hregIndex(rreg); 1039 UInt r_idx = hregIndex(rreg);
909 vassert(rreg_state[r_idx].disp == Bound); 1040 vassert(rreg_state[r_idx].disp == Bound);
910 rreg_state[r_idx].vreg = vregD; 1041 rreg_state[r_idx].vreg = vregD;
911 rreg_state[r_idx].eq_spill_slot = False; 1042 break;
1043 case Spilled:
1044 vassert(hregIsInvalid(vreg_state[vs_idx].rreg));
1045 break;
1046 default:
1047 vassert(0);
1048 }
912 1049
913 if (DEBUG_REGALLOC) { 1050 vreg_state[vd_idx].disp = vreg_state[vs_idx].disp;
914 vex_printf("coalesced: "); 1051 FREE_VREG(&vreg_state[vs_idx]);
915 con->ppReg(vregS); 1052
916 vex_printf(" -> "); 1053 if (DEBUG_REGALLOC) {
917 con->ppReg(vregD); 1054 vex_printf("coalesced: ");
918 vex_printf("\n\n"); 1055 con->ppReg(vregS);
919 } 1056 vex_printf(" -> ");
1057 con->ppReg(vregD);
1058 vex_printf("\n\n");
1059 }
920 1060
921 /* In rare cases it can happen that vregD's live range ends 1061 /* In rare cases it can happen that vregD's live range ends here.
922 here. Check and eventually free the vreg and rreg. 1062 Check and eventually free the vreg and rreg.
923 This effectively means that either the translated program 1063 This effectively means that either the translated program
924 contained dead code (but VEX iropt passes are pretty good 1064 contained dead code (but VEX iropt passes are pretty good
925 at eliminating it) or the VEX backend generated dead code. */ 1065 at eliminating it) or the VEX backend generated dead code. */
926 if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) { 1066 if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) {
927 FREE_VREG(&vreg_state[vd_idx]); 1067 if (vreg_state[vd_idx].disp == Assigned) {
1068 UInt r_idx = hregIndex(rreg);
928 FREE_RREG(&rreg_state[r_idx]); 1069 FREE_RREG(&rreg_state[r_idx]);
929 } 1070 }
930 1071 FREE_VREG(&vreg_state[vd_idx]);
931 /* Move on to the next instruction. We skip the post-instruction
932 stuff because all required house-keeping was done here. */
933 continue;
934 } 1072 }
1073
1074 /* Move on to the next instruction. We skip the post-instruction
1075 stuff because all required house-keeping was done here. */
1076 continue;
935 } 1077 }
936 } 1078 }
937 1079
diff --git a/VEX/priv/host_generic_regs.c b/VEX/priv/host_generic_regs.c
index 67d2ea2..cd5d222 100644
--- a/VEX/priv/host_generic_regs.c
+++ b/VEX/priv/host_generic_regs.c
@@ -184,6 +184,9 @@ void ppHRegUsage ( const RRegUniverse* univ, HRegUsage* tab )
184 ppHReg(tab->vRegs[i]); 184 ppHReg(tab->vRegs[i]);
185 vex_printf("\n"); 185 vex_printf("\n");
186 } 186 }
187 if (tab->isRegRegMove) {
188 vex_printf(" (is a reg-reg move)\n");
189 }
187 vex_printf("}\n"); 190 vex_printf("}\n");
188} 191}
189 192
diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h
index 3db9ea0..8f6b2d6 100644
--- a/VEX/priv/host_generic_regs.h
+++ b/VEX/priv/host_generic_regs.h
@@ -300,6 +300,16 @@ typedef
300 HReg vRegs[N_HREGUSAGE_VREGS]; 300 HReg vRegs[N_HREGUSAGE_VREGS];
301 HRegMode vMode[N_HREGUSAGE_VREGS]; 301 HRegMode vMode[N_HREGUSAGE_VREGS];
302 UInt n_vRegs; 302 UInt n_vRegs;
303
304 /* Hint to the register allocator: this instruction is actually a move
305 between two registers: regMoveSrc -> regMoveDst. */
306 Bool isRegRegMove;
307 HReg regMoveSrc;
308 HReg regMoveDst;
309
310 /* Used internally by the register allocator. The reg-reg move is
311 actually a vreg-vreg move. */
312 Bool isVregVregMove;
303 } 313 }
304 HRegUsage; 314 HRegUsage;
305 315
@@ -307,9 +317,10 @@ extern void ppHRegUsage ( const RRegUniverse*, HRegUsage* );
307 317
308static inline void initHRegUsage ( HRegUsage* tab ) 318static inline void initHRegUsage ( HRegUsage* tab )
309{ 319{
310 tab->rRead = 0; 320 tab->rRead = 0;
311 tab->rWritten = 0; 321 tab->rWritten = 0;
312 tab->n_vRegs = 0; 322 tab->n_vRegs = 0;
323 tab->isRegRegMove = False;
313} 324}
314 325
315/* Add a register to a usage table. Combine incoming read uses with 326/* Add a register to a usage table. Combine incoming read uses with
@@ -471,10 +482,6 @@ typedef
471 allocation. */ 482 allocation. */
472 const RRegUniverse* univ; 483 const RRegUniverse* univ;
473 484
474 /* Return True iff the given insn is a reg-reg move, in which case also
475 return the src and dst regs. */
476 Bool (*isMove)(const HInstr*, HReg*, HReg*);
477
478 /* Get info about register usage in this insn. */ 485 /* Get info about register usage in this insn. */
479 void (*getRegUsage)(HRegUsage*, const HInstr*, Bool); 486 void (*getRegUsage)(HRegUsage*, const HInstr*, Bool);
480 487
diff --git a/VEX/priv/host_mips_defs.c b/VEX/priv/host_mips_defs.c
index 66c226d..35a293b 100644
--- a/VEX/priv/host_mips_defs.c
+++ b/VEX/priv/host_mips_defs.c
@@ -1606,6 +1606,15 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64)
1606 addHRegUse(u, HRmRead, i->Min.Alu.srcL); 1606 addHRegUse(u, HRmRead, i->Min.Alu.srcL);
1607 addRegUsage_MIPSRH(u, i->Min.Alu.srcR); 1607 addRegUsage_MIPSRH(u, i->Min.Alu.srcR);
1608 addHRegUse(u, HRmWrite, i->Min.Alu.dst); 1608 addHRegUse(u, HRmWrite, i->Min.Alu.dst);
1609
1610 /* or Rd,Rs,Rs == mr Rd,Rs */
1611 if ((i->Min.Alu.op == Malu_OR)
1612 && (i->Min.Alu.srcR->tag == Mrh_Reg)
1613 && sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL)) {
1614 u->isRegRegMove = True;
1615 u->regMoveSrc = i->Min.Alu.srcL;
1616 u->regMoveDst = i->Min.Alu.dst;
1617 }
1609 return; 1618 return;
1610 case Min_Shft: 1619 case Min_Shft:
1611 addHRegUse(u, HRmRead, i->Min.Shft.srcL); 1620 addHRegUse(u, HRmRead, i->Min.Shft.srcL);
@@ -1990,28 +1999,6 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64)
1990 1999
1991} 2000}
1992 2001
1993/* Figure out if i represents a reg-reg move, and if so assign the
1994 source and destination to *src and *dst. If in doubt say No. Used
1995 by the register allocator to do move coalescing.
1996*/
1997Bool isMove_MIPSInstr(const MIPSInstr * i, HReg * src, HReg * dst)
1998{
1999 /* Moves between integer regs */
2000 if (i->tag == Min_Alu) {
2001 /* or Rd,Rs,Rs == mr Rd,Rs */
2002 if (i->Min.Alu.op != Malu_OR)
2003 return False;
2004 if (i->Min.Alu.srcR->tag != Mrh_Reg)
2005 return False;
2006 if (!sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL))
2007 return False;
2008 *src = i->Min.Alu.srcL;
2009 *dst = i->Min.Alu.dst;
2010 return True;
2011 }
2012 return False;
2013}
2014
2015/* Generate mips spill/reload instructions under the direction of the 2002/* Generate mips spill/reload instructions under the direction of the
2016 register allocator. */ 2003 register allocator. */
2017void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, 2004void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg,
diff --git a/VEX/priv/host_mips_defs.h b/VEX/priv/host_mips_defs.h
index be1e3a8..fb681ac 100644
--- a/VEX/priv/host_mips_defs.h
+++ b/VEX/priv/host_mips_defs.h
@@ -701,7 +701,6 @@ extern void ppMIPSInstr(const MIPSInstr *, Bool mode64);
701 of the underlying instruction set. */ 701 of the underlying instruction set. */
702extern void getRegUsage_MIPSInstr (HRegUsage *, const MIPSInstr *, Bool); 702extern void getRegUsage_MIPSInstr (HRegUsage *, const MIPSInstr *, Bool);
703extern void mapRegs_MIPSInstr (HRegRemap *, MIPSInstr *, Bool mode64); 703extern void mapRegs_MIPSInstr (HRegRemap *, MIPSInstr *, Bool mode64);
704extern Bool isMove_MIPSInstr (const MIPSInstr *, HReg *, HReg *);
705extern Int emit_MIPSInstr (/*MB_MOD*/Bool* is_profInc, 704extern Int emit_MIPSInstr (/*MB_MOD*/Bool* is_profInc,
706 UChar* buf, Int nbuf, const MIPSInstr* i, 705 UChar* buf, Int nbuf, const MIPSInstr* i,
707 Bool mode64, 706 Bool mode64,
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
index 1ef9c5c..b073c1d 100644
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -2362,6 +2362,15 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
2362 addHRegUse(u, HRmRead, i->Pin.Alu.srcL); 2362 addHRegUse(u, HRmRead, i->Pin.Alu.srcL);
2363 addRegUsage_PPCRH(u, i->Pin.Alu.srcR); 2363 addRegUsage_PPCRH(u, i->Pin.Alu.srcR);
2364 addHRegUse(u, HRmWrite, i->Pin.Alu.dst); 2364 addHRegUse(u, HRmWrite, i->Pin.Alu.dst);
2365
2366 // or Rd,Rs,Rs == mr Rd,Rs
2367 if ((i->Pin.Alu.op == Palu_OR)
2368 && (i->Pin.Alu.srcR->tag == Prh_Reg)
2369 && sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL)) {
2370 u->isRegRegMove = True;
2371 u->regMoveSrc = i->Pin.Alu.srcL;
2372 u->regMoveDst = i->Pin.Alu.dst;
2373 }
2365 return; 2374 return;
2366 case Pin_Shft: 2375 case Pin_Shft:
2367 addHRegUse(u, HRmRead, i->Pin.Shft.srcL); 2376 addHRegUse(u, HRmRead, i->Pin.Shft.srcL);
@@ -2489,6 +2498,12 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
2489 case Pin_FpUnary: 2498 case Pin_FpUnary:
2490 addHRegUse(u, HRmWrite, i->Pin.FpUnary.dst); 2499 addHRegUse(u, HRmWrite, i->Pin.FpUnary.dst);
2491 addHRegUse(u, HRmRead, i->Pin.FpUnary.src); 2500 addHRegUse(u, HRmRead, i->Pin.FpUnary.src);
2501
2502 if (i->Pin.FpUnary.op == Pfp_MOV) {
2503 u->isRegRegMove = True;
2504 u->regMoveSrc = i->Pin.FpUnary.src;
2505 u->regMoveDst = i->Pin.FpUnary.dst;
2506 }
2492 return; 2507 return;
2493 case Pin_FpBinary: 2508 case Pin_FpBinary:
2494 addHRegUse(u, HRmWrite, i->Pin.FpBinary.dst); 2509 addHRegUse(u, HRmWrite, i->Pin.FpBinary.dst);
@@ -3119,37 +3134,6 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
3119 } 3134 }
3120} 3135}
3121 3136
3122/* Figure out if i represents a reg-reg move, and if so assign the
3123 source and destination to *src and *dst. If in doubt say No. Used
3124 by the register allocator to do move coalescing.
3125*/
3126Bool isMove_PPCInstr ( const PPCInstr* i, HReg* src, HReg* dst )
3127{
3128 /* Moves between integer regs */
3129 if (i->tag == Pin_Alu) {
3130 // or Rd,Rs,Rs == mr Rd,Rs
3131 if (i->Pin.Alu.op != Palu_OR)
3132 return False;
3133 if (i->Pin.Alu.srcR->tag != Prh_Reg)
3134 return False;
3135 if (! sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL))
3136 return False;
3137 *src = i->Pin.Alu.srcL;
3138 *dst = i->Pin.Alu.dst;
3139 return True;
3140 }
3141 /* Moves between FP regs */
3142 if (i->tag == Pin_FpUnary) {
3143 if (i->Pin.FpUnary.op != Pfp_MOV)
3144 return False;
3145 *src = i->Pin.FpUnary.src;
3146 *dst = i->Pin.FpUnary.dst;
3147 return True;
3148 }
3149 return False;
3150}
3151
3152
3153/* Generate ppc spill/reload instructions under the direction of the 3137/* Generate ppc spill/reload instructions under the direction of the
3154 register allocator. Note it's critical these don't write the 3138 register allocator. Note it's critical these don't write the
3155 condition codes. */ 3139 condition codes. */
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
index 27b3b38..17baff5 100644
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -1201,7 +1201,6 @@ extern void ppPPCInstr(const PPCInstr*, Bool mode64);
1201 of the underlying instruction set. */ 1201 of the underlying instruction set. */
1202extern void getRegUsage_PPCInstr ( HRegUsage*, const PPCInstr*, Bool mode64 ); 1202extern void getRegUsage_PPCInstr ( HRegUsage*, const PPCInstr*, Bool mode64 );
1203extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64); 1203extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64);
1204extern Bool isMove_PPCInstr ( const PPCInstr*, HReg*, HReg* );
1205extern Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, 1204extern Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
1206 UChar* buf, Int nbuf, const PPCInstr* i, 1205 UChar* buf, Int nbuf, const PPCInstr* i,
1207 Bool mode64, 1206 Bool mode64,
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
index 327674a..f9a9557 100644
--- a/VEX/priv/host_s390_defs.c
+++ b/VEX/priv/host_s390_defs.c
@@ -48,7 +48,6 @@
48/*--- Forward declarations ---*/ 48/*--- Forward declarations ---*/
49/*------------------------------------------------------------*/ 49/*------------------------------------------------------------*/
50 50
51static Bool s390_insn_is_reg_reg_move(const s390_insn *, HReg *src, HReg *dst);
52static void s390_insn_map_regs(HRegRemap *, s390_insn *); 51static void s390_insn_map_regs(HRegRemap *, s390_insn *);
53static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *); 52static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *);
54static UInt s390_tchain_load64_len(void); 53static UInt s390_tchain_load64_len(void);
@@ -467,16 +466,6 @@ mapRegs_S390Instr(HRegRemap *m, s390_insn *insn, Bool mode64)
467} 466}
468 467
469 468
470/* Figure out if the given insn represents a reg-reg move, and if so
471 assign the source and destination to *src and *dst. If in doubt say No.
472 Used by the register allocator to do move coalescing. */
473Bool
474isMove_S390Instr(const s390_insn *insn, HReg *src, HReg *dst)
475{
476 return s390_insn_is_reg_reg_move(insn, src, dst);
477}
478
479
480/* Generate s390 spill/reload instructions under the direction of the 469/* Generate s390 spill/reload instructions under the direction of the
481 register allocator. Note it's critical these don't write the 470 register allocator. Note it's critical these don't write the
482 condition codes. This is like an Ist_Put */ 471 condition codes. This is like an Ist_Put */
@@ -587,6 +576,12 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
587 case S390_INSN_MOVE: 576 case S390_INSN_MOVE:
588 addHRegUse(u, HRmRead, insn->variant.move.src); 577 addHRegUse(u, HRmRead, insn->variant.move.src);
589 addHRegUse(u, HRmWrite, insn->variant.move.dst); 578 addHRegUse(u, HRmWrite, insn->variant.move.dst);
579
580 if (hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
581 u->isRegRegMove = True;
582 u->regMoveSrc = insn->variant.move.src;
583 u->regMoveDst = insn->variant.move.dst;
584 }
590 break; 585 break;
591 586
592 case S390_INSN_MEMCPY: 587 case S390_INSN_MEMCPY:
@@ -1218,23 +1213,6 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
1218} 1213}
1219 1214
1220 1215
1221/* Return True, if INSN is a move between two registers of the same class.
1222 In that case assign the source and destination registers to SRC and DST,
1223 respectively. */
1224static Bool
1225s390_insn_is_reg_reg_move(const s390_insn *insn, HReg *src, HReg *dst)
1226{
1227 if (insn->tag == S390_INSN_MOVE &&
1228 hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
1229 *src = insn->variant.move.src;
1230 *dst = insn->variant.move.dst;
1231 return True;
1232 }
1233
1234 return False;
1235}
1236
1237
1238/*------------------------------------------------------------*/ 1216/*------------------------------------------------------------*/
1239/*--- Functions to emit a sequence of bytes ---*/ 1217/*--- Functions to emit a sequence of bytes ---*/
1240/*------------------------------------------------------------*/ 1218/*------------------------------------------------------------*/
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
index 937829c..254275a 100644
--- a/VEX/priv/host_s390_defs.h
+++ b/VEX/priv/host_s390_defs.h
@@ -742,7 +742,6 @@ UInt ppHRegS390(HReg);
742 of the underlying instruction set. */ 742 of the underlying instruction set. */
743void getRegUsage_S390Instr( HRegUsage *, const s390_insn *, Bool ); 743void getRegUsage_S390Instr( HRegUsage *, const s390_insn *, Bool );
744void mapRegs_S390Instr ( HRegRemap *, s390_insn *, Bool ); 744void mapRegs_S390Instr ( HRegRemap *, s390_insn *, Bool );
745Bool isMove_S390Instr ( const s390_insn *, HReg *, HReg * );
746Int emit_S390Instr ( Bool *, UChar *, Int, const s390_insn *, Bool, 745Int emit_S390Instr ( Bool *, UChar *, Int, const s390_insn *, Bool,
747 VexEndness, const void *, const void *, 746 VexEndness, const void *, const void *,
748 const void *, const void *); 747 const void *, const void *);
diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c
index 2457cc1..eb8e020 100644
--- a/VEX/priv/host_x86_defs.c
+++ b/VEX/priv/host_x86_defs.c
@@ -1234,6 +1234,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
1234 addRegUsage_X86RMI(u, i->Xin.Alu32R.src); 1234 addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
1235 if (i->Xin.Alu32R.op == Xalu_MOV) { 1235 if (i->Xin.Alu32R.op == Xalu_MOV) {
1236 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); 1236 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
1237
1238 if (i->Xin.Alu32R.src->tag == Xrmi_Reg) {
1239 u->isRegRegMove = True;
1240 u->regMoveSrc = i->Xin.Alu32R.src->Xrmi.Reg.reg;
1241 u->regMoveDst = i->Xin.Alu32R.dst;
1242 }
1237 return; 1243 return;
1238 } 1244 }
1239 if (i->Xin.Alu32R.op == Xalu_CMP) { 1245 if (i->Xin.Alu32R.op == Xalu_CMP) {
@@ -1374,6 +1380,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
1374 case Xin_FpUnary: 1380 case Xin_FpUnary:
1375 addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1381 addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1376 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1382 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1383
1384 if (i->Xin.FpUnary.op == Xfp_MOV) {
1385 u->isRegRegMove = True;
1386 u->regMoveSrc = i->Xin.FpUnary.src;
1387 u->regMoveDst = i->Xin.FpUnary.dst;
1388 }
1377 return; 1389 return;
1378 case Xin_FpBinary: 1390 case Xin_FpBinary:
1379 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1391 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
@@ -1469,6 +1481,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
1469 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 1481 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
1470 ? HRmWrite : HRmModify, 1482 ? HRmWrite : HRmModify,
1471 i->Xin.SseReRg.dst); 1483 i->Xin.SseReRg.dst);
1484
1485 if (i->Xin.SseReRg.op == Xsse_MOV) {
1486 u->isRegRegMove = True;
1487 u->regMoveSrc = i->Xin.SseReRg.src;
1488 u->regMoveDst = i->Xin.SseReRg.dst;
1489 }
1472 } 1490 }
1473 return; 1491 return;
1474 case Xin_SseCMov: 1492 case Xin_SseCMov:
@@ -1668,41 +1686,6 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
1668 } 1686 }
1669} 1687}
1670 1688
1671/* Figure out if i represents a reg-reg move, and if so assign the
1672 source and destination to *src and *dst. If in doubt say No. Used
1673 by the register allocator to do move coalescing.
1674*/
1675Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst )
1676{
1677 /* Moves between integer regs */
1678 if (i->tag == Xin_Alu32R) {
1679 if (i->Xin.Alu32R.op != Xalu_MOV)
1680 return False;
1681 if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
1682 return False;
1683 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
1684 *dst = i->Xin.Alu32R.dst;
1685 return True;
1686 }
1687 /* Moves between FP regs */
1688 if (i->tag == Xin_FpUnary) {
1689 if (i->Xin.FpUnary.op != Xfp_MOV)
1690 return False;
1691 *src = i->Xin.FpUnary.src;
1692 *dst = i->Xin.FpUnary.dst;
1693 return True;
1694 }
1695 if (i->tag == Xin_SseReRg) {
1696 if (i->Xin.SseReRg.op != Xsse_MOV)
1697 return False;
1698 *src = i->Xin.SseReRg.src;
1699 *dst = i->Xin.SseReRg.dst;
1700 return True;
1701 }
1702 return False;
1703}
1704
1705
1706/* Generate x86 spill/reload instructions under the direction of the 1689/* Generate x86 spill/reload instructions under the direction of the
1707 register allocator. Note it's critical these don't write the 1690 register allocator. Note it's critical these don't write the
1708 condition codes. */ 1691 condition codes. */
diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h
index e1a5767..6812d5f 100644
--- a/VEX/priv/host_x86_defs.h
+++ b/VEX/priv/host_x86_defs.h
@@ -716,7 +716,6 @@ extern void ppX86Instr ( const X86Instr*, Bool );
716 of the underlying instruction set. */ 716 of the underlying instruction set. */
717extern void getRegUsage_X86Instr ( HRegUsage*, const X86Instr*, Bool ); 717extern void getRegUsage_X86Instr ( HRegUsage*, const X86Instr*, Bool );
718extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool ); 718extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool );
719extern Bool isMove_X86Instr ( const X86Instr*, HReg*, HReg* );
720extern Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, 719extern Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
721 UChar* buf, Int nbuf, const X86Instr* i, 720 UChar* buf, Int nbuf, const X86Instr* i,
722 Bool mode64, 721 Bool mode64,
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
index b27d6ca..107a6a6 100644
--- a/VEX/priv/main_main.c
+++ b/VEX/priv/main_main.c
@@ -709,7 +709,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
709 /* This the bundle of functions we need to do the back-end stuff 709 /* This the bundle of functions we need to do the back-end stuff
710 (insn selection, reg-alloc, assembly) whilst being insulated 710 (insn selection, reg-alloc, assembly) whilst being insulated
711 from the target instruction set. */ 711 from the target instruction set. */
712 Bool (*isMove) ( const HInstr*, HReg*, HReg* );
713 void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ); 712 void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool );
714 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); 713 void (*mapRegs) ( HRegRemap*, HInstr*, Bool );
715 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); 714 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool );
@@ -739,7 +738,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
739 HInstrArray* vcode; 738 HInstrArray* vcode;
740 HInstrArray* rcode; 739 HInstrArray* rcode;
741 740
742 isMove = NULL;
743 getRegUsage = NULL; 741 getRegUsage = NULL;
744 mapRegs = NULL; 742 mapRegs = NULL;
745 genSpill = NULL; 743 genSpill = NULL;
@@ -857,7 +855,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
857 case VexArchX86: 855 case VexArchX86:
858 mode64 = False; 856 mode64 = False;
859 rRegUniv = X86FN(getRRegUniverse_X86()); 857 rRegUniv = X86FN(getRRegUniverse_X86());
860 isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr);
861 getRegUsage 858 getRegUsage
862 = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr); 859 = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr);
863 mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); 860 mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr);
@@ -875,7 +872,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
875 case VexArchAMD64: 872 case VexArchAMD64:
876 mode64 = True; 873 mode64 = True;
877 rRegUniv = AMD64FN(getRRegUniverse_AMD64()); 874 rRegUniv = AMD64FN(getRRegUniverse_AMD64());
878 isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr);
879 getRegUsage 875 getRegUsage
880 = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr); 876 = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr);
881 mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); 877 mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr);
@@ -893,7 +889,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
893 case VexArchPPC32: 889 case VexArchPPC32:
894 mode64 = False; 890 mode64 = False;
895 rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64)); 891 rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64));
896 isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr);
897 getRegUsage 892 getRegUsage
898 = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr); 893 = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr);
899 mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); 894 mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr);
@@ -910,7 +905,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
910 case VexArchPPC64: 905 case VexArchPPC64:
911 mode64 = True; 906 mode64 = True;
912 rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64)); 907 rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64));
913 isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr);
914 getRegUsage 908 getRegUsage
915 = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr); 909 = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr);
916 mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); 910 mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr);
@@ -928,7 +922,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
928 case VexArchS390X: 922 case VexArchS390X:
929 mode64 = True; 923 mode64 = True;
930 rRegUniv = S390FN(getRRegUniverse_S390()); 924 rRegUniv = S390FN(getRRegUniverse_S390());
931 isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr);
932 getRegUsage 925 getRegUsage
933 = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr); 926 = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr);
934 mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); 927 mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr);
@@ -946,7 +939,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
946 case VexArchARM: 939 case VexArchARM:
947 mode64 = False; 940 mode64 = False;
948 rRegUniv = ARMFN(getRRegUniverse_ARM()); 941 rRegUniv = ARMFN(getRRegUniverse_ARM());
949 isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr);
950 getRegUsage 942 getRegUsage
951 = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr); 943 = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr);
952 mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); 944 mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr);
@@ -963,7 +955,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
963 case VexArchARM64: 955 case VexArchARM64:
964 mode64 = True; 956 mode64 = True;
965 rRegUniv = ARM64FN(getRRegUniverse_ARM64()); 957 rRegUniv = ARM64FN(getRRegUniverse_ARM64());
966 isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr);
967 getRegUsage 958 getRegUsage
968 = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr); 959 = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr);
969 mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); 960 mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr);
@@ -980,7 +971,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
980 case VexArchMIPS32: 971 case VexArchMIPS32:
981 mode64 = False; 972 mode64 = False;
982 rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64)); 973 rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64));
983 isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr);
984 getRegUsage 974 getRegUsage
985 = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr); 975 = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr);
986 mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); 976 mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr);
@@ -998,7 +988,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
998 case VexArchMIPS64: 988 case VexArchMIPS64:
999 mode64 = True; 989 mode64 = True;
1000 rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64)); 990 rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64));
1001 isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr);
1002 getRegUsage 991 getRegUsage
1003 = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr); 992 = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr);
1004 mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); 993 mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr);
@@ -1082,11 +1071,10 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
1082 1071
1083 /* Register allocate. */ 1072 /* Register allocate. */
1084 RegAllocControl con = { 1073 RegAllocControl con = {
1085 .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, 1074 .univ = rRegUniv, .getRegUsage = getRegUsage, .mapRegs = mapRegs,
1086 .mapRegs = mapRegs, .genSpill = genSpill, .genReload = genReload, 1075 .genSpill = genSpill, .genReload = genReload, .genMove = genMove,
1087 .genMove = genMove, .directReload = directReload, 1076 .directReload = directReload, .guest_sizeB = guest_sizeB,
1088 .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg, 1077 .ppInstr = ppInstr, .ppReg = ppReg, .mode64 = mode64};
1089 .mode64 = mode64};
1090 switch (vex_control.regalloc_version) { 1078 switch (vex_control.regalloc_version) {
1091 case 2: 1079 case 2:
1092 rcode = doRegisterAllocation_v2(vcode, &con); 1080 rcode = doRegisterAllocation_v2(vcode, &con);