You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gentoo-overlay/dev-lang/ghc/files/ghc-7.8.3-ppc32-fPIC.patch

352 lines
14 KiB

commit fa31e8f4a0f853848d96549a429083941877bf8d
Author: Sergei Trofimovich <siarheit@google.com>
Date: Sun Dec 14 14:30:12 2014 +0000
powerpc: fix and enable shared libraries by default on linux
Summary:
And fix things all the way down to it. Namely:
- remove 'r30' from free registers, it's an .LCTOC1 register
for gcc. generated .plt stubs expect it to be initialised.
- fix PicBase computation, which originally forgot to use 'tmp'
reg in 'initializePicBase_ppc.fetchPC'
- mark 'ForeighTarget's as implicitly using 'PicBase' register
(see comment for details)
- add 64-bit MO_Sub and test on alloclimit3/4 regtests
- fix dynamic label offsets to match with .LCTOC1 offset
Signed-off-by: Sergei Trofimovich <siarheit@google.com>
Test Plan: validate passes equal amount of vanilla/dyn tests
Reviewers: simonmar, erikd, austin
Reviewed By: erikd, austin
Subscribers: carter, thomie
Differential Revision: https://phabricator.haskell.org/D560
GHC Trac Issues: #8024, #9831
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs
index 0f2c0ae..37b8ada 100644
--- a/compiler/cmm/CLabel.hs
+++ b/compiler/cmm/CLabel.hs
@@ -1170,7 +1170,8 @@ pprDynamicLinkerAsmLabel platform dllInfo lbl
else if osElfTarget (platformOS platform)
then if platformArch platform == ArchPPC
then case dllInfo of
- CodeStub -> ppr lbl <> text "@plt"
+ CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
+ ppr lbl <> text "+32768@plt"
SymbolPtr -> text ".LC_" <> ppr lbl
_ -> panic "pprDynamicLinkerAsmLabel"
else if platformArch platform == ArchX86_64
diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs
index 9b5c080..6326a8b 100644
--- a/compiler/nativeGen/PIC.hs
+++ b/compiler/nativeGen/PIC.hs
@@ -54,7 +54,6 @@ import qualified X86.Instr as X86
import Platform
import Instruction
-import Size
import Reg
import NCGMonad
@@ -468,11 +467,8 @@ pprGotDeclaration dflags ArchX86 OSDarwin
pprGotDeclaration _ _ OSDarwin
= empty
--- pprGotDeclaration
+-- Emit GOT declaration
-- Output whatever needs to be output once per .s file.
--- The .LCTOC1 label is defined to point 32768 bytes into the table,
--- to make the most of the PPC's 16-bit displacements.
--- Only needed for PIC.
pprGotDeclaration dflags arch os
| osElfTarget os
, arch /= ArchPPC_64
@@ -482,6 +478,7 @@ pprGotDeclaration dflags arch os
| osElfTarget os
, arch /= ArchPPC_64
= vcat [
+ -- See Note [.LCTOC1 in PPC PIC code]
ptext (sLit ".section \".got2\",\"aw\""),
ptext (sLit ".LCTOC1 = .+32768") ]
@@ -688,12 +685,7 @@ pprImportedSymbol _ _ _
-- Get a pointer to our own fake GOT, which is defined on a per-module basis.
--- This is exactly how GCC does it, and it's quite horrible:
--- We first fetch the address of a local label (mkPicBaseLabel).
--- Then we add a 16-bit offset to that to get the address of a .long that we
--- define in .text space right next to the proc. This .long literal contains
--- the (32-bit) offset from our local label to our global offset table
--- (.LCTOC1 aka gotOffLabel).
+-- This is exactly how GCC does it in linux.
initializePicBase_ppc
:: Arch -> OS -> Reg
@@ -704,18 +696,9 @@ initializePicBase_ppc ArchPPC os picReg
(CmmProc info lab live (ListGraph blocks) : statics)
| osElfTarget os
= do
- dflags <- getDynFlags
- gotOffLabel <- getNewLabelNat
- tmp <- getNewRegNat $ intSize (wordWidth dflags)
let
- gotOffset = CmmData Text $ Statics gotOffLabel [
- CmmStaticLit (CmmLabelDiffOff gotLabel
- mkPicBaseLabel
- 0)
- ]
- offsetToOffset
- = PPC.ImmConstantDiff
- (PPC.ImmCLbl gotOffLabel)
+ gotOffset = PPC.ImmConstantDiff
+ (PPC.ImmCLbl gotLabel)
(PPC.ImmCLbl mkPicBaseLabel)
blocks' = case blocks of
@@ -726,15 +709,23 @@ initializePicBase_ppc ArchPPC os picReg
| bID `mapMember` info = fetchPC b
| otherwise = b
+ -- GCC does PIC prologs thusly:
+ -- bcl 20,31,.L1
+ -- .L1:
+ -- mflr 30
+ -- addis 30,30,.LCTOC1-.L1@ha
+ -- addi 30,30,.LCTOC1-.L1@l
+ -- TODO: below we use it over temporary register,
+ -- it can and should be optimised by picking
+ -- correct PIC reg.
fetchPC (BasicBlock bID insns) =
BasicBlock bID (PPC.FETCHPC picReg
- : PPC.ADDIS tmp picReg (PPC.HI offsetToOffset)
- : PPC.LD PPC.archWordSize tmp
- (PPC.AddrRegImm tmp (PPC.LO offsetToOffset))
- : PPC.ADD picReg picReg (PPC.RIReg picReg)
+ : PPC.ADDIS picReg picReg (PPC.HA gotOffset)
+ : PPC.ADDI picReg picReg (PPC.LO gotOffset)
+ : PPC.MR PPC.r30 picReg
: insns)
- return (CmmProc info lab live (ListGraph blocks') : gotOffset : statics)
+ return (CmmProc info lab live (ListGraph blocks') : statics)
initializePicBase_ppc ArchPPC OSDarwin picReg
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index ddf483a..c1c4a74 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -54,7 +54,7 @@ import Outputable
import Unique
import DynFlags
-import Control.Monad ( mapAndUnzipM )
+import Control.Monad ( mapAndUnzipM, when )
import Data.Bits
import Data.Word
@@ -355,6 +355,19 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
ADDE rhi r1hi r2hi ]
return (ChildCode64 code rlo)
+iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ ChildCode64 code2 r2lo <- iselExpr64 e2
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ r1hi = getHiVRegFromLo r1lo
+ r2hi = getHiVRegFromLo r2lo
+ code = code1 `appOL`
+ code2 `appOL`
+ toOL [ SUBFC rlo r2lo r1lo,
+ SUBFE rhi r2hi r1hi ]
+ return (ChildCode64 code rlo)
+
iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
(expr_reg,expr_code) <- getSomeReg expr
(rlo, rhi) <- getNewRegPairNat II32
@@ -918,8 +931,12 @@ genCCall' dflags gcp target dest_regs args0
(toOL []) []
(labelOrExpr, reduceToFF32) <- case target of
- ForeignTarget (CmmLit (CmmLabel lbl)) _ -> return (Left lbl, False)
- ForeignTarget expr _ -> return (Right expr, False)
+ ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
+ uses_pic_base_implicitly
+ return (Left lbl, False)
+ ForeignTarget expr _ -> do
+ uses_pic_base_implicitly
+ return (Right expr, False)
PrimTarget mop -> outOfLineMachOp mop
let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
@@ -940,6 +957,13 @@ genCCall' dflags gcp target dest_regs args0
where
platform = targetPlatform dflags
+ uses_pic_base_implicitly = do
+ -- See Note [implicit register in PPC PIC code]
+ -- on why we claim to use PIC register here
+ when (gopt Opt_PIC dflags) $ do
+ _ <- getPicBaseNat archWordSize
+ return ()
+
initialStackOffset = case gcp of
GCPDarwin -> 24
GCPLinux -> 8
@@ -1431,3 +1455,21 @@ coerceFP2Int _ toRep x = do
-- read low word of value (high word is undefined)
LD II32 dst (spRel dflags 3)]
return (Any (intSize toRep) code')
+
+-- Note [.LCTOC1 in PPC PIC code]
+-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
+-- to make the most of the PPC's 16-bit displacements.
+-- As 16-bit signed offset is used (usually via addi/lwz instructions)
+-- first element will have '-32768' offset against .LCTOC1.
+
+-- Note [implicit register in PPC PIC code]
+-- PPC generates calls by labels in assembly
+-- in form of:
+-- bl puts+32768@plt
+-- in this form it's not seen directly (by GHC NCG)
+-- that r30 (PicBaseReg) is used,
+-- but r30 is a required part of PLT code setup:
+-- puts+32768@plt:
+-- lwz r11,-30484(r30) ; offset in .LCTOC1
+-- mtctr r11
+-- bctr
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index f5b9506..b7081f9 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -205,8 +205,11 @@ data Instr
| ADD Reg Reg RI -- dst, src1, src2
| ADDC Reg Reg Reg -- (carrying) dst, src1, src2
| ADDE Reg Reg Reg -- (extend) dst, src1, src2
+ | ADDI Reg Reg Imm -- Add Immediate dst, src1, src2
| ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
| SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
+ | SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1
+ | SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1
| MULLW Reg Reg RI
| DIVW Reg Reg Reg
| DIVWU Reg Reg Reg
@@ -284,8 +287,11 @@ ppc_regUsageOfInstr platform instr
ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ ADDI reg1 reg2 _ -> usage ([reg2], [reg1])
ADDIS reg1 reg2 _ -> usage ([reg2], [reg1])
SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
@@ -358,8 +364,11 @@ ppc_patchRegsOfInstr instr env
ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3)
ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3)
+ ADDI reg1 reg2 imm -> ADDI (env reg1) (env reg2) imm
ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3)
+ SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3)
+ SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3)
MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3)
DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3)
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index 6851769..f59d51f 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -525,6 +525,16 @@ pprInstr (BCTRL _) = hcat [
ptext (sLit "bctrl")
]
pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri
+pprInstr (ADDI reg1 reg2 imm) = hcat [
+ char '\t',
+ ptext (sLit "addi"),
+ char '\t',
+ pprReg reg1,
+ ptext (sLit ", "),
+ pprReg reg2,
+ ptext (sLit ", "),
+ pprImm imm
+ ]
pprInstr (ADDIS reg1 reg2 imm) = hcat [
char '\t',
ptext (sLit "addis"),
@@ -539,6 +549,8 @@ pprInstr (ADDIS reg1 reg2 imm) = hcat [
pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3)
pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3)
pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3)
+pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3)
+pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3)
pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri
pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri
pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3)
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index 0f636bf..69e69c0 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -37,7 +37,8 @@ module PPC.Regs (
fits16Bits,
makeImmediate,
fReg,
- sp, r3, r4, r27, r28, f1, f20, f21,
+ sp, r3, r4, r27, r28, r30,
+ f1, f20, f21,
allocatableRegs
@@ -295,12 +296,13 @@ point registers.
fReg :: Int -> RegNo
fReg x = (32 + x)
-sp, r3, r4, r27, r28, f1, f20, f21 :: Reg
+sp, r3, r4, r27, r28, r30, f1, f20, f21 :: Reg
sp = regSingle 1
r3 = regSingle 3
r4 = regSingle 4
r27 = regSingle 27
r28 = regSingle 28
+r30 = regSingle 30
f1 = regSingle $ fReg 1
f20 = regSingle $ fReg 20
f21 = regSingle $ fReg 21
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index 9916e0e..1d46a01 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -881,6 +881,8 @@ freeReg 1 = fastBool False -- The Stack Pointer
# if !MACHREGS_darwin
-- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
freeReg 2 = fastBool False
+-- at least linux in -fPIC relies on r30 in PLT stubs
+freeReg 30 = fastBool False
# endif
# ifdef REG_Base
freeReg REG_Base = fastBool False
diff --git a/mk/config.mk.in b/mk/config.mk.in
index 0f5820f..8f134bc 100644
--- a/mk/config.mk.in
+++ b/mk/config.mk.in
@@ -95,7 +95,7 @@ TargetElf = YES
endif
# Some platforms don't support shared libraries
-NoSharedLibsPlatformList = powerpc-unknown-linux \
+NoSharedLibsPlatformList = \
x86_64-unknown-mingw32 \
i386-unknown-mingw32