diff --git a/asterius/src/Asterius/Ar.hs b/asterius/src/Asterius/Ar.hs index b1569e3bb3..9c3dfcd171 100644 --- a/asterius/src/Asterius/Ar.hs +++ b/asterius/src/Asterius/Ar.hs @@ -19,7 +19,9 @@ -- roll out our own implementation of @loadArchive@/@createArchive@ (based on -- that of GHC). module Asterius.Ar - ( loadArchive, + ( loadArchiveRep, + loadArchiveFile, + loadCompleteArchiveFile, createArchive, ) where @@ -32,7 +34,7 @@ import Data.Binary.Put import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as CBS import qualified Data.ByteString.Lazy as LBS -import Data.Foldable +import qualified Data.Set as Set import Data.Traversable import GHC.IO.Unsafe import qualified IfaceEnv as GHC @@ -94,16 +96,38 @@ writeArchiveToFile fp = LBS.writeFile fp . runPut . putArchive -- files in the archive that cannot be parsed. Also, the metadata of the -- contained files are ignored ('createArchive' always sets them to default -- values anyway). -loadArchive :: GHC.NameCacheUpdater -> FilePath -> IO AsteriusCachedModule -loadArchive ncu p = do - Archive entries <- walkArchiveFile p - foldlM - ( \acc entry -> tryGetBS ncu entry >>= \case - Left _ -> pure acc - Right m -> pure $ m <> acc - ) - mempty - entries +loadArchiveRep :: GHC.NameCacheUpdater -> FilePath -> IO AsteriusRepModule +loadArchiveRep ncu path = do + Archive entries <- walkArchiveFile path + ms <- for entries $ \entry -> tryGetBS ncu entry >>= \case + Left {} -> pure mempty -- Note [Malformed object files] in Asterius.Ld + Right m -> pure + AsteriusRepModule + { dependencyMap = onDiskDependencyMap m, + moduleExports = onDiskModuleExports m, + objectSources = mempty, -- Set it once and for all afterwards + archiveSources = mempty, + inMemoryModule = mempty + } + let combined = mconcat ms + pure combined {archiveSources = Set.singleton path} + +loadArchiveFile :: GHC.NameCacheUpdater -> FilePath -> (AsteriusModule -> AsteriusModule) -> IO AsteriusModule +loadArchiveFile ncu path fn = do + Archive entries <- walkArchiveFile path + ms <- for entries $ \entry -> tryGetBS ncu entry >>= \case + Left {} -> pure mempty -- Note [Malformed object files] in Asterius.Ld + Right m -> pure $ fn $ onDiskToInMemory m + pure $ mconcat ms + +-- could also be loadArchiveFile ncu path id, but let's see if this is more efficient. +loadCompleteArchiveFile :: GHC.NameCacheUpdater -> FilePath -> IO AsteriusModule +loadCompleteArchiveFile ncu path = do + Archive entries <- walkArchiveFile path + ms <- for entries $ \entry -> tryGetBS ncu entry >>= \case + Left {} -> pure mempty -- Note [Malformed object files] in Asterius.Ld + Right m -> pure $ onDiskToInMemory m + pure $ mconcat ms -- | Archives have numeric values padded with '\x20' to the right. getPaddedInt :: BS.ByteString -> Int diff --git a/asterius/src/Asterius/Boot.hs b/asterius/src/Asterius/Boot.hs index c9f0d65d78..3e6805a6cb 100644 --- a/asterius/src/Asterius/Boot.hs +++ b/asterius/src/Asterius/Boot.hs @@ -135,7 +135,7 @@ bootRTSCmm BootArgs {..} = in runCodeGen (marshalCmmIR ms_mod ir) dflags ms_mod >>= \case Left err -> throwIO err Right m -> do - putFile obj_path $ toCachedModule m + putFile obj_path $ inMemoryToOnDisk m modifyIORef' obj_paths_ref (obj_path :) when is_debug $ do let p = (obj_path -<.>) diff --git a/asterius/src/Asterius/Builtins.hs b/asterius/src/Asterius/Builtins.hs index c1d5228aa7..155808ec80 100644 --- a/asterius/src/Asterius/Builtins.hs +++ b/asterius/src/Asterius/Builtins.hs @@ -799,6 +799,9 @@ generateRTSWrapper mod_sym func_sym param_vts ret_vts = [I64] -> ([F64], truncSFloat64ToInt64) _ -> (ret_vts, id) +-- | Create a wrapper function for a JSFFI function import. The wrapper +-- function takes care of the I64/F64 conversion for both the arguments and the +-- results, and internally calls the function import. generateWrapperFunction :: EntitySymbol -> Function -> Function generateWrapperFunction func_sym Function {functionType = FunctionType {..}} = Function @@ -835,8 +838,10 @@ generateWrapperFunction func_sym Function {functionType = FunctionType {..}} = [I64] -> ([F64], convertSInt64ToFloat64) _ -> (returnTypes, id) --- Renames each function in the module to _wrapper, and --- edits their implementation using 'generateWrapperFunction' +-- | Rename each function in the module to _wrapper, and edit its +-- implementation using 'generateWrapperFunction'. Essentially, for each JSFFI +-- import we have two things: a Wasm function import, and a function wrapper +-- that takes care of the I64/F64 conversion. generateWrapperModule :: AsteriusModule -> AsteriusModule generateWrapperModule m = m diff --git a/asterius/src/Asterius/FrontendPlugin.hs b/asterius/src/Asterius/FrontendPlugin.hs index 8deb51b5ee..67ee19dc5c 100644 --- a/asterius/src/Asterius/FrontendPlugin.hs +++ b/asterius/src/Asterius/FrontendPlugin.hs @@ -83,7 +83,7 @@ frontendPlugin = makeFrontendPlugin $ do Left err -> throwIO err Right m' -> do let m = ffi_mod <> m' - putFile obj_path $ toCachedModule m + putFile obj_path $ inMemoryToOnDisk m when is_debug $ do let p = (obj_path -<.>) writeFile (p "dump-wasm-ast") =<< prettyShow m @@ -100,7 +100,7 @@ frontendPlugin = makeFrontendPlugin $ do runCodeGen (marshalCmmIR ms_mod ir) dflags ms_mod >>= \case Left err -> throwIO err Right m -> do - putFile obj_path $ toCachedModule m + putFile obj_path $ inMemoryToOnDisk m when is_debug $ do let p = (obj_path -<.>) writeFile (p "dump-wasm-ast") =<< prettyShow m diff --git a/asterius/src/Asterius/GHCi/Internals.hs b/asterius/src/Asterius/GHCi/Internals.hs index b62d6cc151..e971b137a9 100644 --- a/asterius/src/Asterius/GHCi/Internals.hs +++ b/asterius/src/Asterius/GHCi/Internals.hs @@ -94,9 +94,9 @@ import qualified VarEnv as GHC data GHCiState = GHCiState { ghciUniqSupply :: GHC.UniqSupply, ghciNameCacheUpdater :: GHC.NameCacheUpdater, - ghciLibs :: AsteriusCachedModule, - ghciObjs :: M.Map FilePath AsteriusCachedModule, - ghciCompiledCoreExprs :: IM.IntMap (EntitySymbol, AsteriusCachedModule), + ghciLibs :: AsteriusRepModule, + ghciObjs :: M.Map FilePath AsteriusRepModule, + ghciCompiledCoreExprs :: IM.IntMap (EntitySymbol, AsteriusRepModule), ghciLastCompiledCoreExpr :: Int, ghciSession :: ~(Session, Pipe, JSVal) } @@ -196,10 +196,10 @@ asteriusIservCall hsc_env _ msg = do GHC.InitLinker -> pure () GHC.LoadDLL _ -> pure Nothing GHC.LoadArchive p -> modifyMVar_ globalGHCiState $ \s -> do - lib <- loadArchive (ghciNameCacheUpdater s) p + lib <- loadArchiveRep (ghciNameCacheUpdater s) p evaluate s {ghciLibs = lib <> ghciLibs s} GHC.LoadObj p -> modifyMVar_ globalGHCiState $ \s -> do - obj <- getFile (ghciNameCacheUpdater s) p + obj <- onDiskToObjRep p <$> getFile (ghciNameCacheUpdater s) p evaluate s {ghciObjs = M.insert p obj $ ghciObjs s} GHC.AddLibrarySearchPath _ -> pure $ GHC.RemotePtr 0 GHC.RemoveLibrarySearchPath _ -> pure True @@ -490,7 +490,7 @@ asteriusHscCompileCoreExpr hsc_env srcspan ds_expr = do pure ( s { ghciCompiledCoreExprs = - IM.insert this_id (sym, toCachedModule m) $ + IM.insert this_id (sym, toAsteriusRepModule m) $ ghciCompiledCoreExprs s, ghciLastCompiledCoreExpr = this_id }, diff --git a/asterius/src/Asterius/JSRun/NonMain.hs b/asterius/src/Asterius/JSRun/NonMain.hs index 5788c6bc2f..55e3d74bde 100644 --- a/asterius/src/Asterius/JSRun/NonMain.hs +++ b/asterius/src/Asterius/JSRun/NonMain.hs @@ -17,33 +17,33 @@ import Asterius.Main.Task import Asterius.Resolve import Asterius.Types ( EntitySymbol, - AsteriusCachedModule, + AsteriusRepModule, Module, ) import Data.String import Language.JavaScript.Inline.Core import System.FilePath -linkNonMain :: AsteriusCachedModule -> [EntitySymbol] -> (Module, LinkReport) -linkNonMain store_m extra_syms = (m, link_report) - where - (_, m, link_report) = - linkModules - LinkTask - { progName = "", - linkOutput = "", - linkObjs = [], - linkLibs = [], - linkModule = mempty, - Asterius.Ld.hasMain = False, - Asterius.Ld.debug = False, - Asterius.Ld.gcSections = True, - Asterius.Ld.verboseErr = True, - Asterius.Ld.outputIR = Nothing, - rootSymbols = extra_syms, - Asterius.Ld.exportFunctions = [] - } - store_m +linkNonMain :: AsteriusRepModule -> [EntitySymbol] -> IO (Module, LinkReport) +linkNonMain module_rep extra_syms = do + (_, m, link_report) <- + linkModules + LinkTask + { progName = "", + linkOutput = "", + linkObjs = [], + linkLibs = [], + linkModule = mempty, + Asterius.Ld.hasMain = False, + Asterius.Ld.debug = False, + Asterius.Ld.gcSections = True, + Asterius.Ld.verboseErr = True, + Asterius.Ld.outputIR = Nothing, + rootSymbols = extra_syms, + Asterius.Ld.exportFunctions = [] + } + module_rep + return (m, link_report) distNonMain :: FilePath -> [EntitySymbol] -> (Module, LinkReport) -> IO () @@ -68,10 +68,10 @@ newAsteriusInstanceNonMain :: Session -> FilePath -> [EntitySymbol] -> - AsteriusCachedModule -> + AsteriusRepModule -> IO JSVal -newAsteriusInstanceNonMain s p extra_syms m = do - distNonMain p extra_syms $ linkNonMain m extra_syms +newAsteriusInstanceNonMain s p extra_syms module_rep = do + linkNonMain module_rep extra_syms >>= distNonMain p extra_syms let rts_path = dataDir "rts" "rts.mjs" req_path = p -<.> "req.mjs" wasm_path = p -<.> "wasm" diff --git a/asterius/src/Asterius/Ld.hs b/asterius/src/Asterius/Ld.hs index e7e5aaae80..cee889b7fe 100644 --- a/asterius/src/Asterius/Ld.hs +++ b/asterius/src/Asterius/Ld.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE RecordWildCards #-} @@ -22,32 +23,37 @@ import Asterius.Resolve import Asterius.Types import qualified Asterius.Types.SymbolSet as SS import Control.Exception -import Data.Either import Data.Traversable data LinkTask = LinkTask { progName, linkOutput :: FilePath, linkObjs, linkLibs :: [FilePath], - linkModule :: AsteriusCachedModule, + linkModule :: AsteriusRepModule, hasMain, debug, gcSections, verboseErr :: Bool, outputIR :: Maybe FilePath, rootSymbols, exportFunctions :: [EntitySymbol] } deriving (Show) +{- +Note [Malformed object files] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Object files in Haskell package directories can also originate from gcc being +called on cbits in packages. This in the past gave deserialization failures. +Hence, when we deserialize objects to be linked in 'loadTheWorld', we choose to +be overpermissive and silently ignore deserialization failures. This has worked +well so far. +-} + -- | Load all the library and object dependencies for a 'LinkTask' into a --- single module. NOTE: object files in Haskell package directories can also --- originate from gcc being called on cbits in packages. This in the past gave --- deserialization failures. Hence, when we deserialize objects to be linked in --- 'loadTheWorld', we choose to be overpermissive and silently ignore --- deserialization failures. This has worked well so far. -loadTheWorld :: LinkTask -> IO AsteriusCachedModule +-- single module. +loadTheWorld :: LinkTask -> IO AsteriusRepModule loadTheWorld LinkTask {..} = do ncu <- newNameCacheUpdater - lib <- mconcat <$> for linkLibs (loadArchive ncu) - objs <- rights <$> for linkObjs (tryGetFile ncu) - evaluate $ linkModule <> mconcat objs <> lib + libs <- for linkLibs (loadArchiveRep ncu) + objs <- for linkObjs (loadObjectRep ncu) + evaluate $ linkModule <> mconcat objs <> mconcat libs -- | The *_info are generated from Cmm using the INFO_TABLE macro. -- For example, see StgMiscClosures.cmm / Exception.cmm @@ -89,13 +95,13 @@ rtsPrivateSymbols = ] linkModules :: - LinkTask -> AsteriusCachedModule -> (AsteriusModule, Module, LinkReport) -linkModules LinkTask {..} m = + LinkTask -> AsteriusRepModule -> IO (AsteriusModule, Module, LinkReport) +linkModules LinkTask {..} module_rep = linkStart debug gcSections verboseErr - ( toCachedModule + ( toAsteriusRepModule ( (if hasMain then mainBuiltins else mempty) <> rtsAsteriusModule defaultBuiltinsOptions @@ -103,7 +109,7 @@ linkModules LinkTask {..} m = Asterius.Builtins.debug = debug } ) - <> m + <> module_rep ) ( SS.unions [ SS.fromList rootSymbols, @@ -119,13 +125,13 @@ linkModules LinkTask {..} m = linkExeInMemory :: LinkTask -> IO (AsteriusModule, Module, LinkReport) linkExeInMemory ld_task = do - final_store <- loadTheWorld ld_task - evaluate $ linkModules ld_task final_store + module_rep <- loadTheWorld ld_task + linkModules ld_task module_rep linkExe :: LinkTask -> IO () linkExe ld_task@LinkTask {..} = do (pre_m, m, link_report) <- linkExeInMemory ld_task putFile linkOutput (m, link_report) case outputIR of - Just p -> putFile p $ toCachedModule pre_m + Just p -> putFile p $ inMemoryToOnDisk pre_m _ -> pure () diff --git a/asterius/src/Asterius/Passes/GCSections.hs b/asterius/src/Asterius/Passes/GCSections.hs index 59af42e774..d6872dd94e 100644 --- a/asterius/src/Asterius/Passes/GCSections.hs +++ b/asterius/src/Asterius/Passes/GCSections.hs @@ -1,40 +1,50 @@ -{-# LANGUAGE GADTs #-} -{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TypeApplications #-} {-# LANGUAGE ViewPatterns #-} +-- | +-- Module : Asterius.Passes.GCSections +-- Copyright : (c) 2018 EURL Tweag +-- License : All rights reserved (see LICENCE file in the distribution). +-- +-- Given a set of root symbols and exported functions, load an 'AsteriusModule' +-- from disk, while eliminating unreachable code ('gcSections'). If the +-- complete module is needed, use 'fromAsteriusRepModule' instead. module Asterius.Passes.GCSections ( gcSections, + fromAsteriusRepModule, ) where +import Asterius.Ar +import Asterius.Binary.NameCache import Asterius.Types import Asterius.Types.EntitySymbol import qualified Asterius.Types.SymbolMap as SM import qualified Asterius.Types.SymbolSet as SS +import Control.DeepSeq import Data.Maybe +import qualified Data.Set as Set +import Data.Traversable +-- | Build an 'AsteriusModule' from an 'AsteriusRepModule', by keeping only the +-- parts of the program that are reachable from the given root symbols and +-- exported functions. Notice that this operation needs to be in 'IO', since +-- most parts of the generated 'AsteriusModule' need to be read from disk. gcSections :: Bool -> - AsteriusCachedModule -> + AsteriusRepModule -> SS.SymbolSet -> [EntitySymbol] -> - AsteriusModule -gcSections verbose_err store_mod root_syms (SS.fromList -> export_funcs) = - buildGCModule mod_syms err_syms (fromCachedModule store_mod) export_funcs + IO AsteriusModule +gcSections verbose_err module_rep root_syms export_funcs = + buildGCModule mod_syms err_syms module_rep (SS.fromList export_funcs) where - all_root_syms = root_syms <> module_exports - (mod_syms, err_syms) = resolveSyms verbose_err all_root_syms $ dependencyMap store_mod - module_exports = - SS.fromList - [ ffiExportClosure - | FFIExportDecl {..} <- - SM.elems $ - ffiExportDecls (ffiMarshalState $ fromCachedModule store_mod) - `SM.restrictKeys` export_funcs - ] + all_root_syms = root_syms <> moduleExports module_rep + (force -> !mod_syms, err_syms) = + resolveSyms verbose_err all_root_syms $ + dependencyMap module_rep -- | Resolve all symbols that are reachable from the given root symbols. This -- includes 2 categories: symbols that refer to statics and functions, and @@ -86,11 +96,37 @@ gcModule mod_syms export_funcs m = ffi_imports = ffiImportDecls (ffiMarshalState m) `SM.restrictKeys` wrapper_fn_syms ffi_exports = ffiExportDecls (ffiMarshalState m) `SM.restrictKeys` export_funcs -buildGCModule :: SS.SymbolSet -> SS.SymbolSet -> AsteriusModule -> SS.SymbolSet -> AsteriusModule -buildGCModule mod_syms err_syms store_mod export_funcs = - everything {staticsMap = staticsMap everything <> err_statics} +-- | NOTE: This function only loads object and archive files specified by +-- @accessed_files@, and filters those __per module__. The correctness of the +-- approach thus depends on two important assumptions: (a) for each JSFFI +-- import, @@ and @_wrapper@ are always in the same module, and (b) +-- the keys of the @sptMap@ refer only to statics in the same module. +loadFilterEverything :: AsteriusRepModule -> (AsteriusModule -> AsteriusModule) -> IO AsteriusModule +loadFilterEverything AsteriusRepModule {..} fn = do + ncu <- newNameCacheUpdater + objs <- for (Set.toList objectSources) $ \path -> + fn <$> loadObjectFile ncu path + arcs <- for (Set.toList archiveSources) $ \path -> + loadArchiveFile ncu path fn + pure $ fn inMemoryModule <> mconcat objs <> mconcat arcs + +-- | Convert an 'AsteriusRepModule' to a self-contained 'AsteriusModule' by +-- loading everything remaining from disk and combining it with the parts of +-- 'AsteriusModule' we have in memory (in 'inMemoryModule'). This function is +-- useful if the entirety of the module is needed (@gcSections@ is set to +-- @False@). Otherwise, use @loadFilterEverything@ instead. +fromAsteriusRepModule :: AsteriusRepModule -> IO AsteriusModule +fromAsteriusRepModule AsteriusRepModule {..} = do + ncu <- newNameCacheUpdater + objs <- for (Set.toList objectSources) $ loadObjectFile ncu + arcs <- for (Set.toList archiveSources) $ loadCompleteArchiveFile ncu + pure $ inMemoryModule <> mconcat objs <> mconcat arcs + +buildGCModule :: SS.SymbolSet -> SS.SymbolSet -> AsteriusRepModule -> SS.SymbolSet -> IO AsteriusModule +buildGCModule mod_syms err_syms module_rep export_funcs = do + everything <- loadFilterEverything module_rep (gcModule mod_syms export_funcs) + pure everything {staticsMap = staticsMap everything <> err_statics} where - everything = gcModule mod_syms export_funcs store_mod err_statics = SM.fromList [ ( "__asterius_barf_" <> sym, diff --git a/asterius/src/Asterius/Resolve.hs b/asterius/src/Asterius/Resolve.hs index 1bf7bea11a..405067c777 100644 --- a/asterius/src/Asterius/Resolve.hs +++ b/asterius/src/Asterius/Resolve.hs @@ -108,11 +108,23 @@ linkStart :: Bool -> Bool -> Bool -> - AsteriusCachedModule -> + AsteriusRepModule -> SS.SymbolSet -> [EntitySymbol] -> + IO (AsteriusModule, Module, LinkReport) +linkStart debug gc_sections verbose_err module_rep root_syms export_funcs = do + (force -> !merged_m0) <- + if gc_sections + then gcSections verbose_err module_rep root_syms export_funcs + else fromAsteriusRepModule module_rep + return $ linkStart' debug verbose_err merged_m0 + +linkStart' :: + Bool -> + Bool -> + AsteriusModule -> (AsteriusModule, Module, LinkReport) -linkStart debug gc_sections verbose_err store root_syms export_funcs = +linkStart' debug verbose_err merged_m0 = ( merged_m, result_m, LinkReport @@ -126,13 +138,9 @@ linkStart debug gc_sections verbose_err store root_syms export_funcs = } ) where - merged_m0 - | gc_sections = gcSections verbose_err store root_syms export_funcs - | otherwise = fromCachedModule store - !merged_m0_evaluated = force merged_m0 merged_m1 - | debug = addMemoryTrap merged_m0_evaluated - | otherwise = merged_m0_evaluated + | debug = addMemoryTrap merged_m0 + | otherwise = merged_m0 !merged_m | verbose_err = merged_m1 | otherwise = diff --git a/asterius/src/Asterius/Types.hs b/asterius/src/Asterius/Types.hs index 1be07647f7..19d3b3cff3 100644 --- a/asterius/src/Asterius/Types.hs +++ b/asterius/src/Asterius/Types.hs @@ -3,6 +3,7 @@ {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -18,8 +19,14 @@ module Asterius.Types AsteriusStaticsType (..), AsteriusStatics (..), AsteriusModule (..), - AsteriusCachedModule (..), - toCachedModule, + AsteriusOnDiskModule (..), + onDiskToInMemory, + inMemoryToOnDisk, + onDiskToObjRep, + AsteriusRepModule (..), + toAsteriusRepModule, + loadObjectRep, + loadObjectFile, EntitySymbol, entityName, mkEntitySymbol, @@ -58,6 +65,7 @@ module Asterius.Types ) where +import Asterius.Binary.File import Asterius.Binary.Orphans () import Asterius.Binary.TH import Asterius.Monoid.TH @@ -75,7 +83,9 @@ import qualified Data.ByteString as BS import Data.Data import Data.Foldable import qualified Data.Map.Lazy as LM +import qualified Data.Set as Set import Foreign +import qualified IfaceEnv as GHC import qualified Type.Reflection as TR type BinaryenIndex = Word32 @@ -115,6 +125,8 @@ data AsteriusStatics } deriving (Show, Data) +---------------------------------------------------------------------------- + data AsteriusModule = AsteriusModule { staticsMap :: SymbolMap AsteriusStatics, @@ -125,27 +137,96 @@ data AsteriusModule } deriving (Show, Data) --- | An 'AsteriusCachedModule' in an 'AsteriusModule' along with with all of --- its 'EntitySymbol' dependencies, as they are appear in the modules data --- segments and function definitions (see function 'toCachedModule'). -data AsteriusCachedModule - = AsteriusCachedModule - { dependencyMap :: SymbolMap SymbolSet, - fromCachedModule :: AsteriusModule +---------------------------------------------------------------------------- + +mkModuleExports :: AsteriusModule -> SymbolSet +mkModuleExports m = + SS.fromList + [ ffiExportClosure + | FFIExportDecl {..} <- + SM.elems + $ ffiExportDecls + $ ffiMarshalState m + ] + +mkModuleDependencyMap :: AsteriusModule -> SymbolMap SymbolSet +mkModuleDependencyMap m = + mkDependencyMap (staticsMap m) + <> mkDependencyMap (functionMap m) + <> mkDependencyMap (globalsMap m) + where + mkDependencyMap :: Data a => SymbolMap a -> SymbolMap SymbolSet + mkDependencyMap = flip SM.foldrWithKey' SM.empty $ \k e -> + SM.insert k (collectEntitySymbols e) + -- Collect all entity symbols from an entity. + collectEntitySymbols :: Data a => a -> SymbolSet + collectEntitySymbols t + | Just TR.HRefl <- TR.eqTypeRep (TR.typeOf t) (TR.typeRep @EntitySymbol) = + SS.singleton t + | otherwise = + gmapQl (<>) SS.empty collectEntitySymbols t + +---------------------------------------------------------------------------- + +-- | Load the representation of an object file from disk. +loadObjectRep :: GHC.NameCacheUpdater -> FilePath -> IO AsteriusRepModule +loadObjectRep ncu path = tryGetFile ncu path >>= \case + Left {} -> pure mempty -- Note [Malformed object files] in Asterius.Ld + Right m -> pure $ onDiskToObjRep path m + +-- | Load a module in its entirety from disk. +loadObjectFile :: GHC.NameCacheUpdater -> FilePath -> IO AsteriusModule +loadObjectFile ncu path = tryGetFile ncu path >>= \case + Left {} -> pure mempty -- Note [Malformed object files] in Asterius.Ld + Right m -> pure $ onDiskToInMemory m + +---------------------------------------------------------------------------- + +-- | Asterius modules, as represented on disk. +data AsteriusOnDiskModule + = AsteriusOnDiskModule + { onDiskDependencyMap :: ~(SymbolMap SymbolSet), + onDiskModuleExports :: ~SymbolSet, + onDiskStaticsMap :: ~(SymbolMap AsteriusStatics), + onDiskFunctionMap :: ~(SymbolMap Function), + onDiskGlobalsMap :: ~(SymbolMap Global), + onDiskSptMap :: ~(SymbolMap (Word64, Word64)), + onDiskFFIMarshalState :: ~FFIMarshalState } deriving (Show, Data) -instance GHC.Binary AsteriusCachedModule where +-- data AsteriusModule +-- = AsteriusModule +-- { staticsMap :: SymbolMap AsteriusStatics, +-- functionMap :: SymbolMap Function, +-- globalsMap :: SymbolMap Global, +-- sptMap :: SymbolMap (Word64, Word64), +-- ffiMarshalState :: FFIMarshalState +-- } +-- deriving (Show, Data) + + +instance GHC.Binary AsteriusOnDiskModule where get bh = do getObjectMagic bh - dependencyMap <- GHC.get bh - fromCachedModule <- GHC.get bh - pure AsteriusCachedModule {..} - - put_ bh AsteriusCachedModule {..} = do + onDiskDependencyMap <- GHC.lazyGet bh + onDiskModuleExports <- GHC.lazyGet bh + onDiskStaticsMap <- GHC.lazyGet bh + onDiskFunctionMap <- GHC.lazyGet bh + onDiskGlobalsMap <- GHC.lazyGet bh + onDiskSptMap <- GHC.lazyGet bh + onDiskFFIMarshalState <- GHC.lazyGet bh + return AsteriusOnDiskModule {..} + + put_ bh AsteriusOnDiskModule {..} = do putObjectMagic bh - GHC.put_ bh dependencyMap - GHC.put_ bh fromCachedModule + GHC.lazyPut bh onDiskDependencyMap + GHC.lazyPut bh onDiskModuleExports + GHC.lazyPut bh onDiskStaticsMap + GHC.lazyPut bh onDiskFunctionMap + GHC.lazyPut bh onDiskGlobalsMap + GHC.lazyPut bh onDiskSptMap + GHC.lazyPut bh onDiskFFIMarshalState objectMagic :: BS.ByteString objectMagic = "!\n" @@ -159,28 +240,73 @@ getObjectMagic bh = do when (BS.pack magic /= objectMagic) $ fail "Not an Asterius object file." --- | Convert an 'AsteriusModule' to an 'AsteriusCachedModule' by laboriously --- computing the dependency graph for each 'EntitySymbol'. Historical note: we --- used to compute the dependency graph during link time but that were quite --- inefficient (see isssue #568). Instead, we now do the same work at --- compile-time, thus creating object files containing 'AsteriusCachedModule's --- instead of 'AsteriusModule's. -toCachedModule :: AsteriusModule -> AsteriusCachedModule -toCachedModule m = - AsteriusCachedModule - { fromCachedModule = m, - dependencyMap = staticsMap m `add` (functionMap m `add` (globalsMap m `add` SM.empty)) +onDiskToInMemory :: AsteriusOnDiskModule -> AsteriusModule +onDiskToInMemory AsteriusOnDiskModule {..} = + AsteriusModule + { staticsMap = onDiskStaticsMap, + functionMap = onDiskFunctionMap, + globalsMap = onDiskGlobalsMap, + sptMap = onDiskSptMap, + ffiMarshalState = onDiskFFIMarshalState + } + +inMemoryToOnDisk :: AsteriusModule -> AsteriusOnDiskModule +inMemoryToOnDisk m@AsteriusModule {..} = + AsteriusOnDiskModule + { onDiskDependencyMap = mkModuleDependencyMap m, + onDiskModuleExports = mkModuleExports m, + onDiskStaticsMap = staticsMap, + onDiskFunctionMap = functionMap, + onDiskGlobalsMap = globalsMap, + onDiskSptMap = sptMap, + onDiskFFIMarshalState = ffiMarshalState + } + +onDiskToObjRep :: FilePath -> AsteriusOnDiskModule -> AsteriusRepModule +onDiskToObjRep obj_path m = + AsteriusRepModule + { dependencyMap = onDiskDependencyMap m, + moduleExports = onDiskModuleExports m, + objectSources = Set.singleton obj_path, + archiveSources = mempty, + inMemoryModule = mempty } - where - add :: Data a => SymbolMap a -> SymbolMap SymbolSet -> SymbolMap SymbolSet - add = flip $ SM.foldrWithKey' (\k e -> SM.insert k (collectEntitySymbols e)) - -- Collect all entity symbols from an entity. - collectEntitySymbols :: Data a => a -> SymbolSet - collectEntitySymbols t - | Just TR.HRefl <- TR.eqTypeRep (TR.typeOf t) (TR.typeRep @EntitySymbol) = - SS.singleton t - | otherwise = - gmapQl (<>) SS.empty collectEntitySymbols t + +---------------------------------------------------------------------------- + +-- | An 'AsteriusRepModule' is the representation of an 'AsteriusModule' before +-- @gcSections@ has processed it. This representation is supposed to capture +-- __all__ data, whether it comes from object files, archive files, or +-- in-memory entities created using our EDSL. +data AsteriusRepModule + = AsteriusRepModule + { -- | (Cached, on disk) 'EntitySymbol' dependencies. + dependencyMap :: SymbolMap SymbolSet, + -- | (Cached, on disk) Exported symbols. + moduleExports :: SymbolSet, + -- | (not on disk) Object file dependencies. + objectSources :: Set.Set FilePath, + -- | (not on disk) Archive file dependencies. + archiveSources :: Set.Set FilePath, + -- | (not on disk) In-memory parts of the module that are not yet stored anywhere on disk yet. + inMemoryModule :: AsteriusModule + } + deriving (Show, Data) + +-- | Convert an 'AsteriusModule' to an 'AsteriusRepModule' by laboriously +-- computing the dependency graph for each 'EntitySymbol' and all the +-- 'EntitySymbol's the module exports. +toAsteriusRepModule :: AsteriusModule -> AsteriusRepModule +toAsteriusRepModule m = + AsteriusRepModule + { dependencyMap = mkModuleDependencyMap m, + moduleExports = mkModuleExports m, + objectSources = mempty, + archiveSources = mempty, + inMemoryModule = m + } + +---------------------------------------------------------------------------- data UnresolvedLocalReg = UniqueLocalReg Int ValueType @@ -678,7 +804,7 @@ $(genNFData ''AsteriusStatics) $(genNFData ''AsteriusModule) -$(genNFData ''AsteriusCachedModule) +$(genNFData ''AsteriusRepModule) $(genNFData ''UnresolvedLocalReg) @@ -752,8 +878,6 @@ $(genBinary ''AsteriusStaticsType) $(genBinary ''AsteriusStatics) -$(genBinary ''AsteriusModule) - $(genBinary ''UnresolvedLocalReg) $(genBinary ''UnresolvedGlobalReg) @@ -820,14 +944,14 @@ $(genBinary ''FFIMarshalState) $(genSemigroup ''AsteriusModule) -$(genSemigroup ''AsteriusCachedModule) +$(genSemigroup ''AsteriusRepModule) $(genSemigroup ''FFIMarshalState) --- Semigroup instances +-- Monoid instances $(genMonoid ''AsteriusModule) -$(genMonoid ''AsteriusCachedModule) +$(genMonoid ''AsteriusRepModule) $(genMonoid ''FFIMarshalState) diff --git a/asterius/test/nomain.hs b/asterius/test/nomain.hs index d48f5c5338..085403ffd1 100644 --- a/asterius/test/nomain.hs +++ b/asterius/test/nomain.hs @@ -1,8 +1,8 @@ {-# LANGUAGE OverloadedStrings #-} -import Asterius.Binary.File import Asterius.Binary.NameCache import Asterius.JSRun.NonMain +import Asterius.Types import Control.Exception import qualified Data.ByteString.Lazy as LBS import Language.JavaScript.Inline.Core @@ -22,7 +22,7 @@ main = do ] <> args ncu <- newNameCacheUpdater - m <- getFile ncu "test/nomain/NoMain.unlinked.bin" + m <- loadObjectRep ncu "test/nomain/NoMain.unlinked.bin" bracket ( newSession defaultConfig