-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This will enable caching analysis results in a text format that can be parsed much more efficiently than re-running the entire fixed point analysis.
- Loading branch information
Showing
4 changed files
with
567 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
{-# LANGUAGE OverloadedStrings #-} | ||
-- | The atoms of the concrete syntax for macaw | ||
module Data.Macaw.Syntax.Atom ( | ||
Keyword(..) | ||
, keywords | ||
, AtomName(..) | ||
, Atom(..) | ||
) | ||
where | ||
|
||
import qualified Data.Map.Strict as Map | ||
import qualified Data.Text as T | ||
import Numeric.Natural ( Natural ) | ||
|
||
-- | Macaw syntax keywords | ||
-- | ||
-- These are the keywords for the *base* macaw IR (i.e., without | ||
-- architecture-specific extensions). The architecture-specific operations are | ||
-- parsed as 'AtomName's initially and resolved by architecture-specific parsers | ||
-- at the atom level. | ||
data Keyword = BVAdd | ||
| BVSub | ||
| BVMul | ||
| BVSDiv | ||
| BVUDiv | ||
| Lt | ||
| Le | ||
| Sle | ||
| Slt | ||
-- Syntax | ||
| Assign | ||
-- Statements | ||
| Comment | ||
| InstructionStart | ||
| WriteMemory | ||
| CondWriteMemory | ||
-- Boolean operations | ||
| Not_ | ||
| And_ | ||
| Or_ | ||
| Xor_ | ||
-- Endianness | ||
| BigEndian | ||
| LittleEndian | ||
-- MemRepr | ||
| BVMemRepr | ||
-- Types | ||
| Bool_ | ||
| BV_ | ||
| Float_ | ||
| Tuple_ | ||
| Vec_ | ||
-- Values | ||
| True_ | ||
| False_ | ||
| BV | ||
deriving (Eq, Ord, Show) | ||
|
||
-- | Uninterpreted atoms | ||
newtype AtomName = AtomText T.Text | ||
deriving (Eq, Ord, Show) | ||
|
||
data Atom = Keyword !Keyword -- ^ Keywords include all of the built-in expressions and operators | ||
| AtomName !AtomName -- ^ Non-keyword syntax atoms (to be interpreted at parse time) | ||
| Register !Natural -- ^ A numbered local register (e.g., @r12@) | ||
| AddressWord !Natural -- ^ An arbitrary address rendered in hex ('ArchAddrWord') | ||
| SegmentOffset !Natural -- ^ A segment offset address rendered in hex (validation against the Memory object is required) | ||
| Integer_ !Integer -- ^ Literal integers | ||
| Natural_ !Natural -- ^ Literal naturals | ||
| String_ !T.Text -- ^ Literal strings | ||
deriving (Eq, Ord, Show) | ||
|
||
keywords :: Map.Map T.Text Keyword | ||
keywords = Map.fromList [ ("bv-add", BVAdd) | ||
, ("bv-sub", BVSub) | ||
, ("bv-mul", BVMul) | ||
, ("bv-sdiv", BVSDiv) | ||
, ("bv-udiv", BVUDiv) | ||
, ("<", Lt) | ||
, ("<=", Le) | ||
, ("<$", Slt) | ||
, ("<=$", Sle) | ||
, ("not", Not_) | ||
, ("and", And_) | ||
, ("or", Or_) | ||
, ("xor", Xor_) | ||
, ("Bool", Bool_) | ||
, ("BV", BV_) | ||
, ("Float", Float_) | ||
, ("Tuple", Tuple_) | ||
, ("Vec", Vec_) | ||
, ("true", True_) | ||
, ("false", False_) | ||
, ("bv", BV) | ||
, (":=", Assign) | ||
, ("comment", Comment) | ||
, ("instruction-start", InstructionStart) | ||
, ("write-memory", WriteMemory) | ||
, ("cond-write-memory", CondWriteMemory) | ||
, ("big-endian", BigEndian) | ||
, ("little-endian", LittleEndian) | ||
, ("bv-mem", BVMemRepr) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
{-# LANGUAGE DataKinds #-} | ||
{-# LANGUAGE FlexibleContexts #-} | ||
{-# LANGUAGE GADTs #-} | ||
{-# LANGUAGE RankNTypes #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
{-# LANGUAGE TypeApplications #-} | ||
{-# LANGUAGE TypeOperators #-} | ||
{-# LANGUAGE ViewPatterns #-} | ||
-- | This module defines a concrete syntax parser for macaw, which is meant to | ||
-- make it easy to persist analysis results and reuse them without recomputing | ||
-- everything | ||
-- | ||
-- The persisted artifact is a zip file with one file per function, plus a | ||
-- manifest file that records metadata about the binary to enable the loader to | ||
-- check to ensure that the loaded macaw IR actually matches the given binary. | ||
module Data.Macaw.Syntax.Parser ( | ||
parseDiscoveryFunInfo | ||
, parseApp | ||
, parseType | ||
) where | ||
|
||
import Control.Applicative ( (<|>) ) | ||
import qualified Data.ByteString as BS | ||
import qualified Data.Map as Map | ||
import qualified Data.Parameterized.Classes as PC | ||
import qualified Data.Parameterized.NatRepr as PN | ||
import Data.Parameterized.Some ( Some(..) ) | ||
import qualified Data.Text as T | ||
import GHC.TypeLits ( type (<=) ) | ||
import Numeric.Natural ( Natural ) | ||
import qualified Text.Megaparsec as TM | ||
|
||
import qualified Data.Macaw.CFG as MC | ||
import qualified Data.Macaw.Discovery as MD | ||
import qualified Data.Macaw.Discovery.State as MDS | ||
import qualified Data.Macaw.Discovery.ParsedContents as Parsed | ||
import qualified Data.Macaw.Memory as MM | ||
import Data.Macaw.Syntax.Atom | ||
import qualified Data.Macaw.Syntax.SExpr as SExpr | ||
import qualified Data.Macaw.Types as MT | ||
|
||
|
||
parse | ||
:: (SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError a) | ||
-> SExpr.Parser arch ids a | ||
parse asSomething = do | ||
at <- SExpr.sexp parseAtom | ||
case asSomething at of | ||
Left err -> TM.customFailure err | ||
Right r -> return r | ||
|
||
parseSExpr | ||
:: forall arch ids a | ||
. SExpr.Syntax Atom | ||
-> (SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError a) | ||
-> SExpr.Parser arch ids a | ||
parseSExpr sexp asSomething = | ||
case asSomething sexp of | ||
Left err -> TM.customFailure err | ||
Right r -> return r | ||
|
||
data WidthRepr where | ||
WidthRepr :: (1 <= n) => PN.NatRepr n -> WidthRepr | ||
|
||
-- | Attempt to convert a 'Natural' into a non-zero 'PN.NatRepr' | ||
asWidthRepr :: Natural -> Maybe WidthRepr | ||
asWidthRepr nat = | ||
case PN.mkNatRepr nat of | ||
Some nr | ||
| Right PN.LeqProof <- PN.isZeroOrGT1 nr -> Just (WidthRepr nr) | ||
| otherwise -> Nothing | ||
|
||
asEndianness :: SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError MM.Endianness | ||
asEndianness at = | ||
case at of | ||
SExpr.A (Keyword BigEndian) -> Right MM.BigEndian | ||
SExpr.A (Keyword LittleEndian) -> Right MM.LittleEndian | ||
_ -> Left (SExpr.InvalidEndianness at) | ||
|
||
asMemRepr :: SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError (SomeTyped MC.MemRepr) | ||
asMemRepr at = | ||
case at of | ||
SExpr.L [ SExpr.A (Keyword BVMemRepr), SExpr.A (Natural_ w), send ] | ||
| Just (WidthRepr nr) <- asWidthRepr w -> do | ||
end <- asEndianness send | ||
let nr8 = PN.natMultiply (PN.knownNat @8) nr | ||
-- This cannot fail because we already checked that @w@ is >= 1 above | ||
case PN.isZeroOrGT1 nr8 of | ||
Left _ -> error ("Impossible; w was >= 1, so w * 8 must be >= 1: " ++ show nr8) | ||
Right PN.LeqProof -> do | ||
let tyRep = MT.BVTypeRepr nr8 | ||
return (SomeTyped tyRep (MC.BVMemRepr nr end)) | ||
|
||
data SomeTyped f where | ||
SomeTyped :: MT.TypeRepr tp -> f tp -> SomeTyped f | ||
|
||
asValue :: SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError (SomeTyped (MC.Value arch ids)) | ||
asValue at = | ||
case at of | ||
SExpr.A (Keyword True_) -> Right (SomeTyped MT.BoolTypeRepr (MC.BoolValue True)) | ||
SExpr.A (Keyword False_) -> Right (SomeTyped MT.BoolTypeRepr (MC.BoolValue False)) | ||
SExpr.L [SExpr.A (Keyword BV), SExpr.A (Natural_ w), SExpr.A (Integer_ i)] | ||
| Just (WidthRepr nr) <- asWidthRepr w -> Right (SomeTyped (MT.BVTypeRepr nr) (MC.BVValue nr i)) | ||
| otherwise -> Left SExpr.InvalidZeroWidthBV | ||
|
||
parseApp :: SExpr.Parser arch ids (SomeTyped (MC.App (MC.Value arch ids))) | ||
parseApp = undefined | ||
|
||
parseAtom :: SExpr.Parser arch ids Atom | ||
parseAtom = TM.try parseKeywordOrAtom | ||
|
||
parseIdentifier :: SExpr.Parser arch ids T.Text | ||
parseIdentifier = undefined | ||
|
||
parseKeywordOrAtom :: SExpr.Parser arch ids Atom | ||
parseKeywordOrAtom = do | ||
x <- parseIdentifier | ||
return $! maybe (AtomName (AtomText x)) Keyword (Map.lookup x keywords) | ||
|
||
-- | Parse a single type as a 'MT.TypeRepr' | ||
-- | ||
-- The type forms are: | ||
-- | ||
-- * @Bool@ | ||
-- * @(BV n)@ | ||
-- * @(Vec n ty)@ | ||
asTypeRepr :: SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError (Some MT.TypeRepr) | ||
asTypeRepr at = | ||
case at of | ||
SExpr.A (Keyword Bool_) -> Right (Some MT.BoolTypeRepr) | ||
SExpr.L [SExpr.A (Keyword BV_), SExpr.A (Natural_ w)] | ||
| Just (WidthRepr nr) <- asWidthRepr w -> Right (Some (MT.BVTypeRepr nr)) | ||
| otherwise -> Left SExpr.InvalidZeroWidthBV | ||
SExpr.L [SExpr.A (Keyword Vec_), SExpr.A (Natural_ w), mty] -> | ||
case PN.mkNatRepr w of | ||
Some nr -> | ||
-- Note that zero-width vectors are technically allowed | ||
case asTypeRepr mty of | ||
Right (Some ty) -> Right (Some (MT.VecTypeRepr nr ty)) | ||
Left _errs -> Left (SExpr.InvalidVectorPayload mty) | ||
_ -> Left (SExpr.InvalidType at) | ||
|
||
asRHS :: SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError (Some (MC.AssignRhs arch (MC.Value arch ids))) | ||
asRHS = undefined | ||
|
||
parseType :: SExpr.Parser arch ids (Some MT.TypeRepr) | ||
parseType = parse asTypeRepr | ||
|
||
-- | Forms: | ||
-- | ||
-- * @(comment "msg")@ | ||
-- * @(instruction-start addr decoded-asm-text)@ | ||
-- * @(write-memory addr mem-rep value)@ | ||
-- * @(cond-write-memory cond addr mem-rep value)@ | ||
-- * @(reg := rhs)@ | ||
parseStmt :: forall arch ids . SExpr.Parser arch ids (MC.Stmt arch ids) | ||
parseStmt = do | ||
at <- SExpr.sexp parseAtom | ||
case at of | ||
SExpr.L [ SExpr.A (Keyword Comment), SExpr.A (String_ txt) ] -> | ||
return (MC.Comment txt) | ||
SExpr.L [ SExpr.A (Keyword InstructionStart), SExpr.A (AddressWord addr), SExpr.A (String_ txt) ] -> | ||
return (MC.InstructionStart (MM.memWord (fromIntegral addr)) txt) | ||
SExpr.L [ SExpr.A (Keyword WriteMemory), stargetAddr, smemRepr, svalue ] -> do | ||
SomeTyped addrTy addr <- parseSExpr stargetAddr (asValue @arch @ids) | ||
SomeTyped vtp value <- parseSExpr svalue (asValue @arch @ids) | ||
SomeTyped mtp memRepr <- parseSExpr smemRepr asMemRepr | ||
memWidth <- SExpr.archMemWidth | ||
let addrRepr = MT.BVTypeRepr memWidth | ||
case (PC.testEquality vtp mtp, PC.testEquality addrTy addrRepr) of | ||
(Just PC.Refl, Just PC.Refl) -> do | ||
return (MC.WriteMem addr memRepr value) | ||
-- FIXME: Make a more-specific error | ||
_ -> TM.customFailure (SExpr.InvalidStatement at) | ||
SExpr.L [ SExpr.A (Keyword CondWriteMemory), scond, stargetAddr, smemRepr, svalue ] -> do | ||
SomeTyped condTy cond <- parseSExpr scond (asValue @arch @ids) | ||
SomeTyped addrTy addr <- parseSExpr stargetAddr (asValue @arch @ids) | ||
SomeTyped vtp value <- parseSExpr svalue (asValue @arch @ids) | ||
SomeTyped mtp memRepr <- parseSExpr smemRepr asMemRepr | ||
memWidth <- SExpr.archMemWidth | ||
let addrRepr = MT.BVTypeRepr memWidth | ||
case (PC.testEquality vtp mtp, PC.testEquality addrTy addrRepr, PC.testEquality condTy MT.BoolTypeRepr) of | ||
(Just PC.Refl, Just PC.Refl, Just PC.Refl) -> do | ||
return (MC.CondWriteMem cond addr memRepr value) | ||
-- FIXME: Make a more-specific error | ||
_ -> TM.customFailure (SExpr.InvalidStatement at) | ||
SExpr.L [ SExpr.A (Register r), SExpr.A (Keyword Assign), srhs ] -> do | ||
Some rhs <- parseSExpr srhs (asRHS @arch @ids) | ||
Some asgn <- SExpr.defineRegister r rhs | ||
return (MC.AssignStmt asgn) | ||
_ -> TM.customFailure (SExpr.InvalidStatement at) | ||
|
||
parseBlock :: SExpr.Parser arch ids (Parsed.ParsedBlock arch ids) | ||
parseBlock = do | ||
|
||
stmts <- TM.many parseStmt | ||
|
||
return Parsed.ParsedBlock { Parsed.pblockAddr = undefined | ||
, Parsed.pblockPrecond = undefined | ||
, Parsed.blockSize = undefined | ||
, Parsed.blockReason = undefined | ||
, Parsed.blockAbstractState = undefined | ||
, Parsed.blockJumpBounds = undefined | ||
, Parsed.pblockStmts = stmts | ||
, Parsed.pblockTermStmt = undefined | ||
} | ||
|
||
parseFunction :: SExpr.Parser arch ids (Some (MD.DiscoveryFunInfo arch)) | ||
parseFunction = do | ||
|
||
blocks <- TM.many parseBlock | ||
|
||
let dfi = MDS.DiscoveryFunInfo { MDS.discoveredFunReason = undefined | ||
, MDS.discoveredFunAddr = undefined | ||
, MDS.discoveredFunSymbol = undefined | ||
, MDS._parsedBlocks = Map.fromList [ (Parsed.pblockAddr pb, pb) | ||
| pb <- blocks | ||
] | ||
, MDS.discoveredClassifyFailureResolutions = undefined | ||
} | ||
return (Some dfi) | ||
|
||
parseDiscoveryFunInfo | ||
:: (MC.ArchConstraints arch) | ||
=> MM.Memory (MC.ArchAddrWidth arch) | ||
-- ^ The memory of the binary we are loading results for | ||
-> BS.ByteString | ||
-- ^ The bytes of the persisted file | ||
-> Either SExpr.MacawParseError (Some (MD.DiscoveryFunInfo arch)) | ||
parseDiscoveryFunInfo mem bytes = SExpr.runParser mem bytes parseFunction |
Oops, something went wrong.