manually copy the "c-runtime" branch from the old repository.

This commit is contained in:
Krasimir Angelov
2018-11-02 14:38:44 +01:00
parent bf5abe2948
commit 5a2b200948
80 changed files with 2618 additions and 1527 deletions

View File

@@ -16,8 +16,7 @@
module PGF(
-- * PGF
PGF,
readPGF,
parsePGF,
readPGF, showPGF,
-- * Identifiers
CId, mkCId, wildCId,
@@ -54,12 +53,14 @@ module PGF(
mkDouble, unDouble,
mkFloat, unFloat,
mkMeta, unMeta,
exprSubstitute,
-- extra
pExpr, exprSize, exprFunctions,
-- * Operations
-- ** Linearization
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, bracketedLinearizeAll, tabularLinearizes,
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
groupResults, -- lins of trees by language, removing duplicates
showPrintName,
@@ -166,17 +167,18 @@ import PGF.Macros
import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data
import PGF.Binary ()
import PGF.Binary()
import qualified PGF.Forest as Forest
import qualified PGF.Parse as Parse
import PGF.Utilities(replace)
import PGF.Printer
import Text.PrettyPrint
--import Data.Char
import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap
--import Data.Maybe
import Data.Binary
import Data.ByteString.Lazy (ByteString)
import Data.List(mapAccumL)
--import System.Random (newStdGen)
--import Control.Monad
@@ -192,11 +194,6 @@ import Text.PrettyPrint
-- > $ gf -make <grammar file name>
readPGF :: FilePath -> IO PGF
-- | Like @readPGF@ but you have the manage file-handling.
--
-- @since 3.9.1
parsePGF :: ByteString -> PGF
-- | Tries to parse the given string in the specified language
-- and to produce abstract syntax expression.
parse :: PGF -> Language -> Type -> String -> [Tree]
@@ -261,9 +258,9 @@ functionType :: PGF -> CId -> Maybe Type
-- Implementation
---------------------------------------------------
readPGF = decodeFile
readPGF f = decodeFile f
parsePGF = decode
showPGF pgf = render (ppPGF pgf)
parse pgf lang typ s =
case parse_ pgf lang typ (Just 4) s of

View File

@@ -2,7 +2,7 @@ module PGF.ByteCode(Literal(..),
CodeLabel, Instr(..), IVal(..), TailInfo(..),
ppLit, ppCode, ppInstr
) where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF.CId
import Text.PrettyPrint

View File

@@ -74,7 +74,7 @@ data Production
deriving (Eq,Ord,Show)
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String)
data CncFun = CncFun [CId] {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
type Sequence = Array DotPos Symbol
type FunId = Int
type SeqId = Int
@@ -93,14 +93,6 @@ msgUnionPGF one two = case absname one of
_ -> (two, -- abstracts don't match, discard the old one -- error msg in Importing.ioUnionPGF
Just "Abstract changed, previous concretes discarded.")
emptyPGF :: PGF
emptyPGF = PGF {
gflags = Map.empty,
absname = wildCId,
abstract = error "empty grammar, no abstract",
concretes = Map.empty
}
-- sameness of function type signatures, checked when importing a new concrete in env
haveSameFunsPGF :: PGF -> PGF -> Bool
haveSameFunsPGF one two =

View File

@@ -8,6 +8,7 @@ module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..
mkDouble, unDouble,
mkFloat, unFloat,
mkMeta, unMeta,
exprSubstitute,
normalForm,
@@ -169,6 +170,16 @@ unMeta (ETyped e ty) = unMeta e
unMeta (EImplArg e) = unMeta e
unMeta _ = Nothing
exprSubstitute :: Expr -> [Expr] -> Expr
exprSubstitute e es =
case e of
EAbs b x e -> EAbs b x (exprSubstitute e es)
EApp e1 e2 -> EApp (exprSubstitute e1 es) (exprSubstitute e2 es)
ELit l -> ELit l
EMeta i -> es !! i
EFun x -> EFun x
-----------------------------------------------------
-- Parsing
-----------------------------------------------------

View File

@@ -71,10 +71,10 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
in (ct,fid',fun,es,(map getVar hypos,lin))
Nothing -> error ("wrong forest id " ++ show fid)
where
descend forest (PApply funid args) = let (CncFun pfuns _lins) = cncfuns cnc ! funid
cat = case pfuns of
[] -> wildCId
(pfun:_) -> case Map.lookup pfun (funs abs) of
descend forest (PApply funid args) = let (CncFun fun _lins) = cncfuns cnc ! funid
cat = case isLindefCId fun of
Just cat -> cat
Nothing -> case Map.lookup fun (funs abs) of
Just (DTyp _ cat _,_,_,_) -> cat
largs = map (render forest) args
ltable = mkLinTable cnc isTrusted [] funid largs
@@ -103,6 +103,14 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
descend (PCoerce fid) = trustedSpots parents' (PArg [] fid)
descend (PConst c e _) = IntSet.empty
isLindefCId id
| take l s == lindef = Just (mkCId (drop l s))
| otherwise = Nothing
where
s = showCId id
lindef = "lindef "
l = length lindef
-- | This function extracts the list of all completed parse trees
-- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually
@@ -124,13 +132,13 @@ getAbsTrees (Forest abs cnc forest root) arg@(PArg _ fid) ty dp =
| otherwise = do fid0 <- get
put fid
x <- foldForest (\funid args trees ->
do let CncFun fns _lins = cncfuns cnc ! funid
case fns of
[] -> do arg <- go (Set.insert fid rec_) scope mb_tty (head args)
do let CncFun fn _lins = cncfuns cnc ! funid
case isLindefCId fn of
Just _ -> do arg <- go (Set.insert fid rec_) scope mb_tty (head args)
return (mkAbs arg)
fns -> do ty_fn <- lookupFunType (head fns)
Nothing -> do ty_fn <- lookupFunType fn
(e,tty0) <- foldM (\(e1,tty) arg -> goArg (Set.insert fid rec_) scope fid e1 arg tty)
(EFun (head fns),TTyp [] ty_fn) args
(EFun fn,TTyp [] ty_fn) args
case mb_tty of
Just tty -> do i <- newGuardedMeta e
eqType scope (scopeSize scope) i tty tty0

View File

@@ -1,19 +1,169 @@
{-# OPTIONS_HADDOCK hide #-}
{-# LANGUAGE ImplicitParams, RankNTypes #-}
-------------------------------------------------
-- |
-- Stability : unstable
--
-------------------------------------------------
module PGF.Internal(module Internal) where
import PGF.Binary as Internal
import PGF.Data as Internal
import PGF.Macros as Internal
import PGF.Optimize as Internal
import PGF.Printer as Internal
import PGF.Utilities as Internal
import PGF.ByteCode as Internal
module PGF.Internal(CId,Language,PGF,
Concr,lookConcr,
FId,isPredefFId,
FunId,SeqId,LIndex,Token,
Production(..),PArg(..),Symbol(..),Literal(..),BindType(..),PGF.Internal.Sequence,
globalFlags, abstrFlags, concrFlags,
concrTotalCats, concrCategories, concrProductions,
concrTotalFuns, concrFunction,
concrTotalSeqs, concrSequence,
import Data.Binary as Internal
import Data.Binary.Get as Internal
import Data.Binary.IEEE754 as Internal
import Data.Binary.Put as Internal
CodeLabel, Instr(..), IVal(..), TailInfo(..),
Builder, B, build,
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF,
dTyp, hypo,
fidString, fidInt, fidFloat, fidVar, fidStart,
ppFunId, ppSeqId, ppFId, ppMeta, ppLit, PGF.Internal.ppSeq
) where
import PGF.Data
import PGF.Macros
import PGF.Printer
import PGF.ByteCode
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Data.Array.IArray
import Text.PrettyPrint
globalFlags pgf = gflags pgf
abstrFlags pgf = aflags (abstract pgf)
concrFlags concr = cflags concr
concrTotalCats = totalCats
concrCategories :: Concr -> [(CId,FId,FId,[String])]
concrCategories c = [(cat,start,end,elems lbls) | (cat,CncCat start end lbls) <- Map.toList (cnccats c)]
concrTotalFuns c =
let (s,e) = bounds (cncfuns c)
in e-s+1
concrFunction :: Concr -> FunId -> (CId,[SeqId])
concrFunction c funid =
let CncFun fun lins = cncfuns c ! funid
in (fun,elems lins)
concrTotalSeqs :: Concr -> SeqId
concrTotalSeqs c =
let (s,e) = bounds (sequences c)
in e-s+1
type Sequence = [Symbol]
concrSequence :: Concr -> SeqId -> [Symbol]
concrSequence c seqid = elems (sequences c ! seqid)
concrProductions :: Concr -> FId -> [Production]
concrProductions c fid =
case IntMap.lookup fid (productions c) of
Just set -> Set.toList set
Nothing -> []
data Builder s
newtype B s a = B a
build :: (forall s . (?builder :: Builder s) => B s a) -> a
build x = let ?builder = undefined
in case x of
B x -> x
eAbs :: (?builder :: Builder s) => BindType -> CId -> B s Expr -> B s Expr
eAbs bind_type var (B body) = B (EAbs bind_type var body)
eApp :: (?builder :: Builder s) => B s Expr -> B s Expr -> B s Expr
eApp (B f) (B x) = B (EApp f x)
eMeta :: (?builder :: Builder s) => Int -> B s Expr
eMeta i = B (EMeta i)
eFun :: (?builder :: Builder s) => CId -> B s Expr
eFun f = B (EFun f)
eVar :: (?builder :: Builder s) => Int -> B s Expr
eVar i = B (EVar i)
eLit :: (?builder :: Builder s) => Literal -> B s Expr
eLit l = B (ELit l)
eTyped :: (?builder :: Builder s) => B s Expr -> B s Type -> B s Expr
eTyped (B e) (B ty) = B (ETyped e ty)
eImplArg :: (?builder :: Builder s) => B s Expr -> B s Expr
eImplArg (B e) = B (EImplArg e)
hypo :: BindType -> CId -> B s Type -> (B s Hypo)
hypo bind_type var (B ty) = B (bind_type,var,ty)
dTyp :: (?builder :: Builder s) => [B s Hypo] -> CId -> [B s Expr] -> B s Type
dTyp hypos cat es = B (DTyp [hypo | B hypo <- hypos] cat [e | B e <- es])
type AbstrInfo = Abstr
newAbstr :: (?builder :: Builder s) => [(CId,Literal)] ->
[(CId,[B s Hypo],Float)] ->
[(CId,B s Type,Int,Float)] ->
B s AbstrInfo
newAbstr aflags cats funs = B (Abstr (Map.fromList aflags)
(Map.fromList [(fun,(ty,arity,Nothing,realToFrac prob)) | (fun,B ty,arity,prob) <- funs])
(Map.fromList [(cat,([hypo | B hypo <- hypos],[],realToFrac prob)) | (cat,hypos,prob) <- cats]))
type ConcrInfo = Concr
newConcr :: (?builder :: Builder s) => B s AbstrInfo ->
[(CId,Literal)] -> -- ^ Concrete syntax flags
[(CId,String)] -> -- ^ Printnames
[(FId,[FunId])] -> -- ^ Lindefs
[(FId,[FunId])] -> -- ^ Linrefs
[(FId,[Production])] -> -- ^ Productions
[(CId,[SeqId])] -> -- ^ Concrete functions (must be sorted by Fun)
[[Symbol]] -> -- ^ Sequences (must be sorted)
[(CId,FId,FId,[String])] -> -- ^ Concrete categories
FId -> -- ^ The total count of the categories
B s ConcrInfo
newConcr _ cflags printnames lindefs linrefs productions cncfuns sequences cnccats totalCats =
B (Concr {cflags = Map.fromList cflags
,printnames = Map.fromList printnames
,lindefs = IntMap.fromList lindefs
,linrefs = IntMap.fromList linrefs
,productions = IntMap.fromList [(fid,Set.fromList prods) | (fid,prods) <- productions]
,cncfuns = mkArray [CncFun fun (mkArray lins) | (fun,lins) <- cncfuns]
,sequences = mkArray (map mkArray sequences)
,cnccats = Map.fromList [(cat,CncCat s e (mkArray lbls)) | (cat,s,e,lbls) <- cnccats]
,totalCats = totalCats
})
{-
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
lexicon :: IntMap.IntMap (IntMap.IntMap (TMap.TrieMap Token IntSet.IntSet)),
-}
newPGF :: (?builder :: Builder s) => [(CId,Literal)] ->
CId ->
B s AbstrInfo ->
[(CId,B s ConcrInfo)] ->
B s PGF
newPGF gflags absname (B abstract) concretes =
B (PGF {gflags = Map.fromList gflags
,absname = absname
,abstract = abstract
,concretes = Map.fromList [(cname,concr) | (cname,B concr) <- concretes]
})
ppSeq (seqid,seq) = PGF.Printer.ppSeq (seqid,mkArray seq)
mkArray l = listArray (0,length l-1) l

View File

@@ -4,7 +4,6 @@ module PGF.Linearize
, linearizeAll
, linearizeAllLang
, bracketedLinearize
, bracketedLinearizeAll
, tabularLinearizes
) where
@@ -48,12 +47,6 @@ bracketedLinearize pgf lang = head . map (snd . untokn Nothing . firstLin cnc) .
head [] = []
head (bs:bss) = bs
-- | Linearizes given expression as a bracketed string in the language
bracketedLinearizeAll :: PGF -> Language -> Tree -> [[BracketedString]]
bracketedLinearizeAll pgf lang = map (snd . untokn Nothing . firstLin cnc) . linTree pgf cnc
where
cnc = lookMap (error "no lang") lang (concretes pgf)
firstLin cnc arg@(ct@(cat,n_fid),fid,fun,es,(xs,lin)) =
case IntMap.lookup fid (linrefs cnc) of
Just (funid:_) -> snd (mkLinTable cnc (const True) [] funid [arg]) ! 0

View File

@@ -1,5 +1,4 @@
module PGF.Macros where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF.CId
import PGF.Data

View File

@@ -31,8 +31,7 @@ collectWords pinfo = Map.fromListWith (++)
[(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo)
, fid <- [s..e]
, PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (productions pinfo))
, let CncFun funs lins = cncfuns pinfo ! funid
, fun <- funs
, let CncFun fun lins = cncfuns pinfo ! funid
, (l,seqid) <- assocs lins
, sym <- elems (sequences pinfo ! seqid)
, t <- sym2tokns sym]

View File

@@ -60,7 +60,7 @@ getConcr =
cnccats <- getMap getCId getCncCat
totalCats <- get
let rseq = listToArray [SymCat 0 0]
rfun = CncFun [mkCId "linref"] (listToArray [scnt])
rfun = CncFun (mkCId "linref") (listToArray [scnt])
linrefs = IntMap.fromList [(i,[fcnt])|i<-[0..totalCats-1]]
return (Concr{ cflags=cflags, printnames=printnames
, sequences=toArray (scnt+1,seqs++[rseq])
@@ -110,7 +110,7 @@ getBindType =
1 -> return Implicit
_ -> decodingError "getBindType"
getCncFun = liftM2 CncFun (fmap (:[]) getCId) (getArray get)
getCncFun = liftM2 CncFun getCId (getArray get)
getCncCat = liftM3 CncCat get get (getArray get)

View File

@@ -21,7 +21,6 @@ import qualified Data.IntMap as IntMap
import qualified PGF.TrieMap as TrieMap
import qualified Data.List as List
import Control.Monad.ST
import Debug.Trace
optimizePGF :: PGF -> PGF
optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .
@@ -179,26 +178,26 @@ topDownFilter startCat cnc =
bottomUpFilter :: Concr -> Concr
bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty (productions cnc)}
bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty IntSet.empty (productions cnc)}
filterProductions prods0 prods
filterProductions prods0 hoc0 prods
| prods0 == prods1 = prods0
| otherwise = filterProductions prods1 prods
| otherwise = filterProductions prods1 hoc1 prods
where
prods1 = IntMap.foldWithKey foldProdSet IntMap.empty prods
hoc = IntMap.fold (\set !hoc -> Set.fold accumHOC hoc set) IntSet.empty prods
(prods1,hoc1) = IntMap.foldWithKey foldProdSet (IntMap.empty,IntSet.empty) prods
foldProdSet fid set !prods
| Set.null set1 = prods
| otherwise = IntMap.insert fid set1 prods
foldProdSet fid set (!prods,!hoc)
| Set.null set1 = (prods,hoc)
| otherwise = (IntMap.insert fid set1 prods,hoc1)
where
set1 = Set.filter filterRule set
hoc1 = Set.fold accumHOC hoc set1
filterRule (PApply funid args) = all (\(PArg _ fid) -> isLive fid) args
filterRule (PCoerce fid) = isLive fid
filterRule _ = True
isLive fid = isPredefFId fid || IntMap.member fid prods0 || IntSet.member fid hoc
isLive fid = isPredefFId fid || IntMap.member fid prods0 || IntSet.member fid hoc0
accumHOC (PApply funid args) hoc = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc (_,fid) -> IntSet.insert fid hoc) hoc hypos) hoc args
accumHOC _ hoc = hoc
@@ -242,7 +241,7 @@ splitLexicalRules cnc p_prods =
seq2prefix (SymALL_CAPIT :syms) = TrieMap.fromList [wf ["&|"]]
updateConcrete abs cnc =
let p_prods0 = filterProductions IntMap.empty (productions cnc)
let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)
(lex,p_prods) = splitLexicalRules cnc p_prods0
l_prods = linIndex cnc p_prods0
in cnc{pproductions = p_prods, lproductions = l_prods, lexicon = lex}
@@ -253,7 +252,7 @@ updateConcrete abs cnc =
, prod <- Set.toList prods
, fun <- getFunctions prod]
where
getFunctions (PApply funid args) = let CncFun funs _ = cncfuns cnc ! funid in funs
getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc ! funid in [fun]
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
Nothing -> []
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]

View File

@@ -503,14 +503,14 @@ type Continuation = TrieMap.TrieMap Token ActiveSet
-- | Return the Continuation of a Parsestate with exportable types
-- Used by PGFService
getContinuationInfo :: ParseState -> Map.Map [Token] [(FunId, CId, String)]
getContinuationInfo pstate = Map.map (concatMap f . Set.toList) contMap
getContinuationInfo pstate = Map.map (map f . Set.toList) contMap
where
PState _abstr concr _chart cont = pstate
contMap = Map.fromList (TrieMap.toList cont) -- always get [([], _::ActiveSet)]
f :: Active -> [(FunId,CId,String)]
f (Active int dotpos funid seqid pargs ak) = [(funid, fn, seq) | fn <- fns]
f :: Active -> (FunId,CId,String)
f (Active int dotpos funid seqid pargs ak) = (funid, cid, seq)
where
CncFun fns _ = cncfuns concr ! funid
CncFun cid _ = cncfuns concr ! funid
seq = showSeq dotpos (sequences concr ! seqid)
showSeq :: DotPos -> Sequence -> String

View File

@@ -1,6 +1,5 @@
{-# LANGUAGE FlexibleContexts #-}
module PGF.Printer (ppPGF,ppCat,ppFId,ppFunId,ppSeqId,ppSeq,ppFun) where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF.CId
import PGF.Data
@@ -73,8 +72,8 @@ ppProduction (fid,PCoerce arg) =
ppProduction (fid,PConst _ _ ss) =
ppFId fid <+> text "->" <+> ppStrs ss
ppCncFun (funid,CncFun funs arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (hsep (map ppCId funs))
ppCncFun (funid,CncFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppLinDefs (fid,funids) =
[ppFId fid <+> text "->" <+> ppFunId funid <> brackets (ppFId fidVar) | funid <- funids]
@@ -82,6 +81,7 @@ ppLinDefs (fid,funids) =
ppLinRefs (fid,funids) =
[ppFId fidVar <+> text "->" <+> ppFunId funid <> brackets (ppFId fid) | funid <- funids]
ppSeq :: (SeqId,Sequence) -> Doc
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))

View File

@@ -23,7 +23,6 @@ module PGF.VisualizeTree
, gizaAlignment
, conlls2latexDoc
) where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF.CId (wildCId,showCId,ppCId,mkCId) --CId,pCId,
import PGF.Data

View File

@@ -1,5 +1,5 @@
name: pgf
version: 3.9.1-git
version: 3.9-git
cabal-version: >= 1.20
build-type: Simple
@@ -8,7 +8,7 @@ category: Natural Language Processing
synopsis: Grammatical Framework
description: A library for interpreting the Portable Grammar Format (PGF)
homepage: http://www.grammaticalframework.org/
bug-reports: https://github.com/GrammaticalFramework/gf-core/issues
bug-reports: https://github.com/GrammaticalFramework/GF/issues
maintainer: Thomas Hallgren
tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2
@@ -30,6 +30,7 @@ Library
exceptions
if flag(custom-binary)
hs-source-dirs: ., binary
other-modules:
-- not really part of GF but I have changed the original binary library
-- and we have to keep the copy for now.
@@ -45,9 +46,9 @@ Library
--if impl(ghc>=7.8)
-- ghc-options: +RTS -A20M -RTS
ghc-prof-options: -fprof-auto
extensions:
extensions:
exposed-modules:
exposed-modules:
PGF
PGF.Internal
PGF.Haskell