1
0
forked from GitHub/gf-core

the compiler now compiles with the new runtime

This commit is contained in:
krangelov
2021-09-13 18:32:57 +02:00
parent c5ce2fd4b7
commit cf7673525f
26 changed files with 765 additions and 105 deletions

View File

@@ -6,7 +6,6 @@ module GF.Command.Commands (
import Prelude hiding (putStrLn,(<>)) import Prelude hiding (putStrLn,(<>))
import PGF2 import PGF2
import PGF2.Internal(writePGF)
import GF.Compile.Export import GF.Compile.Export
import GF.Compile.ToAPI import GF.Compile.ToAPI
@@ -666,7 +665,7 @@ pgfCommands = Map.fromList [
[e] -> case unApp e of [e] -> case unApp e of
Just (id, []) -> case functionType pgf id of Just (id, []) -> case functionType pgf id of
Just ty -> do putStrLn (showFun pgf id ty) Just ty -> do putStrLn (showFun pgf id ty)
putStrLn ("Probability: "++show (treeProbability pgf e)) putStrLn ("Probability: "++show (exprProbability pgf e))
return void return void
Nothing -> case categoryContext pgf id of Nothing -> case categoryContext pgf id of
Just hypos -> do putStrLn ("cat "++id++if null hypos then "" else ' ':showContext [] hypos) Just hypos -> do putStrLn ("cat "++id++if null hypos then "" else ' ':showContext [] hypos)
@@ -682,7 +681,7 @@ pgfCommands = Map.fromList [
Left err -> error err Left err -> error err
Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e) Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e)
putStrLn ("Type: "++showType [] ty) putStrLn ("Type: "++showType [] ty)
putStrLn ("Probability: "++show (treeProbability pgf e)) putStrLn ("Probability: "++show (exprProbability pgf e))
return void return void
_ -> do putStrLn "a single identifier or expression is expected from the command" _ -> do putStrLn "a single identifier or expression is expected from the command"
return void, return void,
@@ -800,8 +799,8 @@ pgfCommands = Map.fromList [
showFun pgf id ty = kwd++" "++ id ++ " : " ++ showType [] ty showFun pgf id ty = kwd++" "++ id ++ " : " ++ showType [] ty
where where
kwd | functionIsDataCon pgf id = "data" kwd | functionIsConstructor pgf id = "data"
| otherwise = "fun" | otherwise = "fun"
morphos pgf opts s = morphos pgf opts s =
[(s,lookupMorpho concr s) | concr <- optLangs pgf opts] [(s,lookupMorpho concr s) | concr <- optLangs pgf opts]

View File

@@ -21,7 +21,7 @@ import Data.Maybe(fromMaybe)
-------------------------- --------------------------
cf2pgf :: Options -> FilePath -> ParamCFG -> Map.Map Fun Double -> PGF cf2pgf :: Options -> FilePath -> ParamCFG -> Map.Map Fun Double -> PGF
cf2pgf opts fpath cf probs = cf2pgf opts fpath cf probs = error "TODO: cf2pgf" {-
build (let abstr = cf2abstr cf probs build (let abstr = cf2abstr cf probs
in newPGF [] aname abstr [(cname, cf2concr opts abstr cf)]) in newPGF [] aname abstr [(cname, cf2concr opts abstr cf)])
where where
@@ -134,3 +134,4 @@ mkRuleName rule =
case ruleName rule of case ruleName rule of
CFObj n _ -> n CFObj n _ -> n
_ -> "_" _ -> "_"
-}

View File

@@ -4,7 +4,7 @@ module GF.Compile.Compute.Predef(predef,predefName,delta) where
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Array(array,(!)) import Data.Array(array,(!))
import Data.List (isInfixOf) import Data.List (isInfixOf,genericTake,genericDrop,genericLength)
import Data.Char (isUpper,toLower,toUpper) import Data.Char (isUpper,toLower,toUpper)
import Control.Monad(ap) import Control.Monad(ap)
@@ -20,7 +20,7 @@ class Predef a where
toValue :: a -> Value toValue :: a -> Value
fromValue :: Value -> Err a fromValue :: Value -> Err a
instance Predef Int where instance Predef Integer where
toValue = VInt toValue = VInt
fromValue (VInt i) = return i fromValue (VInt i) = return i
fromValue v = verror "Int" v fromValue v = verror "Int" v
@@ -87,8 +87,8 @@ predefList =
delta f vs = delta f vs =
case f of case f of
Drop -> fromNonExist vs NonExist (ap2 (drop::Int->String->String)) Drop -> fromNonExist vs NonExist (ap2 (genericDrop::Integer->String->String))
Take -> fromNonExist vs NonExist (ap2 (take::Int->String->String)) Take -> fromNonExist vs NonExist (ap2 (genericTake::Integer->String->String))
Tk -> fromNonExist vs NonExist (ap2 tk) Tk -> fromNonExist vs NonExist (ap2 tk)
Dp -> fromNonExist vs NonExist (ap2 dp) Dp -> fromNonExist vs NonExist (ap2 dp)
EqStr -> fromNonExist vs PFalse (ap2 ((==)::String->String->Bool)) EqStr -> fromNonExist vs PFalse (ap2 ((==)::String->String->Bool))
@@ -97,10 +97,10 @@ delta f vs =
ToUpper -> fromNonExist vs NonExist (ap1 (map toUpper)) ToUpper -> fromNonExist vs NonExist (ap1 (map toUpper))
ToLower -> fromNonExist vs NonExist (ap1 (map toLower)) ToLower -> fromNonExist vs NonExist (ap1 (map toLower))
IsUpper -> fromNonExist vs PFalse (ap1 (all' isUpper)) IsUpper -> fromNonExist vs PFalse (ap1 (all' isUpper))
Length -> fromNonExist vs (0::Int) (ap1 (length::String->Int)) Length -> fromNonExist vs (0::Integer) (ap1 (genericLength::String->Integer))
Plus -> ap2 ((+)::Int->Int->Int) Plus -> ap2 ((+)::Integer->Integer->Integer)
EqInt -> ap2 ((==)::Int->Int->Bool) EqInt -> ap2 ((==)::Integer->Integer->Bool)
LessInt -> ap2 ((<)::Int->Int->Bool) LessInt -> ap2 ((<)::Integer->Integer->Bool)
{- -- | Show | Read | ToStr | MapStr | EqVal -} {- -- | Show | Read | ToStr | MapStr | EqVal -}
Error -> ap1 VError Error -> ap1 VError
Trace -> ap2 vtrace Trace -> ap2 vtrace
@@ -139,8 +139,12 @@ delta f vs =
-- unimpl id = bug $ "unimplemented predefined function: "++showIdent id -- unimpl id = bug $ "unimplemented predefined function: "++showIdent id
-- problem id vs = bug $ "unexpected arguments: Predef."++showIdent id++" "++show vs -- problem id vs = bug $ "unexpected arguments: Predef."++showIdent id++" "++show vs
tk i s = take (max 0 (length s - i)) s :: String tk :: Integer -> String -> String
dp i s = drop (max 0 (length s - i)) s :: String tk i s = genericTake (max 0 (genericLength s - i)) s
dp :: Integer -> String -> String
dp i s = genericDrop (max 0 (genericLength s - i)) s
occur s t = isInfixOf (s::String) (t::String) occur s t = isInfixOf (s::String) (t::String)
occurs s t = any (`elem` (t::String)) (s::String) occurs s t = any (`elem` (t::String)) (s::String)
all' = all :: (a->Bool) -> [a] -> Bool all' = all :: (a->Bool) -> [a] -> Bool

View File

@@ -14,7 +14,7 @@ data Value
-- -- | VClosure Env Term -- used in Typecheck.ConcreteNew -- -- | VClosure Env Term -- used in Typecheck.ConcreteNew
| VAbs BindType Ident Binding -- used in Compute.Concrete | VAbs BindType Ident Binding -- used in Compute.Concrete
| VProd BindType Value Ident Binding -- used in Compute.Concrete | VProd BindType Value Ident Binding -- used in Compute.Concrete
| VInt Int | VInt Integer
| VFloat Double | VFloat Double
| VString String | VString String
| VSort Ident | VSort Ident

View File

@@ -1,5 +1,7 @@
-- | Translate concrete syntax to Haskell -- | Translate concrete syntax to Haskell
module GF.Compile.ConcreteToHaskell(concretes2haskell,concrete2haskell) where module GF.Compile.ConcreteToHaskell(concretes2haskell,concrete2haskell) where
import PGF2(Literal(..))
import Data.List(isPrefixOf,sort,sortOn) import Data.List(isPrefixOf,sort,sortOn)
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S import qualified Data.Set as S
@@ -181,9 +183,9 @@ concrete2haskell opts
ppL l = ppL l =
case l of case l of
FloatConstant x -> pure (lit x) LFlt x -> pure (lit x)
IntConstant n -> pure (lit n) LInt n -> pure (lit n)
StrConstant s -> pure (token s) LStr s -> pure (token s)
pId p@(ParamId s) = pId p@(ParamId s) =
if "to_R_" `isPrefixOf` unqual s then toIdent p else gId p -- !! a hack if "to_R_" `isPrefixOf` unqual s then toIdent p else gId p -- !! a hack

View File

@@ -4,7 +4,8 @@ module GF.Compile.GenerateBC(generateByteCode) where
import GF.Grammar import GF.Grammar
import GF.Grammar.Lookup(lookupAbsDef,lookupFunType) import GF.Grammar.Lookup(lookupAbsDef,lookupFunType)
import GF.Data.Operations import GF.Data.Operations
import PGF2.Internal(CodeLabel,Instr(..),IVal(..),TailInfo(..),Literal(..)) import PGF2(Literal(..))
import PGF2.Internal(CodeLabel,Instr(..),IVal(..),TailInfo(..))
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.List(nub,mapAccumL) import Data.List(nub,mapAccumL)
import Data.Maybe(fromMaybe) import Data.Maybe(fromMaybe)

View File

@@ -19,7 +19,7 @@ import GF.Compile.Compute.Predef(predef)
import GF.Compile.Compute.Value(Predefined(..)) import GF.Compile.Compute.Value(Predefined(..))
import GF.Infra.Ident(ModuleName(..),Ident,ident2raw,rawIdentS,showIdent,isWildIdent) import GF.Infra.Ident(ModuleName(..),Ident,ident2raw,rawIdentS,showIdent,isWildIdent)
import GF.Infra.Option(Options,optionsPGF) import GF.Infra.Option(Options,optionsPGF)
import PGF2.Internal(Literal(..)) import PGF2(Literal(..))
import GF.Compile.Compute.Concrete(GlobalEnv,normalForm,resourceValues) import GF.Compile.Compute.Concrete(GlobalEnv,normalForm,resourceValues)
import GF.Grammar.Canonical as C import GF.Grammar.Canonical as C
import System.FilePath ((</>), (<.>)) import System.FilePath ((</>), (<.>))
@@ -193,11 +193,11 @@ convert' gr vs = ppT
Cn x -> VarValue (gId x) -- hmm Cn x -> VarValue (gId x) -- hmm
Con c -> ParamConstant (Param (gId c) []) Con c -> ParamConstant (Param (gId c) [])
Sort k -> VarValue (gId k) Sort k -> VarValue (gId k)
EInt n -> LiteralValue (IntConstant n) EInt n -> LiteralValue (LInt n)
Q (m,n) -> if m==cPredef then ppPredef n else VarValue (gQId m n) Q (m,n) -> if m==cPredef then ppPredef n else VarValue (gQId m n)
QC (m,n) -> ParamConstant (Param (gQId m n) []) QC (m,n) -> ParamConstant (Param (gQId m n) [])
K s -> LiteralValue (StrConstant s) K s -> LiteralValue (LStr s)
Empty -> LiteralValue (StrConstant "") Empty -> LiteralValue (LStr "")
FV ts -> VariantValue (map ppT ts) FV ts -> VariantValue (map ppT ts)
Alts t' vs -> alts vs (ppT t') Alts t' vs -> alts vs (ppT t')
_ -> error $ "convert' ppT: " ++ show t _ -> error $ "convert' ppT: " ++ show t
@@ -265,8 +265,8 @@ convert' gr vs = ppT
concatValue :: LinValue -> LinValue -> LinValue concatValue :: LinValue -> LinValue -> LinValue
concatValue v1 v2 = concatValue v1 v2 =
case (v1,v2) of case (v1,v2) of
(LiteralValue (StrConstant ""),_) -> v2 (LiteralValue (LStr ""),_) -> v2
(_,LiteralValue (StrConstant "")) -> v1 (_,LiteralValue (LStr "")) -> v1
_ -> ConcatValue v1 v2 _ -> ConcatValue v1 v2
-- | Smart constructor for projections -- | Smart constructor for projections
@@ -429,11 +429,5 @@ unqual n = Unqual (ident2raw n)
convFlags :: G.Grammar -> ModuleName -> Flags convFlags :: G.Grammar -> ModuleName -> Flags
convFlags gr mn = convFlags gr mn =
Flags [(rawIdentS n,convLit v) | Flags [(rawIdentS n,v) |
(n,v)<-err (const []) (optionsPGF.mflags) (lookupModule gr mn)] (n,v)<-err (const []) (optionsPGF.mflags) (lookupModule gr mn)]
where
convLit l =
case l of
LStr s -> Str s
LInt i -> C.Int i
LFlt d -> Flt d

View File

@@ -30,7 +30,7 @@ import GHC.Prim
import GHC.Base(getTag) import GHC.Base(getTag)
grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF
grammar2PGF opts gr am probs = do grammar2PGF opts gr am probs = error "TODO: grammar2PGF" {-do
cnc_infos <- getConcreteInfos gr am cnc_infos <- getConcreteInfos gr am
return $ return $
build (let gflags = if flag optSplitPGF opts build (let gflags = if flag optSplitPGF opts
@@ -445,3 +445,4 @@ compareCaseInsensitive (x:xs) (y:ys) =
EQ -> compare x y EQ -> compare x y
x -> x x -> x
x -> x x -> x
-}

View File

@@ -20,7 +20,7 @@ type ConcrData = ([(FId,[FunId])], -- ^ Lindefs
[(Cat,FId,FId,[String])]) -- ^ Concrete categories [(Cat,FId,FId,[String])]) -- ^ Concrete categories
optimizePGF :: Cat -> ConcrData -> ConcrData optimizePGF :: Cat -> ConcrData -> ConcrData
optimizePGF startCat = topDownFilter startCat . bottomUpFilter optimizePGF startCat = error "TODO: optimizePGF" {- topDownFilter startCat . bottomUpFilter
catString = "String" catString = "String"
catInt = "Int" catInt = "Int"
@@ -187,3 +187,4 @@ filterProductions prods0 hoc0 prods
accumHOC hoc (PApply funid args) = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc fid -> IntSet.insert fid hoc) hoc (map snd hypos)) hoc args accumHOC hoc (PApply funid args) = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc fid -> IntSet.insert fid hoc) hoc (map snd hypos)) hoc args
accumHOC hoc _ = hoc accumHOC hoc _ = hoc
-}

View File

@@ -6,8 +6,8 @@ import Text.JSON
import qualified Data.Map as Map import qualified Data.Map as Map
pgf2json :: PGF -> String pgf2json :: PGF -> String
pgf2json pgf = pgf2json pgf = error "TODO: pgf2json"
encode $ makeObj {- encode $ makeObj
[ ("abstract", abstract2json pgf) [ ("abstract", abstract2json pgf)
, ("concretes", makeObj $ map concrete2json , ("concretes", makeObj $ map concrete2json
(Map.toList (languages pgf))) (Map.toList (languages pgf)))
@@ -108,3 +108,4 @@ new f xs =
[ ("type", showJSON f) [ ("type", showJSON f)
, ("args", showJSON xs) , ("args", showJSON xs)
] ]
-}

View File

@@ -35,7 +35,7 @@ data AExp =
AVr Ident Val AVr Ident Val
| ACn QIdent Val | ACn QIdent Val
| AType | AType
| AInt Int | AInt Integer
| AFloat Double | AFloat Double
| AStr String | AStr String
| AMeta MetaId Val | AMeta MetaId Val

View File

@@ -1,7 +1,7 @@
module GF.Compiler (mainGFC, linkGrammars, writeGrammar, writeOutputs) where module GF.Compiler (mainGFC, linkGrammars, writeGrammar, writeOutputs) where
import PGF2 import PGF2
import PGF2.Internal(unionPGF,writePGF,writeConcr) import PGF2.Internal(unionPGF,writeConcr)
import GF.Compile as S(batchCompile,link,srcAbsName) import GF.Compile as S(batchCompile,link,srcAbsName)
import GF.CompileInParallel as P(parallelBatchCompile) import GF.CompileInParallel as P(parallelBatchCompile)
import GF.Compile.Export import GF.Compile.Export

View File

@@ -25,7 +25,7 @@ data BNFCRule = BNFCRule {
ruleName :: CFTerm } ruleName :: CFTerm }
| BNFCCoercions { | BNFCCoercions {
coerCat :: Cat, coerCat :: Cat,
coerNum :: Int } coerNum :: Integer }
| BNFCTerminator { | BNFCTerminator {
termNonEmpty :: Bool, termNonEmpty :: Bool,
termCat :: Cat, termCat :: Cat,

View File

@@ -22,7 +22,8 @@ import GF.Infra.Option
import GF.Infra.UseIO(MonadIO(..)) import GF.Infra.UseIO(MonadIO(..))
import GF.Grammar.Grammar import GF.Grammar.Grammar
import PGF2.Internal(Literal(..),Symbol(..)) import PGF2(Literal(..))
import PGF2.Internal(Symbol(..))
-- Please change this every time when the GFO format is changed -- Please change this every time when the GFO format is changed
gfoVersion = "GF04" gfoVersion = "GF04"

View File

@@ -9,9 +9,11 @@
{-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DeriveTraversable #-}
module GF.Grammar.Canonical where module GF.Grammar.Canonical where
import Prelude hiding ((<>)) import Prelude hiding ((<>))
import GF.Text.Pretty import GF.Text.Pretty
import GF.Infra.Ident (RawIdent) import GF.Infra.Ident (RawIdent)
import PGF(Literal(..))
-- | A Complete grammar -- | A Complete grammar
data Grammar = Grammar Abstract [Concrete] deriving Show data Grammar = Grammar Abstract [Concrete] deriving Show
@@ -58,7 +60,7 @@ newtype ParamType = ParamTypeId ParamId deriving (Eq,Ord,Show)
-- | Linearization value, RHS of @lin@ -- | Linearization value, RHS of @lin@
data LinValue = ConcatValue LinValue LinValue data LinValue = ConcatValue LinValue LinValue
| LiteralValue LinLiteral | LiteralValue Literal
| ErrorValue String | ErrorValue String
| ParamConstant ParamValue | ParamConstant ParamValue
| PredefValue PredefId | PredefValue PredefId
@@ -74,11 +76,6 @@ data LinValue = ConcatValue LinValue LinValue
| CommentedValue String LinValue | CommentedValue String LinValue
deriving (Eq,Ord,Show) deriving (Eq,Ord,Show)
data LinLiteral = FloatConstant Float
| IntConstant Int
| StrConstant String
deriving (Eq,Ord,Show)
data LinPattern = ParamPattern ParamPattern data LinPattern = ParamPattern ParamPattern
| RecordPattern [RecordRow LinPattern] | RecordPattern [RecordRow LinPattern]
| TuplePattern [LinPattern] | TuplePattern [LinPattern]
@@ -120,9 +117,8 @@ newtype FunId = FunId Id deriving (Eq,Show)
data VarId = Anonymous | VarId Id deriving Show data VarId = Anonymous | VarId Id deriving Show
newtype Flags = Flags [(FlagName,FlagValue)] deriving Show newtype Flags = Flags [(FlagName,Literal)] deriving Show
type FlagName = Id type FlagName = Id
data FlagValue = Str String | Int Int | Flt Double deriving Show
-- *** Identifiers -- *** Identifiers
@@ -243,13 +239,13 @@ instance PPA LinValue where
VarValue v -> pp v VarValue v -> pp v
_ -> parens lv _ -> parens lv
instance Pretty LinLiteral where pp = ppA instance Pretty Literal where pp = ppA
instance PPA LinLiteral where instance PPA Literal where
ppA l = case l of ppA l = case l of
FloatConstant f -> pp f LFlt f -> pp f
IntConstant n -> pp n LInt n -> pp n
StrConstant s -> doubleQuotes s -- hmm LStr s -> doubleQuotes s -- hmm
instance RhsSeparator LinValue where rhsSep _ = pp "=" instance RhsSeparator LinValue where rhsSep _ = pp "="
@@ -298,11 +294,6 @@ instance Pretty Flags where
where where
ppFlag (name,value) = name <+> "=" <+> value <>";" ppFlag (name,value) = name <+> "=" <+> value <>";"
instance Pretty FlagValue where
pp (Str s) = pp s
pp (Int i) = pp i
pp (Flt d) = pp d
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- | Pretty print atomically (i.e. wrap it in parentheses if necessary) -- | Pretty print atomically (i.e. wrap it in parentheses if necessary)
class Pretty a => PPA a where ppA :: a -> Doc class Pretty a => PPA a where ppA :: a -> Doc

View File

@@ -8,7 +8,7 @@ import Data.Ratio (denominator, numerator)
import GF.Grammar.Canonical import GF.Grammar.Canonical
import Control.Monad (guard) import Control.Monad (guard)
import GF.Infra.Ident (RawIdent,showRawIdent,rawIdentS) import GF.Infra.Ident (RawIdent,showRawIdent,rawIdentS)
import PGF(Literal(..))
encodeJSON :: FilePath -> Grammar -> IO () encodeJSON :: FilePath -> Grammar -> IO ()
encodeJSON fpath g = writeFile fpath (encode g) encodeJSON fpath g = writeFile fpath (encode g)
@@ -171,13 +171,13 @@ instance JSON LinValue where
<|> do vs <- readJSON o :: Result [LinValue] <|> do vs <- readJSON o :: Result [LinValue]
return (foldr1 ConcatValue vs) return (foldr1 ConcatValue vs)
instance JSON LinLiteral where instance JSON Literal where
-- basic values (Str, Float, Int) are encoded as JSON strings/numbers: -- basic values (Str, Float, Int) are encoded as JSON strings/numbers:
showJSON (StrConstant s) = showJSON s showJSON (LStr s) = showJSON s
showJSON (FloatConstant f) = showJSON f showJSON (LFlt f) = showJSON f
showJSON (IntConstant n) = showJSON n showJSON (LInt n) = showJSON n
readJSON = readBasicJSON StrConstant IntConstant FloatConstant readJSON = readBasicJSON LStr LInt LFlt
instance JSON LinPattern where instance JSON LinPattern where
-- wildcards and patterns without arguments are encoded as strings: -- wildcards and patterns without arguments are encoded as strings:
@@ -262,15 +262,6 @@ instance JSON Flags where
where fromRow (lbl, jsvalue) = do value <- readJSON jsvalue where fromRow (lbl, jsvalue) = do value <- readJSON jsvalue
return (rawIdentS lbl, value) return (rawIdentS lbl, value)
instance JSON FlagValue where
-- flag values are encoded as basic JSON types:
showJSON (Str s) = showJSON s
showJSON (Int i) = showJSON i
showJSON (Flt f) = showJSON f
readJSON = readBasicJSON Str Int Flt
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- ** Convenience functions -- ** Convenience functions

View File

@@ -354,7 +354,7 @@ data Term =
| Cn Ident -- ^ constant | Cn Ident -- ^ constant
| Con Ident -- ^ constructor | Con Ident -- ^ constructor
| Sort Ident -- ^ basic type | Sort Ident -- ^ basic type
| EInt Int -- ^ integer literal | EInt Integer -- ^ integer literal
| EFloat Double -- ^ floating point literal | EFloat Double -- ^ floating point literal
| K String -- ^ string literal or token: @\"foo\"@ | K String -- ^ string literal or token: @\"foo\"@
| Empty -- ^ the empty string @[]@ | Empty -- ^ the empty string @[]@
@@ -409,7 +409,7 @@ data Patt =
| PW -- ^ wild card pattern: @_@ | PW -- ^ wild card pattern: @_@
| PR [(Label,Patt)] -- ^ record pattern: @{r = p ; ...}@ -- only concrete | PR [(Label,Patt)] -- ^ record pattern: @{r = p ; ...}@ -- only concrete
| PString String -- ^ string literal pattern: @\"foo\"@ -- only abstract | PString String -- ^ string literal pattern: @\"foo\"@ -- only abstract
| PInt Int -- ^ integer literal pattern: @12@ -- only abstract | PInt Integer -- ^ integer literal pattern: @12@ -- only abstract
| PFloat Double -- ^ float literal pattern: @1.2@ -- only abstract | PFloat Double -- ^ float literal pattern: @1.2@ -- only abstract
| PT Type Patt -- ^ type-annotated pattern | PT Type Patt -- ^ type-annotated pattern

View File

@@ -130,7 +130,7 @@ data Token
| T_separator | T_separator
| T_nonempty | T_nonempty
| T_String String -- string literals | T_String String -- string literals
| T_Integer Int -- integer literals | T_Integer Integer -- integer literals
| T_Double Double -- double precision float literals | T_Double Double -- double precision float literals
| T_Ident Ident | T_Ident Ident
| T_EOF | T_EOF

View File

@@ -216,7 +216,7 @@ typeTok = Sort cTok
typeStrs = Sort cStrs typeStrs = Sort cStrs
typeString, typeFloat, typeInt :: Type typeString, typeFloat, typeInt :: Type
typeInts :: Int -> Type typeInts :: Integer -> Type
typePBool :: Type typePBool :: Type
typeError :: Type typeError :: Type
@@ -227,7 +227,7 @@ typeInts i = App (cnPredef cInts) (EInt i)
typePBool = cnPredef cPBool typePBool = cnPredef cPBool
typeError = cnPredef cErrorType typeError = cnPredef cErrorType
isTypeInts :: Type -> Maybe Int isTypeInts :: Type -> Maybe Integer
isTypeInts (App c (EInt i)) | c == cnPredef cInts = Just i isTypeInts (App c (EInt i)) | c == cnPredef cInts = Just i
isTypeInts _ = Nothing isTypeInts _ = Nothing
@@ -324,7 +324,7 @@ freshAsTerm s = Vr (varX (readIntArg s))
string2term :: String -> Term string2term :: String -> Term
string2term = K string2term = K
int2term :: Int -> Term int2term :: Integer -> Term
int2term = EInt int2term = EInt
float2term :: Double -> Term float2term :: Double -> Term

View File

@@ -35,7 +35,7 @@ import GF.Infra.Ident
import GF.Infra.GetOpt import GF.Infra.GetOpt
import GF.Grammar.Predef import GF.Grammar.Predef
import System.FilePath import System.FilePath
import PGF2.Internal(Literal(..)) import PGF2(Literal(..))
import GF.Data.Operations(Err,ErrorMonad(..),liftErr) import GF.Data.Operations(Err,ErrorMonad(..),liftErr)

View File

@@ -25,7 +25,7 @@ toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
type Profile = [Int] type Profile = [Int]
pgfToCFG :: PGF -> Concr -> CFG pgfToCFG :: PGF -> Concr -> CFG
pgfToCFG pgf cnc = mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule rules) pgfToCFG pgf cnc = error "TODO: pgfToCFG" {- mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule rules)
where where
(_,start_cat,_) = unType (startCat pgf) (_,start_cat,_) = unType (startCat pgf)
@@ -116,3 +116,4 @@ pgfToCFG pgf cnc = mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule
ruleToCFRule (c,PCoerce c') = ruleToCFRule (c,PCoerce c') =
[Rule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0) [Rule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
| l <- [0..catLinArity c-1]] | l <- [0..catLinArity c-1]]
-}

View File

@@ -14,17 +14,20 @@
------------------------------------------------- -------------------------------------------------
module PGF2 (-- * PGF module PGF2 (-- * PGF
PGF,readPGF,bootNGF,readNGF, PGF,readPGF,bootNGF,readNGF,writePGF,showPGF,
-- * Abstract syntax -- * Abstract syntax
AbsName,abstractName,globalFlag,abstractFlag, AbsName,abstractName,globalFlag,abstractFlag,
-- ** Categories -- ** Categories
Cat,categories,categoryContext,categoryProb, Cat,categories,categoryContext,categoryProbability,
-- ** Functions -- ** Functions
Fun, functions, functionsByCat, Fun, functions, functionsByCat,
functionType, functionIsConstructor, functionProb, functionType, functionIsConstructor, functionProbability,
-- ** Expressions -- ** Expressions
Expr(..), Literal(..), showExpr, readExpr, Expr(..), Literal(..), showExpr, readExpr, pExpr, pIdent,
mkAbs, unAbs, mkAbs, unAbs,
mkApp, unApp, unapply, mkApp, unApp, unapply,
mkStr, unStr, mkStr, unStr,
@@ -33,20 +36,58 @@ module PGF2 (-- * PGF
mkFloat, unFloat, mkFloat, unFloat,
mkMeta, unMeta, mkMeta, unMeta,
-- extra -- extra
exprSize, exprFunctions, exprSize, exprFunctions, exprSubstitute, exprProbability,
-- ** Types -- ** Types
Type(..), Hypo, BindType(..), startCat, Type(..), Hypo, BindType(..), startCat,
readType, showType, readType, showType, showContext,
mkType, unType, mkType, unType,
mkHypo, mkDepHypo, mkImplHypo, mkHypo, mkDepHypo, mkImplHypo,
-- ** Type checking
-- | Dynamically-built expressions should always be type-checked before using in other functions,
-- as the exceptions thrown by using invalid expressions may not catchable.
checkExpr, inferExpr, checkType,
-- ** Computing
compute,
-- ** Generation
generateAll, generateAllFrom, generateRandom, generateRandomFrom,
-- ** Morphological Analysis
MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
filterBest, filterLongest,
-- ** Visualizations
GraphvizOptions(..), graphvizDefaults,
graphvizAbstractTree, graphvizParseTree,
Labels, getDepLabels,
graphvizDependencyTree, conlls2latexDoc, getCncDepLabels,
graphvizWordAlignment,
-- * Concrete syntax -- * Concrete syntax
ConcName, ConcName,Concr,languages,concreteName,languageCode,
-- ** Linearization
linearize, linearizeAll, tabularLinearize, tabularLinearizeAll,
FId, BracketedString(..), showBracketedString, flattenBracketedString,
bracketedLinearize, bracketedLinearizeAll,
hasLinearization,
printName, alignWords, gizaAlignment,
-- ** Parsing
ParseOutput(..), parse, parseWithHeuristics, complete,
-- * Exceptions -- * Exceptions
PGFError(..) PGFError(..),
-- * Auxiliaries
readProbabilitiesFromFile
) where ) where
import Prelude hiding ((<>))
import PGF2.Expr import PGF2.Expr
import PGF2.FFI import PGF2.FFI
@@ -54,15 +95,16 @@ import Foreign
import Foreign.C import Foreign.C
import Control.Exception(mask_,bracket) import Control.Exception(mask_,bracket)
import System.IO.Unsafe(unsafePerformIO) import System.IO.Unsafe(unsafePerformIO)
import System.Random
import qualified Foreign.Concurrent as C import qualified Foreign.Concurrent as C
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.IORef import Data.IORef
import Data.List(intersperse,groupBy)
import Data.Char(isUpper,isSpace,isPunctuation)
import Text.PrettyPrint
#include <pgf/pgf.h> #include <pgf/pgf.h>
type AbsName = String -- ^ Name of abstract syntax
type ConcName = String -- ^ Name of concrete syntax
-- | Reads a PGF file and keeps it in memory. -- | Reads a PGF file and keeps it in memory.
readPGF :: FilePath -> IO PGF readPGF :: FilePath -> IO PGF
readPGF fpath = readPGF fpath =
@@ -106,6 +148,12 @@ readNGF fpath =
fptr2 <- C.newForeignPtr c_revision (withForeignPtr fptr1 (\c_db -> pgf_free_revision c_db c_revision)) fptr2 <- C.newForeignPtr c_revision (withForeignPtr fptr1 (\c_db -> pgf_free_revision c_db c_revision))
return (PGF fptr1 fptr2 Map.empty) return (PGF fptr1 fptr2 Map.empty)
writePGF :: FilePath -> PGF -> IO ()
writePGF = error "TODO: writePGF"
showPGF :: PGF -> String
showPGF = error "TODO: showPGF"
-- | The abstract language name is the name of the top-level -- | The abstract language name is the name of the top-level
-- abstract module -- abstract module
abstractName :: PGF -> AbsName abstractName :: PGF -> AbsName
@@ -156,14 +204,273 @@ functionIsConstructor p fun =
do res <- withPgfExn (pgf_function_is_constructor c_db c_revision c_fun) do res <- withPgfExn (pgf_function_is_constructor c_db c_revision c_fun)
return (res /= 0) return (res /= 0)
functionProb :: PGF -> Fun -> Float functionProbability :: PGF -> Fun -> Float
functionProb p fun = functionProbability p fun =
unsafePerformIO $ unsafePerformIO $
withText fun $ \c_fun -> withText fun $ \c_fun ->
withForeignPtr (a_db p) $ \c_db -> withForeignPtr (a_db p) $ \c_db ->
withForeignPtr (revision p) $ \c_revision -> withForeignPtr (revision p) $ \c_revision ->
withPgfExn (pgf_function_prob c_db c_revision c_fun) withPgfExn (pgf_function_prob c_db c_revision c_fun)
exprProbability :: PGF -> Expr -> Float
exprProbability = error "TODO: exprProbability"
checkExpr :: PGF -> Expr -> Type -> Either String Expr
checkExpr = error "TODO: checkExpr"
-- | Tries to infer the type of an expression. Note that
-- even if the expression is type correct it is not always
-- possible to infer its type in the GF type system.
-- In this case the function returns an error.
inferExpr :: PGF -> Expr -> Either String (Expr, Type)
inferExpr = error "TODO: inferExpr"
-- | Check whether a type is consistent with the abstract
-- syntax of the grammar.
checkType :: PGF -> Type -> Either String Type
checkType = error "TODO: checkType"
compute :: PGF -> Expr -> Expr
compute = error "TODO: compute"
concreteName :: Concr -> ConcName
concreteName c = error "TODO: concreteName"
languageCode :: Concr -> Maybe String
languageCode c = error "TODO: languageCode"
printName :: Concr -> Fun -> Maybe String
printName lang fun = error "TODO: printName"
alignWords :: Concr -> Expr -> [(String, [Int])]
alignWords = error "TODO: alignWords"
gizaAlignment = error "TODO: gizaAlignment"
-----------------------------------------------------------------------------
-- Functions using Concr
-- Morpho analyses, parsing & linearization
-- | This triple is returned by all functions that deal with
-- the grammar's lexicon. Its first element is the name of an abstract
-- lexical function which can produce a given word or
-- a multiword expression (i.e. this is the lemma).
-- After that follows a string which describes
-- the particular inflection form.
--
-- The last element is a logarithm from the
-- the probability of the function. The probability is not
-- conditionalized on the category of the function. This makes it
-- possible to compare the likelihood of two functions even if they
-- have different types.
type MorphoAnalysis = (Fun,String,Float)
-- | 'lookupMorpho' takes a string which must be a single word or
-- a multiword expression. It then computes the list of all possible
-- morphological analyses.
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
lookupMorpho = error "TODO: lookupMorpho"
-- | 'lookupCohorts' takes an arbitrary string an produces
-- a list of all places where lexical items from the grammar have been
-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@,
-- where @start-end@ identifies the span in the text and @ans@ is
-- the list of possible morphological analyses similar to 'lookupMorpho'.
--
-- The list is sorted first by the @start@ position and after than
-- by the @end@ position. This can be used for instance if you want to
-- filter only the longest matches.
lookupCohorts :: Concr -> String -> [(Int,String,[MorphoAnalysis],Int)]
lookupCohorts = error "TODO: lookupCohorts"
filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
filterBest ans =
reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
where
iterate v0 [] [] res = res
iterate v0 [] new res = iterate v0 new [] res
iterate v0 ((_,v,conf, []):old) new res =
case compare v0 v of
LT -> res
EQ -> iterate v0 old new (merge conf res)
GT -> iterate v old new conf
iterate v0 ((_,v,conf,an:ans):old) new res = iterate v0 old (insert (v+valueOf an) conf an ans [] new) res
valueOf (_,_,[],_) = 2
valueOf _ = 1
insert v conf an@(start,_,_,end) ans l_new [] =
match start v conf ans ((end,v,comb conf an,filter end ans):l_new) []
insert v conf an@(start,_,_,end) ans l_new (new@(end0,v0,conf0,ans0):r_new) =
case compare end0 end of
LT -> insert v conf an ans (new:l_new) r_new
EQ -> case compare v0 v of
LT -> match start v conf ans ((end,v, conf0,ans0): l_new) r_new
EQ -> match start v conf ans ((end,v,merge (comb conf an) conf0,ans0): l_new) r_new
GT -> match start v conf ans ((end,v,comb conf an, ans0): l_new) r_new
GT -> match start v conf ans ((end,v,comb conf an, filter end ans):new:l_new) r_new
match start0 v conf (an@(start,_,_,end):ans) l_new r_new
| start0 == start = insert v conf an ans l_new r_new
match start0 v conf ans l_new r_new = revOn l_new r_new
comb ((start0,w0,an0,end0):conf) (start,w,an,end)
| end0 == start && (unk w0 an0 || unk w an) = (start0,w0++w,[],end):conf
comb conf an = an:conf
filter end [] = []
filter end (next@(start,_,_,_):ans)
| end <= start = next:ans
| otherwise = filter end ans
revOn [] ys = ys
revOn (x:xs) ys = revOn xs (x:ys)
merge [] ans = ans
merge ans [] = ans
merge (an1@(start1,_,_,end1):ans1) (an2@(start2,_,_,end2):ans2) =
case compare (start1,end1) (start2,end2) of
GT -> an1 : merge ans1 (an2:ans2)
EQ -> an1 : merge ans1 ans2
LT -> an2 : merge (an1:ans1) ans2
filterLongest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
filterLongest [] = []
filterLongest (an:ans) = longest an ans
where
longest prev [] = [prev]
longest prev@(start0,_,_,end0) (next@(start,_,_,end):ans)
| start0 == start = longest next ans
| otherwise = filter prev (next:ans)
filter prev [] = [prev]
filter prev@(start0,w0,an0,end0) (next@(start,w,an,end):ans)
| end0 == start && (unk w0 an0 || unk w an)
= filter (start0,w0++w,[],end) ans
| end0 <= start = prev : longest next ans
| otherwise = filter prev ans
unk w [] | any (not . isPunctuation) w = True
unk _ _ = False
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon lang = error "TODO: fullFormLexicon"
-- | This data type encodes the different outcomes which you could get from the parser.
data ParseOutput a
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
-- The string is the token where the parser have failed.
| ParseOk a -- ^ If the parsing and the type checking are successful
-- we get the abstract syntax trees as either a list or a chart.
| ParseIncomplete -- ^ The sentence is not complete.
parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)]
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
parseWithHeuristics :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
-- to lookup up the default from
-- the grammar flags
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories.
-- The arguments of the callback are:
-- the index of the constituent for the literal category;
-- the input sentence; the current offset in the sentence.
-- If a literal has been recognized then the output should
-- be Just (expr,probability,end_offset)
-> ParseOutput [(Expr,Float)]
parseWithHeuristics = error "TODO: parseWithHeuristics"
-- | Returns possible completions of the current partial input.
complete :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence (excluding token being completed)
-> String -- ^ prefix (partial token being completed)
-> ParseOutput [(String, Fun, Cat, Float)] -- ^ (token, category, function, probability)
complete = error "TODO: complete"
-- | Returns True if there is a linearization defined for that function in that language
hasLinearization :: Concr -> Fun -> Bool
hasLinearization = error "TODO: linearize"
-- | Linearizes an expression as a string in the language
linearize :: Concr -> Expr -> String
linearize lang e = error "TODO: linearize"
-- | Generates all possible linearizations of an expression
linearizeAll :: Concr -> Expr -> [String]
linearizeAll lang e = error "TODO: linearizeAll"
-- | Generates a table of linearizations for an expression
tabularLinearize :: Concr -> Expr -> [(String, String)]
tabularLinearize lang e =
case tabularLinearizeAll lang e of
(lins:_) -> lins
_ -> []
-- | Generates a table of linearizations for an expression
tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]]
tabularLinearizeAll lang e = error "TODO: tabularLinearizeAll"
type FId = Int
-- | BracketedString represents a sentence that is linearized
-- as usual but we also want to retain the ''brackets'' that
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf String -- ^ this is the leaf i.e. a single token
| BIND -- ^ the surrounding tokens must be bound together
| Bracket Cat {-# UNPACK #-} !FId String Fun [BracketedString]
-- ^ this is a bracket. The 'Cat' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier
-- is also unique for every bracket. When there are discontinuous
-- phrases then the identifiers are unique for every phrase but
-- not for every bracket since the bracket represents a constituent.
-- The different constituents could still be distinguished by using
-- the analysis string. If the grammar is reduplicating
-- then the constituent indices will be the same for all brackets
-- that represents the same constituent.
-- The 'Fun' is the name of the abstract function that generated
-- this phrase.
-- | Renders the bracketed string as a string where
-- the brackets are shown as @(S ...)@ where
-- @S@ is the category.
showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString BIND = text "&+"
ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | Extracts the sequence of tokens from the bracketed string
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
bracketedLinearize :: Concr -> Expr -> [BracketedString]
bracketedLinearize = error "TODO: bracketedLinearize"
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
bracketedLinearizeAll = error "TODO: bracketedLinearizeAll"
generateAll :: PGF -> Type -> [(Expr,Float)]
generateAll p ty = error "TODO: generateAll"
generateAllFrom :: PGF -> Expr -> [(Expr,Float)]
generateAllFrom p ty = error "TODO: generateAllFrom"
generateRandom :: StdGen -> PGF -> Type -> [a]
generateRandom = error "TODO: generateRandom"
generateRandomFrom :: StdGen -> PGF -> Expr -> [a]
generateRandomFrom = error "TODO: generateRandomFrom"
-- | List of all functions defined in the abstract syntax -- | List of all functions defined in the abstract syntax
categories :: PGF -> [Cat] categories :: PGF -> [Cat]
categories p = categories p =
@@ -184,7 +491,7 @@ categories p =
name <- peekText key name <- peekText key
writeIORef ref $ (name : names) writeIORef ref $ (name : names)
categoryContext :: PGF -> Cat -> [Hypo] categoryContext :: PGF -> Cat -> Maybe [Hypo]
categoryContext p cat = categoryContext p cat =
unsafePerformIO $ unsafePerformIO $
withText cat $ \c_cat -> withText cat $ \c_cat ->
@@ -195,11 +502,11 @@ categoryContext p cat =
mask_ $ do mask_ $ do
c_hypos <- withPgfExn (pgf_category_context c_db c_revision c_cat p_n_hypos u) c_hypos <- withPgfExn (pgf_category_context c_db c_revision c_cat p_n_hypos u)
if c_hypos == nullPtr if c_hypos == nullPtr
then return [] then return Nothing
else do n_hypos <- peek p_n_hypos else do n_hypos <- peek p_n_hypos
hypos <- peekHypos c_hypos 0 n_hypos hypos <- peekHypos c_hypos 0 n_hypos
free c_hypos free c_hypos
return hypos return (Just hypos)
where where
peekHypos :: Ptr PgfTypeHypo -> CSize -> CSize -> IO [Hypo] peekHypos :: Ptr PgfTypeHypo -> CSize -> CSize -> IO [Hypo]
peekHypos c_hypo i n peekHypos c_hypo i n
@@ -214,8 +521,8 @@ categoryContext p cat =
return ((bt,cat,ty) : hs) return ((bt,cat,ty) : hs)
| otherwise = return [] | otherwise = return []
categoryProb :: PGF -> Cat -> Float categoryProbability :: PGF -> Cat -> Float
categoryProb p cat = categoryProbability p cat =
unsafePerformIO $ unsafePerformIO $
withText cat $ \c_cat -> withText cat $ \c_cat ->
withForeignPtr (a_db p) $ \c_db -> withForeignPtr (a_db p) $ \c_db ->
@@ -291,6 +598,256 @@ abstractFlag p name =
freeStablePtr c_lit freeStablePtr c_lit
return (Just lit) return (Just lit)
-----------------------------------------------------------------------------
-- Graphviz
data GraphvizOptions = GraphvizOptions {noLeaves :: Bool,
noFun :: Bool,
noCat :: Bool,
noDep :: Bool,
nodeFont :: String,
leafFont :: String,
nodeColor :: String,
leafColor :: String,
nodeEdgeStyle :: String,
leafEdgeStyle :: String
}
graphvizDefaults = GraphvizOptions False False False True "" "" "" "" "" ""
-- | Renders an abstract syntax tree in a Graphviz format.
graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String
graphvizAbstractTree p opts e = error "TODO: graphvizAbstractTree"
graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String
graphvizParseTree c opts e = error "TODO: graphvizParseTree"
graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String
graphvizWordAlignment cs opts e = error "TODO: graphvizWordAlignment"
type Labels = Map.Map Fun [String]
getDepLabels :: String -> Labels
getDepLabels s = Map.fromList [(f,ls) | f:ls <- map words (lines s)]
-- | Visualize word dependency tree.
graphvizDependencyTree
:: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
-> Bool -- ^ Include extra information (debug)
-> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
-> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
-> Concr
-> Expr
-> String -- ^ Rendered output in the specified format
graphvizDependencyTree format debug mlab mclab concr t = error "TODO: graphvizDependencyTree"
---------------------- should be a separate module?
-- visualization with latex output. AR Nov 2015
conlls2latexDoc :: [String] -> String
conlls2latexDoc =
render .
latexDoc .
vcat .
intersperse (text "" $+$ app "vspace" (text "4mm")) .
map conll2latex .
filter (not . null)
conll2latex :: String -> Doc
conll2latex = ppLaTeX . conll2latex' . parseCoNLL
conll2latex' :: CoNLL -> [LaTeX]
conll2latex' = dep2latex . conll2dep'
data Dep = Dep {
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
, tokens :: [(String,String)] -- word, pos (0..)
, deps :: [((Int,Int),String)] -- from, to, label
, root :: Int -- root word position
}
-- some general measures
defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units
defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres
spaceLength = 10.0
charWidth = 1.8
wsize rwld w = 100 * rwld w + spaceLength -- word length, units
wpos rwld i = sum [wsize rwld j | j <- [0..i-1]] -- start position of the i'th word
wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]] -- distance between words x and y
labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below
labelstart c = c - 15.0 -- label starts 15u left of arc centre
arcbase = 30.0 -- arcs start and end 40u above the bottom
arcfactor r = r * 600 -- reduction of arc size from word distance
xyratio = 3 -- width/height ratio of arcs
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
putArc frwld height x y label = [oval,arrowhead,labelling] where
oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base
labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
dxy = wdist frwld x y -- distance between words, >>= 20.0
ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length
hdxy = dxy / 2 -- half the distance
wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are wider in proportion
hght = ndxy / (xyratio * rwld) -- arc height is independent of word length
begp = min x y -- begin position of oval
ctr = wpos frwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
rwld = 0.5 ----
dep2latex :: Dep -> [LaTeX]
dep2latex d =
[Comment (unwords (map fst (tokens d))),
Picture defaultUnit (width,height) (
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
)]
where
wld i = wordLength d i -- >= 20.0
rwld i = (wld i) / defaultWordLength -- >= 1.0
aheight x y = depth (min x y) (max x y) + 1 ---- abs (x-y)
arcs = [(min u v, max u v) | ((u,v),_) <- deps d]
depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
[] -> 0
uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs])
width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d]))
type CoNLL = [[String]]
parseCoNLL :: String -> CoNLL
parseCoNLL = map words . lines
--conll2dep :: String -> Dep
--conll2dep = conll2dep' . parseCoNLL
conll2dep' :: CoNLL -> Dep
conll2dep' ls = Dep {
wordLength = wld
, tokens = toks
, deps = dps
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
}
where
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
toks = [(w,c) | _:w:_:c:_ <- ls]
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)
-- We render both LaTeX and SVG from this intermediate representation of
-- LaTeX pictures.
data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
data DrawingCommand = Put Position Object
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length
type UnitLengthMM = Double
type Size = (Double,Double)
type Position = (Double,Double)
type Length = Double
-- * latex formatting
ppLaTeX = vcat . map ppLaTeX1
where
ppLaTeX1 el =
case el of
Comment s -> comment s
Picture unit size cmds ->
app "setlength{\\unitlength}" (text (show unit ++ "mm"))
$$ hang (app "begin" (text "picture")<>text (show size)) 2
(vcat (map ppDrawingCommand cmds))
$$ app "end" (text "picture")
$$ text ""
ppDrawingCommand (Put pos obj) = put pos (ppObject obj)
ppObject obj =
case obj of
Text s -> text s
TinyText s -> small (text s)
OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
ArrowDown len -> app "vector(0,-1)" (text (show len))
put p@(_,_) = app ("put" ++ show p)
small w = text "{\\tiny" <+> w <> text "}"
comment s = text "%%" <+> text s -- line break show follow
app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"
latexDoc :: Doc -> Doc
latexDoc body =
vcat [text "\\documentclass{article}",
text "\\usepackage[utf8]{inputenc}",
text "\\begin{document}",
body,
text "\\end{document}"]
----------------------------------
-- concrete syntax annotations (local) on top of conll
-- examples of annotations:
-- UseComp {"not"} PART neg head
-- UseComp {*} AUX cop head
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
-- (fun, word -> (pos,label,target))
-- the pos can remain unchanged, as in the current notation in the article
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
fixCoNLL labels conll = map fixc conll where
fixc row = case row of
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
_ -> row
_ -> row
look (fun,word) = case lookup fun labels of
Just relabel -> case relabel word of
Just row -> Just row
_ -> case lookup "*" labels of
Just starlabel -> starlabel word
_ -> Nothing
_ -> case lookup "*" labels of
Just starlabel -> starlabel word
_ -> Nothing
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
getCncDepLabels :: String -> CncLabels
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
--- choose is for compatibility with the general notation
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
analyse line = case break (=='{') line of
(beg,_:ws) -> case break (=='}') ws of
(toks,_:target) -> case (words beg, words target) of
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
_ -> []
_ -> []
_ -> []
merge rules@((fun,_):_) = (fun, \tok ->
case lookup tok (map snd rules) of
Just new -> return new
_ -> lookup "*" (map snd rules)
)
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
printCoNLL :: CoNLL -> String
printCoNLL = unlines . map (concat . intersperse "\t")
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- Expressions & types -- Expressions & types
@@ -335,6 +892,12 @@ readExpr str =
freeStablePtr c_expr freeStablePtr c_expr
return (Just expr) return (Just expr)
pIdent :: ReadS String
pIdent = error "TODO: pIdent"
pExpr :: ReadS Expr
pExpr = error "TODO: pExpr"
-- | renders a type as a 'String'. The list -- | renders a type as a 'String'. The list
-- of identifiers is the list of all free variables -- of identifiers is the list of all free variables
-- in the type in order reverse to the order -- in the type in order reverse to the order
@@ -348,6 +911,9 @@ showType scope ty =
bracket (pgf_print_type c_ty pctxt 0 m) free $ \c_text -> bracket (pgf_print_type c_ty pctxt 0 m) free $ \c_text ->
peekText c_text peekText c_text
showContext :: [Var] -> [(BindType,Var,Type)] -> String
showContext = error "TODO: showContext"
-- | parses a 'String' as a type -- | parses a 'String' as a type
readType :: String -> Maybe Type readType :: String -> Maybe Type
readType str = readType str =
@@ -360,3 +926,8 @@ readType str =
else do ty <- deRefStablePtr c_ty else do ty <- deRefStablePtr c_ty
freeStablePtr c_ty freeStablePtr c_ty
return (Just ty) return (Just ty)
readProbabilitiesFromFile :: FilePath -> IO (Map.Map String Double)
readProbabilitiesFromFile fpath = do
s <- readFile fpath
return $ Map.fromList [(f,read p) | f:p:_ <- map words (lines s)]

View File

@@ -11,7 +11,7 @@ module PGF2.Expr(Var, Cat, Fun,
mkFloat, unFloat, mkFloat, unFloat,
mkMeta, unMeta, mkMeta, unMeta,
exprSize, exprFunctions, exprSize, exprFunctions, exprSubstitute,
mkType, unType, mkType, unType,
mkHypo, mkDepHypo, mkImplHypo mkHypo, mkDepHypo, mkImplHypo
@@ -169,6 +169,15 @@ exprFunctions (EImplArg e) = exprFunctions e
exprFunctions (EFun f) = [f] exprFunctions (EFun f) = [f]
exprFunctions _ = [] exprFunctions _ = []
exprSubstitute :: Expr -> [Expr] -> Expr
exprSubstitute (EAbs bt x e) vs = EAbs bt x (exprSubstitute e vs)
exprSubstitute (EApp e1 e2) vs = EApp (exprSubstitute e1 vs) (exprSubstitute e2 vs)
exprSubstitute (EMeta i) vs = vs !! i
exprSubstitute (ETyped e ty) vs = ETyped (exprSubstitute e vs) ty
exprSubstitute (EImplArg e) vs = EImplArg (exprSubstitute e vs)
exprSubstitute e vs = e
-- | creates a type from list of hypothesises, category and -- | creates a type from list of hypothesises, category and
-- list of arguments for the category. The operation -- list of arguments for the category. The operation
-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create -- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create

View File

@@ -18,11 +18,14 @@ import PGF2.Expr
#include <pgf/pgf.h> #include <pgf/pgf.h>
type AbsName = String -- ^ Name of abstract syntax
type ConcName = String -- ^ Name of concrete syntax
-- | An abstract data type representing multilingual grammar -- | An abstract data type representing multilingual grammar
-- in Portable Grammar Format. -- in Portable Grammar Format.
data PGF = PGF { a_db :: ForeignPtr PgfDB data PGF = PGF { a_db :: ForeignPtr PgfDB
, revision :: ForeignPtr PgfRevision , revision :: ForeignPtr PgfRevision
, langs :: Map.Map String Concr , languages:: Map.Map ConcName Concr
} }
data Concr = Concr {c_pgf :: ForeignPtr PgfDB, concr :: Ptr PgfConcr} data Concr = Concr {c_pgf :: ForeignPtr PgfDB, concr :: Ptr PgfConcr}

View File

@@ -1,4 +1,93 @@
{-# LANGUAGE ImplicitParams, RankNTypes #-} {-# LANGUAGE ImplicitParams, RankNTypes #-}
module PGF2.Internal(-- * Access the internal structures module PGF2.Internal(-- * Access the internal structures
FId,isPredefFId,
fidString,fidInt,fidFloat,fidVar,fidStart,
-- * Byte code
CodeLabel, Instr(..), IVal(..), TailInfo(..),
SeqId,LIndex,
FunId,Token,Production(..),PArg(..),Symbol(..),
unionPGF, writeConcr
) where ) where
import PGF2.FFI
import PGF2.Expr
type Token = String
type LIndex = Int
data Symbol
= SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
| SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
| SymVar {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| SymKS Token
| SymKP [Symbol] [([Symbol],[String])]
| SymBIND -- the special BIND token
| SymNE -- non exist
| SymSOFT_BIND -- the special SOFT_BIND token
| SymSOFT_SPACE -- the special SOFT_SPACE token
| SymCAPIT -- the special CAPIT token
| SymALL_CAPIT -- the special ALL_CAPIT token
deriving (Eq,Ord,Show)
data Production
= PApply {-# UNPACK #-} !FunId [PArg]
| PCoerce {-# UNPACK #-} !FId
deriving (Eq,Ord,Show)
type FunId = Int
type SeqId = Int
type FId = Int
data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
fidString, fidInt, fidFloat, fidVar, fidStart :: FId
fidString = (-1)
fidInt = (-2)
fidFloat = (-3)
fidVar = (-4)
fidStart = (-5)
isPredefFId :: FId -> Bool
isPredefFId = (`elem` [fidString, fidInt, fidFloat, fidVar])
type CodeLabel = Int
data Instr
= CHECK_ARGS {-# UNPACK #-} !Int
| CASE Fun {-# UNPACK #-} !CodeLabel
| CASE_LIT Literal {-# UNPACK #-} !CodeLabel
| SAVE {-# UNPACK #-} !Int
| ALLOC {-# UNPACK #-} !Int
| PUT_CONSTR Fun
| PUT_CLOSURE {-# UNPACK #-} !CodeLabel
| PUT_LIT Literal
| SET IVal
| SET_PAD
| PUSH_FRAME
| PUSH IVal
| TUCK IVal {-# UNPACK #-} !Int
| EVAL IVal TailInfo
| DROP {-# UNPACK #-} !Int
| JUMP {-# UNPACK #-} !CodeLabel
| FAIL
| PUSH_ACCUM Literal
| POP_ACCUM
| ADD
data IVal
= HEAP {-# UNPACK #-} !Int
| ARG_VAR {-# UNPACK #-} !Int
| FREE_VAR {-# UNPACK #-} !Int
| GLOBAL Fun
deriving Eq
data TailInfo
= RecCall
| TailCall {-# UNPACK #-} !Int
| UpdateCall
unionPGF :: PGF -> PGF -> Maybe PGF
unionPGF = error "TODO: unionPGF"
writeConcr :: FilePath -> Concr -> IO ()
writeConcr = error "TODO: writeConcr"

View File

@@ -91,7 +91,7 @@ branchPGF_ c_name p (Transaction f) =
ex_type <- (#peek PgfExn, type) c_exn ex_type <- (#peek PgfExn, type) c_exn
if (ex_type :: (#type PgfExnType)) == (#const PGF_EXN_NONE) if (ex_type :: (#type PgfExnType)) == (#const PGF_EXN_NONE)
then do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision)) then do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision))
return (PGF (a_db p) fptr2 (langs p)) return (PGF (a_db p) fptr2 (languages p))
else do pgf_free_revision c_db c_revision else do pgf_free_revision c_db c_revision
return p return p
else do pgf_free_revision c_db c_revision else do pgf_free_revision c_db c_revision
@@ -107,7 +107,7 @@ checkoutPGF p name =
if c_revision == nullPtr if c_revision == nullPtr
then return Nothing then return Nothing
else do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision)) else do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision))
return (Just (PGF (a_db p) fptr2 (langs p))) return (Just (PGF (a_db p) fptr2 (languages p)))
createFunction :: Fun -> Type -> Float -> Transaction () createFunction :: Fun -> Type -> Float -> Transaction ()
createFunction name ty prob = Transaction $ \c_db c_revision c_exn -> createFunction name ty prob = Transaction $ \c_db c_revision c_exn ->