forked from GitHub/gf-core
the compiler now compiles with the new runtime
This commit is contained in:
@@ -6,7 +6,6 @@ module GF.Command.Commands (
|
|||||||
import Prelude hiding (putStrLn,(<>))
|
import Prelude hiding (putStrLn,(<>))
|
||||||
|
|
||||||
import PGF2
|
import PGF2
|
||||||
import PGF2.Internal(writePGF)
|
|
||||||
|
|
||||||
import GF.Compile.Export
|
import GF.Compile.Export
|
||||||
import GF.Compile.ToAPI
|
import GF.Compile.ToAPI
|
||||||
@@ -666,7 +665,7 @@ pgfCommands = Map.fromList [
|
|||||||
[e] -> case unApp e of
|
[e] -> case unApp e of
|
||||||
Just (id, []) -> case functionType pgf id of
|
Just (id, []) -> case functionType pgf id of
|
||||||
Just ty -> do putStrLn (showFun pgf id ty)
|
Just ty -> do putStrLn (showFun pgf id ty)
|
||||||
putStrLn ("Probability: "++show (treeProbability pgf e))
|
putStrLn ("Probability: "++show (exprProbability pgf e))
|
||||||
return void
|
return void
|
||||||
Nothing -> case categoryContext pgf id of
|
Nothing -> case categoryContext pgf id of
|
||||||
Just hypos -> do putStrLn ("cat "++id++if null hypos then "" else ' ':showContext [] hypos)
|
Just hypos -> do putStrLn ("cat "++id++if null hypos then "" else ' ':showContext [] hypos)
|
||||||
@@ -682,7 +681,7 @@ pgfCommands = Map.fromList [
|
|||||||
Left err -> error err
|
Left err -> error err
|
||||||
Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e)
|
Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e)
|
||||||
putStrLn ("Type: "++showType [] ty)
|
putStrLn ("Type: "++showType [] ty)
|
||||||
putStrLn ("Probability: "++show (treeProbability pgf e))
|
putStrLn ("Probability: "++show (exprProbability pgf e))
|
||||||
return void
|
return void
|
||||||
_ -> do putStrLn "a single identifier or expression is expected from the command"
|
_ -> do putStrLn "a single identifier or expression is expected from the command"
|
||||||
return void,
|
return void,
|
||||||
@@ -800,8 +799,8 @@ pgfCommands = Map.fromList [
|
|||||||
|
|
||||||
showFun pgf id ty = kwd++" "++ id ++ " : " ++ showType [] ty
|
showFun pgf id ty = kwd++" "++ id ++ " : " ++ showType [] ty
|
||||||
where
|
where
|
||||||
kwd | functionIsDataCon pgf id = "data"
|
kwd | functionIsConstructor pgf id = "data"
|
||||||
| otherwise = "fun"
|
| otherwise = "fun"
|
||||||
|
|
||||||
morphos pgf opts s =
|
morphos pgf opts s =
|
||||||
[(s,lookupMorpho concr s) | concr <- optLangs pgf opts]
|
[(s,lookupMorpho concr s) | concr <- optLangs pgf opts]
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import Data.Maybe(fromMaybe)
|
|||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
cf2pgf :: Options -> FilePath -> ParamCFG -> Map.Map Fun Double -> PGF
|
cf2pgf :: Options -> FilePath -> ParamCFG -> Map.Map Fun Double -> PGF
|
||||||
cf2pgf opts fpath cf probs =
|
cf2pgf opts fpath cf probs = error "TODO: cf2pgf" {-
|
||||||
build (let abstr = cf2abstr cf probs
|
build (let abstr = cf2abstr cf probs
|
||||||
in newPGF [] aname abstr [(cname, cf2concr opts abstr cf)])
|
in newPGF [] aname abstr [(cname, cf2concr opts abstr cf)])
|
||||||
where
|
where
|
||||||
@@ -134,3 +134,4 @@ mkRuleName rule =
|
|||||||
case ruleName rule of
|
case ruleName rule of
|
||||||
CFObj n _ -> n
|
CFObj n _ -> n
|
||||||
_ -> "_"
|
_ -> "_"
|
||||||
|
-}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ module GF.Compile.Compute.Predef(predef,predefName,delta) where
|
|||||||
|
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import Data.Array(array,(!))
|
import Data.Array(array,(!))
|
||||||
import Data.List (isInfixOf)
|
import Data.List (isInfixOf,genericTake,genericDrop,genericLength)
|
||||||
import Data.Char (isUpper,toLower,toUpper)
|
import Data.Char (isUpper,toLower,toUpper)
|
||||||
import Control.Monad(ap)
|
import Control.Monad(ap)
|
||||||
|
|
||||||
@@ -20,7 +20,7 @@ class Predef a where
|
|||||||
toValue :: a -> Value
|
toValue :: a -> Value
|
||||||
fromValue :: Value -> Err a
|
fromValue :: Value -> Err a
|
||||||
|
|
||||||
instance Predef Int where
|
instance Predef Integer where
|
||||||
toValue = VInt
|
toValue = VInt
|
||||||
fromValue (VInt i) = return i
|
fromValue (VInt i) = return i
|
||||||
fromValue v = verror "Int" v
|
fromValue v = verror "Int" v
|
||||||
@@ -87,8 +87,8 @@ predefList =
|
|||||||
|
|
||||||
delta f vs =
|
delta f vs =
|
||||||
case f of
|
case f of
|
||||||
Drop -> fromNonExist vs NonExist (ap2 (drop::Int->String->String))
|
Drop -> fromNonExist vs NonExist (ap2 (genericDrop::Integer->String->String))
|
||||||
Take -> fromNonExist vs NonExist (ap2 (take::Int->String->String))
|
Take -> fromNonExist vs NonExist (ap2 (genericTake::Integer->String->String))
|
||||||
Tk -> fromNonExist vs NonExist (ap2 tk)
|
Tk -> fromNonExist vs NonExist (ap2 tk)
|
||||||
Dp -> fromNonExist vs NonExist (ap2 dp)
|
Dp -> fromNonExist vs NonExist (ap2 dp)
|
||||||
EqStr -> fromNonExist vs PFalse (ap2 ((==)::String->String->Bool))
|
EqStr -> fromNonExist vs PFalse (ap2 ((==)::String->String->Bool))
|
||||||
@@ -97,10 +97,10 @@ delta f vs =
|
|||||||
ToUpper -> fromNonExist vs NonExist (ap1 (map toUpper))
|
ToUpper -> fromNonExist vs NonExist (ap1 (map toUpper))
|
||||||
ToLower -> fromNonExist vs NonExist (ap1 (map toLower))
|
ToLower -> fromNonExist vs NonExist (ap1 (map toLower))
|
||||||
IsUpper -> fromNonExist vs PFalse (ap1 (all' isUpper))
|
IsUpper -> fromNonExist vs PFalse (ap1 (all' isUpper))
|
||||||
Length -> fromNonExist vs (0::Int) (ap1 (length::String->Int))
|
Length -> fromNonExist vs (0::Integer) (ap1 (genericLength::String->Integer))
|
||||||
Plus -> ap2 ((+)::Int->Int->Int)
|
Plus -> ap2 ((+)::Integer->Integer->Integer)
|
||||||
EqInt -> ap2 ((==)::Int->Int->Bool)
|
EqInt -> ap2 ((==)::Integer->Integer->Bool)
|
||||||
LessInt -> ap2 ((<)::Int->Int->Bool)
|
LessInt -> ap2 ((<)::Integer->Integer->Bool)
|
||||||
{- -- | Show | Read | ToStr | MapStr | EqVal -}
|
{- -- | Show | Read | ToStr | MapStr | EqVal -}
|
||||||
Error -> ap1 VError
|
Error -> ap1 VError
|
||||||
Trace -> ap2 vtrace
|
Trace -> ap2 vtrace
|
||||||
@@ -139,8 +139,12 @@ delta f vs =
|
|||||||
-- unimpl id = bug $ "unimplemented predefined function: "++showIdent id
|
-- unimpl id = bug $ "unimplemented predefined function: "++showIdent id
|
||||||
-- problem id vs = bug $ "unexpected arguments: Predef."++showIdent id++" "++show vs
|
-- problem id vs = bug $ "unexpected arguments: Predef."++showIdent id++" "++show vs
|
||||||
|
|
||||||
tk i s = take (max 0 (length s - i)) s :: String
|
tk :: Integer -> String -> String
|
||||||
dp i s = drop (max 0 (length s - i)) s :: String
|
tk i s = genericTake (max 0 (genericLength s - i)) s
|
||||||
|
|
||||||
|
dp :: Integer -> String -> String
|
||||||
|
dp i s = genericDrop (max 0 (genericLength s - i)) s
|
||||||
|
|
||||||
occur s t = isInfixOf (s::String) (t::String)
|
occur s t = isInfixOf (s::String) (t::String)
|
||||||
occurs s t = any (`elem` (t::String)) (s::String)
|
occurs s t = any (`elem` (t::String)) (s::String)
|
||||||
all' = all :: (a->Bool) -> [a] -> Bool
|
all' = all :: (a->Bool) -> [a] -> Bool
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ data Value
|
|||||||
-- -- | VClosure Env Term -- used in Typecheck.ConcreteNew
|
-- -- | VClosure Env Term -- used in Typecheck.ConcreteNew
|
||||||
| VAbs BindType Ident Binding -- used in Compute.Concrete
|
| VAbs BindType Ident Binding -- used in Compute.Concrete
|
||||||
| VProd BindType Value Ident Binding -- used in Compute.Concrete
|
| VProd BindType Value Ident Binding -- used in Compute.Concrete
|
||||||
| VInt Int
|
| VInt Integer
|
||||||
| VFloat Double
|
| VFloat Double
|
||||||
| VString String
|
| VString String
|
||||||
| VSort Ident
|
| VSort Ident
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
-- | Translate concrete syntax to Haskell
|
-- | Translate concrete syntax to Haskell
|
||||||
module GF.Compile.ConcreteToHaskell(concretes2haskell,concrete2haskell) where
|
module GF.Compile.ConcreteToHaskell(concretes2haskell,concrete2haskell) where
|
||||||
|
|
||||||
|
import PGF2(Literal(..))
|
||||||
import Data.List(isPrefixOf,sort,sortOn)
|
import Data.List(isPrefixOf,sort,sortOn)
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
@@ -181,9 +183,9 @@ concrete2haskell opts
|
|||||||
|
|
||||||
ppL l =
|
ppL l =
|
||||||
case l of
|
case l of
|
||||||
FloatConstant x -> pure (lit x)
|
LFlt x -> pure (lit x)
|
||||||
IntConstant n -> pure (lit n)
|
LInt n -> pure (lit n)
|
||||||
StrConstant s -> pure (token s)
|
LStr s -> pure (token s)
|
||||||
|
|
||||||
pId p@(ParamId s) =
|
pId p@(ParamId s) =
|
||||||
if "to_R_" `isPrefixOf` unqual s then toIdent p else gId p -- !! a hack
|
if "to_R_" `isPrefixOf` unqual s then toIdent p else gId p -- !! a hack
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ module GF.Compile.GenerateBC(generateByteCode) where
|
|||||||
import GF.Grammar
|
import GF.Grammar
|
||||||
import GF.Grammar.Lookup(lookupAbsDef,lookupFunType)
|
import GF.Grammar.Lookup(lookupAbsDef,lookupFunType)
|
||||||
import GF.Data.Operations
|
import GF.Data.Operations
|
||||||
import PGF2.Internal(CodeLabel,Instr(..),IVal(..),TailInfo(..),Literal(..))
|
import PGF2(Literal(..))
|
||||||
|
import PGF2.Internal(CodeLabel,Instr(..),IVal(..),TailInfo(..))
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import Data.List(nub,mapAccumL)
|
import Data.List(nub,mapAccumL)
|
||||||
import Data.Maybe(fromMaybe)
|
import Data.Maybe(fromMaybe)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ import GF.Compile.Compute.Predef(predef)
|
|||||||
import GF.Compile.Compute.Value(Predefined(..))
|
import GF.Compile.Compute.Value(Predefined(..))
|
||||||
import GF.Infra.Ident(ModuleName(..),Ident,ident2raw,rawIdentS,showIdent,isWildIdent)
|
import GF.Infra.Ident(ModuleName(..),Ident,ident2raw,rawIdentS,showIdent,isWildIdent)
|
||||||
import GF.Infra.Option(Options,optionsPGF)
|
import GF.Infra.Option(Options,optionsPGF)
|
||||||
import PGF2.Internal(Literal(..))
|
import PGF2(Literal(..))
|
||||||
import GF.Compile.Compute.Concrete(GlobalEnv,normalForm,resourceValues)
|
import GF.Compile.Compute.Concrete(GlobalEnv,normalForm,resourceValues)
|
||||||
import GF.Grammar.Canonical as C
|
import GF.Grammar.Canonical as C
|
||||||
import System.FilePath ((</>), (<.>))
|
import System.FilePath ((</>), (<.>))
|
||||||
@@ -193,11 +193,11 @@ convert' gr vs = ppT
|
|||||||
Cn x -> VarValue (gId x) -- hmm
|
Cn x -> VarValue (gId x) -- hmm
|
||||||
Con c -> ParamConstant (Param (gId c) [])
|
Con c -> ParamConstant (Param (gId c) [])
|
||||||
Sort k -> VarValue (gId k)
|
Sort k -> VarValue (gId k)
|
||||||
EInt n -> LiteralValue (IntConstant n)
|
EInt n -> LiteralValue (LInt n)
|
||||||
Q (m,n) -> if m==cPredef then ppPredef n else VarValue (gQId m n)
|
Q (m,n) -> if m==cPredef then ppPredef n else VarValue (gQId m n)
|
||||||
QC (m,n) -> ParamConstant (Param (gQId m n) [])
|
QC (m,n) -> ParamConstant (Param (gQId m n) [])
|
||||||
K s -> LiteralValue (StrConstant s)
|
K s -> LiteralValue (LStr s)
|
||||||
Empty -> LiteralValue (StrConstant "")
|
Empty -> LiteralValue (LStr "")
|
||||||
FV ts -> VariantValue (map ppT ts)
|
FV ts -> VariantValue (map ppT ts)
|
||||||
Alts t' vs -> alts vs (ppT t')
|
Alts t' vs -> alts vs (ppT t')
|
||||||
_ -> error $ "convert' ppT: " ++ show t
|
_ -> error $ "convert' ppT: " ++ show t
|
||||||
@@ -265,8 +265,8 @@ convert' gr vs = ppT
|
|||||||
concatValue :: LinValue -> LinValue -> LinValue
|
concatValue :: LinValue -> LinValue -> LinValue
|
||||||
concatValue v1 v2 =
|
concatValue v1 v2 =
|
||||||
case (v1,v2) of
|
case (v1,v2) of
|
||||||
(LiteralValue (StrConstant ""),_) -> v2
|
(LiteralValue (LStr ""),_) -> v2
|
||||||
(_,LiteralValue (StrConstant "")) -> v1
|
(_,LiteralValue (LStr "")) -> v1
|
||||||
_ -> ConcatValue v1 v2
|
_ -> ConcatValue v1 v2
|
||||||
|
|
||||||
-- | Smart constructor for projections
|
-- | Smart constructor for projections
|
||||||
@@ -429,11 +429,5 @@ unqual n = Unqual (ident2raw n)
|
|||||||
|
|
||||||
convFlags :: G.Grammar -> ModuleName -> Flags
|
convFlags :: G.Grammar -> ModuleName -> Flags
|
||||||
convFlags gr mn =
|
convFlags gr mn =
|
||||||
Flags [(rawIdentS n,convLit v) |
|
Flags [(rawIdentS n,v) |
|
||||||
(n,v)<-err (const []) (optionsPGF.mflags) (lookupModule gr mn)]
|
(n,v)<-err (const []) (optionsPGF.mflags) (lookupModule gr mn)]
|
||||||
where
|
|
||||||
convLit l =
|
|
||||||
case l of
|
|
||||||
LStr s -> Str s
|
|
||||||
LInt i -> C.Int i
|
|
||||||
LFlt d -> Flt d
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ import GHC.Prim
|
|||||||
import GHC.Base(getTag)
|
import GHC.Base(getTag)
|
||||||
|
|
||||||
grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF
|
grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF
|
||||||
grammar2PGF opts gr am probs = do
|
grammar2PGF opts gr am probs = error "TODO: grammar2PGF" {-do
|
||||||
cnc_infos <- getConcreteInfos gr am
|
cnc_infos <- getConcreteInfos gr am
|
||||||
return $
|
return $
|
||||||
build (let gflags = if flag optSplitPGF opts
|
build (let gflags = if flag optSplitPGF opts
|
||||||
@@ -445,3 +445,4 @@ compareCaseInsensitive (x:xs) (y:ys) =
|
|||||||
EQ -> compare x y
|
EQ -> compare x y
|
||||||
x -> x
|
x -> x
|
||||||
x -> x
|
x -> x
|
||||||
|
-}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ type ConcrData = ([(FId,[FunId])], -- ^ Lindefs
|
|||||||
[(Cat,FId,FId,[String])]) -- ^ Concrete categories
|
[(Cat,FId,FId,[String])]) -- ^ Concrete categories
|
||||||
|
|
||||||
optimizePGF :: Cat -> ConcrData -> ConcrData
|
optimizePGF :: Cat -> ConcrData -> ConcrData
|
||||||
optimizePGF startCat = topDownFilter startCat . bottomUpFilter
|
optimizePGF startCat = error "TODO: optimizePGF" {- topDownFilter startCat . bottomUpFilter
|
||||||
|
|
||||||
catString = "String"
|
catString = "String"
|
||||||
catInt = "Int"
|
catInt = "Int"
|
||||||
@@ -187,3 +187,4 @@ filterProductions prods0 hoc0 prods
|
|||||||
|
|
||||||
accumHOC hoc (PApply funid args) = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc fid -> IntSet.insert fid hoc) hoc (map snd hypos)) hoc args
|
accumHOC hoc (PApply funid args) = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc fid -> IntSet.insert fid hoc) hoc (map snd hypos)) hoc args
|
||||||
accumHOC hoc _ = hoc
|
accumHOC hoc _ = hoc
|
||||||
|
-}
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ import Text.JSON
|
|||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
|
|
||||||
pgf2json :: PGF -> String
|
pgf2json :: PGF -> String
|
||||||
pgf2json pgf =
|
pgf2json pgf = error "TODO: pgf2json"
|
||||||
encode $ makeObj
|
{- encode $ makeObj
|
||||||
[ ("abstract", abstract2json pgf)
|
[ ("abstract", abstract2json pgf)
|
||||||
, ("concretes", makeObj $ map concrete2json
|
, ("concretes", makeObj $ map concrete2json
|
||||||
(Map.toList (languages pgf)))
|
(Map.toList (languages pgf)))
|
||||||
@@ -108,3 +108,4 @@ new f xs =
|
|||||||
[ ("type", showJSON f)
|
[ ("type", showJSON f)
|
||||||
, ("args", showJSON xs)
|
, ("args", showJSON xs)
|
||||||
]
|
]
|
||||||
|
-}
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ data AExp =
|
|||||||
AVr Ident Val
|
AVr Ident Val
|
||||||
| ACn QIdent Val
|
| ACn QIdent Val
|
||||||
| AType
|
| AType
|
||||||
| AInt Int
|
| AInt Integer
|
||||||
| AFloat Double
|
| AFloat Double
|
||||||
| AStr String
|
| AStr String
|
||||||
| AMeta MetaId Val
|
| AMeta MetaId Val
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
module GF.Compiler (mainGFC, linkGrammars, writeGrammar, writeOutputs) where
|
module GF.Compiler (mainGFC, linkGrammars, writeGrammar, writeOutputs) where
|
||||||
|
|
||||||
import PGF2
|
import PGF2
|
||||||
import PGF2.Internal(unionPGF,writePGF,writeConcr)
|
import PGF2.Internal(unionPGF,writeConcr)
|
||||||
import GF.Compile as S(batchCompile,link,srcAbsName)
|
import GF.Compile as S(batchCompile,link,srcAbsName)
|
||||||
import GF.CompileInParallel as P(parallelBatchCompile)
|
import GF.CompileInParallel as P(parallelBatchCompile)
|
||||||
import GF.Compile.Export
|
import GF.Compile.Export
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ data BNFCRule = BNFCRule {
|
|||||||
ruleName :: CFTerm }
|
ruleName :: CFTerm }
|
||||||
| BNFCCoercions {
|
| BNFCCoercions {
|
||||||
coerCat :: Cat,
|
coerCat :: Cat,
|
||||||
coerNum :: Int }
|
coerNum :: Integer }
|
||||||
| BNFCTerminator {
|
| BNFCTerminator {
|
||||||
termNonEmpty :: Bool,
|
termNonEmpty :: Bool,
|
||||||
termCat :: Cat,
|
termCat :: Cat,
|
||||||
|
|||||||
@@ -22,7 +22,8 @@ import GF.Infra.Option
|
|||||||
import GF.Infra.UseIO(MonadIO(..))
|
import GF.Infra.UseIO(MonadIO(..))
|
||||||
import GF.Grammar.Grammar
|
import GF.Grammar.Grammar
|
||||||
|
|
||||||
import PGF2.Internal(Literal(..),Symbol(..))
|
import PGF2(Literal(..))
|
||||||
|
import PGF2.Internal(Symbol(..))
|
||||||
|
|
||||||
-- Please change this every time when the GFO format is changed
|
-- Please change this every time when the GFO format is changed
|
||||||
gfoVersion = "GF04"
|
gfoVersion = "GF04"
|
||||||
|
|||||||
@@ -9,9 +9,11 @@
|
|||||||
|
|
||||||
{-# LANGUAGE DeriveTraversable #-}
|
{-# LANGUAGE DeriveTraversable #-}
|
||||||
module GF.Grammar.Canonical where
|
module GF.Grammar.Canonical where
|
||||||
|
|
||||||
import Prelude hiding ((<>))
|
import Prelude hiding ((<>))
|
||||||
import GF.Text.Pretty
|
import GF.Text.Pretty
|
||||||
import GF.Infra.Ident (RawIdent)
|
import GF.Infra.Ident (RawIdent)
|
||||||
|
import PGF(Literal(..))
|
||||||
|
|
||||||
-- | A Complete grammar
|
-- | A Complete grammar
|
||||||
data Grammar = Grammar Abstract [Concrete] deriving Show
|
data Grammar = Grammar Abstract [Concrete] deriving Show
|
||||||
@@ -58,7 +60,7 @@ newtype ParamType = ParamTypeId ParamId deriving (Eq,Ord,Show)
|
|||||||
|
|
||||||
-- | Linearization value, RHS of @lin@
|
-- | Linearization value, RHS of @lin@
|
||||||
data LinValue = ConcatValue LinValue LinValue
|
data LinValue = ConcatValue LinValue LinValue
|
||||||
| LiteralValue LinLiteral
|
| LiteralValue Literal
|
||||||
| ErrorValue String
|
| ErrorValue String
|
||||||
| ParamConstant ParamValue
|
| ParamConstant ParamValue
|
||||||
| PredefValue PredefId
|
| PredefValue PredefId
|
||||||
@@ -74,11 +76,6 @@ data LinValue = ConcatValue LinValue LinValue
|
|||||||
| CommentedValue String LinValue
|
| CommentedValue String LinValue
|
||||||
deriving (Eq,Ord,Show)
|
deriving (Eq,Ord,Show)
|
||||||
|
|
||||||
data LinLiteral = FloatConstant Float
|
|
||||||
| IntConstant Int
|
|
||||||
| StrConstant String
|
|
||||||
deriving (Eq,Ord,Show)
|
|
||||||
|
|
||||||
data LinPattern = ParamPattern ParamPattern
|
data LinPattern = ParamPattern ParamPattern
|
||||||
| RecordPattern [RecordRow LinPattern]
|
| RecordPattern [RecordRow LinPattern]
|
||||||
| TuplePattern [LinPattern]
|
| TuplePattern [LinPattern]
|
||||||
@@ -120,9 +117,8 @@ newtype FunId = FunId Id deriving (Eq,Show)
|
|||||||
|
|
||||||
data VarId = Anonymous | VarId Id deriving Show
|
data VarId = Anonymous | VarId Id deriving Show
|
||||||
|
|
||||||
newtype Flags = Flags [(FlagName,FlagValue)] deriving Show
|
newtype Flags = Flags [(FlagName,Literal)] deriving Show
|
||||||
type FlagName = Id
|
type FlagName = Id
|
||||||
data FlagValue = Str String | Int Int | Flt Double deriving Show
|
|
||||||
|
|
||||||
|
|
||||||
-- *** Identifiers
|
-- *** Identifiers
|
||||||
@@ -243,13 +239,13 @@ instance PPA LinValue where
|
|||||||
VarValue v -> pp v
|
VarValue v -> pp v
|
||||||
_ -> parens lv
|
_ -> parens lv
|
||||||
|
|
||||||
instance Pretty LinLiteral where pp = ppA
|
instance Pretty Literal where pp = ppA
|
||||||
|
|
||||||
instance PPA LinLiteral where
|
instance PPA Literal where
|
||||||
ppA l = case l of
|
ppA l = case l of
|
||||||
FloatConstant f -> pp f
|
LFlt f -> pp f
|
||||||
IntConstant n -> pp n
|
LInt n -> pp n
|
||||||
StrConstant s -> doubleQuotes s -- hmm
|
LStr s -> doubleQuotes s -- hmm
|
||||||
|
|
||||||
instance RhsSeparator LinValue where rhsSep _ = pp "="
|
instance RhsSeparator LinValue where rhsSep _ = pp "="
|
||||||
|
|
||||||
@@ -298,11 +294,6 @@ instance Pretty Flags where
|
|||||||
where
|
where
|
||||||
ppFlag (name,value) = name <+> "=" <+> value <>";"
|
ppFlag (name,value) = name <+> "=" <+> value <>";"
|
||||||
|
|
||||||
instance Pretty FlagValue where
|
|
||||||
pp (Str s) = pp s
|
|
||||||
pp (Int i) = pp i
|
|
||||||
pp (Flt d) = pp d
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- | Pretty print atomically (i.e. wrap it in parentheses if necessary)
|
-- | Pretty print atomically (i.e. wrap it in parentheses if necessary)
|
||||||
class Pretty a => PPA a where ppA :: a -> Doc
|
class Pretty a => PPA a where ppA :: a -> Doc
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import Data.Ratio (denominator, numerator)
|
|||||||
import GF.Grammar.Canonical
|
import GF.Grammar.Canonical
|
||||||
import Control.Monad (guard)
|
import Control.Monad (guard)
|
||||||
import GF.Infra.Ident (RawIdent,showRawIdent,rawIdentS)
|
import GF.Infra.Ident (RawIdent,showRawIdent,rawIdentS)
|
||||||
|
import PGF(Literal(..))
|
||||||
|
|
||||||
encodeJSON :: FilePath -> Grammar -> IO ()
|
encodeJSON :: FilePath -> Grammar -> IO ()
|
||||||
encodeJSON fpath g = writeFile fpath (encode g)
|
encodeJSON fpath g = writeFile fpath (encode g)
|
||||||
@@ -171,13 +171,13 @@ instance JSON LinValue where
|
|||||||
<|> do vs <- readJSON o :: Result [LinValue]
|
<|> do vs <- readJSON o :: Result [LinValue]
|
||||||
return (foldr1 ConcatValue vs)
|
return (foldr1 ConcatValue vs)
|
||||||
|
|
||||||
instance JSON LinLiteral where
|
instance JSON Literal where
|
||||||
-- basic values (Str, Float, Int) are encoded as JSON strings/numbers:
|
-- basic values (Str, Float, Int) are encoded as JSON strings/numbers:
|
||||||
showJSON (StrConstant s) = showJSON s
|
showJSON (LStr s) = showJSON s
|
||||||
showJSON (FloatConstant f) = showJSON f
|
showJSON (LFlt f) = showJSON f
|
||||||
showJSON (IntConstant n) = showJSON n
|
showJSON (LInt n) = showJSON n
|
||||||
|
|
||||||
readJSON = readBasicJSON StrConstant IntConstant FloatConstant
|
readJSON = readBasicJSON LStr LInt LFlt
|
||||||
|
|
||||||
instance JSON LinPattern where
|
instance JSON LinPattern where
|
||||||
-- wildcards and patterns without arguments are encoded as strings:
|
-- wildcards and patterns without arguments are encoded as strings:
|
||||||
@@ -262,15 +262,6 @@ instance JSON Flags where
|
|||||||
where fromRow (lbl, jsvalue) = do value <- readJSON jsvalue
|
where fromRow (lbl, jsvalue) = do value <- readJSON jsvalue
|
||||||
return (rawIdentS lbl, value)
|
return (rawIdentS lbl, value)
|
||||||
|
|
||||||
instance JSON FlagValue where
|
|
||||||
-- flag values are encoded as basic JSON types:
|
|
||||||
showJSON (Str s) = showJSON s
|
|
||||||
showJSON (Int i) = showJSON i
|
|
||||||
showJSON (Flt f) = showJSON f
|
|
||||||
|
|
||||||
readJSON = readBasicJSON Str Int Flt
|
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- ** Convenience functions
|
-- ** Convenience functions
|
||||||
|
|
||||||
|
|||||||
@@ -354,7 +354,7 @@ data Term =
|
|||||||
| Cn Ident -- ^ constant
|
| Cn Ident -- ^ constant
|
||||||
| Con Ident -- ^ constructor
|
| Con Ident -- ^ constructor
|
||||||
| Sort Ident -- ^ basic type
|
| Sort Ident -- ^ basic type
|
||||||
| EInt Int -- ^ integer literal
|
| EInt Integer -- ^ integer literal
|
||||||
| EFloat Double -- ^ floating point literal
|
| EFloat Double -- ^ floating point literal
|
||||||
| K String -- ^ string literal or token: @\"foo\"@
|
| K String -- ^ string literal or token: @\"foo\"@
|
||||||
| Empty -- ^ the empty string @[]@
|
| Empty -- ^ the empty string @[]@
|
||||||
@@ -409,7 +409,7 @@ data Patt =
|
|||||||
| PW -- ^ wild card pattern: @_@
|
| PW -- ^ wild card pattern: @_@
|
||||||
| PR [(Label,Patt)] -- ^ record pattern: @{r = p ; ...}@ -- only concrete
|
| PR [(Label,Patt)] -- ^ record pattern: @{r = p ; ...}@ -- only concrete
|
||||||
| PString String -- ^ string literal pattern: @\"foo\"@ -- only abstract
|
| PString String -- ^ string literal pattern: @\"foo\"@ -- only abstract
|
||||||
| PInt Int -- ^ integer literal pattern: @12@ -- only abstract
|
| PInt Integer -- ^ integer literal pattern: @12@ -- only abstract
|
||||||
| PFloat Double -- ^ float literal pattern: @1.2@ -- only abstract
|
| PFloat Double -- ^ float literal pattern: @1.2@ -- only abstract
|
||||||
| PT Type Patt -- ^ type-annotated pattern
|
| PT Type Patt -- ^ type-annotated pattern
|
||||||
|
|
||||||
|
|||||||
@@ -130,7 +130,7 @@ data Token
|
|||||||
| T_separator
|
| T_separator
|
||||||
| T_nonempty
|
| T_nonempty
|
||||||
| T_String String -- string literals
|
| T_String String -- string literals
|
||||||
| T_Integer Int -- integer literals
|
| T_Integer Integer -- integer literals
|
||||||
| T_Double Double -- double precision float literals
|
| T_Double Double -- double precision float literals
|
||||||
| T_Ident Ident
|
| T_Ident Ident
|
||||||
| T_EOF
|
| T_EOF
|
||||||
|
|||||||
@@ -216,7 +216,7 @@ typeTok = Sort cTok
|
|||||||
typeStrs = Sort cStrs
|
typeStrs = Sort cStrs
|
||||||
|
|
||||||
typeString, typeFloat, typeInt :: Type
|
typeString, typeFloat, typeInt :: Type
|
||||||
typeInts :: Int -> Type
|
typeInts :: Integer -> Type
|
||||||
typePBool :: Type
|
typePBool :: Type
|
||||||
typeError :: Type
|
typeError :: Type
|
||||||
|
|
||||||
@@ -227,7 +227,7 @@ typeInts i = App (cnPredef cInts) (EInt i)
|
|||||||
typePBool = cnPredef cPBool
|
typePBool = cnPredef cPBool
|
||||||
typeError = cnPredef cErrorType
|
typeError = cnPredef cErrorType
|
||||||
|
|
||||||
isTypeInts :: Type -> Maybe Int
|
isTypeInts :: Type -> Maybe Integer
|
||||||
isTypeInts (App c (EInt i)) | c == cnPredef cInts = Just i
|
isTypeInts (App c (EInt i)) | c == cnPredef cInts = Just i
|
||||||
isTypeInts _ = Nothing
|
isTypeInts _ = Nothing
|
||||||
|
|
||||||
@@ -324,7 +324,7 @@ freshAsTerm s = Vr (varX (readIntArg s))
|
|||||||
string2term :: String -> Term
|
string2term :: String -> Term
|
||||||
string2term = K
|
string2term = K
|
||||||
|
|
||||||
int2term :: Int -> Term
|
int2term :: Integer -> Term
|
||||||
int2term = EInt
|
int2term = EInt
|
||||||
|
|
||||||
float2term :: Double -> Term
|
float2term :: Double -> Term
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ import GF.Infra.Ident
|
|||||||
import GF.Infra.GetOpt
|
import GF.Infra.GetOpt
|
||||||
import GF.Grammar.Predef
|
import GF.Grammar.Predef
|
||||||
import System.FilePath
|
import System.FilePath
|
||||||
import PGF2.Internal(Literal(..))
|
import PGF2(Literal(..))
|
||||||
|
|
||||||
import GF.Data.Operations(Err,ErrorMonad(..),liftErr)
|
import GF.Data.Operations(Err,ErrorMonad(..),liftErr)
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
|
|||||||
type Profile = [Int]
|
type Profile = [Int]
|
||||||
|
|
||||||
pgfToCFG :: PGF -> Concr -> CFG
|
pgfToCFG :: PGF -> Concr -> CFG
|
||||||
pgfToCFG pgf cnc = mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule rules)
|
pgfToCFG pgf cnc = error "TODO: pgfToCFG" {- mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule rules)
|
||||||
where
|
where
|
||||||
(_,start_cat,_) = unType (startCat pgf)
|
(_,start_cat,_) = unType (startCat pgf)
|
||||||
|
|
||||||
@@ -116,3 +116,4 @@ pgfToCFG pgf cnc = mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule
|
|||||||
ruleToCFRule (c,PCoerce c') =
|
ruleToCFRule (c,PCoerce c') =
|
||||||
[Rule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
|
[Rule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
|
||||||
| l <- [0..catLinArity c-1]]
|
| l <- [0..catLinArity c-1]]
|
||||||
|
-}
|
||||||
|
|||||||
@@ -14,17 +14,20 @@
|
|||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
module PGF2 (-- * PGF
|
module PGF2 (-- * PGF
|
||||||
PGF,readPGF,bootNGF,readNGF,
|
PGF,readPGF,bootNGF,readNGF,writePGF,showPGF,
|
||||||
|
|
||||||
-- * Abstract syntax
|
-- * Abstract syntax
|
||||||
AbsName,abstractName,globalFlag,abstractFlag,
|
AbsName,abstractName,globalFlag,abstractFlag,
|
||||||
|
|
||||||
-- ** Categories
|
-- ** Categories
|
||||||
Cat,categories,categoryContext,categoryProb,
|
Cat,categories,categoryContext,categoryProbability,
|
||||||
|
|
||||||
-- ** Functions
|
-- ** Functions
|
||||||
Fun, functions, functionsByCat,
|
Fun, functions, functionsByCat,
|
||||||
functionType, functionIsConstructor, functionProb,
|
functionType, functionIsConstructor, functionProbability,
|
||||||
|
|
||||||
-- ** Expressions
|
-- ** Expressions
|
||||||
Expr(..), Literal(..), showExpr, readExpr,
|
Expr(..), Literal(..), showExpr, readExpr, pExpr, pIdent,
|
||||||
mkAbs, unAbs,
|
mkAbs, unAbs,
|
||||||
mkApp, unApp, unapply,
|
mkApp, unApp, unapply,
|
||||||
mkStr, unStr,
|
mkStr, unStr,
|
||||||
@@ -33,20 +36,58 @@ module PGF2 (-- * PGF
|
|||||||
mkFloat, unFloat,
|
mkFloat, unFloat,
|
||||||
mkMeta, unMeta,
|
mkMeta, unMeta,
|
||||||
-- extra
|
-- extra
|
||||||
exprSize, exprFunctions,
|
exprSize, exprFunctions, exprSubstitute, exprProbability,
|
||||||
|
|
||||||
-- ** Types
|
-- ** Types
|
||||||
Type(..), Hypo, BindType(..), startCat,
|
Type(..), Hypo, BindType(..), startCat,
|
||||||
readType, showType,
|
readType, showType, showContext,
|
||||||
mkType, unType,
|
mkType, unType,
|
||||||
mkHypo, mkDepHypo, mkImplHypo,
|
mkHypo, mkDepHypo, mkImplHypo,
|
||||||
|
|
||||||
|
-- ** Type checking
|
||||||
|
-- | Dynamically-built expressions should always be type-checked before using in other functions,
|
||||||
|
-- as the exceptions thrown by using invalid expressions may not catchable.
|
||||||
|
checkExpr, inferExpr, checkType,
|
||||||
|
|
||||||
|
-- ** Computing
|
||||||
|
compute,
|
||||||
|
|
||||||
|
-- ** Generation
|
||||||
|
generateAll, generateAllFrom, generateRandom, generateRandomFrom,
|
||||||
|
|
||||||
|
-- ** Morphological Analysis
|
||||||
|
MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
|
||||||
|
filterBest, filterLongest,
|
||||||
|
|
||||||
|
-- ** Visualizations
|
||||||
|
GraphvizOptions(..), graphvizDefaults,
|
||||||
|
graphvizAbstractTree, graphvizParseTree,
|
||||||
|
Labels, getDepLabels,
|
||||||
|
graphvizDependencyTree, conlls2latexDoc, getCncDepLabels,
|
||||||
|
graphvizWordAlignment,
|
||||||
|
|
||||||
-- * Concrete syntax
|
-- * Concrete syntax
|
||||||
ConcName,
|
ConcName,Concr,languages,concreteName,languageCode,
|
||||||
|
|
||||||
|
-- ** Linearization
|
||||||
|
linearize, linearizeAll, tabularLinearize, tabularLinearizeAll,
|
||||||
|
FId, BracketedString(..), showBracketedString, flattenBracketedString,
|
||||||
|
bracketedLinearize, bracketedLinearizeAll,
|
||||||
|
hasLinearization,
|
||||||
|
printName, alignWords, gizaAlignment,
|
||||||
|
|
||||||
|
-- ** Parsing
|
||||||
|
ParseOutput(..), parse, parseWithHeuristics, complete,
|
||||||
|
|
||||||
-- * Exceptions
|
-- * Exceptions
|
||||||
PGFError(..)
|
PGFError(..),
|
||||||
|
|
||||||
|
-- * Auxiliaries
|
||||||
|
readProbabilitiesFromFile
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Prelude hiding ((<>))
|
||||||
|
|
||||||
import PGF2.Expr
|
import PGF2.Expr
|
||||||
import PGF2.FFI
|
import PGF2.FFI
|
||||||
|
|
||||||
@@ -54,15 +95,16 @@ import Foreign
|
|||||||
import Foreign.C
|
import Foreign.C
|
||||||
import Control.Exception(mask_,bracket)
|
import Control.Exception(mask_,bracket)
|
||||||
import System.IO.Unsafe(unsafePerformIO)
|
import System.IO.Unsafe(unsafePerformIO)
|
||||||
|
import System.Random
|
||||||
import qualified Foreign.Concurrent as C
|
import qualified Foreign.Concurrent as C
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import Data.IORef
|
import Data.IORef
|
||||||
|
import Data.List(intersperse,groupBy)
|
||||||
|
import Data.Char(isUpper,isSpace,isPunctuation)
|
||||||
|
import Text.PrettyPrint
|
||||||
|
|
||||||
#include <pgf/pgf.h>
|
#include <pgf/pgf.h>
|
||||||
|
|
||||||
type AbsName = String -- ^ Name of abstract syntax
|
|
||||||
type ConcName = String -- ^ Name of concrete syntax
|
|
||||||
|
|
||||||
-- | Reads a PGF file and keeps it in memory.
|
-- | Reads a PGF file and keeps it in memory.
|
||||||
readPGF :: FilePath -> IO PGF
|
readPGF :: FilePath -> IO PGF
|
||||||
readPGF fpath =
|
readPGF fpath =
|
||||||
@@ -106,6 +148,12 @@ readNGF fpath =
|
|||||||
fptr2 <- C.newForeignPtr c_revision (withForeignPtr fptr1 (\c_db -> pgf_free_revision c_db c_revision))
|
fptr2 <- C.newForeignPtr c_revision (withForeignPtr fptr1 (\c_db -> pgf_free_revision c_db c_revision))
|
||||||
return (PGF fptr1 fptr2 Map.empty)
|
return (PGF fptr1 fptr2 Map.empty)
|
||||||
|
|
||||||
|
writePGF :: FilePath -> PGF -> IO ()
|
||||||
|
writePGF = error "TODO: writePGF"
|
||||||
|
|
||||||
|
showPGF :: PGF -> String
|
||||||
|
showPGF = error "TODO: showPGF"
|
||||||
|
|
||||||
-- | The abstract language name is the name of the top-level
|
-- | The abstract language name is the name of the top-level
|
||||||
-- abstract module
|
-- abstract module
|
||||||
abstractName :: PGF -> AbsName
|
abstractName :: PGF -> AbsName
|
||||||
@@ -156,14 +204,273 @@ functionIsConstructor p fun =
|
|||||||
do res <- withPgfExn (pgf_function_is_constructor c_db c_revision c_fun)
|
do res <- withPgfExn (pgf_function_is_constructor c_db c_revision c_fun)
|
||||||
return (res /= 0)
|
return (res /= 0)
|
||||||
|
|
||||||
functionProb :: PGF -> Fun -> Float
|
functionProbability :: PGF -> Fun -> Float
|
||||||
functionProb p fun =
|
functionProbability p fun =
|
||||||
unsafePerformIO $
|
unsafePerformIO $
|
||||||
withText fun $ \c_fun ->
|
withText fun $ \c_fun ->
|
||||||
withForeignPtr (a_db p) $ \c_db ->
|
withForeignPtr (a_db p) $ \c_db ->
|
||||||
withForeignPtr (revision p) $ \c_revision ->
|
withForeignPtr (revision p) $ \c_revision ->
|
||||||
withPgfExn (pgf_function_prob c_db c_revision c_fun)
|
withPgfExn (pgf_function_prob c_db c_revision c_fun)
|
||||||
|
|
||||||
|
exprProbability :: PGF -> Expr -> Float
|
||||||
|
exprProbability = error "TODO: exprProbability"
|
||||||
|
|
||||||
|
checkExpr :: PGF -> Expr -> Type -> Either String Expr
|
||||||
|
checkExpr = error "TODO: checkExpr"
|
||||||
|
|
||||||
|
-- | Tries to infer the type of an expression. Note that
|
||||||
|
-- even if the expression is type correct it is not always
|
||||||
|
-- possible to infer its type in the GF type system.
|
||||||
|
-- In this case the function returns an error.
|
||||||
|
inferExpr :: PGF -> Expr -> Either String (Expr, Type)
|
||||||
|
inferExpr = error "TODO: inferExpr"
|
||||||
|
|
||||||
|
-- | Check whether a type is consistent with the abstract
|
||||||
|
-- syntax of the grammar.
|
||||||
|
checkType :: PGF -> Type -> Either String Type
|
||||||
|
checkType = error "TODO: checkType"
|
||||||
|
|
||||||
|
compute :: PGF -> Expr -> Expr
|
||||||
|
compute = error "TODO: compute"
|
||||||
|
|
||||||
|
concreteName :: Concr -> ConcName
|
||||||
|
concreteName c = error "TODO: concreteName"
|
||||||
|
|
||||||
|
languageCode :: Concr -> Maybe String
|
||||||
|
languageCode c = error "TODO: languageCode"
|
||||||
|
|
||||||
|
printName :: Concr -> Fun -> Maybe String
|
||||||
|
printName lang fun = error "TODO: printName"
|
||||||
|
|
||||||
|
alignWords :: Concr -> Expr -> [(String, [Int])]
|
||||||
|
alignWords = error "TODO: alignWords"
|
||||||
|
|
||||||
|
gizaAlignment = error "TODO: gizaAlignment"
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
-- Functions using Concr
|
||||||
|
-- Morpho analyses, parsing & linearization
|
||||||
|
|
||||||
|
-- | This triple is returned by all functions that deal with
|
||||||
|
-- the grammar's lexicon. Its first element is the name of an abstract
|
||||||
|
-- lexical function which can produce a given word or
|
||||||
|
-- a multiword expression (i.e. this is the lemma).
|
||||||
|
-- After that follows a string which describes
|
||||||
|
-- the particular inflection form.
|
||||||
|
--
|
||||||
|
-- The last element is a logarithm from the
|
||||||
|
-- the probability of the function. The probability is not
|
||||||
|
-- conditionalized on the category of the function. This makes it
|
||||||
|
-- possible to compare the likelihood of two functions even if they
|
||||||
|
-- have different types.
|
||||||
|
type MorphoAnalysis = (Fun,String,Float)
|
||||||
|
|
||||||
|
-- | 'lookupMorpho' takes a string which must be a single word or
|
||||||
|
-- a multiword expression. It then computes the list of all possible
|
||||||
|
-- morphological analyses.
|
||||||
|
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
|
||||||
|
lookupMorpho = error "TODO: lookupMorpho"
|
||||||
|
|
||||||
|
-- | 'lookupCohorts' takes an arbitrary string an produces
|
||||||
|
-- a list of all places where lexical items from the grammar have been
|
||||||
|
-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@,
|
||||||
|
-- where @start-end@ identifies the span in the text and @ans@ is
|
||||||
|
-- the list of possible morphological analyses similar to 'lookupMorpho'.
|
||||||
|
--
|
||||||
|
-- The list is sorted first by the @start@ position and after than
|
||||||
|
-- by the @end@ position. This can be used for instance if you want to
|
||||||
|
-- filter only the longest matches.
|
||||||
|
lookupCohorts :: Concr -> String -> [(Int,String,[MorphoAnalysis],Int)]
|
||||||
|
lookupCohorts = error "TODO: lookupCohorts"
|
||||||
|
|
||||||
|
filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
|
||||||
|
filterBest ans =
|
||||||
|
reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
|
||||||
|
where
|
||||||
|
iterate v0 [] [] res = res
|
||||||
|
iterate v0 [] new res = iterate v0 new [] res
|
||||||
|
iterate v0 ((_,v,conf, []):old) new res =
|
||||||
|
case compare v0 v of
|
||||||
|
LT -> res
|
||||||
|
EQ -> iterate v0 old new (merge conf res)
|
||||||
|
GT -> iterate v old new conf
|
||||||
|
iterate v0 ((_,v,conf,an:ans):old) new res = iterate v0 old (insert (v+valueOf an) conf an ans [] new) res
|
||||||
|
|
||||||
|
valueOf (_,_,[],_) = 2
|
||||||
|
valueOf _ = 1
|
||||||
|
|
||||||
|
insert v conf an@(start,_,_,end) ans l_new [] =
|
||||||
|
match start v conf ans ((end,v,comb conf an,filter end ans):l_new) []
|
||||||
|
insert v conf an@(start,_,_,end) ans l_new (new@(end0,v0,conf0,ans0):r_new) =
|
||||||
|
case compare end0 end of
|
||||||
|
LT -> insert v conf an ans (new:l_new) r_new
|
||||||
|
EQ -> case compare v0 v of
|
||||||
|
LT -> match start v conf ans ((end,v, conf0,ans0): l_new) r_new
|
||||||
|
EQ -> match start v conf ans ((end,v,merge (comb conf an) conf0,ans0): l_new) r_new
|
||||||
|
GT -> match start v conf ans ((end,v,comb conf an, ans0): l_new) r_new
|
||||||
|
GT -> match start v conf ans ((end,v,comb conf an, filter end ans):new:l_new) r_new
|
||||||
|
|
||||||
|
match start0 v conf (an@(start,_,_,end):ans) l_new r_new
|
||||||
|
| start0 == start = insert v conf an ans l_new r_new
|
||||||
|
match start0 v conf ans l_new r_new = revOn l_new r_new
|
||||||
|
|
||||||
|
comb ((start0,w0,an0,end0):conf) (start,w,an,end)
|
||||||
|
| end0 == start && (unk w0 an0 || unk w an) = (start0,w0++w,[],end):conf
|
||||||
|
comb conf an = an:conf
|
||||||
|
|
||||||
|
filter end [] = []
|
||||||
|
filter end (next@(start,_,_,_):ans)
|
||||||
|
| end <= start = next:ans
|
||||||
|
| otherwise = filter end ans
|
||||||
|
|
||||||
|
revOn [] ys = ys
|
||||||
|
revOn (x:xs) ys = revOn xs (x:ys)
|
||||||
|
|
||||||
|
merge [] ans = ans
|
||||||
|
merge ans [] = ans
|
||||||
|
merge (an1@(start1,_,_,end1):ans1) (an2@(start2,_,_,end2):ans2) =
|
||||||
|
case compare (start1,end1) (start2,end2) of
|
||||||
|
GT -> an1 : merge ans1 (an2:ans2)
|
||||||
|
EQ -> an1 : merge ans1 ans2
|
||||||
|
LT -> an2 : merge (an1:ans1) ans2
|
||||||
|
|
||||||
|
filterLongest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
|
||||||
|
filterLongest [] = []
|
||||||
|
filterLongest (an:ans) = longest an ans
|
||||||
|
where
|
||||||
|
longest prev [] = [prev]
|
||||||
|
longest prev@(start0,_,_,end0) (next@(start,_,_,end):ans)
|
||||||
|
| start0 == start = longest next ans
|
||||||
|
| otherwise = filter prev (next:ans)
|
||||||
|
|
||||||
|
filter prev [] = [prev]
|
||||||
|
filter prev@(start0,w0,an0,end0) (next@(start,w,an,end):ans)
|
||||||
|
| end0 == start && (unk w0 an0 || unk w an)
|
||||||
|
= filter (start0,w0++w,[],end) ans
|
||||||
|
| end0 <= start = prev : longest next ans
|
||||||
|
| otherwise = filter prev ans
|
||||||
|
|
||||||
|
unk w [] | any (not . isPunctuation) w = True
|
||||||
|
unk _ _ = False
|
||||||
|
|
||||||
|
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
|
||||||
|
fullFormLexicon lang = error "TODO: fullFormLexicon"
|
||||||
|
|
||||||
|
-- | This data type encodes the different outcomes which you could get from the parser.
|
||||||
|
data ParseOutput a
|
||||||
|
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
|
||||||
|
-- The string is the token where the parser have failed.
|
||||||
|
| ParseOk a -- ^ If the parsing and the type checking are successful
|
||||||
|
-- we get the abstract syntax trees as either a list or a chart.
|
||||||
|
| ParseIncomplete -- ^ The sentence is not complete.
|
||||||
|
|
||||||
|
parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)]
|
||||||
|
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
|
||||||
|
|
||||||
|
parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||||
|
-> Type -- ^ the start category
|
||||||
|
-> String -- ^ the input sentence
|
||||||
|
-> Double -- ^ the heuristic factor.
|
||||||
|
-- A negative value tells the parser
|
||||||
|
-- to lookup up the default from
|
||||||
|
-- the grammar flags
|
||||||
|
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
|
||||||
|
-- ^ a list of callbacks for literal categories.
|
||||||
|
-- The arguments of the callback are:
|
||||||
|
-- the index of the constituent for the literal category;
|
||||||
|
-- the input sentence; the current offset in the sentence.
|
||||||
|
-- If a literal has been recognized then the output should
|
||||||
|
-- be Just (expr,probability,end_offset)
|
||||||
|
-> ParseOutput [(Expr,Float)]
|
||||||
|
parseWithHeuristics = error "TODO: parseWithHeuristics"
|
||||||
|
|
||||||
|
-- | Returns possible completions of the current partial input.
|
||||||
|
complete :: Concr -- ^ the language with which we parse
|
||||||
|
-> Type -- ^ the start category
|
||||||
|
-> String -- ^ the input sentence (excluding token being completed)
|
||||||
|
-> String -- ^ prefix (partial token being completed)
|
||||||
|
-> ParseOutput [(String, Fun, Cat, Float)] -- ^ (token, category, function, probability)
|
||||||
|
complete = error "TODO: complete"
|
||||||
|
|
||||||
|
-- | Returns True if there is a linearization defined for that function in that language
|
||||||
|
hasLinearization :: Concr -> Fun -> Bool
|
||||||
|
hasLinearization = error "TODO: linearize"
|
||||||
|
|
||||||
|
-- | Linearizes an expression as a string in the language
|
||||||
|
linearize :: Concr -> Expr -> String
|
||||||
|
linearize lang e = error "TODO: linearize"
|
||||||
|
|
||||||
|
-- | Generates all possible linearizations of an expression
|
||||||
|
linearizeAll :: Concr -> Expr -> [String]
|
||||||
|
linearizeAll lang e = error "TODO: linearizeAll"
|
||||||
|
|
||||||
|
-- | Generates a table of linearizations for an expression
|
||||||
|
tabularLinearize :: Concr -> Expr -> [(String, String)]
|
||||||
|
tabularLinearize lang e =
|
||||||
|
case tabularLinearizeAll lang e of
|
||||||
|
(lins:_) -> lins
|
||||||
|
_ -> []
|
||||||
|
|
||||||
|
-- | Generates a table of linearizations for an expression
|
||||||
|
tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]]
|
||||||
|
tabularLinearizeAll lang e = error "TODO: tabularLinearizeAll"
|
||||||
|
|
||||||
|
type FId = Int
|
||||||
|
|
||||||
|
-- | BracketedString represents a sentence that is linearized
|
||||||
|
-- as usual but we also want to retain the ''brackets'' that
|
||||||
|
-- mark the beginning and the end of each constituent.
|
||||||
|
data BracketedString
|
||||||
|
= Leaf String -- ^ this is the leaf i.e. a single token
|
||||||
|
| BIND -- ^ the surrounding tokens must be bound together
|
||||||
|
| Bracket Cat {-# UNPACK #-} !FId String Fun [BracketedString]
|
||||||
|
-- ^ this is a bracket. The 'Cat' is the category of
|
||||||
|
-- the phrase. The 'FId' is an unique identifier for
|
||||||
|
-- every phrase in the sentence. For context-free grammars
|
||||||
|
-- i.e. without discontinuous constituents this identifier
|
||||||
|
-- is also unique for every bracket. When there are discontinuous
|
||||||
|
-- phrases then the identifiers are unique for every phrase but
|
||||||
|
-- not for every bracket since the bracket represents a constituent.
|
||||||
|
-- The different constituents could still be distinguished by using
|
||||||
|
-- the analysis string. If the grammar is reduplicating
|
||||||
|
-- then the constituent indices will be the same for all brackets
|
||||||
|
-- that represents the same constituent.
|
||||||
|
-- The 'Fun' is the name of the abstract function that generated
|
||||||
|
-- this phrase.
|
||||||
|
|
||||||
|
-- | Renders the bracketed string as a string where
|
||||||
|
-- the brackets are shown as @(S ...)@ where
|
||||||
|
-- @S@ is the category.
|
||||||
|
showBracketedString :: BracketedString -> String
|
||||||
|
showBracketedString = render . ppBracketedString
|
||||||
|
|
||||||
|
ppBracketedString (Leaf t) = text t
|
||||||
|
ppBracketedString BIND = text "&+"
|
||||||
|
ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||||
|
|
||||||
|
-- | Extracts the sequence of tokens from the bracketed string
|
||||||
|
flattenBracketedString :: BracketedString -> [String]
|
||||||
|
flattenBracketedString (Leaf w) = [w]
|
||||||
|
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||||
|
|
||||||
|
bracketedLinearize :: Concr -> Expr -> [BracketedString]
|
||||||
|
bracketedLinearize = error "TODO: bracketedLinearize"
|
||||||
|
|
||||||
|
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
|
||||||
|
bracketedLinearizeAll = error "TODO: bracketedLinearizeAll"
|
||||||
|
|
||||||
|
generateAll :: PGF -> Type -> [(Expr,Float)]
|
||||||
|
generateAll p ty = error "TODO: generateAll"
|
||||||
|
|
||||||
|
generateAllFrom :: PGF -> Expr -> [(Expr,Float)]
|
||||||
|
generateAllFrom p ty = error "TODO: generateAllFrom"
|
||||||
|
|
||||||
|
generateRandom :: StdGen -> PGF -> Type -> [a]
|
||||||
|
generateRandom = error "TODO: generateRandom"
|
||||||
|
|
||||||
|
generateRandomFrom :: StdGen -> PGF -> Expr -> [a]
|
||||||
|
generateRandomFrom = error "TODO: generateRandomFrom"
|
||||||
|
|
||||||
-- | List of all functions defined in the abstract syntax
|
-- | List of all functions defined in the abstract syntax
|
||||||
categories :: PGF -> [Cat]
|
categories :: PGF -> [Cat]
|
||||||
categories p =
|
categories p =
|
||||||
@@ -184,7 +491,7 @@ categories p =
|
|||||||
name <- peekText key
|
name <- peekText key
|
||||||
writeIORef ref $ (name : names)
|
writeIORef ref $ (name : names)
|
||||||
|
|
||||||
categoryContext :: PGF -> Cat -> [Hypo]
|
categoryContext :: PGF -> Cat -> Maybe [Hypo]
|
||||||
categoryContext p cat =
|
categoryContext p cat =
|
||||||
unsafePerformIO $
|
unsafePerformIO $
|
||||||
withText cat $ \c_cat ->
|
withText cat $ \c_cat ->
|
||||||
@@ -195,11 +502,11 @@ categoryContext p cat =
|
|||||||
mask_ $ do
|
mask_ $ do
|
||||||
c_hypos <- withPgfExn (pgf_category_context c_db c_revision c_cat p_n_hypos u)
|
c_hypos <- withPgfExn (pgf_category_context c_db c_revision c_cat p_n_hypos u)
|
||||||
if c_hypos == nullPtr
|
if c_hypos == nullPtr
|
||||||
then return []
|
then return Nothing
|
||||||
else do n_hypos <- peek p_n_hypos
|
else do n_hypos <- peek p_n_hypos
|
||||||
hypos <- peekHypos c_hypos 0 n_hypos
|
hypos <- peekHypos c_hypos 0 n_hypos
|
||||||
free c_hypos
|
free c_hypos
|
||||||
return hypos
|
return (Just hypos)
|
||||||
where
|
where
|
||||||
peekHypos :: Ptr PgfTypeHypo -> CSize -> CSize -> IO [Hypo]
|
peekHypos :: Ptr PgfTypeHypo -> CSize -> CSize -> IO [Hypo]
|
||||||
peekHypos c_hypo i n
|
peekHypos c_hypo i n
|
||||||
@@ -214,8 +521,8 @@ categoryContext p cat =
|
|||||||
return ((bt,cat,ty) : hs)
|
return ((bt,cat,ty) : hs)
|
||||||
| otherwise = return []
|
| otherwise = return []
|
||||||
|
|
||||||
categoryProb :: PGF -> Cat -> Float
|
categoryProbability :: PGF -> Cat -> Float
|
||||||
categoryProb p cat =
|
categoryProbability p cat =
|
||||||
unsafePerformIO $
|
unsafePerformIO $
|
||||||
withText cat $ \c_cat ->
|
withText cat $ \c_cat ->
|
||||||
withForeignPtr (a_db p) $ \c_db ->
|
withForeignPtr (a_db p) $ \c_db ->
|
||||||
@@ -291,6 +598,256 @@ abstractFlag p name =
|
|||||||
freeStablePtr c_lit
|
freeStablePtr c_lit
|
||||||
return (Just lit)
|
return (Just lit)
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
-- Graphviz
|
||||||
|
|
||||||
|
data GraphvizOptions = GraphvizOptions {noLeaves :: Bool,
|
||||||
|
noFun :: Bool,
|
||||||
|
noCat :: Bool,
|
||||||
|
noDep :: Bool,
|
||||||
|
nodeFont :: String,
|
||||||
|
leafFont :: String,
|
||||||
|
nodeColor :: String,
|
||||||
|
leafColor :: String,
|
||||||
|
nodeEdgeStyle :: String,
|
||||||
|
leafEdgeStyle :: String
|
||||||
|
}
|
||||||
|
|
||||||
|
graphvizDefaults = GraphvizOptions False False False True "" "" "" "" "" ""
|
||||||
|
|
||||||
|
-- | Renders an abstract syntax tree in a Graphviz format.
|
||||||
|
graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String
|
||||||
|
graphvizAbstractTree p opts e = error "TODO: graphvizAbstractTree"
|
||||||
|
|
||||||
|
graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String
|
||||||
|
graphvizParseTree c opts e = error "TODO: graphvizParseTree"
|
||||||
|
|
||||||
|
graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String
|
||||||
|
graphvizWordAlignment cs opts e = error "TODO: graphvizWordAlignment"
|
||||||
|
|
||||||
|
|
||||||
|
type Labels = Map.Map Fun [String]
|
||||||
|
|
||||||
|
getDepLabels :: String -> Labels
|
||||||
|
getDepLabels s = Map.fromList [(f,ls) | f:ls <- map words (lines s)]
|
||||||
|
|
||||||
|
-- | Visualize word dependency tree.
|
||||||
|
graphvizDependencyTree
|
||||||
|
:: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
|
||||||
|
-> Bool -- ^ Include extra information (debug)
|
||||||
|
-> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
|
||||||
|
-> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
|
||||||
|
-> Concr
|
||||||
|
-> Expr
|
||||||
|
-> String -- ^ Rendered output in the specified format
|
||||||
|
graphvizDependencyTree format debug mlab mclab concr t = error "TODO: graphvizDependencyTree"
|
||||||
|
|
||||||
|
---------------------- should be a separate module?
|
||||||
|
|
||||||
|
-- visualization with latex output. AR Nov 2015
|
||||||
|
|
||||||
|
conlls2latexDoc :: [String] -> String
|
||||||
|
conlls2latexDoc =
|
||||||
|
render .
|
||||||
|
latexDoc .
|
||||||
|
vcat .
|
||||||
|
intersperse (text "" $+$ app "vspace" (text "4mm")) .
|
||||||
|
map conll2latex .
|
||||||
|
filter (not . null)
|
||||||
|
|
||||||
|
conll2latex :: String -> Doc
|
||||||
|
conll2latex = ppLaTeX . conll2latex' . parseCoNLL
|
||||||
|
|
||||||
|
conll2latex' :: CoNLL -> [LaTeX]
|
||||||
|
conll2latex' = dep2latex . conll2dep'
|
||||||
|
|
||||||
|
data Dep = Dep {
|
||||||
|
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
|
||||||
|
, tokens :: [(String,String)] -- word, pos (0..)
|
||||||
|
, deps :: [((Int,Int),String)] -- from, to, label
|
||||||
|
, root :: Int -- root word position
|
||||||
|
}
|
||||||
|
|
||||||
|
-- some general measures
|
||||||
|
defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units
|
||||||
|
defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres
|
||||||
|
spaceLength = 10.0
|
||||||
|
charWidth = 1.8
|
||||||
|
|
||||||
|
wsize rwld w = 100 * rwld w + spaceLength -- word length, units
|
||||||
|
wpos rwld i = sum [wsize rwld j | j <- [0..i-1]] -- start position of the i'th word
|
||||||
|
wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]] -- distance between words x and y
|
||||||
|
labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below
|
||||||
|
labelstart c = c - 15.0 -- label starts 15u left of arc centre
|
||||||
|
arcbase = 30.0 -- arcs start and end 40u above the bottom
|
||||||
|
arcfactor r = r * 600 -- reduction of arc size from word distance
|
||||||
|
xyratio = 3 -- width/height ratio of arcs
|
||||||
|
|
||||||
|
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
|
||||||
|
putArc frwld height x y label = [oval,arrowhead,labelling] where
|
||||||
|
oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
|
||||||
|
arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base
|
||||||
|
labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
|
||||||
|
dxy = wdist frwld x y -- distance between words, >>= 20.0
|
||||||
|
ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length
|
||||||
|
hdxy = dxy / 2 -- half the distance
|
||||||
|
wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are wider in proportion
|
||||||
|
hght = ndxy / (xyratio * rwld) -- arc height is independent of word length
|
||||||
|
begp = min x y -- begin position of oval
|
||||||
|
ctr = wpos frwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval
|
||||||
|
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
|
||||||
|
rwld = 0.5 ----
|
||||||
|
|
||||||
|
dep2latex :: Dep -> [LaTeX]
|
||||||
|
dep2latex d =
|
||||||
|
[Comment (unwords (map fst (tokens d))),
|
||||||
|
Picture defaultUnit (width,height) (
|
||||||
|
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
|
||||||
|
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
|
||||||
|
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
|
||||||
|
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
|
||||||
|
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
|
||||||
|
)]
|
||||||
|
where
|
||||||
|
wld i = wordLength d i -- >= 20.0
|
||||||
|
rwld i = (wld i) / defaultWordLength -- >= 1.0
|
||||||
|
aheight x y = depth (min x y) (max x y) + 1 ---- abs (x-y)
|
||||||
|
arcs = [(min u v, max u v) | ((u,v),_) <- deps d]
|
||||||
|
depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
|
||||||
|
[] -> 0
|
||||||
|
uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs])
|
||||||
|
width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
|
||||||
|
height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d]))
|
||||||
|
|
||||||
|
type CoNLL = [[String]]
|
||||||
|
parseCoNLL :: String -> CoNLL
|
||||||
|
parseCoNLL = map words . lines
|
||||||
|
|
||||||
|
--conll2dep :: String -> Dep
|
||||||
|
--conll2dep = conll2dep' . parseCoNLL
|
||||||
|
|
||||||
|
conll2dep' :: CoNLL -> Dep
|
||||||
|
conll2dep' ls = Dep {
|
||||||
|
wordLength = wld
|
||||||
|
, tokens = toks
|
||||||
|
, deps = dps
|
||||||
|
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
|
||||||
|
}
|
||||||
|
where
|
||||||
|
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
|
||||||
|
toks = [(w,c) | _:w:_:c:_ <- ls]
|
||||||
|
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
|
||||||
|
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
|
||||||
|
|
||||||
|
|
||||||
|
-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)
|
||||||
|
|
||||||
|
-- We render both LaTeX and SVG from this intermediate representation of
|
||||||
|
-- LaTeX pictures.
|
||||||
|
|
||||||
|
data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
|
||||||
|
data DrawingCommand = Put Position Object
|
||||||
|
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length
|
||||||
|
|
||||||
|
type UnitLengthMM = Double
|
||||||
|
type Size = (Double,Double)
|
||||||
|
type Position = (Double,Double)
|
||||||
|
type Length = Double
|
||||||
|
|
||||||
|
|
||||||
|
-- * latex formatting
|
||||||
|
ppLaTeX = vcat . map ppLaTeX1
|
||||||
|
where
|
||||||
|
ppLaTeX1 el =
|
||||||
|
case el of
|
||||||
|
Comment s -> comment s
|
||||||
|
Picture unit size cmds ->
|
||||||
|
app "setlength{\\unitlength}" (text (show unit ++ "mm"))
|
||||||
|
$$ hang (app "begin" (text "picture")<>text (show size)) 2
|
||||||
|
(vcat (map ppDrawingCommand cmds))
|
||||||
|
$$ app "end" (text "picture")
|
||||||
|
$$ text ""
|
||||||
|
|
||||||
|
ppDrawingCommand (Put pos obj) = put pos (ppObject obj)
|
||||||
|
|
||||||
|
ppObject obj =
|
||||||
|
case obj of
|
||||||
|
Text s -> text s
|
||||||
|
TinyText s -> small (text s)
|
||||||
|
OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
|
||||||
|
ArrowDown len -> app "vector(0,-1)" (text (show len))
|
||||||
|
|
||||||
|
put p@(_,_) = app ("put" ++ show p)
|
||||||
|
small w = text "{\\tiny" <+> w <> text "}"
|
||||||
|
comment s = text "%%" <+> text s -- line break show follow
|
||||||
|
|
||||||
|
app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"
|
||||||
|
|
||||||
|
|
||||||
|
latexDoc :: Doc -> Doc
|
||||||
|
latexDoc body =
|
||||||
|
vcat [text "\\documentclass{article}",
|
||||||
|
text "\\usepackage[utf8]{inputenc}",
|
||||||
|
text "\\begin{document}",
|
||||||
|
body,
|
||||||
|
text "\\end{document}"]
|
||||||
|
|
||||||
|
|
||||||
|
----------------------------------
|
||||||
|
-- concrete syntax annotations (local) on top of conll
|
||||||
|
-- examples of annotations:
|
||||||
|
-- UseComp {"not"} PART neg head
|
||||||
|
-- UseComp {*} AUX cop head
|
||||||
|
|
||||||
|
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
|
||||||
|
-- (fun, word -> (pos,label,target))
|
||||||
|
-- the pos can remain unchanged, as in the current notation in the article
|
||||||
|
|
||||||
|
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
|
||||||
|
fixCoNLL labels conll = map fixc conll where
|
||||||
|
fixc row = case row of
|
||||||
|
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
|
||||||
|
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
|
||||||
|
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
|
||||||
|
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
|
||||||
|
_ -> row
|
||||||
|
_ -> row
|
||||||
|
|
||||||
|
look (fun,word) = case lookup fun labels of
|
||||||
|
Just relabel -> case relabel word of
|
||||||
|
Just row -> Just row
|
||||||
|
_ -> case lookup "*" labels of
|
||||||
|
Just starlabel -> starlabel word
|
||||||
|
_ -> Nothing
|
||||||
|
_ -> case lookup "*" labels of
|
||||||
|
Just starlabel -> starlabel word
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
|
||||||
|
|
||||||
|
getCncDepLabels :: String -> CncLabels
|
||||||
|
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
|
||||||
|
--- choose is for compatibility with the general notation
|
||||||
|
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
|
||||||
|
|
||||||
|
analyse line = case break (=='{') line of
|
||||||
|
(beg,_:ws) -> case break (=='}') ws of
|
||||||
|
(toks,_:target) -> case (words beg, words target) of
|
||||||
|
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
|
||||||
|
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
|
||||||
|
_ -> []
|
||||||
|
_ -> []
|
||||||
|
_ -> []
|
||||||
|
merge rules@((fun,_):_) = (fun, \tok ->
|
||||||
|
case lookup tok (map snd rules) of
|
||||||
|
Just new -> return new
|
||||||
|
_ -> lookup "*" (map snd rules)
|
||||||
|
)
|
||||||
|
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
|
||||||
|
|
||||||
|
printCoNLL :: CoNLL -> String
|
||||||
|
printCoNLL = unlines . map (concat . intersperse "\t")
|
||||||
|
|
||||||
-----------------------------------------------------------------------
|
-----------------------------------------------------------------------
|
||||||
-- Expressions & types
|
-- Expressions & types
|
||||||
|
|
||||||
@@ -335,6 +892,12 @@ readExpr str =
|
|||||||
freeStablePtr c_expr
|
freeStablePtr c_expr
|
||||||
return (Just expr)
|
return (Just expr)
|
||||||
|
|
||||||
|
pIdent :: ReadS String
|
||||||
|
pIdent = error "TODO: pIdent"
|
||||||
|
|
||||||
|
pExpr :: ReadS Expr
|
||||||
|
pExpr = error "TODO: pExpr"
|
||||||
|
|
||||||
-- | renders a type as a 'String'. The list
|
-- | renders a type as a 'String'. The list
|
||||||
-- of identifiers is the list of all free variables
|
-- of identifiers is the list of all free variables
|
||||||
-- in the type in order reverse to the order
|
-- in the type in order reverse to the order
|
||||||
@@ -348,6 +911,9 @@ showType scope ty =
|
|||||||
bracket (pgf_print_type c_ty pctxt 0 m) free $ \c_text ->
|
bracket (pgf_print_type c_ty pctxt 0 m) free $ \c_text ->
|
||||||
peekText c_text
|
peekText c_text
|
||||||
|
|
||||||
|
showContext :: [Var] -> [(BindType,Var,Type)] -> String
|
||||||
|
showContext = error "TODO: showContext"
|
||||||
|
|
||||||
-- | parses a 'String' as a type
|
-- | parses a 'String' as a type
|
||||||
readType :: String -> Maybe Type
|
readType :: String -> Maybe Type
|
||||||
readType str =
|
readType str =
|
||||||
@@ -360,3 +926,8 @@ readType str =
|
|||||||
else do ty <- deRefStablePtr c_ty
|
else do ty <- deRefStablePtr c_ty
|
||||||
freeStablePtr c_ty
|
freeStablePtr c_ty
|
||||||
return (Just ty)
|
return (Just ty)
|
||||||
|
|
||||||
|
readProbabilitiesFromFile :: FilePath -> IO (Map.Map String Double)
|
||||||
|
readProbabilitiesFromFile fpath = do
|
||||||
|
s <- readFile fpath
|
||||||
|
return $ Map.fromList [(f,read p) | f:p:_ <- map words (lines s)]
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ module PGF2.Expr(Var, Cat, Fun,
|
|||||||
mkFloat, unFloat,
|
mkFloat, unFloat,
|
||||||
mkMeta, unMeta,
|
mkMeta, unMeta,
|
||||||
|
|
||||||
exprSize, exprFunctions,
|
exprSize, exprFunctions, exprSubstitute,
|
||||||
|
|
||||||
mkType, unType,
|
mkType, unType,
|
||||||
mkHypo, mkDepHypo, mkImplHypo
|
mkHypo, mkDepHypo, mkImplHypo
|
||||||
@@ -169,6 +169,15 @@ exprFunctions (EImplArg e) = exprFunctions e
|
|||||||
exprFunctions (EFun f) = [f]
|
exprFunctions (EFun f) = [f]
|
||||||
exprFunctions _ = []
|
exprFunctions _ = []
|
||||||
|
|
||||||
|
exprSubstitute :: Expr -> [Expr] -> Expr
|
||||||
|
exprSubstitute (EAbs bt x e) vs = EAbs bt x (exprSubstitute e vs)
|
||||||
|
exprSubstitute (EApp e1 e2) vs = EApp (exprSubstitute e1 vs) (exprSubstitute e2 vs)
|
||||||
|
exprSubstitute (EMeta i) vs = vs !! i
|
||||||
|
exprSubstitute (ETyped e ty) vs = ETyped (exprSubstitute e vs) ty
|
||||||
|
exprSubstitute (EImplArg e) vs = EImplArg (exprSubstitute e vs)
|
||||||
|
exprSubstitute e vs = e
|
||||||
|
|
||||||
|
|
||||||
-- | creates a type from list of hypothesises, category and
|
-- | creates a type from list of hypothesises, category and
|
||||||
-- list of arguments for the category. The operation
|
-- list of arguments for the category. The operation
|
||||||
-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
|
-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
|
||||||
|
|||||||
@@ -18,11 +18,14 @@ import PGF2.Expr
|
|||||||
|
|
||||||
#include <pgf/pgf.h>
|
#include <pgf/pgf.h>
|
||||||
|
|
||||||
|
type AbsName = String -- ^ Name of abstract syntax
|
||||||
|
type ConcName = String -- ^ Name of concrete syntax
|
||||||
|
|
||||||
-- | An abstract data type representing multilingual grammar
|
-- | An abstract data type representing multilingual grammar
|
||||||
-- in Portable Grammar Format.
|
-- in Portable Grammar Format.
|
||||||
data PGF = PGF { a_db :: ForeignPtr PgfDB
|
data PGF = PGF { a_db :: ForeignPtr PgfDB
|
||||||
, revision :: ForeignPtr PgfRevision
|
, revision :: ForeignPtr PgfRevision
|
||||||
, langs :: Map.Map String Concr
|
, languages:: Map.Map ConcName Concr
|
||||||
}
|
}
|
||||||
data Concr = Concr {c_pgf :: ForeignPtr PgfDB, concr :: Ptr PgfConcr}
|
data Concr = Concr {c_pgf :: ForeignPtr PgfDB, concr :: Ptr PgfConcr}
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,93 @@
|
|||||||
{-# LANGUAGE ImplicitParams, RankNTypes #-}
|
{-# LANGUAGE ImplicitParams, RankNTypes #-}
|
||||||
|
|
||||||
module PGF2.Internal(-- * Access the internal structures
|
module PGF2.Internal(-- * Access the internal structures
|
||||||
|
FId,isPredefFId,
|
||||||
|
fidString,fidInt,fidFloat,fidVar,fidStart,
|
||||||
|
|
||||||
|
-- * Byte code
|
||||||
|
CodeLabel, Instr(..), IVal(..), TailInfo(..),
|
||||||
|
|
||||||
|
SeqId,LIndex,
|
||||||
|
FunId,Token,Production(..),PArg(..),Symbol(..),
|
||||||
|
|
||||||
|
unionPGF, writeConcr
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import PGF2.FFI
|
||||||
|
import PGF2.Expr
|
||||||
|
|
||||||
|
type Token = String
|
||||||
|
type LIndex = Int
|
||||||
|
data Symbol
|
||||||
|
= SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
|
||||||
|
| SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
|
||||||
|
| SymVar {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
||||||
|
| SymKS Token
|
||||||
|
| SymKP [Symbol] [([Symbol],[String])]
|
||||||
|
| SymBIND -- the special BIND token
|
||||||
|
| SymNE -- non exist
|
||||||
|
| SymSOFT_BIND -- the special SOFT_BIND token
|
||||||
|
| SymSOFT_SPACE -- the special SOFT_SPACE token
|
||||||
|
| SymCAPIT -- the special CAPIT token
|
||||||
|
| SymALL_CAPIT -- the special ALL_CAPIT token
|
||||||
|
deriving (Eq,Ord,Show)
|
||||||
|
data Production
|
||||||
|
= PApply {-# UNPACK #-} !FunId [PArg]
|
||||||
|
| PCoerce {-# UNPACK #-} !FId
|
||||||
|
deriving (Eq,Ord,Show)
|
||||||
|
type FunId = Int
|
||||||
|
type SeqId = Int
|
||||||
|
type FId = Int
|
||||||
|
data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
|
||||||
|
|
||||||
|
fidString, fidInt, fidFloat, fidVar, fidStart :: FId
|
||||||
|
fidString = (-1)
|
||||||
|
fidInt = (-2)
|
||||||
|
fidFloat = (-3)
|
||||||
|
fidVar = (-4)
|
||||||
|
fidStart = (-5)
|
||||||
|
|
||||||
|
isPredefFId :: FId -> Bool
|
||||||
|
isPredefFId = (`elem` [fidString, fidInt, fidFloat, fidVar])
|
||||||
|
|
||||||
|
type CodeLabel = Int
|
||||||
|
|
||||||
|
data Instr
|
||||||
|
= CHECK_ARGS {-# UNPACK #-} !Int
|
||||||
|
| CASE Fun {-# UNPACK #-} !CodeLabel
|
||||||
|
| CASE_LIT Literal {-# UNPACK #-} !CodeLabel
|
||||||
|
| SAVE {-# UNPACK #-} !Int
|
||||||
|
| ALLOC {-# UNPACK #-} !Int
|
||||||
|
| PUT_CONSTR Fun
|
||||||
|
| PUT_CLOSURE {-# UNPACK #-} !CodeLabel
|
||||||
|
| PUT_LIT Literal
|
||||||
|
| SET IVal
|
||||||
|
| SET_PAD
|
||||||
|
| PUSH_FRAME
|
||||||
|
| PUSH IVal
|
||||||
|
| TUCK IVal {-# UNPACK #-} !Int
|
||||||
|
| EVAL IVal TailInfo
|
||||||
|
| DROP {-# UNPACK #-} !Int
|
||||||
|
| JUMP {-# UNPACK #-} !CodeLabel
|
||||||
|
| FAIL
|
||||||
|
| PUSH_ACCUM Literal
|
||||||
|
| POP_ACCUM
|
||||||
|
| ADD
|
||||||
|
|
||||||
|
data IVal
|
||||||
|
= HEAP {-# UNPACK #-} !Int
|
||||||
|
| ARG_VAR {-# UNPACK #-} !Int
|
||||||
|
| FREE_VAR {-# UNPACK #-} !Int
|
||||||
|
| GLOBAL Fun
|
||||||
|
deriving Eq
|
||||||
|
|
||||||
|
data TailInfo
|
||||||
|
= RecCall
|
||||||
|
| TailCall {-# UNPACK #-} !Int
|
||||||
|
| UpdateCall
|
||||||
|
|
||||||
|
unionPGF :: PGF -> PGF -> Maybe PGF
|
||||||
|
unionPGF = error "TODO: unionPGF"
|
||||||
|
|
||||||
|
writeConcr :: FilePath -> Concr -> IO ()
|
||||||
|
writeConcr = error "TODO: writeConcr"
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ branchPGF_ c_name p (Transaction f) =
|
|||||||
ex_type <- (#peek PgfExn, type) c_exn
|
ex_type <- (#peek PgfExn, type) c_exn
|
||||||
if (ex_type :: (#type PgfExnType)) == (#const PGF_EXN_NONE)
|
if (ex_type :: (#type PgfExnType)) == (#const PGF_EXN_NONE)
|
||||||
then do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision))
|
then do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision))
|
||||||
return (PGF (a_db p) fptr2 (langs p))
|
return (PGF (a_db p) fptr2 (languages p))
|
||||||
else do pgf_free_revision c_db c_revision
|
else do pgf_free_revision c_db c_revision
|
||||||
return p
|
return p
|
||||||
else do pgf_free_revision c_db c_revision
|
else do pgf_free_revision c_db c_revision
|
||||||
@@ -107,7 +107,7 @@ checkoutPGF p name =
|
|||||||
if c_revision == nullPtr
|
if c_revision == nullPtr
|
||||||
then return Nothing
|
then return Nothing
|
||||||
else do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision))
|
else do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision))
|
||||||
return (Just (PGF (a_db p) fptr2 (langs p)))
|
return (Just (PGF (a_db p) fptr2 (languages p)))
|
||||||
|
|
||||||
createFunction :: Fun -> Type -> Float -> Transaction ()
|
createFunction :: Fun -> Type -> Float -> Transaction ()
|
||||||
createFunction name ty prob = Transaction $ \c_db c_revision c_exn ->
|
createFunction name ty prob = Transaction $ \c_db c_revision c_exn ->
|
||||||
|
|||||||
Reference in New Issue
Block a user