diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index df8dc7246..a2bbcfdeb 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -6,7 +6,6 @@ module GF.Command.Commands ( import Prelude hiding (putStrLn,(<>)) import PGF2 -import PGF2.Internal(writePGF) import GF.Compile.Export import GF.Compile.ToAPI @@ -666,7 +665,7 @@ pgfCommands = Map.fromList [ [e] -> case unApp e of Just (id, []) -> case functionType pgf id of Just ty -> do putStrLn (showFun pgf id ty) - putStrLn ("Probability: "++show (treeProbability pgf e)) + putStrLn ("Probability: "++show (exprProbability pgf e)) return void Nothing -> case categoryContext pgf id of Just hypos -> do putStrLn ("cat "++id++if null hypos then "" else ' ':showContext [] hypos) @@ -682,7 +681,7 @@ pgfCommands = Map.fromList [ Left err -> error err Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e) putStrLn ("Type: "++showType [] ty) - putStrLn ("Probability: "++show (treeProbability pgf e)) + putStrLn ("Probability: "++show (exprProbability pgf e)) return void _ -> do putStrLn "a single identifier or expression is expected from the command" return void, @@ -800,8 +799,8 @@ pgfCommands = Map.fromList [ showFun pgf id ty = kwd++" "++ id ++ " : " ++ showType [] ty where - kwd | functionIsDataCon pgf id = "data" - | otherwise = "fun" + kwd | functionIsConstructor pgf id = "data" + | otherwise = "fun" morphos pgf opts s = [(s,lookupMorpho concr s) | concr <- optLangs pgf opts] diff --git a/src/compiler/GF/Compile/CFGtoPGF.hs b/src/compiler/GF/Compile/CFGtoPGF.hs index e7bb6f050..ccc596104 100644 --- a/src/compiler/GF/Compile/CFGtoPGF.hs +++ b/src/compiler/GF/Compile/CFGtoPGF.hs @@ -21,7 +21,7 @@ import Data.Maybe(fromMaybe) -------------------------- cf2pgf :: Options -> FilePath -> ParamCFG -> Map.Map Fun Double -> PGF -cf2pgf opts fpath cf probs = +cf2pgf opts fpath cf probs = error "TODO: cf2pgf" {- build (let abstr = cf2abstr cf probs in newPGF [] aname abstr [(cname, cf2concr opts abstr cf)]) where @@ -134,3 +134,4 @@ mkRuleName rule = case ruleName rule of CFObj n _ -> n _ -> "_" +-} diff --git a/src/compiler/GF/Compile/Compute/Predef.hs b/src/compiler/GF/Compile/Compute/Predef.hs index 58b9b3447..fe75be09d 100644 --- a/src/compiler/GF/Compile/Compute/Predef.hs +++ b/src/compiler/GF/Compile/Compute/Predef.hs @@ -4,7 +4,7 @@ module GF.Compile.Compute.Predef(predef,predefName,delta) where import qualified Data.Map as Map import Data.Array(array,(!)) -import Data.List (isInfixOf) +import Data.List (isInfixOf,genericTake,genericDrop,genericLength) import Data.Char (isUpper,toLower,toUpper) import Control.Monad(ap) @@ -20,7 +20,7 @@ class Predef a where toValue :: a -> Value fromValue :: Value -> Err a -instance Predef Int where +instance Predef Integer where toValue = VInt fromValue (VInt i) = return i fromValue v = verror "Int" v @@ -87,8 +87,8 @@ predefList = delta f vs = case f of - Drop -> fromNonExist vs NonExist (ap2 (drop::Int->String->String)) - Take -> fromNonExist vs NonExist (ap2 (take::Int->String->String)) + Drop -> fromNonExist vs NonExist (ap2 (genericDrop::Integer->String->String)) + Take -> fromNonExist vs NonExist (ap2 (genericTake::Integer->String->String)) Tk -> fromNonExist vs NonExist (ap2 tk) Dp -> fromNonExist vs NonExist (ap2 dp) EqStr -> fromNonExist vs PFalse (ap2 ((==)::String->String->Bool)) @@ -97,10 +97,10 @@ delta f vs = ToUpper -> fromNonExist vs NonExist (ap1 (map toUpper)) ToLower -> fromNonExist vs NonExist (ap1 (map toLower)) IsUpper -> fromNonExist vs PFalse (ap1 (all' isUpper)) - Length -> fromNonExist vs (0::Int) (ap1 (length::String->Int)) - Plus -> ap2 ((+)::Int->Int->Int) - EqInt -> ap2 ((==)::Int->Int->Bool) - LessInt -> ap2 ((<)::Int->Int->Bool) + Length -> fromNonExist vs (0::Integer) (ap1 (genericLength::String->Integer)) + Plus -> ap2 ((+)::Integer->Integer->Integer) + EqInt -> ap2 ((==)::Integer->Integer->Bool) + LessInt -> ap2 ((<)::Integer->Integer->Bool) {- -- | Show | Read | ToStr | MapStr | EqVal -} Error -> ap1 VError Trace -> ap2 vtrace @@ -139,8 +139,12 @@ delta f vs = -- unimpl id = bug $ "unimplemented predefined function: "++showIdent id -- problem id vs = bug $ "unexpected arguments: Predef."++showIdent id++" "++show vs - tk i s = take (max 0 (length s - i)) s :: String - dp i s = drop (max 0 (length s - i)) s :: String + tk :: Integer -> String -> String + tk i s = genericTake (max 0 (genericLength s - i)) s + + dp :: Integer -> String -> String + dp i s = genericDrop (max 0 (genericLength s - i)) s + occur s t = isInfixOf (s::String) (t::String) occurs s t = any (`elem` (t::String)) (s::String) all' = all :: (a->Bool) -> [a] -> Bool diff --git a/src/compiler/GF/Compile/Compute/Value.hs b/src/compiler/GF/Compile/Compute/Value.hs index b5cf37f78..ea3db72a2 100644 --- a/src/compiler/GF/Compile/Compute/Value.hs +++ b/src/compiler/GF/Compile/Compute/Value.hs @@ -14,7 +14,7 @@ data Value -- -- | VClosure Env Term -- used in Typecheck.ConcreteNew | VAbs BindType Ident Binding -- used in Compute.Concrete | VProd BindType Value Ident Binding -- used in Compute.Concrete - | VInt Int + | VInt Integer | VFloat Double | VString String | VSort Ident diff --git a/src/compiler/GF/Compile/ConcreteToHaskell.hs b/src/compiler/GF/Compile/ConcreteToHaskell.hs index c9f0438e6..3df28492f 100644 --- a/src/compiler/GF/Compile/ConcreteToHaskell.hs +++ b/src/compiler/GF/Compile/ConcreteToHaskell.hs @@ -1,5 +1,7 @@ -- | Translate concrete syntax to Haskell module GF.Compile.ConcreteToHaskell(concretes2haskell,concrete2haskell) where + +import PGF2(Literal(..)) import Data.List(isPrefixOf,sort,sortOn) import qualified Data.Map as M import qualified Data.Set as S @@ -181,9 +183,9 @@ concrete2haskell opts ppL l = case l of - FloatConstant x -> pure (lit x) - IntConstant n -> pure (lit n) - StrConstant s -> pure (token s) + LFlt x -> pure (lit x) + LInt n -> pure (lit n) + LStr s -> pure (token s) pId p@(ParamId s) = if "to_R_" `isPrefixOf` unqual s then toIdent p else gId p -- !! a hack diff --git a/src/compiler/GF/Compile/GenerateBC.hs b/src/compiler/GF/Compile/GenerateBC.hs index befd7a4f8..5ef93dce0 100644 --- a/src/compiler/GF/Compile/GenerateBC.hs +++ b/src/compiler/GF/Compile/GenerateBC.hs @@ -4,7 +4,8 @@ module GF.Compile.GenerateBC(generateByteCode) where import GF.Grammar import GF.Grammar.Lookup(lookupAbsDef,lookupFunType) import GF.Data.Operations -import PGF2.Internal(CodeLabel,Instr(..),IVal(..),TailInfo(..),Literal(..)) +import PGF2(Literal(..)) +import PGF2.Internal(CodeLabel,Instr(..),IVal(..),TailInfo(..)) import qualified Data.Map as Map import Data.List(nub,mapAccumL) import Data.Maybe(fromMaybe) diff --git a/src/compiler/GF/Compile/GrammarToCanonical.hs b/src/compiler/GF/Compile/GrammarToCanonical.hs index f2f2edaa1..05b6a858b 100644 --- a/src/compiler/GF/Compile/GrammarToCanonical.hs +++ b/src/compiler/GF/Compile/GrammarToCanonical.hs @@ -19,7 +19,7 @@ import GF.Compile.Compute.Predef(predef) import GF.Compile.Compute.Value(Predefined(..)) import GF.Infra.Ident(ModuleName(..),Ident,ident2raw,rawIdentS,showIdent,isWildIdent) import GF.Infra.Option(Options,optionsPGF) -import PGF2.Internal(Literal(..)) +import PGF2(Literal(..)) import GF.Compile.Compute.Concrete(GlobalEnv,normalForm,resourceValues) import GF.Grammar.Canonical as C import System.FilePath ((), (<.>)) @@ -193,11 +193,11 @@ convert' gr vs = ppT Cn x -> VarValue (gId x) -- hmm Con c -> ParamConstant (Param (gId c) []) Sort k -> VarValue (gId k) - EInt n -> LiteralValue (IntConstant n) + EInt n -> LiteralValue (LInt n) Q (m,n) -> if m==cPredef then ppPredef n else VarValue (gQId m n) QC (m,n) -> ParamConstant (Param (gQId m n) []) - K s -> LiteralValue (StrConstant s) - Empty -> LiteralValue (StrConstant "") + K s -> LiteralValue (LStr s) + Empty -> LiteralValue (LStr "") FV ts -> VariantValue (map ppT ts) Alts t' vs -> alts vs (ppT t') _ -> error $ "convert' ppT: " ++ show t @@ -265,8 +265,8 @@ convert' gr vs = ppT concatValue :: LinValue -> LinValue -> LinValue concatValue v1 v2 = case (v1,v2) of - (LiteralValue (StrConstant ""),_) -> v2 - (_,LiteralValue (StrConstant "")) -> v1 + (LiteralValue (LStr ""),_) -> v2 + (_,LiteralValue (LStr "")) -> v1 _ -> ConcatValue v1 v2 -- | Smart constructor for projections @@ -429,11 +429,5 @@ unqual n = Unqual (ident2raw n) convFlags :: G.Grammar -> ModuleName -> Flags convFlags gr mn = - Flags [(rawIdentS n,convLit v) | + Flags [(rawIdentS n,v) | (n,v)<-err (const []) (optionsPGF.mflags) (lookupModule gr mn)] - where - convLit l = - case l of - LStr s -> Str s - LInt i -> C.Int i - LFlt d -> Flt d diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index 7002677be..29c98b03d 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -30,7 +30,7 @@ import GHC.Prim import GHC.Base(getTag) grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF -grammar2PGF opts gr am probs = do +grammar2PGF opts gr am probs = error "TODO: grammar2PGF" {-do cnc_infos <- getConcreteInfos gr am return $ build (let gflags = if flag optSplitPGF opts @@ -445,3 +445,4 @@ compareCaseInsensitive (x:xs) (y:ys) = EQ -> compare x y x -> x x -> x +-} diff --git a/src/compiler/GF/Compile/OptimizePGF.hs b/src/compiler/GF/Compile/OptimizePGF.hs index f440c58d2..730322649 100644 --- a/src/compiler/GF/Compile/OptimizePGF.hs +++ b/src/compiler/GF/Compile/OptimizePGF.hs @@ -20,7 +20,7 @@ type ConcrData = ([(FId,[FunId])], -- ^ Lindefs [(Cat,FId,FId,[String])]) -- ^ Concrete categories optimizePGF :: Cat -> ConcrData -> ConcrData -optimizePGF startCat = topDownFilter startCat . bottomUpFilter +optimizePGF startCat = error "TODO: optimizePGF" {- topDownFilter startCat . bottomUpFilter catString = "String" catInt = "Int" @@ -187,3 +187,4 @@ filterProductions prods0 hoc0 prods accumHOC hoc (PApply funid args) = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc fid -> IntSet.insert fid hoc) hoc (map snd hypos)) hoc args accumHOC hoc _ = hoc +-} diff --git a/src/compiler/GF/Compile/PGFtoJSON.hs b/src/compiler/GF/Compile/PGFtoJSON.hs index dbb65908b..4bc5b9c01 100644 --- a/src/compiler/GF/Compile/PGFtoJSON.hs +++ b/src/compiler/GF/Compile/PGFtoJSON.hs @@ -6,8 +6,8 @@ import Text.JSON import qualified Data.Map as Map pgf2json :: PGF -> String -pgf2json pgf = - encode $ makeObj +pgf2json pgf = error "TODO: pgf2json" +{- encode $ makeObj [ ("abstract", abstract2json pgf) , ("concretes", makeObj $ map concrete2json (Map.toList (languages pgf))) @@ -108,3 +108,4 @@ new f xs = [ ("type", showJSON f) , ("args", showJSON xs) ] +-} diff --git a/src/compiler/GF/Compile/TypeCheck/TC.hs b/src/compiler/GF/Compile/TypeCheck/TC.hs index c0df83394..e06c5b5a9 100644 --- a/src/compiler/GF/Compile/TypeCheck/TC.hs +++ b/src/compiler/GF/Compile/TypeCheck/TC.hs @@ -35,7 +35,7 @@ data AExp = AVr Ident Val | ACn QIdent Val | AType - | AInt Int + | AInt Integer | AFloat Double | AStr String | AMeta MetaId Val diff --git a/src/compiler/GF/Compiler.hs b/src/compiler/GF/Compiler.hs index 8479fe28a..6eac757ae 100644 --- a/src/compiler/GF/Compiler.hs +++ b/src/compiler/GF/Compiler.hs @@ -1,7 +1,7 @@ module GF.Compiler (mainGFC, linkGrammars, writeGrammar, writeOutputs) where import PGF2 -import PGF2.Internal(unionPGF,writePGF,writeConcr) +import PGF2.Internal(unionPGF,writeConcr) import GF.Compile as S(batchCompile,link,srcAbsName) import GF.CompileInParallel as P(parallelBatchCompile) import GF.Compile.Export diff --git a/src/compiler/GF/Grammar/BNFC.hs b/src/compiler/GF/Grammar/BNFC.hs index b9d2b3169..6f8f3d753 100644 --- a/src/compiler/GF/Grammar/BNFC.hs +++ b/src/compiler/GF/Grammar/BNFC.hs @@ -25,7 +25,7 @@ data BNFCRule = BNFCRule { ruleName :: CFTerm } | BNFCCoercions { coerCat :: Cat, - coerNum :: Int } + coerNum :: Integer } | BNFCTerminator { termNonEmpty :: Bool, termCat :: Cat, diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs index 12eef3fbb..06e941674 100644 --- a/src/compiler/GF/Grammar/Binary.hs +++ b/src/compiler/GF/Grammar/Binary.hs @@ -22,7 +22,8 @@ import GF.Infra.Option import GF.Infra.UseIO(MonadIO(..)) import GF.Grammar.Grammar -import PGF2.Internal(Literal(..),Symbol(..)) +import PGF2(Literal(..)) +import PGF2.Internal(Symbol(..)) -- Please change this every time when the GFO format is changed gfoVersion = "GF04" diff --git a/src/compiler/GF/Grammar/Canonical.hs b/src/compiler/GF/Grammar/Canonical.hs index e62424f6a..cc581f826 100644 --- a/src/compiler/GF/Grammar/Canonical.hs +++ b/src/compiler/GF/Grammar/Canonical.hs @@ -9,9 +9,11 @@ {-# LANGUAGE DeriveTraversable #-} module GF.Grammar.Canonical where + import Prelude hiding ((<>)) import GF.Text.Pretty import GF.Infra.Ident (RawIdent) +import PGF(Literal(..)) -- | A Complete grammar data Grammar = Grammar Abstract [Concrete] deriving Show @@ -58,7 +60,7 @@ newtype ParamType = ParamTypeId ParamId deriving (Eq,Ord,Show) -- | Linearization value, RHS of @lin@ data LinValue = ConcatValue LinValue LinValue - | LiteralValue LinLiteral + | LiteralValue Literal | ErrorValue String | ParamConstant ParamValue | PredefValue PredefId @@ -74,11 +76,6 @@ data LinValue = ConcatValue LinValue LinValue | CommentedValue String LinValue deriving (Eq,Ord,Show) -data LinLiteral = FloatConstant Float - | IntConstant Int - | StrConstant String - deriving (Eq,Ord,Show) - data LinPattern = ParamPattern ParamPattern | RecordPattern [RecordRow LinPattern] | TuplePattern [LinPattern] @@ -120,9 +117,8 @@ newtype FunId = FunId Id deriving (Eq,Show) data VarId = Anonymous | VarId Id deriving Show -newtype Flags = Flags [(FlagName,FlagValue)] deriving Show +newtype Flags = Flags [(FlagName,Literal)] deriving Show type FlagName = Id -data FlagValue = Str String | Int Int | Flt Double deriving Show -- *** Identifiers @@ -243,13 +239,13 @@ instance PPA LinValue where VarValue v -> pp v _ -> parens lv -instance Pretty LinLiteral where pp = ppA +instance Pretty Literal where pp = ppA -instance PPA LinLiteral where +instance PPA Literal where ppA l = case l of - FloatConstant f -> pp f - IntConstant n -> pp n - StrConstant s -> doubleQuotes s -- hmm + LFlt f -> pp f + LInt n -> pp n + LStr s -> doubleQuotes s -- hmm instance RhsSeparator LinValue where rhsSep _ = pp "=" @@ -298,11 +294,6 @@ instance Pretty Flags where where ppFlag (name,value) = name <+> "=" <+> value <>";" -instance Pretty FlagValue where - pp (Str s) = pp s - pp (Int i) = pp i - pp (Flt d) = pp d - -------------------------------------------------------------------------------- -- | Pretty print atomically (i.e. wrap it in parentheses if necessary) class Pretty a => PPA a where ppA :: a -> Doc diff --git a/src/compiler/GF/Grammar/CanonicalJSON.hs b/src/compiler/GF/Grammar/CanonicalJSON.hs index 04c13df5e..5c2457350 100644 --- a/src/compiler/GF/Grammar/CanonicalJSON.hs +++ b/src/compiler/GF/Grammar/CanonicalJSON.hs @@ -8,7 +8,7 @@ import Data.Ratio (denominator, numerator) import GF.Grammar.Canonical import Control.Monad (guard) import GF.Infra.Ident (RawIdent,showRawIdent,rawIdentS) - +import PGF(Literal(..)) encodeJSON :: FilePath -> Grammar -> IO () encodeJSON fpath g = writeFile fpath (encode g) @@ -171,13 +171,13 @@ instance JSON LinValue where <|> do vs <- readJSON o :: Result [LinValue] return (foldr1 ConcatValue vs) -instance JSON LinLiteral where +instance JSON Literal where -- basic values (Str, Float, Int) are encoded as JSON strings/numbers: - showJSON (StrConstant s) = showJSON s - showJSON (FloatConstant f) = showJSON f - showJSON (IntConstant n) = showJSON n + showJSON (LStr s) = showJSON s + showJSON (LFlt f) = showJSON f + showJSON (LInt n) = showJSON n - readJSON = readBasicJSON StrConstant IntConstant FloatConstant + readJSON = readBasicJSON LStr LInt LFlt instance JSON LinPattern where -- wildcards and patterns without arguments are encoded as strings: @@ -262,15 +262,6 @@ instance JSON Flags where where fromRow (lbl, jsvalue) = do value <- readJSON jsvalue return (rawIdentS lbl, value) -instance JSON FlagValue where - -- flag values are encoded as basic JSON types: - showJSON (Str s) = showJSON s - showJSON (Int i) = showJSON i - showJSON (Flt f) = showJSON f - - readJSON = readBasicJSON Str Int Flt - - -------------------------------------------------------------------------------- -- ** Convenience functions diff --git a/src/compiler/GF/Grammar/Grammar.hs b/src/compiler/GF/Grammar/Grammar.hs index c272afc2b..758cdd270 100644 --- a/src/compiler/GF/Grammar/Grammar.hs +++ b/src/compiler/GF/Grammar/Grammar.hs @@ -354,7 +354,7 @@ data Term = | Cn Ident -- ^ constant | Con Ident -- ^ constructor | Sort Ident -- ^ basic type - | EInt Int -- ^ integer literal + | EInt Integer -- ^ integer literal | EFloat Double -- ^ floating point literal | K String -- ^ string literal or token: @\"foo\"@ | Empty -- ^ the empty string @[]@ @@ -409,7 +409,7 @@ data Patt = | PW -- ^ wild card pattern: @_@ | PR [(Label,Patt)] -- ^ record pattern: @{r = p ; ...}@ -- only concrete | PString String -- ^ string literal pattern: @\"foo\"@ -- only abstract - | PInt Int -- ^ integer literal pattern: @12@ -- only abstract + | PInt Integer -- ^ integer literal pattern: @12@ -- only abstract | PFloat Double -- ^ float literal pattern: @1.2@ -- only abstract | PT Type Patt -- ^ type-annotated pattern diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x index 365388726..b6b024d7b 100644 --- a/src/compiler/GF/Grammar/Lexer.x +++ b/src/compiler/GF/Grammar/Lexer.x @@ -130,7 +130,7 @@ data Token | T_separator | T_nonempty | T_String String -- string literals - | T_Integer Int -- integer literals + | T_Integer Integer -- integer literals | T_Double Double -- double precision float literals | T_Ident Ident | T_EOF diff --git a/src/compiler/GF/Grammar/Macros.hs b/src/compiler/GF/Grammar/Macros.hs index 280aee141..d7869f507 100644 --- a/src/compiler/GF/Grammar/Macros.hs +++ b/src/compiler/GF/Grammar/Macros.hs @@ -216,7 +216,7 @@ typeTok = Sort cTok typeStrs = Sort cStrs typeString, typeFloat, typeInt :: Type -typeInts :: Int -> Type +typeInts :: Integer -> Type typePBool :: Type typeError :: Type @@ -227,7 +227,7 @@ typeInts i = App (cnPredef cInts) (EInt i) typePBool = cnPredef cPBool typeError = cnPredef cErrorType -isTypeInts :: Type -> Maybe Int +isTypeInts :: Type -> Maybe Integer isTypeInts (App c (EInt i)) | c == cnPredef cInts = Just i isTypeInts _ = Nothing @@ -324,7 +324,7 @@ freshAsTerm s = Vr (varX (readIntArg s)) string2term :: String -> Term string2term = K -int2term :: Int -> Term +int2term :: Integer -> Term int2term = EInt float2term :: Double -> Term diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index cf3e72c36..a2b76008f 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -35,7 +35,7 @@ import GF.Infra.Ident import GF.Infra.GetOpt import GF.Grammar.Predef import System.FilePath -import PGF2.Internal(Literal(..)) +import PGF2(Literal(..)) import GF.Data.Operations(Err,ErrorMonad(..),liftErr) diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index f67ad0dbd..24f177824 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -25,7 +25,7 @@ toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc type Profile = [Int] pgfToCFG :: PGF -> Concr -> CFG -pgfToCFG pgf cnc = mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule rules) +pgfToCFG pgf cnc = error "TODO: pgfToCFG" {- mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule rules) where (_,start_cat,_) = unType (startCat pgf) @@ -116,3 +116,4 @@ pgfToCFG pgf cnc = mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule ruleToCFRule (c,PCoerce c') = [Rule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0) | l <- [0..catLinArity c-1]] +-} diff --git a/src/runtime/haskell/PGF2.hsc b/src/runtime/haskell/PGF2.hsc index eee6639f8..758ae39a6 100644 --- a/src/runtime/haskell/PGF2.hsc +++ b/src/runtime/haskell/PGF2.hsc @@ -14,17 +14,20 @@ ------------------------------------------------- module PGF2 (-- * PGF - PGF,readPGF,bootNGF,readNGF, + PGF,readPGF,bootNGF,readNGF,writePGF,showPGF, -- * Abstract syntax AbsName,abstractName,globalFlag,abstractFlag, + -- ** Categories - Cat,categories,categoryContext,categoryProb, + Cat,categories,categoryContext,categoryProbability, + -- ** Functions Fun, functions, functionsByCat, - functionType, functionIsConstructor, functionProb, + functionType, functionIsConstructor, functionProbability, + -- ** Expressions - Expr(..), Literal(..), showExpr, readExpr, + Expr(..), Literal(..), showExpr, readExpr, pExpr, pIdent, mkAbs, unAbs, mkApp, unApp, unapply, mkStr, unStr, @@ -33,20 +36,58 @@ module PGF2 (-- * PGF mkFloat, unFloat, mkMeta, unMeta, -- extra - exprSize, exprFunctions, + exprSize, exprFunctions, exprSubstitute, exprProbability, + -- ** Types Type(..), Hypo, BindType(..), startCat, - readType, showType, + readType, showType, showContext, mkType, unType, mkHypo, mkDepHypo, mkImplHypo, + -- ** Type checking + -- | Dynamically-built expressions should always be type-checked before using in other functions, + -- as the exceptions thrown by using invalid expressions may not catchable. + checkExpr, inferExpr, checkType, + + -- ** Computing + compute, + + -- ** Generation + generateAll, generateAllFrom, generateRandom, generateRandomFrom, + + -- ** Morphological Analysis + MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon, + filterBest, filterLongest, + + -- ** Visualizations + GraphvizOptions(..), graphvizDefaults, + graphvizAbstractTree, graphvizParseTree, + Labels, getDepLabels, + graphvizDependencyTree, conlls2latexDoc, getCncDepLabels, + graphvizWordAlignment, + -- * Concrete syntax - ConcName, + ConcName,Concr,languages,concreteName,languageCode, + + -- ** Linearization + linearize, linearizeAll, tabularLinearize, tabularLinearizeAll, + FId, BracketedString(..), showBracketedString, flattenBracketedString, + bracketedLinearize, bracketedLinearizeAll, + hasLinearization, + printName, alignWords, gizaAlignment, + + -- ** Parsing + ParseOutput(..), parse, parseWithHeuristics, complete, -- * Exceptions - PGFError(..) + PGFError(..), + + -- * Auxiliaries + readProbabilitiesFromFile ) where +import Prelude hiding ((<>)) + import PGF2.Expr import PGF2.FFI @@ -54,15 +95,16 @@ import Foreign import Foreign.C import Control.Exception(mask_,bracket) import System.IO.Unsafe(unsafePerformIO) +import System.Random import qualified Foreign.Concurrent as C import qualified Data.Map as Map import Data.IORef +import Data.List(intersperse,groupBy) +import Data.Char(isUpper,isSpace,isPunctuation) +import Text.PrettyPrint #include -type AbsName = String -- ^ Name of abstract syntax -type ConcName = String -- ^ Name of concrete syntax - -- | Reads a PGF file and keeps it in memory. readPGF :: FilePath -> IO PGF readPGF fpath = @@ -106,6 +148,12 @@ readNGF fpath = fptr2 <- C.newForeignPtr c_revision (withForeignPtr fptr1 (\c_db -> pgf_free_revision c_db c_revision)) return (PGF fptr1 fptr2 Map.empty) +writePGF :: FilePath -> PGF -> IO () +writePGF = error "TODO: writePGF" + +showPGF :: PGF -> String +showPGF = error "TODO: showPGF" + -- | The abstract language name is the name of the top-level -- abstract module abstractName :: PGF -> AbsName @@ -156,14 +204,273 @@ functionIsConstructor p fun = do res <- withPgfExn (pgf_function_is_constructor c_db c_revision c_fun) return (res /= 0) -functionProb :: PGF -> Fun -> Float -functionProb p fun = +functionProbability :: PGF -> Fun -> Float +functionProbability p fun = unsafePerformIO $ withText fun $ \c_fun -> withForeignPtr (a_db p) $ \c_db -> withForeignPtr (revision p) $ \c_revision -> withPgfExn (pgf_function_prob c_db c_revision c_fun) +exprProbability :: PGF -> Expr -> Float +exprProbability = error "TODO: exprProbability" + +checkExpr :: PGF -> Expr -> Type -> Either String Expr +checkExpr = error "TODO: checkExpr" + +-- | Tries to infer the type of an expression. Note that +-- even if the expression is type correct it is not always +-- possible to infer its type in the GF type system. +-- In this case the function returns an error. +inferExpr :: PGF -> Expr -> Either String (Expr, Type) +inferExpr = error "TODO: inferExpr" + +-- | Check whether a type is consistent with the abstract +-- syntax of the grammar. +checkType :: PGF -> Type -> Either String Type +checkType = error "TODO: checkType" + +compute :: PGF -> Expr -> Expr +compute = error "TODO: compute" + +concreteName :: Concr -> ConcName +concreteName c = error "TODO: concreteName" + +languageCode :: Concr -> Maybe String +languageCode c = error "TODO: languageCode" + +printName :: Concr -> Fun -> Maybe String +printName lang fun = error "TODO: printName" + +alignWords :: Concr -> Expr -> [(String, [Int])] +alignWords = error "TODO: alignWords" + +gizaAlignment = error "TODO: gizaAlignment" + +----------------------------------------------------------------------------- +-- Functions using Concr +-- Morpho analyses, parsing & linearization + +-- | This triple is returned by all functions that deal with +-- the grammar's lexicon. Its first element is the name of an abstract +-- lexical function which can produce a given word or +-- a multiword expression (i.e. this is the lemma). +-- After that follows a string which describes +-- the particular inflection form. +-- +-- The last element is a logarithm from the +-- the probability of the function. The probability is not +-- conditionalized on the category of the function. This makes it +-- possible to compare the likelihood of two functions even if they +-- have different types. +type MorphoAnalysis = (Fun,String,Float) + +-- | 'lookupMorpho' takes a string which must be a single word or +-- a multiword expression. It then computes the list of all possible +-- morphological analyses. +lookupMorpho :: Concr -> String -> [MorphoAnalysis] +lookupMorpho = error "TODO: lookupMorpho" + +-- | 'lookupCohorts' takes an arbitrary string an produces +-- a list of all places where lexical items from the grammar have been +-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@, +-- where @start-end@ identifies the span in the text and @ans@ is +-- the list of possible morphological analyses similar to 'lookupMorpho'. +-- +-- The list is sorted first by the @start@ position and after than +-- by the @end@ position. This can be used for instance if you want to +-- filter only the longest matches. +lookupCohorts :: Concr -> String -> [(Int,String,[MorphoAnalysis],Int)] +lookupCohorts = error "TODO: lookupCohorts" + +filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)] +filterBest ans = + reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] []) + where + iterate v0 [] [] res = res + iterate v0 [] new res = iterate v0 new [] res + iterate v0 ((_,v,conf, []):old) new res = + case compare v0 v of + LT -> res + EQ -> iterate v0 old new (merge conf res) + GT -> iterate v old new conf + iterate v0 ((_,v,conf,an:ans):old) new res = iterate v0 old (insert (v+valueOf an) conf an ans [] new) res + + valueOf (_,_,[],_) = 2 + valueOf _ = 1 + + insert v conf an@(start,_,_,end) ans l_new [] = + match start v conf ans ((end,v,comb conf an,filter end ans):l_new) [] + insert v conf an@(start,_,_,end) ans l_new (new@(end0,v0,conf0,ans0):r_new) = + case compare end0 end of + LT -> insert v conf an ans (new:l_new) r_new + EQ -> case compare v0 v of + LT -> match start v conf ans ((end,v, conf0,ans0): l_new) r_new + EQ -> match start v conf ans ((end,v,merge (comb conf an) conf0,ans0): l_new) r_new + GT -> match start v conf ans ((end,v,comb conf an, ans0): l_new) r_new + GT -> match start v conf ans ((end,v,comb conf an, filter end ans):new:l_new) r_new + + match start0 v conf (an@(start,_,_,end):ans) l_new r_new + | start0 == start = insert v conf an ans l_new r_new + match start0 v conf ans l_new r_new = revOn l_new r_new + + comb ((start0,w0,an0,end0):conf) (start,w,an,end) + | end0 == start && (unk w0 an0 || unk w an) = (start0,w0++w,[],end):conf + comb conf an = an:conf + + filter end [] = [] + filter end (next@(start,_,_,_):ans) + | end <= start = next:ans + | otherwise = filter end ans + + revOn [] ys = ys + revOn (x:xs) ys = revOn xs (x:ys) + + merge [] ans = ans + merge ans [] = ans + merge (an1@(start1,_,_,end1):ans1) (an2@(start2,_,_,end2):ans2) = + case compare (start1,end1) (start2,end2) of + GT -> an1 : merge ans1 (an2:ans2) + EQ -> an1 : merge ans1 ans2 + LT -> an2 : merge (an1:ans1) ans2 + +filterLongest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)] +filterLongest [] = [] +filterLongest (an:ans) = longest an ans + where + longest prev [] = [prev] + longest prev@(start0,_,_,end0) (next@(start,_,_,end):ans) + | start0 == start = longest next ans + | otherwise = filter prev (next:ans) + + filter prev [] = [prev] + filter prev@(start0,w0,an0,end0) (next@(start,w,an,end):ans) + | end0 == start && (unk w0 an0 || unk w an) + = filter (start0,w0++w,[],end) ans + | end0 <= start = prev : longest next ans + | otherwise = filter prev ans + +unk w [] | any (not . isPunctuation) w = True +unk _ _ = False + +fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])] +fullFormLexicon lang = error "TODO: fullFormLexicon" + +-- | This data type encodes the different outcomes which you could get from the parser. +data ParseOutput a + = ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed. + -- The string is the token where the parser have failed. + | ParseOk a -- ^ If the parsing and the type checking are successful + -- we get the abstract syntax trees as either a list or a chart. + | ParseIncomplete -- ^ The sentence is not complete. + +parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)] +parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) [] + +parseWithHeuristics :: Concr -- ^ the language with which we parse + -> Type -- ^ the start category + -> String -- ^ the input sentence + -> Double -- ^ the heuristic factor. + -- A negative value tells the parser + -- to lookup up the default from + -- the grammar flags + -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))] + -- ^ a list of callbacks for literal categories. + -- The arguments of the callback are: + -- the index of the constituent for the literal category; + -- the input sentence; the current offset in the sentence. + -- If a literal has been recognized then the output should + -- be Just (expr,probability,end_offset) + -> ParseOutput [(Expr,Float)] +parseWithHeuristics = error "TODO: parseWithHeuristics" + +-- | Returns possible completions of the current partial input. +complete :: Concr -- ^ the language with which we parse + -> Type -- ^ the start category + -> String -- ^ the input sentence (excluding token being completed) + -> String -- ^ prefix (partial token being completed) + -> ParseOutput [(String, Fun, Cat, Float)] -- ^ (token, category, function, probability) +complete = error "TODO: complete" + +-- | Returns True if there is a linearization defined for that function in that language +hasLinearization :: Concr -> Fun -> Bool +hasLinearization = error "TODO: linearize" + +-- | Linearizes an expression as a string in the language +linearize :: Concr -> Expr -> String +linearize lang e = error "TODO: linearize" + +-- | Generates all possible linearizations of an expression +linearizeAll :: Concr -> Expr -> [String] +linearizeAll lang e = error "TODO: linearizeAll" + +-- | Generates a table of linearizations for an expression +tabularLinearize :: Concr -> Expr -> [(String, String)] +tabularLinearize lang e = + case tabularLinearizeAll lang e of + (lins:_) -> lins + _ -> [] + +-- | Generates a table of linearizations for an expression +tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]] +tabularLinearizeAll lang e = error "TODO: tabularLinearizeAll" + +type FId = Int + +-- | BracketedString represents a sentence that is linearized +-- as usual but we also want to retain the ''brackets'' that +-- mark the beginning and the end of each constituent. +data BracketedString + = Leaf String -- ^ this is the leaf i.e. a single token + | BIND -- ^ the surrounding tokens must be bound together + | Bracket Cat {-# UNPACK #-} !FId String Fun [BracketedString] + -- ^ this is a bracket. The 'Cat' is the category of + -- the phrase. The 'FId' is an unique identifier for + -- every phrase in the sentence. For context-free grammars + -- i.e. without discontinuous constituents this identifier + -- is also unique for every bracket. When there are discontinuous + -- phrases then the identifiers are unique for every phrase but + -- not for every bracket since the bracket represents a constituent. + -- The different constituents could still be distinguished by using + -- the analysis string. If the grammar is reduplicating + -- then the constituent indices will be the same for all brackets + -- that represents the same constituent. + -- The 'Fun' is the name of the abstract function that generated + -- this phrase. + +-- | Renders the bracketed string as a string where +-- the brackets are shown as @(S ...)@ where +-- @S@ is the category. +showBracketedString :: BracketedString -> String +showBracketedString = render . ppBracketedString + +ppBracketedString (Leaf t) = text t +ppBracketedString BIND = text "&+" +ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss)) + +-- | Extracts the sequence of tokens from the bracketed string +flattenBracketedString :: BracketedString -> [String] +flattenBracketedString (Leaf w) = [w] +flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss + +bracketedLinearize :: Concr -> Expr -> [BracketedString] +bracketedLinearize = error "TODO: bracketedLinearize" + +bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]] +bracketedLinearizeAll = error "TODO: bracketedLinearizeAll" + +generateAll :: PGF -> Type -> [(Expr,Float)] +generateAll p ty = error "TODO: generateAll" + +generateAllFrom :: PGF -> Expr -> [(Expr,Float)] +generateAllFrom p ty = error "TODO: generateAllFrom" + +generateRandom :: StdGen -> PGF -> Type -> [a] +generateRandom = error "TODO: generateRandom" + +generateRandomFrom :: StdGen -> PGF -> Expr -> [a] +generateRandomFrom = error "TODO: generateRandomFrom" + -- | List of all functions defined in the abstract syntax categories :: PGF -> [Cat] categories p = @@ -184,7 +491,7 @@ categories p = name <- peekText key writeIORef ref $ (name : names) -categoryContext :: PGF -> Cat -> [Hypo] +categoryContext :: PGF -> Cat -> Maybe [Hypo] categoryContext p cat = unsafePerformIO $ withText cat $ \c_cat -> @@ -195,11 +502,11 @@ categoryContext p cat = mask_ $ do c_hypos <- withPgfExn (pgf_category_context c_db c_revision c_cat p_n_hypos u) if c_hypos == nullPtr - then return [] + then return Nothing else do n_hypos <- peek p_n_hypos hypos <- peekHypos c_hypos 0 n_hypos free c_hypos - return hypos + return (Just hypos) where peekHypos :: Ptr PgfTypeHypo -> CSize -> CSize -> IO [Hypo] peekHypos c_hypo i n @@ -214,8 +521,8 @@ categoryContext p cat = return ((bt,cat,ty) : hs) | otherwise = return [] -categoryProb :: PGF -> Cat -> Float -categoryProb p cat = +categoryProbability :: PGF -> Cat -> Float +categoryProbability p cat = unsafePerformIO $ withText cat $ \c_cat -> withForeignPtr (a_db p) $ \c_db -> @@ -291,6 +598,256 @@ abstractFlag p name = freeStablePtr c_lit return (Just lit) +----------------------------------------------------------------------------- +-- Graphviz + +data GraphvizOptions = GraphvizOptions {noLeaves :: Bool, + noFun :: Bool, + noCat :: Bool, + noDep :: Bool, + nodeFont :: String, + leafFont :: String, + nodeColor :: String, + leafColor :: String, + nodeEdgeStyle :: String, + leafEdgeStyle :: String + } + +graphvizDefaults = GraphvizOptions False False False True "" "" "" "" "" "" + +-- | Renders an abstract syntax tree in a Graphviz format. +graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String +graphvizAbstractTree p opts e = error "TODO: graphvizAbstractTree" + +graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String +graphvizParseTree c opts e = error "TODO: graphvizParseTree" + +graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String +graphvizWordAlignment cs opts e = error "TODO: graphvizWordAlignment" + + +type Labels = Map.Map Fun [String] + +getDepLabels :: String -> Labels +getDepLabels s = Map.fromList [(f,ls) | f:ls <- map words (lines s)] + +-- | Visualize word dependency tree. +graphvizDependencyTree + :: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@ + -> Bool -- ^ Include extra information (debug) + -> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels' + -> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@)) + -> Concr + -> Expr + -> String -- ^ Rendered output in the specified format +graphvizDependencyTree format debug mlab mclab concr t = error "TODO: graphvizDependencyTree" + +---------------------- should be a separate module? + +-- visualization with latex output. AR Nov 2015 + +conlls2latexDoc :: [String] -> String +conlls2latexDoc = + render . + latexDoc . + vcat . + intersperse (text "" $+$ app "vspace" (text "4mm")) . + map conll2latex . + filter (not . null) + +conll2latex :: String -> Doc +conll2latex = ppLaTeX . conll2latex' . parseCoNLL + +conll2latex' :: CoNLL -> [LaTeX] +conll2latex' = dep2latex . conll2dep' + +data Dep = Dep { + wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0) + , tokens :: [(String,String)] -- word, pos (0..) + , deps :: [((Int,Int),String)] -- from, to, label + , root :: Int -- root word position + } + +-- some general measures +defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units +defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres +spaceLength = 10.0 +charWidth = 1.8 + +wsize rwld w = 100 * rwld w + spaceLength -- word length, units +wpos rwld i = sum [wsize rwld j | j <- [0..i-1]] -- start position of the i'th word +wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]] -- distance between words x and y +labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below +labelstart c = c - 15.0 -- label starts 15u left of arc centre +arcbase = 30.0 -- arcs start and end 40u above the bottom +arcfactor r = r * 600 -- reduction of arc size from word distance +xyratio = 3 -- width/height ratio of arcs + +putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand] +putArc frwld height x y label = [oval,arrowhead,labelling] where + oval = Put (ctr,arcbase) (OvalTop (wdth,hght)) + arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base + labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label) + dxy = wdist frwld x y -- distance between words, >>= 20.0 + ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length + hdxy = dxy / 2 -- half the distance + wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are wider in proportion + hght = ndxy / (xyratio * rwld) -- arc height is independent of word length + begp = min x y -- begin position of oval + ctr = wpos frwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval + endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow + rwld = 0.5 ---- + +dep2latex :: Dep -> [LaTeX] +dep2latex d = + [Comment (unwords (map fst (tokens d))), + Picture defaultUnit (width,height) ( + [Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words + ++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom + ++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels + ++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))] + ++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")] + )] + where + wld i = wordLength d i -- >= 20.0 + rwld i = (wld i) / defaultWordLength -- >= 1.0 + aheight x y = depth (min x y) (max x y) + 1 ---- abs (x-y) + arcs = [(min u v, max u v) | ((u,v),_) <- deps d] + depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted + [] -> 0 + uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs]) + width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1) + height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d])) + +type CoNLL = [[String]] +parseCoNLL :: String -> CoNLL +parseCoNLL = map words . lines + +--conll2dep :: String -> Dep +--conll2dep = conll2dep' . parseCoNLL + +conll2dep' :: CoNLL -> Dep +conll2dep' ls = Dep { + wordLength = wld + , tokens = toks + , deps = dps + , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1] + } + where + wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]]) + toks = [(w,c) | _:w:_:c:_ <- ls] + dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"] + --maxdist = maximum [abs (x-y) | ((x,y),_) <- dps] + + +-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture) + +-- We render both LaTeX and SVG from this intermediate representation of +-- LaTeX pictures. + +data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand] +data DrawingCommand = Put Position Object +data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length + +type UnitLengthMM = Double +type Size = (Double,Double) +type Position = (Double,Double) +type Length = Double + + +-- * latex formatting +ppLaTeX = vcat . map ppLaTeX1 + where + ppLaTeX1 el = + case el of + Comment s -> comment s + Picture unit size cmds -> + app "setlength{\\unitlength}" (text (show unit ++ "mm")) + $$ hang (app "begin" (text "picture")<>text (show size)) 2 + (vcat (map ppDrawingCommand cmds)) + $$ app "end" (text "picture") + $$ text "" + + ppDrawingCommand (Put pos obj) = put pos (ppObject obj) + + ppObject obj = + case obj of + Text s -> text s + TinyText s -> small (text s) + OvalTop size -> text "\\oval" <> text (show size) <> text "[t]" + ArrowDown len -> app "vector(0,-1)" (text (show len)) + + put p@(_,_) = app ("put" ++ show p) + small w = text "{\\tiny" <+> w <> text "}" + comment s = text "%%" <+> text s -- line break show follow + +app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}" + + +latexDoc :: Doc -> Doc +latexDoc body = + vcat [text "\\documentclass{article}", + text "\\usepackage[utf8]{inputenc}", + text "\\begin{document}", + body, + text "\\end{document}"] + + +---------------------------------- +-- concrete syntax annotations (local) on top of conll +-- examples of annotations: +-- UseComp {"not"} PART neg head +-- UseComp {*} AUX cop head + +type CncLabels = [(String, String -> Maybe (String -> String,String,String))] +-- (fun, word -> (pos,label,target)) +-- the pos can remain unchanged, as in the current notation in the article + +fixCoNLL :: CncLabels -> CoNLL -> CoNLL +fixCoNLL labels conll = map fixc conll where + fixc row = case row of + (i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root + (i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of + Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs) + Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs) + _ -> row + _ -> row + + look (fun,word) = case lookup fun labels of + Just relabel -> case relabel word of + Just row -> Just row + _ -> case lookup "*" labels of + Just starlabel -> starlabel word + _ -> Nothing + _ -> case lookup "*" labels of + Just starlabel -> starlabel word + _ -> Nothing + + getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll] + +getCncDepLabels :: String -> CncLabels +getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where + --- choose is for compatibility with the general notation + choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules + + analyse line = case break (=='{') line of + (beg,_:ws) -> case break (=='}') ws of + (toks,_:target) -> case (words beg, words target) of + (fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks] + (fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks] + _ -> [] + _ -> [] + _ -> [] + merge rules@((fun,_):_) = (fun, \tok -> + case lookup tok (map snd rules) of + Just new -> return new + _ -> lookup "*" (map snd rules) + ) + getToks = words . map (\c -> if elem c "\"," then ' ' else c) + +printCoNLL :: CoNLL -> String +printCoNLL = unlines . map (concat . intersperse "\t") + ----------------------------------------------------------------------- -- Expressions & types @@ -335,6 +892,12 @@ readExpr str = freeStablePtr c_expr return (Just expr) +pIdent :: ReadS String +pIdent = error "TODO: pIdent" + +pExpr :: ReadS Expr +pExpr = error "TODO: pExpr" + -- | renders a type as a 'String'. The list -- of identifiers is the list of all free variables -- in the type in order reverse to the order @@ -348,6 +911,9 @@ showType scope ty = bracket (pgf_print_type c_ty pctxt 0 m) free $ \c_text -> peekText c_text +showContext :: [Var] -> [(BindType,Var,Type)] -> String +showContext = error "TODO: showContext" + -- | parses a 'String' as a type readType :: String -> Maybe Type readType str = @@ -360,3 +926,8 @@ readType str = else do ty <- deRefStablePtr c_ty freeStablePtr c_ty return (Just ty) + +readProbabilitiesFromFile :: FilePath -> IO (Map.Map String Double) +readProbabilitiesFromFile fpath = do + s <- readFile fpath + return $ Map.fromList [(f,read p) | f:p:_ <- map words (lines s)] diff --git a/src/runtime/haskell/PGF2/Expr.hs b/src/runtime/haskell/PGF2/Expr.hs index a8b4abe73..66661c785 100644 --- a/src/runtime/haskell/PGF2/Expr.hs +++ b/src/runtime/haskell/PGF2/Expr.hs @@ -11,7 +11,7 @@ module PGF2.Expr(Var, Cat, Fun, mkFloat, unFloat, mkMeta, unMeta, - exprSize, exprFunctions, + exprSize, exprFunctions, exprSubstitute, mkType, unType, mkHypo, mkDepHypo, mkImplHypo @@ -169,6 +169,15 @@ exprFunctions (EImplArg e) = exprFunctions e exprFunctions (EFun f) = [f] exprFunctions _ = [] +exprSubstitute :: Expr -> [Expr] -> Expr +exprSubstitute (EAbs bt x e) vs = EAbs bt x (exprSubstitute e vs) +exprSubstitute (EApp e1 e2) vs = EApp (exprSubstitute e1 vs) (exprSubstitute e2 vs) +exprSubstitute (EMeta i) vs = vs !! i +exprSubstitute (ETyped e ty) vs = ETyped (exprSubstitute e vs) ty +exprSubstitute (EImplArg e) vs = EImplArg (exprSubstitute e vs) +exprSubstitute e vs = e + + -- | creates a type from list of hypothesises, category and -- list of arguments for the category. The operation -- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create diff --git a/src/runtime/haskell/PGF2/FFI.hsc b/src/runtime/haskell/PGF2/FFI.hsc index 1e9e843a9..c2b477a6b 100644 --- a/src/runtime/haskell/PGF2/FFI.hsc +++ b/src/runtime/haskell/PGF2/FFI.hsc @@ -18,11 +18,14 @@ import PGF2.Expr #include +type AbsName = String -- ^ Name of abstract syntax +type ConcName = String -- ^ Name of concrete syntax + -- | An abstract data type representing multilingual grammar -- in Portable Grammar Format. data PGF = PGF { a_db :: ForeignPtr PgfDB , revision :: ForeignPtr PgfRevision - , langs :: Map.Map String Concr + , languages:: Map.Map ConcName Concr } data Concr = Concr {c_pgf :: ForeignPtr PgfDB, concr :: Ptr PgfConcr} diff --git a/src/runtime/haskell/PGF2/Internal.hsc b/src/runtime/haskell/PGF2/Internal.hsc index 67d7b4d9c..f700e225e 100644 --- a/src/runtime/haskell/PGF2/Internal.hsc +++ b/src/runtime/haskell/PGF2/Internal.hsc @@ -1,4 +1,93 @@ {-# LANGUAGE ImplicitParams, RankNTypes #-} module PGF2.Internal(-- * Access the internal structures + FId,isPredefFId, + fidString,fidInt,fidFloat,fidVar,fidStart, + + -- * Byte code + CodeLabel, Instr(..), IVal(..), TailInfo(..), + + SeqId,LIndex, + FunId,Token,Production(..),PArg(..),Symbol(..), + + unionPGF, writeConcr ) where + +import PGF2.FFI +import PGF2.Expr + +type Token = String +type LIndex = Int +data Symbol + = SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex + | SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex + | SymVar {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | SymKS Token + | SymKP [Symbol] [([Symbol],[String])] + | SymBIND -- the special BIND token + | SymNE -- non exist + | SymSOFT_BIND -- the special SOFT_BIND token + | SymSOFT_SPACE -- the special SOFT_SPACE token + | SymCAPIT -- the special CAPIT token + | SymALL_CAPIT -- the special ALL_CAPIT token + deriving (Eq,Ord,Show) +data Production + = PApply {-# UNPACK #-} !FunId [PArg] + | PCoerce {-# UNPACK #-} !FId + deriving (Eq,Ord,Show) +type FunId = Int +type SeqId = Int +type FId = Int +data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show) + +fidString, fidInt, fidFloat, fidVar, fidStart :: FId +fidString = (-1) +fidInt = (-2) +fidFloat = (-3) +fidVar = (-4) +fidStart = (-5) + +isPredefFId :: FId -> Bool +isPredefFId = (`elem` [fidString, fidInt, fidFloat, fidVar]) + +type CodeLabel = Int + +data Instr + = CHECK_ARGS {-# UNPACK #-} !Int + | CASE Fun {-# UNPACK #-} !CodeLabel + | CASE_LIT Literal {-# UNPACK #-} !CodeLabel + | SAVE {-# UNPACK #-} !Int + | ALLOC {-# UNPACK #-} !Int + | PUT_CONSTR Fun + | PUT_CLOSURE {-# UNPACK #-} !CodeLabel + | PUT_LIT Literal + | SET IVal + | SET_PAD + | PUSH_FRAME + | PUSH IVal + | TUCK IVal {-# UNPACK #-} !Int + | EVAL IVal TailInfo + | DROP {-# UNPACK #-} !Int + | JUMP {-# UNPACK #-} !CodeLabel + | FAIL + | PUSH_ACCUM Literal + | POP_ACCUM + | ADD + +data IVal + = HEAP {-# UNPACK #-} !Int + | ARG_VAR {-# UNPACK #-} !Int + | FREE_VAR {-# UNPACK #-} !Int + | GLOBAL Fun + deriving Eq + +data TailInfo + = RecCall + | TailCall {-# UNPACK #-} !Int + | UpdateCall + +unionPGF :: PGF -> PGF -> Maybe PGF +unionPGF = error "TODO: unionPGF" + +writeConcr :: FilePath -> Concr -> IO () +writeConcr = error "TODO: writeConcr" diff --git a/src/runtime/haskell/PGF2/Transactions.hsc b/src/runtime/haskell/PGF2/Transactions.hsc index 5e9a9542d..24582cdd3 100644 --- a/src/runtime/haskell/PGF2/Transactions.hsc +++ b/src/runtime/haskell/PGF2/Transactions.hsc @@ -91,7 +91,7 @@ branchPGF_ c_name p (Transaction f) = ex_type <- (#peek PgfExn, type) c_exn if (ex_type :: (#type PgfExnType)) == (#const PGF_EXN_NONE) then do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision)) - return (PGF (a_db p) fptr2 (langs p)) + return (PGF (a_db p) fptr2 (languages p)) else do pgf_free_revision c_db c_revision return p else do pgf_free_revision c_db c_revision @@ -107,7 +107,7 @@ checkoutPGF p name = if c_revision == nullPtr then return Nothing else do fptr2 <- C.newForeignPtr c_revision (withForeignPtr (a_db p) (\c_db -> pgf_free_revision c_db c_revision)) - return (Just (PGF (a_db p) fptr2 (langs p))) + return (Just (PGF (a_db p) fptr2 (languages p))) createFunction :: Fun -> Type -> Float -> Transaction () createFunction name ty prob = Transaction $ \c_db c_revision c_exn ->