modules for embedded GF

This commit is contained in:
aarne
2005-05-10 11:49:13 +00:00
parent b2357c9ddc
commit 8b0e66b45b
6 changed files with 363 additions and 6 deletions

View File

@@ -18,11 +18,14 @@ November 8, 2004.
</p><h2>News</h2>
<!--
<i>May 9, 2005</i>. Version 2.2 coming soon. Here is a
<a href="download-2.2/GF-2.2.tgz">preliminary source release</a>.
Here are the <a href="doc/gf2.2-highlights.html">highlights</a>.
<p>
-->
<i>May 9, 2005</i>.
PhD Thesis by

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:45:57 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.33 $
-- > CVS $Date: 2005/05/10 12:49:13 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.34 $
--
-- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001
-----------------------------------------------------------------------------
@@ -88,6 +88,12 @@ linearize :: GFGrammar -> Tree -> String
linearize sgr = err id id . optLinearizeTree opts sgr where
opts = addOption firstLin $ stateOptions sgr
term2tree :: GFGrammar -> G.Term -> Tree
term2tree gr = errVal uTree . annotate (grammar gr) . qualifTerm (absId gr)
tree2term :: Tree -> G.Term
tree2term = tree2exp
linearizeToAll :: [GFGrammar] -> Tree -> [String]
linearizeToAll grs t = [linearize gr t | gr <- grs]

78
src/GF/Embed/EmbedAPI.hs Normal file
View File

@@ -0,0 +1,78 @@
----------------------------------------------------------------------
-- |
-- Module : EmbedAPI
-- Maintainer : Aarne Ranta
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- Reduced Application Programmer's Interface to GF, meant for
-- embedded GF systems. AR 10/5/2005
-----------------------------------------------------------------------------
module GF.Embed.EmbedAPI where
import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar)
import GF.UseGrammar.Linear (linTree2string)
import GF.Embed.EmbedParsing (parseString)
import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree)
import GF.Grammar.Macros (zIdent)
import GF.Grammar.Values (tree2exp)
import GF.Grammar.TypeCheck (annotate)
import GF.Canon.GetGFC (getCanonGrammar)
import GF.Infra.Modules (emptyMGrammar)
import GF.CF.CFIdent (string2CFCat)
import GF.Infra.UseIO
import GF.Data.Operations
import GF.Infra.Option (noOptions,useUntokenizer)
import GF.Infra.Ident (prIdent)
import GF.Embed.EmbedCustom
-- This API is meant to be used when embedding GF grammars in Haskell
-- programs. The embedded system is supposed to use the
-- .gfcm grammar format, which is first produced by the gf program.
---------------------------------------------------
-- Interface
---------------------------------------------------
type MultiGrammar = ShellState
type Language = String
type Category = String
type Tree = Trm
file2grammar :: FilePath -> IO MultiGrammar
linearize :: MultiGrammar -> Language -> Tree -> String
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
---------------------------------------------------
-- Implementation
---------------------------------------------------
file2grammar file = do
can <- useIOE (error "cannot parse grammar file") $ getCanonGrammar file
return $ errVal (error "cannot build multigrammar") $ grammar2shellState noOptions (can,emptyMGrammar)
linearize mgr lang =
untok .
linTree2string noMark (canModules mgr) (zIdent lang) .
errVal (error "illegal tree") .
annotate gr
where
gr = grammar sgr
sgr = stateGrammarOfLang mgr (zIdent lang)
untok = customOrDefault noOptions useUntokenizer customUntokenizer sgr
parse mgr lang cat =
map tree2exp .
errVal [] .
parseString noOptions sgr cfcat
where
sgr = stateGrammarOfLang mgr (zIdent lang)
cfcat = string2CFCat abs cat
abs = maybe (error "no abstract syntax") prIdent $ abstract mgr

113
src/GF/Embed/EmbedCustom.hs Normal file
View File

@@ -0,0 +1,113 @@
----------------------------------------------------------------------
-- |
-- Module : EmbedCustom
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- A database for customizable lexers and unlexers. Reduced version of
-- GF.API, intended for embedded GF grammars.
-----------------------------------------------------------------------------
module GF.Embed.EmbedCustom where
import GF.Data.Operations
import GF.Text.Text
import GF.UseGrammar.Tokenize
import GF.UseGrammar.Morphology
import GF.Infra.Option
import GF.CF.CFIdent
import GF.Compile.ShellState
import Data.Char
-- | useTokenizer, \"-lexer=x\"
customTokenizer :: CustomData (StateGrammar -> String -> [CFTok])
-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
customUntokenizer :: CustomData (StateGrammar -> String -> String)
-- | this is the way of selecting an item
customOrDefault :: Options -> OptFun -> CustomData a -> a
customOrDefault opts optfun db = maybe (defaultCustomVal db) id $
customAsOptVal opts optfun db
-- | to produce menus of custom operations
customInfo :: CustomData a -> (String, [String])
customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
type CommandId = String
strCI :: String -> CommandId
strCI = id
ciStr :: CommandId -> String
ciStr = id
ciOpt :: CommandId -> Option
ciOpt = iOpt
newtype CustomData a = CustomData (String, [(CommandId,a)])
customData :: String -> [(CommandId, a)] -> CustomData a
customData title db = CustomData (title,db)
dbCustomData :: CustomData a -> [(CommandId, a)]
dbCustomData (CustomData (_,db)) = db
titleCustomData :: CustomData a -> String
titleCustomData (CustomData (t,_)) = t
lookupCustom :: CustomData a -> CommandId -> Maybe a
lookupCustom = flip lookup . dbCustomData
customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
customAsOptVal opts optfun db = do
arg <- getOptVal opts optfun
lookupCustom db (strCI arg)
-- | take the first entry from the database
defaultCustomVal :: CustomData a -> a
defaultCustomVal (CustomData (s,db)) =
ifNull (error ("empty database:" +++ s)) (snd . head) db
customTokenizer =
customData "Tokenizers, selected by option -lexer=x" $
[
(strCI "words", const $ tokWords)
,(strCI "literals", const $ tokLits)
,(strCI "vars", const $ tokVars)
,(strCI "chars", const $ map (tS . singleton))
,(strCI "code", const $ lexHaskell)
,(strCI "codevars", lexHaskellVar . stateIsWord)
,(strCI "text", const $ lexText)
,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr))
,(strCI "codelit", lexHaskellLiteral . stateIsWord)
,(strCI "textlit", lexTextLiteral . stateIsWord)
,(strCI "codeC", const $ lexC2M)
,(strCI "codeCHigh", const $ lexC2M' True)
-- add your own tokenizers here
]
customUntokenizer =
customData "Untokenizers, selected by option -unlexer=x" $
[
(strCI "unwords", const $ id) -- DEFAULT
,(strCI "text", const $ formatAsText)
,(strCI "html", const $ formatAsHTML)
,(strCI "latex", const $ formatAsLatex)
,(strCI "code", const $ formatAsCode)
,(strCI "concat", const $ filter (not . isSpace))
,(strCI "textlit", const $ formatAsTextLit)
,(strCI "codelit", const $ formatAsCodeLit)
,(strCI "concat", const $ concatRemSpace)
,(strCI "glue", const $ performBinds)
,(strCI "reverse", const $ reverse)
,(strCI "bind", const $ performBinds) -- backward compat
-- add your own untokenizers here
]

View File

@@ -0,0 +1,137 @@
----------------------------------------------------------------------
-- |
-- Module : EmbedParsing
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- just one parse method, for use in embedded GF systems
-----------------------------------------------------------------------------
module GF.Embed.EmbedParsing where
import GF.Infra.CheckM
import qualified GF.Canon.AbsGFC as C
import GF.Canon.GFC
import GF.Canon.MkGFC (trExp) ----
import GF.Canon.CMacros
import GF.Grammar.MMacros (refreshMetas)
import GF.UseGrammar.Linear
import GF.Data.Str
import GF.CF.CF
import GF.CF.CFIdent
import GF.Infra.Ident
import GF.Grammar.TypeCheck
import GF.Grammar.Values
import GF.UseGrammar.Tokenize
import GF.CF.Profile
import GF.Infra.Option
import GF.Compile.ShellState
import GF.Embed.EmbedCustom
import GF.CF.PPrCF (prCFTree)
import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE
-- import qualified GF.Parsing.GFC as New
import GF.Data.Operations
import Data.List (nub)
import Control.Monad (liftM)
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
parseString os sg cat = liftM fst . parseStringMsg os sg cat
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
parseStringMsg os sg cat s = do
(ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
return (ts,unlines ss)
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
parseStringC opts0 sg cat s
| otherwise = do
let opts = unionOptions opts0 $ stateOptions sg
cf = stateCF sg
gr = stateGrammarST sg
cn = cncId sg
tok = customOrDefault opts useTokenizer customTokenizer sg
parser = PCFOld.parse "ibn" (stateCF sg) cat -- customOrDefault opts useParser customParser sg cat
tokens2trms opts sg cn parser (tok s)
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
where result = parser toks
info = snd result
trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
trees2trms :: Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
trees2trms opts sg cn as ts0 info = do
ts <- case () of
_ | null ts0 -> checkWarn "No success in cf parsing" >> return []
_ | raw -> do
ts1 <- return (map cf2trm0 ts0) ----- should not need annot
checks [
mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
]
_ -> do
let num = optIntOrN opts flagRawtrees 99999
let (ts01,rest) = splitAt num ts0
if null rest then return ()
else checkWarn ("Warning: only" +++ show num +++ "raw parses out of" +++
show (length ts0) +++
"considered; use -rawtrees=<Int> to see more"
)
(ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
if null ts1 then raise ss else return ()
ts2 <- mapM (checkErr . annotate gr . refreshMetas [] . trExp) ts1 ----
if forgive then return ts2 else do
let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
ps = [t | (t,ss) <- tsss,
any (compatToks as) (map str2cftoks ss)]
if null ps
then raise $ "Failure in morphology." ++
if verb
then "\nPossible corrections: " +++++
unlines (nub (map sstr (concatMap snd tsss)))
else ""
else return ps
if verb
then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info
else return ()
return $ optIntOrAll opts flagNumber $ nub ts
where
gr = stateGrammarST sg
raw = oElem rawParse opts
verb = oElem beVerbose opts
forgive = oElem forgiveParse opts
unknown ts = case filter noMatch [t | t@(TS _) <- ts] of
[] -> "where all words are known"
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
terminals = map TS $ stateGrammarWords sg
noMatch t = all (not . compatTok t) terminals
--- too much type checking in building term info? return FullTerm to save work?
-- | raw parsing: so simple it is for a context-free CF grammar
cf2trm0 :: CFTree -> C.Exp
cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
where
cffun2trm (CFFun (fun,_)) = fun
mkApp = foldl C.EApp
mkAppAtom a = mkApp (C.EAtom a)

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:22:24 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
-- > CVS $Date: 2005/05/10 12:49:13 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.9 $
--
-- some more abstractions on grammars, esp. for Edit
-----------------------------------------------------------------------------
@@ -318,3 +318,23 @@ reindexTerm = qualif (0,[]) where
_ -> composSafeOp (qualif dg) t
look x = maybe x id . lookup x --- if x is not in scope it is unchanged
ind x d = identC $ prIdent x ++ "_" ++ show d
-- this method works for context-free abstract syntax
-- and is meant to be used in simple embedded GF applications
exp2tree :: Exp -> Err Tree
exp2tree e = do
(bs,f,xs) <- termForm e
cont <- case bs of
[] -> return []
_ -> prtBad "cannot convert bindings in" e
at <- case f of
Q m c -> return $ AtC (m,c)
QC m c -> return $ AtC (m,c)
Meta m -> return $ AtM m
K s -> return $ AtL s
EInt n -> return $ AtI n
_ -> prtBad "cannot convert to atom" f
ts <- mapM exp2tree xs
return $ Tr (N (cont,at,uVal,([],[]),True),ts)