forked from GitHub/gf-core
modules for embedded GF
This commit is contained in:
@@ -18,11 +18,14 @@ November 8, 2004.
|
|||||||
|
|
||||||
</p><h2>News</h2>
|
</p><h2>News</h2>
|
||||||
|
|
||||||
|
<!--
|
||||||
<i>May 9, 2005</i>. Version 2.2 coming soon. Here is a
|
<i>May 9, 2005</i>. Version 2.2 coming soon. Here is a
|
||||||
<a href="download-2.2/GF-2.2.tgz">preliminary source release</a>.
|
<a href="download-2.2/GF-2.2.tgz">preliminary source release</a>.
|
||||||
Here are the <a href="doc/gf2.2-highlights.html">highlights</a>.
|
Here are the <a href="doc/gf2.2-highlights.html">highlights</a>.
|
||||||
|
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
-->
|
||||||
|
|
||||||
<i>May 9, 2005</i>.
|
<i>May 9, 2005</i>.
|
||||||
PhD Thesis by
|
PhD Thesis by
|
||||||
|
|||||||
@@ -5,9 +5,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/04/21 16:45:57 $
|
-- > CVS $Date: 2005/05/10 12:49:13 $
|
||||||
-- > CVS $Author: bringert $
|
-- > CVS $Author: aarne $
|
||||||
-- > CVS $Revision: 1.33 $
|
-- > CVS $Revision: 1.34 $
|
||||||
--
|
--
|
||||||
-- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001
|
-- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
@@ -88,6 +88,12 @@ linearize :: GFGrammar -> Tree -> String
|
|||||||
linearize sgr = err id id . optLinearizeTree opts sgr where
|
linearize sgr = err id id . optLinearizeTree opts sgr where
|
||||||
opts = addOption firstLin $ stateOptions sgr
|
opts = addOption firstLin $ stateOptions sgr
|
||||||
|
|
||||||
|
term2tree :: GFGrammar -> G.Term -> Tree
|
||||||
|
term2tree gr = errVal uTree . annotate (grammar gr) . qualifTerm (absId gr)
|
||||||
|
|
||||||
|
tree2term :: Tree -> G.Term
|
||||||
|
tree2term = tree2exp
|
||||||
|
|
||||||
linearizeToAll :: [GFGrammar] -> Tree -> [String]
|
linearizeToAll :: [GFGrammar] -> Tree -> [String]
|
||||||
linearizeToAll grs t = [linearize gr t | gr <- grs]
|
linearizeToAll grs t = [linearize gr t | gr <- grs]
|
||||||
|
|
||||||
|
|||||||
78
src/GF/Embed/EmbedAPI.hs
Normal file
78
src/GF/Embed/EmbedAPI.hs
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : EmbedAPI
|
||||||
|
-- Maintainer : Aarne Ranta
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date:
|
||||||
|
-- > CVS $Author:
|
||||||
|
-- > CVS $Revision:
|
||||||
|
--
|
||||||
|
-- Reduced Application Programmer's Interface to GF, meant for
|
||||||
|
-- embedded GF systems. AR 10/5/2005
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Embed.EmbedAPI where
|
||||||
|
|
||||||
|
import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar)
|
||||||
|
import GF.UseGrammar.Linear (linTree2string)
|
||||||
|
import GF.Embed.EmbedParsing (parseString)
|
||||||
|
import GF.Canon.CMacros (noMark)
|
||||||
|
import GF.Grammar.Grammar (Trm)
|
||||||
|
import GF.Grammar.MMacros (exp2tree)
|
||||||
|
import GF.Grammar.Macros (zIdent)
|
||||||
|
import GF.Grammar.Values (tree2exp)
|
||||||
|
import GF.Grammar.TypeCheck (annotate)
|
||||||
|
import GF.Canon.GetGFC (getCanonGrammar)
|
||||||
|
import GF.Infra.Modules (emptyMGrammar)
|
||||||
|
import GF.CF.CFIdent (string2CFCat)
|
||||||
|
import GF.Infra.UseIO
|
||||||
|
import GF.Data.Operations
|
||||||
|
import GF.Infra.Option (noOptions,useUntokenizer)
|
||||||
|
import GF.Infra.Ident (prIdent)
|
||||||
|
import GF.Embed.EmbedCustom
|
||||||
|
|
||||||
|
-- This API is meant to be used when embedding GF grammars in Haskell
|
||||||
|
-- programs. The embedded system is supposed to use the
|
||||||
|
-- .gfcm grammar format, which is first produced by the gf program.
|
||||||
|
|
||||||
|
---------------------------------------------------
|
||||||
|
-- Interface
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
type MultiGrammar = ShellState
|
||||||
|
type Language = String
|
||||||
|
type Category = String
|
||||||
|
type Tree = Trm
|
||||||
|
|
||||||
|
file2grammar :: FilePath -> IO MultiGrammar
|
||||||
|
linearize :: MultiGrammar -> Language -> Tree -> String
|
||||||
|
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
|
||||||
|
|
||||||
|
---------------------------------------------------
|
||||||
|
-- Implementation
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
file2grammar file = do
|
||||||
|
can <- useIOE (error "cannot parse grammar file") $ getCanonGrammar file
|
||||||
|
return $ errVal (error "cannot build multigrammar") $ grammar2shellState noOptions (can,emptyMGrammar)
|
||||||
|
|
||||||
|
linearize mgr lang =
|
||||||
|
untok .
|
||||||
|
linTree2string noMark (canModules mgr) (zIdent lang) .
|
||||||
|
errVal (error "illegal tree") .
|
||||||
|
annotate gr
|
||||||
|
where
|
||||||
|
gr = grammar sgr
|
||||||
|
sgr = stateGrammarOfLang mgr (zIdent lang)
|
||||||
|
untok = customOrDefault noOptions useUntokenizer customUntokenizer sgr
|
||||||
|
|
||||||
|
parse mgr lang cat =
|
||||||
|
map tree2exp .
|
||||||
|
errVal [] .
|
||||||
|
parseString noOptions sgr cfcat
|
||||||
|
where
|
||||||
|
sgr = stateGrammarOfLang mgr (zIdent lang)
|
||||||
|
cfcat = string2CFCat abs cat
|
||||||
|
abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
|
||||||
113
src/GF/Embed/EmbedCustom.hs
Normal file
113
src/GF/Embed/EmbedCustom.hs
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : EmbedCustom
|
||||||
|
-- Maintainer : AR
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date:
|
||||||
|
-- > CVS $Author:
|
||||||
|
-- > CVS $Revision:
|
||||||
|
--
|
||||||
|
-- A database for customizable lexers and unlexers. Reduced version of
|
||||||
|
-- GF.API, intended for embedded GF grammars.
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Embed.EmbedCustom where
|
||||||
|
|
||||||
|
import GF.Data.Operations
|
||||||
|
import GF.Text.Text
|
||||||
|
import GF.UseGrammar.Tokenize
|
||||||
|
import GF.UseGrammar.Morphology
|
||||||
|
import GF.Infra.Option
|
||||||
|
import GF.CF.CFIdent
|
||||||
|
import GF.Compile.ShellState
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
|
-- | useTokenizer, \"-lexer=x\"
|
||||||
|
customTokenizer :: CustomData (StateGrammar -> String -> [CFTok])
|
||||||
|
|
||||||
|
-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
|
||||||
|
customUntokenizer :: CustomData (StateGrammar -> String -> String)
|
||||||
|
|
||||||
|
-- | this is the way of selecting an item
|
||||||
|
customOrDefault :: Options -> OptFun -> CustomData a -> a
|
||||||
|
customOrDefault opts optfun db = maybe (defaultCustomVal db) id $
|
||||||
|
customAsOptVal opts optfun db
|
||||||
|
|
||||||
|
-- | to produce menus of custom operations
|
||||||
|
customInfo :: CustomData a -> (String, [String])
|
||||||
|
customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
|
||||||
|
|
||||||
|
type CommandId = String
|
||||||
|
|
||||||
|
strCI :: String -> CommandId
|
||||||
|
strCI = id
|
||||||
|
|
||||||
|
ciStr :: CommandId -> String
|
||||||
|
ciStr = id
|
||||||
|
|
||||||
|
ciOpt :: CommandId -> Option
|
||||||
|
ciOpt = iOpt
|
||||||
|
|
||||||
|
newtype CustomData a = CustomData (String, [(CommandId,a)])
|
||||||
|
|
||||||
|
customData :: String -> [(CommandId, a)] -> CustomData a
|
||||||
|
customData title db = CustomData (title,db)
|
||||||
|
|
||||||
|
dbCustomData :: CustomData a -> [(CommandId, a)]
|
||||||
|
dbCustomData (CustomData (_,db)) = db
|
||||||
|
|
||||||
|
titleCustomData :: CustomData a -> String
|
||||||
|
titleCustomData (CustomData (t,_)) = t
|
||||||
|
|
||||||
|
lookupCustom :: CustomData a -> CommandId -> Maybe a
|
||||||
|
lookupCustom = flip lookup . dbCustomData
|
||||||
|
|
||||||
|
customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
|
||||||
|
customAsOptVal opts optfun db = do
|
||||||
|
arg <- getOptVal opts optfun
|
||||||
|
lookupCustom db (strCI arg)
|
||||||
|
|
||||||
|
-- | take the first entry from the database
|
||||||
|
defaultCustomVal :: CustomData a -> a
|
||||||
|
defaultCustomVal (CustomData (s,db)) =
|
||||||
|
ifNull (error ("empty database:" +++ s)) (snd . head) db
|
||||||
|
|
||||||
|
customTokenizer =
|
||||||
|
customData "Tokenizers, selected by option -lexer=x" $
|
||||||
|
[
|
||||||
|
(strCI "words", const $ tokWords)
|
||||||
|
,(strCI "literals", const $ tokLits)
|
||||||
|
,(strCI "vars", const $ tokVars)
|
||||||
|
,(strCI "chars", const $ map (tS . singleton))
|
||||||
|
,(strCI "code", const $ lexHaskell)
|
||||||
|
,(strCI "codevars", lexHaskellVar . stateIsWord)
|
||||||
|
,(strCI "text", const $ lexText)
|
||||||
|
,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr))
|
||||||
|
,(strCI "codelit", lexHaskellLiteral . stateIsWord)
|
||||||
|
,(strCI "textlit", lexTextLiteral . stateIsWord)
|
||||||
|
,(strCI "codeC", const $ lexC2M)
|
||||||
|
,(strCI "codeCHigh", const $ lexC2M' True)
|
||||||
|
-- add your own tokenizers here
|
||||||
|
]
|
||||||
|
|
||||||
|
customUntokenizer =
|
||||||
|
customData "Untokenizers, selected by option -unlexer=x" $
|
||||||
|
[
|
||||||
|
(strCI "unwords", const $ id) -- DEFAULT
|
||||||
|
,(strCI "text", const $ formatAsText)
|
||||||
|
,(strCI "html", const $ formatAsHTML)
|
||||||
|
,(strCI "latex", const $ formatAsLatex)
|
||||||
|
,(strCI "code", const $ formatAsCode)
|
||||||
|
,(strCI "concat", const $ filter (not . isSpace))
|
||||||
|
,(strCI "textlit", const $ formatAsTextLit)
|
||||||
|
,(strCI "codelit", const $ formatAsCodeLit)
|
||||||
|
,(strCI "concat", const $ concatRemSpace)
|
||||||
|
,(strCI "glue", const $ performBinds)
|
||||||
|
,(strCI "reverse", const $ reverse)
|
||||||
|
,(strCI "bind", const $ performBinds) -- backward compat
|
||||||
|
-- add your own untokenizers here
|
||||||
|
]
|
||||||
|
|
||||||
137
src/GF/Embed/EmbedParsing.hs
Normal file
137
src/GF/Embed/EmbedParsing.hs
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : EmbedParsing
|
||||||
|
-- Maintainer : AR
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date:
|
||||||
|
-- > CVS $Author:
|
||||||
|
-- > CVS $Revision:
|
||||||
|
--
|
||||||
|
-- just one parse method, for use in embedded GF systems
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Embed.EmbedParsing where
|
||||||
|
|
||||||
|
import GF.Infra.CheckM
|
||||||
|
import qualified GF.Canon.AbsGFC as C
|
||||||
|
import GF.Canon.GFC
|
||||||
|
import GF.Canon.MkGFC (trExp) ----
|
||||||
|
import GF.Canon.CMacros
|
||||||
|
import GF.Grammar.MMacros (refreshMetas)
|
||||||
|
import GF.UseGrammar.Linear
|
||||||
|
import GF.Data.Str
|
||||||
|
import GF.CF.CF
|
||||||
|
import GF.CF.CFIdent
|
||||||
|
import GF.Infra.Ident
|
||||||
|
import GF.Grammar.TypeCheck
|
||||||
|
import GF.Grammar.Values
|
||||||
|
import GF.UseGrammar.Tokenize
|
||||||
|
import GF.CF.Profile
|
||||||
|
import GF.Infra.Option
|
||||||
|
import GF.Compile.ShellState
|
||||||
|
import GF.Embed.EmbedCustom
|
||||||
|
import GF.CF.PPrCF (prCFTree)
|
||||||
|
|
||||||
|
import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE
|
||||||
|
|
||||||
|
|
||||||
|
-- import qualified GF.Parsing.GFC as New
|
||||||
|
|
||||||
|
import GF.Data.Operations
|
||||||
|
|
||||||
|
import Data.List (nub)
|
||||||
|
import Control.Monad (liftM)
|
||||||
|
|
||||||
|
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
|
||||||
|
|
||||||
|
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
|
||||||
|
parseString os sg cat = liftM fst . parseStringMsg os sg cat
|
||||||
|
|
||||||
|
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
|
||||||
|
parseStringMsg os sg cat s = do
|
||||||
|
(ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
|
||||||
|
return (ts,unlines ss)
|
||||||
|
|
||||||
|
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
|
||||||
|
parseStringC opts0 sg cat s
|
||||||
|
|
||||||
|
| otherwise = do
|
||||||
|
let opts = unionOptions opts0 $ stateOptions sg
|
||||||
|
cf = stateCF sg
|
||||||
|
gr = stateGrammarST sg
|
||||||
|
cn = cncId sg
|
||||||
|
tok = customOrDefault opts useTokenizer customTokenizer sg
|
||||||
|
parser = PCFOld.parse "ibn" (stateCF sg) cat -- customOrDefault opts useParser customParser sg cat
|
||||||
|
tokens2trms opts sg cn parser (tok s)
|
||||||
|
|
||||||
|
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
|
||||||
|
tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
|
||||||
|
where result = parser toks
|
||||||
|
info = snd result
|
||||||
|
trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
|
||||||
|
|
||||||
|
trees2trms :: Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
|
||||||
|
trees2trms opts sg cn as ts0 info = do
|
||||||
|
ts <- case () of
|
||||||
|
_ | null ts0 -> checkWarn "No success in cf parsing" >> return []
|
||||||
|
_ | raw -> do
|
||||||
|
ts1 <- return (map cf2trm0 ts0) ----- should not need annot
|
||||||
|
checks [
|
||||||
|
mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
|
||||||
|
,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
|
||||||
|
]
|
||||||
|
_ -> do
|
||||||
|
let num = optIntOrN opts flagRawtrees 99999
|
||||||
|
let (ts01,rest) = splitAt num ts0
|
||||||
|
if null rest then return ()
|
||||||
|
else checkWarn ("Warning: only" +++ show num +++ "raw parses out of" +++
|
||||||
|
show (length ts0) +++
|
||||||
|
"considered; use -rawtrees=<Int> to see more"
|
||||||
|
)
|
||||||
|
(ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
|
||||||
|
if null ts1 then raise ss else return ()
|
||||||
|
ts2 <- mapM (checkErr . annotate gr . refreshMetas [] . trExp) ts1 ----
|
||||||
|
if forgive then return ts2 else do
|
||||||
|
let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
|
||||||
|
ps = [t | (t,ss) <- tsss,
|
||||||
|
any (compatToks as) (map str2cftoks ss)]
|
||||||
|
if null ps
|
||||||
|
then raise $ "Failure in morphology." ++
|
||||||
|
if verb
|
||||||
|
then "\nPossible corrections: " +++++
|
||||||
|
unlines (nub (map sstr (concatMap snd tsss)))
|
||||||
|
else ""
|
||||||
|
else return ps
|
||||||
|
|
||||||
|
if verb
|
||||||
|
then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info
|
||||||
|
else return ()
|
||||||
|
|
||||||
|
return $ optIntOrAll opts flagNumber $ nub ts
|
||||||
|
where
|
||||||
|
gr = stateGrammarST sg
|
||||||
|
|
||||||
|
raw = oElem rawParse opts
|
||||||
|
verb = oElem beVerbose opts
|
||||||
|
forgive = oElem forgiveParse opts
|
||||||
|
|
||||||
|
unknown ts = case filter noMatch [t | t@(TS _) <- ts] of
|
||||||
|
[] -> "where all words are known"
|
||||||
|
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
|
||||||
|
terminals = map TS $ stateGrammarWords sg
|
||||||
|
noMatch t = all (not . compatTok t) terminals
|
||||||
|
|
||||||
|
|
||||||
|
--- too much type checking in building term info? return FullTerm to save work?
|
||||||
|
|
||||||
|
-- | raw parsing: so simple it is for a context-free CF grammar
|
||||||
|
cf2trm0 :: CFTree -> C.Exp
|
||||||
|
cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
|
||||||
|
where
|
||||||
|
cffun2trm (CFFun (fun,_)) = fun
|
||||||
|
mkApp = foldl C.EApp
|
||||||
|
mkAppAtom a = mkApp (C.EAtom a)
|
||||||
|
|
||||||
|
|
||||||
@@ -5,9 +5,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/04/21 16:22:24 $
|
-- > CVS $Date: 2005/05/10 12:49:13 $
|
||||||
-- > CVS $Author: bringert $
|
-- > CVS $Author: aarne $
|
||||||
-- > CVS $Revision: 1.8 $
|
-- > CVS $Revision: 1.9 $
|
||||||
--
|
--
|
||||||
-- some more abstractions on grammars, esp. for Edit
|
-- some more abstractions on grammars, esp. for Edit
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
@@ -318,3 +318,23 @@ reindexTerm = qualif (0,[]) where
|
|||||||
_ -> composSafeOp (qualif dg) t
|
_ -> composSafeOp (qualif dg) t
|
||||||
look x = maybe x id . lookup x --- if x is not in scope it is unchanged
|
look x = maybe x id . lookup x --- if x is not in scope it is unchanged
|
||||||
ind x d = identC $ prIdent x ++ "_" ++ show d
|
ind x d = identC $ prIdent x ++ "_" ++ show d
|
||||||
|
|
||||||
|
|
||||||
|
-- this method works for context-free abstract syntax
|
||||||
|
-- and is meant to be used in simple embedded GF applications
|
||||||
|
|
||||||
|
exp2tree :: Exp -> Err Tree
|
||||||
|
exp2tree e = do
|
||||||
|
(bs,f,xs) <- termForm e
|
||||||
|
cont <- case bs of
|
||||||
|
[] -> return []
|
||||||
|
_ -> prtBad "cannot convert bindings in" e
|
||||||
|
at <- case f of
|
||||||
|
Q m c -> return $ AtC (m,c)
|
||||||
|
QC m c -> return $ AtC (m,c)
|
||||||
|
Meta m -> return $ AtM m
|
||||||
|
K s -> return $ AtL s
|
||||||
|
EInt n -> return $ AtI n
|
||||||
|
_ -> prtBad "cannot convert to atom" f
|
||||||
|
ts <- mapM exp2tree xs
|
||||||
|
return $ Tr (N (cont,at,uVal,([],[]),True),ts)
|
||||||
|
|||||||
Reference in New Issue
Block a user