forked from GitHub/gf-core
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
114
src-3.0/GF/Embed/EmbedAPI.hs
Normal file
114
src-3.0/GF/Embed/EmbedAPI.hs
Normal file
@@ -0,0 +1,114 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : EmbedAPI
|
||||
-- Maintainer : Aarne Ranta
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date:
|
||||
-- > CVS $Author:
|
||||
-- > CVS $Revision:
|
||||
--
|
||||
-- Reduced Application Programmer's Interface to GF, meant for
|
||||
-- embedded GF systems. AR 10/5/2005
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Embed.EmbedAPI where
|
||||
|
||||
import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions,firstAbsCat)
|
||||
import GF.UseGrammar.Linear (linTree2string)
|
||||
import GF.UseGrammar.GetTree (string2tree)
|
||||
import GF.Embed.EmbedParsing (parseString)
|
||||
import GF.Canon.CMacros (noMark)
|
||||
import GF.Grammar.Grammar (Trm)
|
||||
import GF.Grammar.MMacros (exp2tree)
|
||||
import GF.Grammar.Macros (zIdent)
|
||||
import GF.Grammar.PrGrammar (prt_)
|
||||
import GF.Grammar.Values (tree2exp)
|
||||
import GF.Grammar.TypeCheck (annotate)
|
||||
import GF.Canon.GetGFC (getCanonGrammar)
|
||||
import GF.Infra.Modules (emptyMGrammar)
|
||||
import GF.CF.CFIdent (string2CFCat)
|
||||
import GF.Infra.UseIO
|
||||
import GF.Data.Operations
|
||||
import GF.Infra.Option (noOptions,useUntokenizer,options,iOpt)
|
||||
import GF.Infra.Ident (prIdent)
|
||||
import GF.Embed.EmbedCustom
|
||||
|
||||
-- This API is meant to be used when embedding GF grammars in Haskell
|
||||
-- programs. The embedded system is supposed to use the
|
||||
-- .gfcm grammar format, which is first produced by the gf program.
|
||||
|
||||
---------------------------------------------------
|
||||
-- Interface
|
||||
---------------------------------------------------
|
||||
|
||||
type MultiGrammar = ShellState
|
||||
type Language = String
|
||||
type Category = String
|
||||
type Tree = Trm
|
||||
|
||||
file2grammar :: FilePath -> IO MultiGrammar
|
||||
|
||||
linearize :: MultiGrammar -> Language -> Tree -> String
|
||||
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
|
||||
|
||||
linearizeAll :: MultiGrammar -> Tree -> [String]
|
||||
linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
|
||||
|
||||
parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
|
||||
parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
|
||||
|
||||
readTree :: MultiGrammar -> String -> Tree
|
||||
showTree :: Tree -> String
|
||||
|
||||
languages :: MultiGrammar -> [Language]
|
||||
categories :: MultiGrammar -> [Category]
|
||||
|
||||
startCat :: MultiGrammar -> Category
|
||||
|
||||
---------------------------------------------------
|
||||
-- Implementation
|
||||
---------------------------------------------------
|
||||
|
||||
file2grammar file = do
|
||||
can <- useIOE (error "cannot parse grammar file") $ getCanonGrammar file
|
||||
return $ errVal (error "cannot build multigrammar") $
|
||||
grammar2shellState (options [iOpt "docf"]) (can,emptyMGrammar)
|
||||
|
||||
linearize mgr lang =
|
||||
untok .
|
||||
linTree2string noMark (canModules mgr) (zIdent lang) .
|
||||
errVal (error "illegal tree") .
|
||||
annotate gr
|
||||
where
|
||||
gr = grammar sgr
|
||||
sgr = stateGrammarOfLang mgr (zIdent lang)
|
||||
untok = customOrDefault (stateOptions sgr) useUntokenizer customUntokenizer sgr
|
||||
|
||||
parse mgr lang cat =
|
||||
map tree2exp .
|
||||
errVal [] .
|
||||
parseString (stateOptions sgr) sgr cfcat
|
||||
where
|
||||
sgr = stateGrammarOfLang mgr (zIdent lang)
|
||||
cfcat = string2CFCat abs cat
|
||||
abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
|
||||
|
||||
linearizeAll mgr = map snd . linearizeAllLang mgr
|
||||
linearizeAllLang mgr t = [(lang,linearize mgr lang t) | lang <- languages mgr]
|
||||
|
||||
parseAll mgr cat = map snd . parseAllLang mgr cat
|
||||
|
||||
parseAllLang mgr cat s =
|
||||
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
|
||||
|
||||
readTree mgr s = tree2exp $ string2tree (firstStateGrammar mgr) s
|
||||
|
||||
showTree t = prt_ t
|
||||
|
||||
languages mgr = [prt_ l | l <- allLanguages mgr]
|
||||
|
||||
categories mgr = [prt_ c | (_,c) <- allCategories mgr]
|
||||
|
||||
startCat = prt_ . snd . firstAbsCat noOptions . firstStateGrammar
|
||||
113
src-3.0/GF/Embed/EmbedCustom.hs
Normal file
113
src-3.0/GF/Embed/EmbedCustom.hs
Normal file
@@ -0,0 +1,113 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : EmbedCustom
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date:
|
||||
-- > CVS $Author:
|
||||
-- > CVS $Revision:
|
||||
--
|
||||
-- A database for customizable lexers and unlexers. Reduced version of
|
||||
-- GF.API, intended for embedded GF grammars.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Embed.EmbedCustom where
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Text.Text
|
||||
import GF.UseGrammar.Tokenize
|
||||
import GF.UseGrammar.Morphology
|
||||
import GF.Infra.Option
|
||||
import GF.CF.CFIdent
|
||||
import GF.Compile.ShellState
|
||||
import Data.Char
|
||||
|
||||
-- | useTokenizer, \"-lexer=x\"
|
||||
customTokenizer :: CustomData (StateGrammar -> String -> [CFTok])
|
||||
|
||||
-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
|
||||
customUntokenizer :: CustomData (StateGrammar -> String -> String)
|
||||
|
||||
-- | this is the way of selecting an item
|
||||
customOrDefault :: Options -> OptFun -> CustomData a -> a
|
||||
customOrDefault opts optfun db = maybe (defaultCustomVal db) id $
|
||||
customAsOptVal opts optfun db
|
||||
|
||||
-- | to produce menus of custom operations
|
||||
customInfo :: CustomData a -> (String, [String])
|
||||
customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
|
||||
|
||||
type CommandId = String
|
||||
|
||||
strCI :: String -> CommandId
|
||||
strCI = id
|
||||
|
||||
ciStr :: CommandId -> String
|
||||
ciStr = id
|
||||
|
||||
ciOpt :: CommandId -> Option
|
||||
ciOpt = iOpt
|
||||
|
||||
newtype CustomData a = CustomData (String, [(CommandId,a)])
|
||||
|
||||
customData :: String -> [(CommandId, a)] -> CustomData a
|
||||
customData title db = CustomData (title,db)
|
||||
|
||||
dbCustomData :: CustomData a -> [(CommandId, a)]
|
||||
dbCustomData (CustomData (_,db)) = db
|
||||
|
||||
titleCustomData :: CustomData a -> String
|
||||
titleCustomData (CustomData (t,_)) = t
|
||||
|
||||
lookupCustom :: CustomData a -> CommandId -> Maybe a
|
||||
lookupCustom = flip lookup . dbCustomData
|
||||
|
||||
customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
|
||||
customAsOptVal opts optfun db = do
|
||||
arg <- getOptVal opts optfun
|
||||
lookupCustom db (strCI arg)
|
||||
|
||||
-- | take the first entry from the database
|
||||
defaultCustomVal :: CustomData a -> a
|
||||
defaultCustomVal (CustomData (s,db)) =
|
||||
ifNull (error ("empty database:" +++ s)) (snd . head) db
|
||||
|
||||
customTokenizer =
|
||||
customData "Tokenizers, selected by option -lexer=x" $
|
||||
[
|
||||
(strCI "words", const $ tokWords)
|
||||
,(strCI "literals", const $ tokLits)
|
||||
,(strCI "vars", const $ tokVars)
|
||||
,(strCI "chars", const $ map (tS . singleton))
|
||||
,(strCI "code", const $ lexHaskell)
|
||||
,(strCI "codevars", lexHaskellVar . stateIsWord)
|
||||
,(strCI "text", const $ lexText)
|
||||
,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr))
|
||||
,(strCI "codelit", lexHaskellLiteral . stateIsWord)
|
||||
,(strCI "textlit", lexTextLiteral . stateIsWord)
|
||||
,(strCI "codeC", const $ lexC2M)
|
||||
,(strCI "codeCHigh", const $ lexC2M' True)
|
||||
-- add your own tokenizers here
|
||||
]
|
||||
|
||||
customUntokenizer =
|
||||
customData "Untokenizers, selected by option -unlexer=x" $
|
||||
[
|
||||
(strCI "unwords", const $ id) -- DEFAULT
|
||||
,(strCI "text", const $ formatAsText)
|
||||
,(strCI "html", const $ formatAsHTML)
|
||||
,(strCI "latex", const $ formatAsLatex)
|
||||
,(strCI "code", const $ formatAsCode)
|
||||
,(strCI "concat", const $ filter (not . isSpace))
|
||||
,(strCI "textlit", const $ formatAsTextLit)
|
||||
,(strCI "codelit", const $ formatAsCodeLit)
|
||||
,(strCI "concat", const $ concatRemSpace)
|
||||
,(strCI "glue", const $ performBinds)
|
||||
,(strCI "reverse", const $ reverse)
|
||||
,(strCI "bind", const $ performBinds) -- backward compat
|
||||
-- add your own untokenizers here
|
||||
]
|
||||
|
||||
65
src-3.0/GF/Embed/EmbedParsing.hs
Normal file
65
src-3.0/GF/Embed/EmbedParsing.hs
Normal file
@@ -0,0 +1,65 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : EmbedParsing
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date:
|
||||
-- > CVS $Author:
|
||||
-- > CVS $Revision:
|
||||
--
|
||||
-- just one parse method, for use in embedded GF systems
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Embed.EmbedParsing where
|
||||
|
||||
import GF.Infra.CheckM
|
||||
import qualified GF.Canon.AbsGFC as C
|
||||
import GF.Canon.GFC
|
||||
import GF.Canon.MkGFC (trExp) ----
|
||||
import GF.Canon.CMacros
|
||||
import GF.Grammar.MMacros (refreshMetas)
|
||||
import GF.UseGrammar.Linear
|
||||
import GF.Data.Str
|
||||
import GF.CF.CF
|
||||
import GF.CF.CFIdent
|
||||
import GF.Infra.Ident
|
||||
import GF.Grammar.TypeCheck
|
||||
import GF.Grammar.Values
|
||||
import GF.UseGrammar.Tokenize
|
||||
import GF.CF.Profile
|
||||
import GF.Infra.Option
|
||||
import GF.Compile.ShellState
|
||||
import GF.Embed.EmbedCustom
|
||||
import GF.CF.PPrCF (prCFTree)
|
||||
import qualified GF.Parsing.GFC as New
|
||||
|
||||
|
||||
-- import qualified GF.Parsing.GFC as New
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import Data.List (nub)
|
||||
import Control.Monad (liftM)
|
||||
|
||||
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
|
||||
|
||||
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
|
||||
parseString os sg cat = liftM fst . parseStringMsg os sg cat
|
||||
|
||||
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
|
||||
parseStringMsg os sg cat s = do
|
||||
(ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
|
||||
return (ts,unlines ss)
|
||||
|
||||
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
|
||||
parseStringC opts0 sg cat s = do
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
algorithm = "f" -- default algorithm: FCFG
|
||||
strategy = "bottomup"
|
||||
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
|
||||
toks = tokenizer s
|
||||
ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
|
||||
checkErr $ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
|
||||
|
||||
44
src-3.0/GF/Embed/TemplateApp.hs
Normal file
44
src-3.0/GF/Embed/TemplateApp.hs
Normal file
@@ -0,0 +1,44 @@
|
||||
module Main where
|
||||
|
||||
import GF.Embed.EmbedAPI
|
||||
import System
|
||||
|
||||
-- Simple translation application built on EmbedAPI. AR 7/10/2005
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
file:_ <- getArgs
|
||||
grammar <- file2grammar file
|
||||
translate grammar
|
||||
|
||||
translate :: MultiGrammar -> IO ()
|
||||
translate grammar = do
|
||||
s <- getLine
|
||||
if s == "quit" then return () else do
|
||||
treat grammar s
|
||||
translate grammar
|
||||
|
||||
treat :: MultiGrammar -> String -> IO ()
|
||||
treat grammar s = putStrLn $ case comm of
|
||||
["lin"] -> unlines $ linearizeAll grammar $ readTree grammar rest
|
||||
["lin",lang] -> linearize grammar lang $ readTree grammar rest
|
||||
["parse",cat] -> unlines $ map showTree $ concat $ parseAll grammar cat rest
|
||||
["parse",lang,cat] -> unlines $ map showTree $ parse grammar lang cat rest
|
||||
["langs"] -> unwords $ languages grammar
|
||||
["cats"] -> unwords $ categories grammar
|
||||
["help"] -> helpMsg
|
||||
_ -> "command not interpreted: " ++ s
|
||||
where
|
||||
(comm,rest) = (words c,drop 1 r) where
|
||||
(c,r) = span (/=':') s
|
||||
|
||||
helpMsg = unlines [
|
||||
"lin : <Tree>",
|
||||
"lin <Lang> : <Tree>",
|
||||
"parse <Cat> : <String>",
|
||||
"parse <Lang> <Cat> : <String>",
|
||||
"langs",
|
||||
"cats",
|
||||
"help",
|
||||
"quit"
|
||||
]
|
||||
Reference in New Issue
Block a user