1
0
forked from GitHub/gf-core

GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

This commit is contained in:
aarne
2008-05-21 09:26:44 +00:00
parent 915a1de717
commit 055c0d0d5a
536 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,114 @@
----------------------------------------------------------------------
-- |
-- Module : EmbedAPI
-- Maintainer : Aarne Ranta
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- Reduced Application Programmer's Interface to GF, meant for
-- embedded GF systems. AR 10/5/2005
-----------------------------------------------------------------------------
module GF.Embed.EmbedAPI where
import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions,firstAbsCat)
import GF.UseGrammar.Linear (linTree2string)
import GF.UseGrammar.GetTree (string2tree)
import GF.Embed.EmbedParsing (parseString)
import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree)
import GF.Grammar.Macros (zIdent)
import GF.Grammar.PrGrammar (prt_)
import GF.Grammar.Values (tree2exp)
import GF.Grammar.TypeCheck (annotate)
import GF.Canon.GetGFC (getCanonGrammar)
import GF.Infra.Modules (emptyMGrammar)
import GF.CF.CFIdent (string2CFCat)
import GF.Infra.UseIO
import GF.Data.Operations
import GF.Infra.Option (noOptions,useUntokenizer,options,iOpt)
import GF.Infra.Ident (prIdent)
import GF.Embed.EmbedCustom
-- This API is meant to be used when embedding GF grammars in Haskell
-- programs. The embedded system is supposed to use the
-- .gfcm grammar format, which is first produced by the gf program.
---------------------------------------------------
-- Interface
---------------------------------------------------
type MultiGrammar = ShellState
type Language = String
type Category = String
type Tree = Trm
file2grammar :: FilePath -> IO MultiGrammar
linearize :: MultiGrammar -> Language -> Tree -> String
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
linearizeAll :: MultiGrammar -> Tree -> [String]
linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
readTree :: MultiGrammar -> String -> Tree
showTree :: Tree -> String
languages :: MultiGrammar -> [Language]
categories :: MultiGrammar -> [Category]
startCat :: MultiGrammar -> Category
---------------------------------------------------
-- Implementation
---------------------------------------------------
file2grammar file = do
can <- useIOE (error "cannot parse grammar file") $ getCanonGrammar file
return $ errVal (error "cannot build multigrammar") $
grammar2shellState (options [iOpt "docf"]) (can,emptyMGrammar)
linearize mgr lang =
untok .
linTree2string noMark (canModules mgr) (zIdent lang) .
errVal (error "illegal tree") .
annotate gr
where
gr = grammar sgr
sgr = stateGrammarOfLang mgr (zIdent lang)
untok = customOrDefault (stateOptions sgr) useUntokenizer customUntokenizer sgr
parse mgr lang cat =
map tree2exp .
errVal [] .
parseString (stateOptions sgr) sgr cfcat
where
sgr = stateGrammarOfLang mgr (zIdent lang)
cfcat = string2CFCat abs cat
abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t = [(lang,linearize mgr lang t) | lang <- languages mgr]
parseAll mgr cat = map snd . parseAllLang mgr cat
parseAllLang mgr cat s =
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
readTree mgr s = tree2exp $ string2tree (firstStateGrammar mgr) s
showTree t = prt_ t
languages mgr = [prt_ l | l <- allLanguages mgr]
categories mgr = [prt_ c | (_,c) <- allCategories mgr]
startCat = prt_ . snd . firstAbsCat noOptions . firstStateGrammar

View File

@@ -0,0 +1,113 @@
----------------------------------------------------------------------
-- |
-- Module : EmbedCustom
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- A database for customizable lexers and unlexers. Reduced version of
-- GF.API, intended for embedded GF grammars.
-----------------------------------------------------------------------------
module GF.Embed.EmbedCustom where
import GF.Data.Operations
import GF.Text.Text
import GF.UseGrammar.Tokenize
import GF.UseGrammar.Morphology
import GF.Infra.Option
import GF.CF.CFIdent
import GF.Compile.ShellState
import Data.Char
-- | useTokenizer, \"-lexer=x\"
customTokenizer :: CustomData (StateGrammar -> String -> [CFTok])
-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
customUntokenizer :: CustomData (StateGrammar -> String -> String)
-- | this is the way of selecting an item
customOrDefault :: Options -> OptFun -> CustomData a -> a
customOrDefault opts optfun db = maybe (defaultCustomVal db) id $
customAsOptVal opts optfun db
-- | to produce menus of custom operations
customInfo :: CustomData a -> (String, [String])
customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
type CommandId = String
strCI :: String -> CommandId
strCI = id
ciStr :: CommandId -> String
ciStr = id
ciOpt :: CommandId -> Option
ciOpt = iOpt
newtype CustomData a = CustomData (String, [(CommandId,a)])
customData :: String -> [(CommandId, a)] -> CustomData a
customData title db = CustomData (title,db)
dbCustomData :: CustomData a -> [(CommandId, a)]
dbCustomData (CustomData (_,db)) = db
titleCustomData :: CustomData a -> String
titleCustomData (CustomData (t,_)) = t
lookupCustom :: CustomData a -> CommandId -> Maybe a
lookupCustom = flip lookup . dbCustomData
customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
customAsOptVal opts optfun db = do
arg <- getOptVal opts optfun
lookupCustom db (strCI arg)
-- | take the first entry from the database
defaultCustomVal :: CustomData a -> a
defaultCustomVal (CustomData (s,db)) =
ifNull (error ("empty database:" +++ s)) (snd . head) db
customTokenizer =
customData "Tokenizers, selected by option -lexer=x" $
[
(strCI "words", const $ tokWords)
,(strCI "literals", const $ tokLits)
,(strCI "vars", const $ tokVars)
,(strCI "chars", const $ map (tS . singleton))
,(strCI "code", const $ lexHaskell)
,(strCI "codevars", lexHaskellVar . stateIsWord)
,(strCI "text", const $ lexText)
,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr))
,(strCI "codelit", lexHaskellLiteral . stateIsWord)
,(strCI "textlit", lexTextLiteral . stateIsWord)
,(strCI "codeC", const $ lexC2M)
,(strCI "codeCHigh", const $ lexC2M' True)
-- add your own tokenizers here
]
customUntokenizer =
customData "Untokenizers, selected by option -unlexer=x" $
[
(strCI "unwords", const $ id) -- DEFAULT
,(strCI "text", const $ formatAsText)
,(strCI "html", const $ formatAsHTML)
,(strCI "latex", const $ formatAsLatex)
,(strCI "code", const $ formatAsCode)
,(strCI "concat", const $ filter (not . isSpace))
,(strCI "textlit", const $ formatAsTextLit)
,(strCI "codelit", const $ formatAsCodeLit)
,(strCI "concat", const $ concatRemSpace)
,(strCI "glue", const $ performBinds)
,(strCI "reverse", const $ reverse)
,(strCI "bind", const $ performBinds) -- backward compat
-- add your own untokenizers here
]

View File

@@ -0,0 +1,65 @@
----------------------------------------------------------------------
-- |
-- Module : EmbedParsing
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- just one parse method, for use in embedded GF systems
-----------------------------------------------------------------------------
module GF.Embed.EmbedParsing where
import GF.Infra.CheckM
import qualified GF.Canon.AbsGFC as C
import GF.Canon.GFC
import GF.Canon.MkGFC (trExp) ----
import GF.Canon.CMacros
import GF.Grammar.MMacros (refreshMetas)
import GF.UseGrammar.Linear
import GF.Data.Str
import GF.CF.CF
import GF.CF.CFIdent
import GF.Infra.Ident
import GF.Grammar.TypeCheck
import GF.Grammar.Values
import GF.UseGrammar.Tokenize
import GF.CF.Profile
import GF.Infra.Option
import GF.Compile.ShellState
import GF.Embed.EmbedCustom
import GF.CF.PPrCF (prCFTree)
import qualified GF.Parsing.GFC as New
-- import qualified GF.Parsing.GFC as New
import GF.Data.Operations
import Data.List (nub)
import Control.Monad (liftM)
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
parseString os sg cat = liftM fst . parseStringMsg os sg cat
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
parseStringMsg os sg cat s = do
(ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
return (ts,unlines ss)
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
parseStringC opts0 sg cat s = do
let opts = unionOptions opts0 $ stateOptions sg
algorithm = "f" -- default algorithm: FCFG
strategy = "bottomup"
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
toks = tokenizer s
ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
checkErr $ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts

View File

@@ -0,0 +1,44 @@
module Main where
import GF.Embed.EmbedAPI
import System
-- Simple translation application built on EmbedAPI. AR 7/10/2005
main :: IO ()
main = do
file:_ <- getArgs
grammar <- file2grammar file
translate grammar
translate :: MultiGrammar -> IO ()
translate grammar = do
s <- getLine
if s == "quit" then return () else do
treat grammar s
translate grammar
treat :: MultiGrammar -> String -> IO ()
treat grammar s = putStrLn $ case comm of
["lin"] -> unlines $ linearizeAll grammar $ readTree grammar rest
["lin",lang] -> linearize grammar lang $ readTree grammar rest
["parse",cat] -> unlines $ map showTree $ concat $ parseAll grammar cat rest
["parse",lang,cat] -> unlines $ map showTree $ parse grammar lang cat rest
["langs"] -> unwords $ languages grammar
["cats"] -> unwords $ categories grammar
["help"] -> helpMsg
_ -> "command not interpreted: " ++ s
where
(comm,rest) = (words c,drop 1 r) where
(c,r) = span (/=':') s
helpMsg = unlines [
"lin : <Tree>",
"lin <Lang> : <Tree>",
"parse <Cat> : <String>",
"parse <Lang> <Cat> : <String>",
"langs",
"cats",
"help",
"quit"
]