forked from GitHub/gf-core
example based also with treebank, with real term parser
This commit is contained in:
@@ -12,15 +12,16 @@
|
||||
-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where
|
||||
module GF.Compile.MkConcrete (mkConcretes) where
|
||||
|
||||
import GF.Grammar.Values (Tree,tree2exp)
|
||||
import GF.Grammar.PrGrammar (prt_)
|
||||
import GF.Grammar.Grammar (Term(Q,QC)) ---
|
||||
import GF.Grammar.Macros (composSafeOp, record2subst)
|
||||
import GF.Grammar.PrGrammar (prt_,prModule)
|
||||
import GF.Grammar.Grammar --- (Term(..),SourceModule)
|
||||
import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent)
|
||||
import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
|
||||
import GF.Compile.PGrammar (pTerm)
|
||||
import GF.Compile.PGrammar (pTerm,pTrm)
|
||||
import GF.Compile.Compile
|
||||
import GF.Compile.GetGrammar
|
||||
import GF.API
|
||||
import GF.API.IOGrammar
|
||||
import qualified GF.Embed.EmbedAPI as EA
|
||||
@@ -28,8 +29,10 @@ import qualified GF.Embed.EmbedAPI as EA
|
||||
import GF.Data.Operations
|
||||
import GF.Infra.UseIO
|
||||
import GF.Infra.Option
|
||||
import GF.Infra.Modules
|
||||
import GF.Infra.ReadFiles
|
||||
import GF.System.Arch
|
||||
import GF.UseGrammar.Treebank
|
||||
|
||||
import System.Directory
|
||||
import Data.Char
|
||||
@@ -50,38 +53,40 @@ import Data.List
|
||||
-- notice: we use a hand-crafted lexer and parser in order to preserve
|
||||
-- the layout and comments in the rest of the file.
|
||||
|
||||
mkConcretes :: [FilePath] -> IO ()
|
||||
mkConcretes files = do
|
||||
mkConcretes :: Options -> [FilePath] -> IO ()
|
||||
mkConcretes opts files = do
|
||||
ress <- mapM getResPath files
|
||||
let grps = groupBy (\a b -> fst a == fst b) $
|
||||
sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
|
||||
mapM_ mkCncGroups [(rp,map snd gs) | gs@((rp,_):_) <- grps]
|
||||
mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps]
|
||||
|
||||
mkCncGroups ((res,path),files) = do
|
||||
mkCncGroups opts0 ((res,path),files) = do
|
||||
putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
|
||||
putStrLn $ "Compiling resource " ++ res
|
||||
let opts = options [beSilent,pathList path]
|
||||
let opts = addOptions (options [beSilent,pathList path]) opts0
|
||||
let treebank = oElem (iOpt "treebank") opts
|
||||
egr <- appIOE $ shellStateFromFiles opts emptyShellState res
|
||||
gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
|
||||
(return . firstStateGrammar) egr
|
||||
let parser cat =
|
||||
errVal ([],"No parse") .
|
||||
optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr
|
||||
let morpho = isKnownWord gr
|
||||
(parser,morpho) <- if treebank then do
|
||||
tb <- err (\_ -> error "no treebank")
|
||||
return
|
||||
(egr >>= flip findTreebank (zIdent (unsuffixFile res)))
|
||||
return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb,
|
||||
isWordInTreebank tb)
|
||||
else do
|
||||
gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
|
||||
(return . firstStateGrammar) egr
|
||||
return
|
||||
(\cat s ->
|
||||
errVal ([],"No parse") $
|
||||
optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr s >>=
|
||||
(\ (ts,e) -> return (map tree2exp ts, e)) ,
|
||||
isKnownWord gr)
|
||||
putStrLn "Building parser"
|
||||
mapM_ (mkConcrete parser morpho) files
|
||||
|
||||
type Parser = String -> String -> ([Tree],String)
|
||||
type Parser = String -> String -> ([Term],String)
|
||||
type Morpho = String -> Bool
|
||||
|
||||
mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
|
||||
mkConcrete parser morpho file = do
|
||||
cont <- liftM getExLines $ readFileIf file
|
||||
let out = suffixFile "gf" $ justModuleName file
|
||||
writeFile out $ "-- File generated by GF from " ++ file
|
||||
appendFile out "\n"
|
||||
mapM_ (mkCnc out parser morpho) cont
|
||||
|
||||
getResPath :: FilePath -> IO (String,String)
|
||||
getResPath file = do
|
||||
s <- liftM lines $ readFileIf file
|
||||
@@ -95,62 +100,46 @@ getResPath file = do
|
||||
"--#":w:_ -> isPrefixOf ('-':tag) w
|
||||
_ -> False
|
||||
|
||||
getExLines :: String -> [Either String String]
|
||||
getExLines = getl . lines where
|
||||
getl ls = case ls of
|
||||
s:ss | begEx (words s) -> case break endEx ls of
|
||||
(x,y:z) -> Left (unwords (x ++ [y])) : getl z
|
||||
_ -> Left s : getl ss
|
||||
s:ss -> Right s : getl ss
|
||||
[] -> []
|
||||
begEx s = case s of
|
||||
"=":"in":_ -> True
|
||||
_:ws -> begEx ws
|
||||
_ -> False
|
||||
endEx s = case dropWhile isSpace (reverse s) of
|
||||
';':_ -> True
|
||||
_ -> False
|
||||
|
||||
mkCnc :: FilePath -> Parser -> Morpho -> Either String String -> IO ()
|
||||
mkCnc out parser morpho line = do
|
||||
let (res,msg) = mkCncLine parser morpho line
|
||||
appendFile out res
|
||||
mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
|
||||
mkConcrete parser morpho file = do
|
||||
src <- appIOE (getSourceModule noOptions file) >>= err error return
|
||||
let (src',msgs) = mkModule parser morpho src
|
||||
let out = suffixFile "gf" $ justModuleName file
|
||||
writeFile out $ "-- File generated by GF from " ++ file
|
||||
appendFile out "\n"
|
||||
ifNull (return ()) putStrLnFlush msg
|
||||
appendFile out (prModule src')
|
||||
appendFile out "{-\n"
|
||||
appendFile out $ unlines $ filter (not . null) msgs
|
||||
appendFile out "-}\n"
|
||||
|
||||
mkCncLine :: (String -> String -> ([Tree],String)) -> (String -> Bool) ->
|
||||
Either String String -> (String,String)
|
||||
mkCncLine parser morpho (Right line) = (line,[])
|
||||
mkCncLine parser morpho (Left line) = mkLinRule (words line) where
|
||||
mkLinRule s =
|
||||
let
|
||||
(pre,str) = span (/= "in") s
|
||||
([mcat],rest) = splitAt 1 $ tail str
|
||||
(lin,subst) = span (/= '"') $ tail $ unwords rest
|
||||
cat = reverse $ takeWhile (/= '.') $ reverse mcat
|
||||
substs = doSubst (init (tail subst))
|
||||
def
|
||||
| last pre /= "=" = line -- ordinary lin rule
|
||||
| otherwise = case parser cat lin of
|
||||
(t:ts,_) -> ind ++ unwords pre +++
|
||||
substs (tree2exp t) +++ ";" ++
|
||||
if null ts then [] else (" -- AMBIGUOUS:" ++++
|
||||
unlines ["-- " ++ substs (tree2exp s) +++ ";" | s <- ts])
|
||||
([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}"
|
||||
in
|
||||
(def,def)
|
||||
mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String])
|
||||
mkModule parser morpho (name,src) = case src of
|
||||
ModMod m@(Module mt st fs me ops js) ->
|
||||
|
||||
let js1 = jments m
|
||||
(js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) []
|
||||
mod2 = ModMod $ Module mt st fs me ops $ js2
|
||||
in ((name,mod2), msgs)
|
||||
where
|
||||
mkInfo ni@(name,info) = case info of
|
||||
CncFun mt (Yes trm) ppr -> do
|
||||
trm' <- mkTrm trm
|
||||
return (name, CncFun mt (Yes trm') ppr)
|
||||
_ -> return ni
|
||||
where
|
||||
mkTrm t = case t of
|
||||
Example (P _ cat) s -> parse cat s t
|
||||
Example (Vr cat) s -> parse cat s t
|
||||
_ -> composOp mkTrm t
|
||||
parse cat s t = case parser (prt_ cat) s of
|
||||
(tr:[], _) -> return tr
|
||||
(tr:trs,_) -> do
|
||||
updateSTM ((("AMBIGUOUS" +++ prt_ name) : s : map prt_ trs) ++)
|
||||
return tr
|
||||
([],ms) -> do
|
||||
updateSTM ((("NO PARSE" +++ prt_ name) : s : ms : [morph s]) ++)
|
||||
return t
|
||||
morph s = case [w | w <- words s, not (morpho w)] of
|
||||
[] -> ""
|
||||
ws -> "unknown words: " ++ unwords ws
|
||||
ind = takeWhile isSpace line
|
||||
|
||||
doSubst :: String -> Term -> String
|
||||
doSubst subst0 trm = prt_ $ subt subst trm where
|
||||
subst
|
||||
| all isSpace subst0 = []
|
||||
| otherwise = err error id $ pTerm subst0 >>= record2subst
|
||||
subt g t = case t of
|
||||
Q _ c -> maybe t id $ lookup c g
|
||||
QC _ c -> maybe t id $ lookup c g
|
||||
_ -> composSafeOp (subt g) t
|
||||
|
||||
|
||||
Reference in New Issue
Block a user