forked from GitHub/gf-core
154 lines
5.4 KiB
Haskell
154 lines
5.4 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : MkConcrete
|
|
-- Maintainer : AR
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date:
|
|
-- > CVS $Author:
|
|
-- > CVS $Revision:
|
|
--
|
|
-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.Compile.MkConcrete (mkConcretes) where
|
|
|
|
import GF.Grammar.Values (Tree,tree2exp)
|
|
import GF.Grammar.PrGrammar (prt_,prModule)
|
|
import GF.Grammar.Grammar --- (Term(..),SourceModule)
|
|
import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent)
|
|
import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
|
|
import GF.Compile.PGrammar (pTerm,pTrm)
|
|
import GF.Compile.Compile
|
|
import GF.Compile.PrOld (stripTerm)
|
|
import GF.Compile.GetGrammar
|
|
import GF.API
|
|
import GF.API.IOGrammar
|
|
import qualified GF.Embed.EmbedAPI as EA
|
|
|
|
import GF.Data.Operations
|
|
import GF.Infra.UseIO
|
|
import GF.Infra.Option
|
|
import GF.Infra.Modules
|
|
import GF.Infra.ReadFiles
|
|
import GF.System.Arch
|
|
import GF.UseGrammar.Treebank
|
|
|
|
import System.Directory
|
|
import Data.Char
|
|
import Control.Monad
|
|
import Data.List
|
|
|
|
-- translate strings into lin rules by parsing in a resource
|
|
-- grammar. AR 2/6/2005
|
|
|
|
-- Format of rule (on one line):
|
|
-- lin F x y = in C "ssss" ;
|
|
-- Format of resource path (on first line):
|
|
-- --# -resource=PATH
|
|
-- Other lines are copied verbatim.
|
|
-- A sequence of files can be processed with the same resource without
|
|
-- rebuilding the grammar and parser.
|
|
|
|
-- notice: we use a hand-crafted lexer and parser in order to preserve
|
|
-- the layout and comments in the rest of the file.
|
|
|
|
mkConcretes :: Options -> [FilePath] -> IO ()
|
|
mkConcretes opts files = do
|
|
ress <- mapM getResPath files
|
|
let grps = groupBy (\a b -> fst a == fst b) $
|
|
sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
|
|
mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps]
|
|
|
|
mkCncGroups opts0 ((res,path),files) = do
|
|
putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
|
|
putStrLn $ "Compiling resource " ++ res
|
|
let opts = addOptions (options [beSilent,pathList path]) opts0
|
|
let treebank = oElem (iOpt "treebank") opts
|
|
resf <- useIOE res $ do
|
|
(fp,_) <- readFileLibraryIOE gfLibraryPath res
|
|
return fp
|
|
egr <- appIOE $ shellStateFromFiles opts emptyShellState resf
|
|
(parser,morpho) <- if treebank then do
|
|
tb <- err (\_ -> error $ "no treebank of name" +++ path)
|
|
return
|
|
(egr >>= flip findTreebank (zIdent path))
|
|
return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb,
|
|
isWordInTreebank tb)
|
|
else do
|
|
gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
|
|
(return . firstStateGrammar) egr
|
|
return
|
|
(\cat s ->
|
|
errVal ([],"No parse") $
|
|
optParseArgErrMsg (options [newFParser, firstCat cat, beVerbose]) gr s >>=
|
|
(\ (ts,e) -> return (map tree2exp ts, e)) ,
|
|
isKnownWord gr)
|
|
putStrLn "Building parser"
|
|
mapM_ (mkConcrete parser morpho) files
|
|
|
|
type Parser = String -> String -> ([Term],String)
|
|
type Morpho = String -> Bool
|
|
|
|
getResPath :: FilePath -> IO (String,String)
|
|
getResPath file = do
|
|
s <- liftM lines $ readFileIf file
|
|
case filter (not . all isSpace) s of
|
|
res:path:_ | is "resource" res && is "path" path -> return (val res, val path)
|
|
res:path:_ | is "resource" res && is "treebank" path -> return (val res, val path)
|
|
res:_ | is "resource" res -> return (val res, "")
|
|
_ -> error
|
|
"expected --# -resource=FILE and optional --# -path=PATH or --# -treebank=IDENT"
|
|
where
|
|
val = dropWhile (isSpace) . tail . dropWhile (not . (=='='))
|
|
is tag s = case words s of
|
|
"--#":w:_ -> isPrefixOf ('-':tag) w
|
|
_ -> False
|
|
|
|
|
|
mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
|
|
mkConcrete parser morpho file = do
|
|
src <- appIOE (getSourceModule noOptions file) >>= err error return
|
|
let (src',msgs) = mkModule parser morpho src
|
|
let out = suffixFile "gf" $ justModuleName file
|
|
writeFile out $ "-- File generated by GF from " ++ file
|
|
appendFile out "\n"
|
|
appendFile out (prModule src')
|
|
appendFile out "{-\n"
|
|
appendFile out $ unlines $ filter (not . null) msgs
|
|
appendFile out "-}\n"
|
|
|
|
mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String])
|
|
mkModule parser morpho (name,src) = case src of
|
|
ModMod m@(Module mt st fs me ops js) ->
|
|
|
|
let js1 = jments m
|
|
(js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) []
|
|
mod2 = ModMod $ Module mt st fs me ops $ js2
|
|
in ((name,mod2), msgs)
|
|
where
|
|
mkInfo ni@(name,info) = case info of
|
|
CncFun mt (Yes trm) ppr -> do
|
|
trm' <- mkTrm trm
|
|
return (name, CncFun mt (Yes trm') ppr)
|
|
_ -> return ni
|
|
where
|
|
mkTrm t = case t of
|
|
Example (P _ cat) s -> parse cat s t
|
|
Example (Vr cat) s -> parse cat s t
|
|
_ -> composOp mkTrm t
|
|
parse cat s t = case parser (prt_ cat) s of
|
|
(tr:[], _) -> do
|
|
updateSTM ((("PARSED in" +++ prt_ name) : s : [prt_ tr]) ++)
|
|
return $ stripTerm tr
|
|
(tr:trs,_) -> do
|
|
updateSTM ((("AMBIGUOUS in" +++ prt_ name) : s : map prt_ trs) ++)
|
|
return $ stripTerm tr
|
|
([],ms) -> do
|
|
updateSTM ((("NO PARSE in" +++ prt_ name) : s : ms : [morph s]) ++)
|
|
return t
|
|
morph s = case [w | w <- words s, not (morpho w)] of
|
|
[] -> ""
|
|
ws -> "unknown words: " ++ unwords ws
|