---------------------------------------------------------------------- -- | -- Module : MkConcrete -- Maintainer : AR -- Stability : (stable) -- Portability : (portable) -- -- > CVS $Date: -- > CVS $Author: -- > CVS $Revision: -- -- Compile a gfe file into a concrete syntax by using the parser on a resource grammar. ----------------------------------------------------------------------------- module GF.Compile.MkConcrete (mkConcretes) where import GF.Grammar.Values (Tree,tree2exp) import GF.Grammar.PrGrammar (prt_,prModule) import GF.Grammar.Grammar --- (Term(..),SourceModule) import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent) import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords) import GF.Compile.PGrammar (pTerm,pTrm) import GF.Compile.Compile import GF.Compile.PrOld (stripTerm) import GF.Compile.GetGrammar import GF.API import GF.API.IOGrammar import qualified GF.Embed.EmbedAPI as EA import GF.Data.Operations import GF.Infra.UseIO import GF.Infra.Option import GF.Infra.Modules import GF.Infra.ReadFiles import GF.System.Arch import GF.UseGrammar.Treebank import System.Directory import Data.Char import Control.Monad import Data.List -- translate strings into lin rules by parsing in a resource -- grammar. AR 2/6/2005 -- Format of rule (on one line): -- lin F x y = in C "ssss" ; -- Format of resource path (on first line): -- --# -resource=PATH -- Other lines are copied verbatim. -- A sequence of files can be processed with the same resource without -- rebuilding the grammar and parser. -- notice: we use a hand-crafted lexer and parser in order to preserve -- the layout and comments in the rest of the file. mkConcretes :: Options -> [FilePath] -> IO () mkConcretes opts files = do ress <- mapM getResPath files let grps = groupBy (\a b -> fst a == fst b) $ sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps] mkCncGroups opts0 ((res,path),files) = do putStrLnFlush $ "Going to preprocess examples in " ++ unwords files putStrLn $ "Compiling resource " ++ res let opts = addOptions (options [beSilent,pathList path]) opts0 let treebank = oElem (iOpt "treebank") opts resf <- useIOE res $ do (fp,_) <- readFileLibraryIOE gfLibraryPath res return fp egr <- appIOE $ shellStateFromFiles opts emptyShellState resf (parser,morpho) <- if treebank then do tb <- err (\_ -> error $ "no treebank of name" +++ path) return (egr >>= flip findTreebank (zIdent path)) return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb, isWordInTreebank tb) else do gr <- err (\s -> putStrLn s >> error "resource grammar rejected") (return . firstStateGrammar) egr return (\cat s -> errVal ([],"No parse") $ optParseArgErrMsg (options [newFParser, firstCat cat, beVerbose]) gr s >>= (\ (ts,e) -> return (map tree2exp ts, e)) , isKnownWord gr) putStrLn "Building parser" mapM_ (mkConcrete parser morpho) files type Parser = String -> String -> ([Term],String) type Morpho = String -> Bool getResPath :: FilePath -> IO (String,String) getResPath file = do s <- liftM lines $ readFileIf file case filter (not . all isSpace) s of res:path:_ | is "resource" res && is "path" path -> return (val res, val path) res:path:_ | is "resource" res && is "treebank" path -> return (val res, val path) res:_ | is "resource" res -> return (val res, "") _ -> error "expected --# -resource=FILE and optional --# -path=PATH or --# -treebank=IDENT" where val = dropWhile (isSpace) . tail . dropWhile (not . (=='=')) is tag s = case words s of "--#":w:_ -> isPrefixOf ('-':tag) w _ -> False mkConcrete :: Parser -> Morpho -> FilePath -> IO () mkConcrete parser morpho file = do src <- appIOE (getSourceModule noOptions file) >>= err error return let (src',msgs) = mkModule parser morpho src let out = suffixFile "gf" $ justModuleName file writeFile out $ "-- File generated by GF from " ++ file appendFile out "\n" appendFile out (prModule src') appendFile out "{-\n" appendFile out $ unlines $ filter (not . null) msgs appendFile out "-}\n" mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String]) mkModule parser morpho (name,src) = case src of ModMod m@(Module mt st fs me ops js) -> let js1 = jments m (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) [] mod2 = ModMod $ Module mt st fs me ops $ js2 in ((name,mod2), msgs) where mkInfo ni@(name,info) = case info of CncFun mt (Yes trm) ppr -> do trm' <- mkTrm trm return (name, CncFun mt (Yes trm') ppr) _ -> return ni where mkTrm t = case t of Example (P _ cat) s -> parse cat s t Example (Vr cat) s -> parse cat s t _ -> composOp mkTrm t parse cat s t = case parser (prt_ cat) s of (tr:[], _) -> do updateSTM ((("PARSED in" +++ prt_ name) : s : [prt_ tr]) ++) return $ stripTerm tr (tr:trs,_) -> do updateSTM ((("AMBIGUOUS in" +++ prt_ name) : s : map prt_ trs) ++) return $ stripTerm tr ([],ms) -> do updateSTM ((("NO PARSE in" +++ prt_ name) : s : ms : [morph s]) ++) return t morph s = case [w | w <- words s, not (morpho w)] of [] -> "" ws -> "unknown words: " ++ unwords ws