diff --git a/examples/animal/QuestionsI.gf b/examples/animal/QuestionsI.gf index e79b2e0c2..bc8451681 100644 --- a/examples/animal/QuestionsI.gf +++ b/examples/animal/QuestionsI.gf @@ -1,23 +1,17 @@ -- File generated by GF from QuestionsI.gfe ---# -resource=../../lib/resource-1.0/english/LangEng.gf ---# -path=.:present:prelude - --- to compile: gf -examples QuestionsI.gfe --- or use directly gf \love_V2 -> \man_N -> % Lang.PhrUtt % Lang.NoPConj (% Lang.UttS (% Lang.UseCl % Lang.TPres % Lang.ASimul % Lang.PPos (% Lang.PredVP (% Lang.DetCN (% Lang.DetSg (% Lang.SgQuant % Lang.DefArt)% Lang.NoOrd)(% Lang.UseN % Lang.woman_N)) (% Lang.ComplV2 % Lang.love_V2 (% Lang.DetCN (% Lang.DetPl (% Lang.PlQuant % Lang.IndefArt)% Lang.NoNum % Lang.NoOrd)(% Lang.UseN % Lang.man_N)))))) % Lang.NoVoc ; + lincat Entity = N ; + lincat Phrase = Phr ; + lin Who = \love_V2 -> \man_N -> in Phr "who lovs men" ; + lin Whom = \man_N -> \love_V2 -> % Lang.PhrUtt % Lang.NoPConj (% Lang.UttQS (% Lang.UseQCl % Lang.TPres % Lang.ASimul % Lang.PPos (% Lang.QuestSlash % Lang.whoPl_IP (% Lang.SlashV2 (% Lang.DetCN (% Lang.DetSg (% Lang.SgQuant % Lang.DefArt)% Lang.NoOrd)(% Lang.UseN % Lang.man_N)) % Lang.love_V2)))) % Lang.NoVoc ; + } +{- +NO PARSE Who +who lovs men +unknown words: lovs +AMBIGUOUS Whom +whom does the man love +PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestSlash whoSg_IP (SlashV2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN man_N)) love_V2)))) NoVoc +-} diff --git a/src/GF.hs b/src/GF.hs index 054ff6e89..a4bf22c59 100644 --- a/src/GF.hs +++ b/src/GF.hs @@ -63,7 +63,7 @@ main = do return () _ | opt makeConcrete -> do - mkConcretes fs + mkConcretes os fs _ | opt openEditor -> do system $ "jgf" +++ unwords xs @@ -88,7 +88,7 @@ main = do if opt fromExamples then do es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs - mkConcretes es + mkConcretes os es doGF (removeOption fromExamples os) fs else doGF os fs diff --git a/src/GF/Compile/MkConcrete.hs b/src/GF/Compile/MkConcrete.hs index a0af24007..aafa56242 100644 --- a/src/GF/Compile/MkConcrete.hs +++ b/src/GF/Compile/MkConcrete.hs @@ -12,15 +12,16 @@ -- Compile a gfe file into a concrete syntax by using the parser on a resource grammar. ----------------------------------------------------------------------------- -module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where +module GF.Compile.MkConcrete (mkConcretes) where import GF.Grammar.Values (Tree,tree2exp) -import GF.Grammar.PrGrammar (prt_) -import GF.Grammar.Grammar (Term(Q,QC)) --- -import GF.Grammar.Macros (composSafeOp, record2subst) +import GF.Grammar.PrGrammar (prt_,prModule) +import GF.Grammar.Grammar --- (Term(..),SourceModule) +import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent) import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords) -import GF.Compile.PGrammar (pTerm) +import GF.Compile.PGrammar (pTerm,pTrm) import GF.Compile.Compile +import GF.Compile.GetGrammar import GF.API import GF.API.IOGrammar import qualified GF.Embed.EmbedAPI as EA @@ -28,8 +29,10 @@ import qualified GF.Embed.EmbedAPI as EA import GF.Data.Operations import GF.Infra.UseIO import GF.Infra.Option +import GF.Infra.Modules import GF.Infra.ReadFiles import GF.System.Arch +import GF.UseGrammar.Treebank import System.Directory import Data.Char @@ -50,38 +53,40 @@ import Data.List -- notice: we use a hand-crafted lexer and parser in order to preserve -- the layout and comments in the rest of the file. -mkConcretes :: [FilePath] -> IO () -mkConcretes files = do +mkConcretes :: Options -> [FilePath] -> IO () +mkConcretes opts files = do ress <- mapM getResPath files let grps = groupBy (\a b -> fst a == fst b) $ sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files - mapM_ mkCncGroups [(rp,map snd gs) | gs@((rp,_):_) <- grps] + mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps] -mkCncGroups ((res,path),files) = do +mkCncGroups opts0 ((res,path),files) = do putStrLnFlush $ "Going to preprocess examples in " ++ unwords files putStrLn $ "Compiling resource " ++ res - let opts = options [beSilent,pathList path] + let opts = addOptions (options [beSilent,pathList path]) opts0 + let treebank = oElem (iOpt "treebank") opts egr <- appIOE $ shellStateFromFiles opts emptyShellState res - gr <- err (\s -> putStrLn s >> error "resource grammar rejected") - (return . firstStateGrammar) egr - let parser cat = - errVal ([],"No parse") . - optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr - let morpho = isKnownWord gr + (parser,morpho) <- if treebank then do + tb <- err (\_ -> error "no treebank") + return + (egr >>= flip findTreebank (zIdent (unsuffixFile res))) + return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb, + isWordInTreebank tb) + else do + gr <- err (\s -> putStrLn s >> error "resource grammar rejected") + (return . firstStateGrammar) egr + return + (\cat s -> + errVal ([],"No parse") $ + optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr s >>= + (\ (ts,e) -> return (map tree2exp ts, e)) , + isKnownWord gr) putStrLn "Building parser" mapM_ (mkConcrete parser morpho) files -type Parser = String -> String -> ([Tree],String) +type Parser = String -> String -> ([Term],String) type Morpho = String -> Bool -mkConcrete :: Parser -> Morpho -> FilePath -> IO () -mkConcrete parser morpho file = do - cont <- liftM getExLines $ readFileIf file - let out = suffixFile "gf" $ justModuleName file - writeFile out $ "-- File generated by GF from " ++ file - appendFile out "\n" - mapM_ (mkCnc out parser morpho) cont - getResPath :: FilePath -> IO (String,String) getResPath file = do s <- liftM lines $ readFileIf file @@ -95,62 +100,46 @@ getResPath file = do "--#":w:_ -> isPrefixOf ('-':tag) w _ -> False -getExLines :: String -> [Either String String] -getExLines = getl . lines where - getl ls = case ls of - s:ss | begEx (words s) -> case break endEx ls of - (x,y:z) -> Left (unwords (x ++ [y])) : getl z - _ -> Left s : getl ss - s:ss -> Right s : getl ss - [] -> [] - begEx s = case s of - "=":"in":_ -> True - _:ws -> begEx ws - _ -> False - endEx s = case dropWhile isSpace (reverse s) of - ';':_ -> True - _ -> False -mkCnc :: FilePath -> Parser -> Morpho -> Either String String -> IO () -mkCnc out parser morpho line = do - let (res,msg) = mkCncLine parser morpho line - appendFile out res +mkConcrete :: Parser -> Morpho -> FilePath -> IO () +mkConcrete parser morpho file = do + src <- appIOE (getSourceModule noOptions file) >>= err error return + let (src',msgs) = mkModule parser morpho src + let out = suffixFile "gf" $ justModuleName file + writeFile out $ "-- File generated by GF from " ++ file appendFile out "\n" - ifNull (return ()) putStrLnFlush msg + appendFile out (prModule src') + appendFile out "{-\n" + appendFile out $ unlines $ filter (not . null) msgs + appendFile out "-}\n" -mkCncLine :: (String -> String -> ([Tree],String)) -> (String -> Bool) -> - Either String String -> (String,String) -mkCncLine parser morpho (Right line) = (line,[]) -mkCncLine parser morpho (Left line) = mkLinRule (words line) where - mkLinRule s = - let - (pre,str) = span (/= "in") s - ([mcat],rest) = splitAt 1 $ tail str - (lin,subst) = span (/= '"') $ tail $ unwords rest - cat = reverse $ takeWhile (/= '.') $ reverse mcat - substs = doSubst (init (tail subst)) - def - | last pre /= "=" = line -- ordinary lin rule - | otherwise = case parser cat lin of - (t:ts,_) -> ind ++ unwords pre +++ - substs (tree2exp t) +++ ";" ++ - if null ts then [] else (" -- AMBIGUOUS:" ++++ - unlines ["-- " ++ substs (tree2exp s) +++ ";" | s <- ts]) - ([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}" - in - (def,def) +mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String]) +mkModule parser morpho (name,src) = case src of + ModMod m@(Module mt st fs me ops js) -> + + let js1 = jments m + (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) [] + mod2 = ModMod $ Module mt st fs me ops $ js2 + in ((name,mod2), msgs) + where + mkInfo ni@(name,info) = case info of + CncFun mt (Yes trm) ppr -> do + trm' <- mkTrm trm + return (name, CncFun mt (Yes trm') ppr) + _ -> return ni + where + mkTrm t = case t of + Example (P _ cat) s -> parse cat s t + Example (Vr cat) s -> parse cat s t + _ -> composOp mkTrm t + parse cat s t = case parser (prt_ cat) s of + (tr:[], _) -> return tr + (tr:trs,_) -> do + updateSTM ((("AMBIGUOUS" +++ prt_ name) : s : map prt_ trs) ++) + return tr + ([],ms) -> do + updateSTM ((("NO PARSE" +++ prt_ name) : s : ms : [morph s]) ++) + return t morph s = case [w | w <- words s, not (morpho w)] of [] -> "" ws -> "unknown words: " ++ unwords ws - ind = takeWhile isSpace line - -doSubst :: String -> Term -> String -doSubst subst0 trm = prt_ $ subt subst trm where - subst - | all isSpace subst0 = [] - | otherwise = err error id $ pTerm subst0 >>= record2subst - subt g t = case t of - Q _ c -> maybe t id $ lookup c g - QC _ c -> maybe t id $ lookup c g - _ -> composSafeOp (subt g) t - diff --git a/src/GF/Grammar/Grammar.hs b/src/GF/Grammar/Grammar.hs index 47970c882..1c963ac66 100644 --- a/src/GF/Grammar/Grammar.hs +++ b/src/GF/Grammar/Grammar.hs @@ -128,6 +128,7 @@ data Term = | Typed Term Term -- ^ type-annotated term -- -- /below this, the constructors are only for concrete syntax/ + | Example Term String -- ^ example-based term: @in M.C "foo" | RecType [Labelling] -- ^ record type: @{ p : A ; ...}@ | R [Assign] -- ^ record: @{ p = a ; ...}@ | P Term Label -- ^ projection: @r.p@ diff --git a/src/GF/Source/GrammarToSource.hs b/src/GF/Source/GrammarToSource.hs index 526a29f4c..38c658dc4 100644 --- a/src/GF/Source/GrammarToSource.hs +++ b/src/GF/Source/GrammarToSource.hs @@ -35,7 +35,9 @@ trGrammar (MGrammar ms) = P.Gr (map trModule ms) -- no includes trModule :: (Ident,SourceModInfo) -> P.ModDef trModule (i,mo) = case mo of ModMod m -> P.MModule compl typ body where - compl = P.CMCompl -- always complete module + compl = case mstatus m of + MSIncomplete -> P.CMIncompl + _ -> P.CMCompl i' = tri i typ = case typeOfModule mo of MTResource -> P.MTResource i' @@ -140,6 +142,7 @@ trt trm = case trm of Prod x a b | isWildIdent x -> P.EProd (P.DExp (trt a)) (trt b) Prod x a b -> P.EProd (P.DDec [trb x] (trt a)) (trt b) + Example t s -> P.EExample (trt t) s R [] -> P.ETuple [] --- to get correct parsing when read back R r -> P.ERecord $ map trAssign r RecType r -> P.ERecord $ map trLabelling r diff --git a/src/GF/Source/SourceToGrammar.hs b/src/GF/Source/SourceToGrammar.hs index ffba51d6e..4aa5b55a6 100644 --- a/src/GF/Source/SourceToGrammar.hs +++ b/src/GF/Source/SourceToGrammar.hs @@ -437,6 +437,7 @@ transExp x = case x of EExtend exp0 exp -> liftM2 G.ExtR (transExp exp0) (transExp exp) EAbstr binds exp -> liftM2 M.mkAbs (mapM transBind binds) (transExp exp) ETyped exp0 exp -> liftM2 G.Typed (transExp exp0) (transExp exp) + EExample exp str -> liftM2 G.Example (transExp exp) (return str) EProd decl exp -> liftM2 M.mkProdSimple (transDecl decl) (transExp exp) ETType exp0 exp -> liftM2 G.Table (transExp exp0) (transExp exp) diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs index f1dd5b75b..54ac8fb04 100644 --- a/src/GF/UseGrammar/Treebank.hs +++ b/src/GF/UseGrammar/Treebank.hs @@ -25,6 +25,7 @@ module GF.UseGrammar.Treebank ( readMultiTreebank, lookupTreebank, assocsTreebank, + isWordInTreebank, printAssoc ) where @@ -45,6 +46,7 @@ import GF.Infra.Ident (Ident) import GF.Infra.UseIO import qualified GF.Grammar.Abstract as A import qualified Data.Map as M +import qualified Data.Set as S -- Generate a treebank with a multilingual grammar. AR 8/2/2006 -- (c) Aarne Ranta 2006 under GNU GPL @@ -142,6 +144,9 @@ ret = [] -- return () assocsTreebank :: UniTreebank -> [(String,[String])] assocsTreebank = M.assocs +isWordInTreebank :: UniTreebank -> String -> Bool +isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb))) + printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts] getTreebanks :: [String] -> [(String,String,String)]