1
0
forked from GitHub/gf-core

example based also with treebank, with real term parser

This commit is contained in:
aarne
2006-03-04 22:14:33 +00:00
parent ddc8a409ec
commit 8d4c4307b9
7 changed files with 95 additions and 102 deletions

View File

@@ -1,23 +1,17 @@
-- File generated by GF from QuestionsI.gfe -- File generated by GF from QuestionsI.gfe
--# -resource=../../lib/resource-1.0/english/LangEng.gf
--# -path=.:present:prelude
-- to compile: gf -examples QuestionsI.gfe
-- or use directly gf <mkAnimals.gfs
incomplete concrete QuestionsI of Questions = open Lang in { incomplete concrete QuestionsI of Questions = open Lang in {
lincat lincat Action = V2 ;
Phrase = Phr ; lin Answer = \woman_N -> \love_V2 -> \man_N -> % Lang.PhrUtt % Lang.NoPConj (% Lang.UttS (% Lang.UseCl % Lang.TPres % Lang.ASimul % Lang.PPos (% Lang.PredVP (% Lang.DetCN (% Lang.DetSg (% Lang.SgQuant % Lang.DefArt)% Lang.NoOrd)(% Lang.UseN % Lang.woman_N)) (% Lang.ComplV2 % Lang.love_V2 (% Lang.DetCN (% Lang.DetPl (% Lang.PlQuant % Lang.IndefArt)% Lang.NoNum % Lang.NoOrd)(% Lang.UseN % Lang.man_N)))))) % Lang.NoVoc ;
Entity = N ; lincat Entity = N ;
Action = V2 ; lincat Phrase = Phr ;
lin Who = \love_V2 -> \man_N -> in Phr "who lovs men" ;
lin lin Whom = \man_N -> \love_V2 -> % Lang.PhrUtt % Lang.NoPConj (% Lang.UttQS (% Lang.UseQCl % Lang.TPres % Lang.ASimul % Lang.PPos (% Lang.QuestSlash % Lang.whoPl_IP (% Lang.SlashV2 (% Lang.DetCN (% Lang.DetSg (% Lang.SgQuant % Lang.DefArt)% Lang.NoOrd)(% Lang.UseN % Lang.man_N)) % Lang.love_V2)))) % Lang.NoVoc ;
Who love_V2 man_N = PhrUtt NoPConj (UttAdv (AdvSC (EmbedQS (UseQCl TPres ASimul PPos (QuestVP whoSg_IP (ComplV2 love_V2 (DetCN (DetPl (PlQuant IndefArt) NoNum NoOrd) (UseN man_N)))))))) NoVoc ; -- AMBIGUOUS: }
-- PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestVP whoSg_IP (ComplV2 love_V2 (DetCN (DetPl (PlQuant IndefArt) NoNum NoOrd) (UseN man_N)))))) NoVoc ; {-
NO PARSE Who
Whom man_N love_V2 = PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestSlash whoPl_IP (SlashV2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN man_N)) love_V2)))) NoVoc ; -- AMBIGUOUS: who lovs men
-- PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestSlash whoSg_IP (SlashV2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN man_N)) love_V2)))) NoVoc ; unknown words: lovs
AMBIGUOUS Whom
Answer woman_N love_V2 man_N = PhrUtt NoPConj (UttS (UseCl TPres ASimul PPos (PredVP (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN woman_N)) (ComplV2 love_V2 (DetCN (DetPl (PlQuant IndefArt) NoNum NoOrd) (UseN man_N)))))) NoVoc ; whom does the man love
PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestSlash whoSg_IP (SlashV2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN man_N)) love_V2)))) NoVoc
} -}

View File

@@ -63,7 +63,7 @@ main = do
return () return ()
_ | opt makeConcrete -> do _ | opt makeConcrete -> do
mkConcretes fs mkConcretes os fs
_ | opt openEditor -> do _ | opt openEditor -> do
system $ "jgf" +++ unwords xs system $ "jgf" +++ unwords xs
@@ -88,7 +88,7 @@ main = do
if opt fromExamples if opt fromExamples
then do then do
es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs
mkConcretes es mkConcretes os es
doGF (removeOption fromExamples os) fs doGF (removeOption fromExamples os) fs
else doGF os fs else doGF os fs

View File

@@ -12,15 +12,16 @@
-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar. -- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where module GF.Compile.MkConcrete (mkConcretes) where
import GF.Grammar.Values (Tree,tree2exp) import GF.Grammar.Values (Tree,tree2exp)
import GF.Grammar.PrGrammar (prt_) import GF.Grammar.PrGrammar (prt_,prModule)
import GF.Grammar.Grammar (Term(Q,QC)) --- import GF.Grammar.Grammar --- (Term(..),SourceModule)
import GF.Grammar.Macros (composSafeOp, record2subst) import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent)
import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords) import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
import GF.Compile.PGrammar (pTerm) import GF.Compile.PGrammar (pTerm,pTrm)
import GF.Compile.Compile import GF.Compile.Compile
import GF.Compile.GetGrammar
import GF.API import GF.API
import GF.API.IOGrammar import GF.API.IOGrammar
import qualified GF.Embed.EmbedAPI as EA import qualified GF.Embed.EmbedAPI as EA
@@ -28,8 +29,10 @@ import qualified GF.Embed.EmbedAPI as EA
import GF.Data.Operations import GF.Data.Operations
import GF.Infra.UseIO import GF.Infra.UseIO
import GF.Infra.Option import GF.Infra.Option
import GF.Infra.Modules
import GF.Infra.ReadFiles import GF.Infra.ReadFiles
import GF.System.Arch import GF.System.Arch
import GF.UseGrammar.Treebank
import System.Directory import System.Directory
import Data.Char import Data.Char
@@ -50,38 +53,40 @@ import Data.List
-- notice: we use a hand-crafted lexer and parser in order to preserve -- notice: we use a hand-crafted lexer and parser in order to preserve
-- the layout and comments in the rest of the file. -- the layout and comments in the rest of the file.
mkConcretes :: [FilePath] -> IO () mkConcretes :: Options -> [FilePath] -> IO ()
mkConcretes files = do mkConcretes opts files = do
ress <- mapM getResPath files ress <- mapM getResPath files
let grps = groupBy (\a b -> fst a == fst b) $ let grps = groupBy (\a b -> fst a == fst b) $
sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
mapM_ mkCncGroups [(rp,map snd gs) | gs@((rp,_):_) <- grps] mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps]
mkCncGroups ((res,path),files) = do mkCncGroups opts0 ((res,path),files) = do
putStrLnFlush $ "Going to preprocess examples in " ++ unwords files putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
putStrLn $ "Compiling resource " ++ res putStrLn $ "Compiling resource " ++ res
let opts = options [beSilent,pathList path] let opts = addOptions (options [beSilent,pathList path]) opts0
let treebank = oElem (iOpt "treebank") opts
egr <- appIOE $ shellStateFromFiles opts emptyShellState res egr <- appIOE $ shellStateFromFiles opts emptyShellState res
gr <- err (\s -> putStrLn s >> error "resource grammar rejected") (parser,morpho) <- if treebank then do
(return . firstStateGrammar) egr tb <- err (\_ -> error "no treebank")
let parser cat = return
errVal ([],"No parse") . (egr >>= flip findTreebank (zIdent (unsuffixFile res)))
optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb,
let morpho = isKnownWord gr isWordInTreebank tb)
else do
gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
(return . firstStateGrammar) egr
return
(\cat s ->
errVal ([],"No parse") $
optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr s >>=
(\ (ts,e) -> return (map tree2exp ts, e)) ,
isKnownWord gr)
putStrLn "Building parser" putStrLn "Building parser"
mapM_ (mkConcrete parser morpho) files mapM_ (mkConcrete parser morpho) files
type Parser = String -> String -> ([Tree],String) type Parser = String -> String -> ([Term],String)
type Morpho = String -> Bool type Morpho = String -> Bool
mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
mkConcrete parser morpho file = do
cont <- liftM getExLines $ readFileIf file
let out = suffixFile "gf" $ justModuleName file
writeFile out $ "-- File generated by GF from " ++ file
appendFile out "\n"
mapM_ (mkCnc out parser morpho) cont
getResPath :: FilePath -> IO (String,String) getResPath :: FilePath -> IO (String,String)
getResPath file = do getResPath file = do
s <- liftM lines $ readFileIf file s <- liftM lines $ readFileIf file
@@ -95,62 +100,46 @@ getResPath file = do
"--#":w:_ -> isPrefixOf ('-':tag) w "--#":w:_ -> isPrefixOf ('-':tag) w
_ -> False _ -> False
getExLines :: String -> [Either String String]
getExLines = getl . lines where
getl ls = case ls of
s:ss | begEx (words s) -> case break endEx ls of
(x,y:z) -> Left (unwords (x ++ [y])) : getl z
_ -> Left s : getl ss
s:ss -> Right s : getl ss
[] -> []
begEx s = case s of
"=":"in":_ -> True
_:ws -> begEx ws
_ -> False
endEx s = case dropWhile isSpace (reverse s) of
';':_ -> True
_ -> False
mkCnc :: FilePath -> Parser -> Morpho -> Either String String -> IO () mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
mkCnc out parser morpho line = do mkConcrete parser morpho file = do
let (res,msg) = mkCncLine parser morpho line src <- appIOE (getSourceModule noOptions file) >>= err error return
appendFile out res let (src',msgs) = mkModule parser morpho src
let out = suffixFile "gf" $ justModuleName file
writeFile out $ "-- File generated by GF from " ++ file
appendFile out "\n" appendFile out "\n"
ifNull (return ()) putStrLnFlush msg appendFile out (prModule src')
appendFile out "{-\n"
appendFile out $ unlines $ filter (not . null) msgs
appendFile out "-}\n"
mkCncLine :: (String -> String -> ([Tree],String)) -> (String -> Bool) -> mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String])
Either String String -> (String,String) mkModule parser morpho (name,src) = case src of
mkCncLine parser morpho (Right line) = (line,[]) ModMod m@(Module mt st fs me ops js) ->
mkCncLine parser morpho (Left line) = mkLinRule (words line) where
mkLinRule s = let js1 = jments m
let (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) []
(pre,str) = span (/= "in") s mod2 = ModMod $ Module mt st fs me ops $ js2
([mcat],rest) = splitAt 1 $ tail str in ((name,mod2), msgs)
(lin,subst) = span (/= '"') $ tail $ unwords rest where
cat = reverse $ takeWhile (/= '.') $ reverse mcat mkInfo ni@(name,info) = case info of
substs = doSubst (init (tail subst)) CncFun mt (Yes trm) ppr -> do
def trm' <- mkTrm trm
| last pre /= "=" = line -- ordinary lin rule return (name, CncFun mt (Yes trm') ppr)
| otherwise = case parser cat lin of _ -> return ni
(t:ts,_) -> ind ++ unwords pre +++ where
substs (tree2exp t) +++ ";" ++ mkTrm t = case t of
if null ts then [] else (" -- AMBIGUOUS:" ++++ Example (P _ cat) s -> parse cat s t
unlines ["-- " ++ substs (tree2exp s) +++ ";" | s <- ts]) Example (Vr cat) s -> parse cat s t
([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}" _ -> composOp mkTrm t
in parse cat s t = case parser (prt_ cat) s of
(def,def) (tr:[], _) -> return tr
(tr:trs,_) -> do
updateSTM ((("AMBIGUOUS" +++ prt_ name) : s : map prt_ trs) ++)
return tr
([],ms) -> do
updateSTM ((("NO PARSE" +++ prt_ name) : s : ms : [morph s]) ++)
return t
morph s = case [w | w <- words s, not (morpho w)] of morph s = case [w | w <- words s, not (morpho w)] of
[] -> "" [] -> ""
ws -> "unknown words: " ++ unwords ws ws -> "unknown words: " ++ unwords ws
ind = takeWhile isSpace line
doSubst :: String -> Term -> String
doSubst subst0 trm = prt_ $ subt subst trm where
subst
| all isSpace subst0 = []
| otherwise = err error id $ pTerm subst0 >>= record2subst
subt g t = case t of
Q _ c -> maybe t id $ lookup c g
QC _ c -> maybe t id $ lookup c g
_ -> composSafeOp (subt g) t

View File

@@ -128,6 +128,7 @@ data Term =
| Typed Term Term -- ^ type-annotated term | Typed Term Term -- ^ type-annotated term
-- --
-- /below this, the constructors are only for concrete syntax/ -- /below this, the constructors are only for concrete syntax/
| Example Term String -- ^ example-based term: @in M.C "foo"
| RecType [Labelling] -- ^ record type: @{ p : A ; ...}@ | RecType [Labelling] -- ^ record type: @{ p : A ; ...}@
| R [Assign] -- ^ record: @{ p = a ; ...}@ | R [Assign] -- ^ record: @{ p = a ; ...}@
| P Term Label -- ^ projection: @r.p@ | P Term Label -- ^ projection: @r.p@

View File

@@ -35,7 +35,9 @@ trGrammar (MGrammar ms) = P.Gr (map trModule ms) -- no includes
trModule :: (Ident,SourceModInfo) -> P.ModDef trModule :: (Ident,SourceModInfo) -> P.ModDef
trModule (i,mo) = case mo of trModule (i,mo) = case mo of
ModMod m -> P.MModule compl typ body where ModMod m -> P.MModule compl typ body where
compl = P.CMCompl -- always complete module compl = case mstatus m of
MSIncomplete -> P.CMIncompl
_ -> P.CMCompl
i' = tri i i' = tri i
typ = case typeOfModule mo of typ = case typeOfModule mo of
MTResource -> P.MTResource i' MTResource -> P.MTResource i'
@@ -140,6 +142,7 @@ trt trm = case trm of
Prod x a b | isWildIdent x -> P.EProd (P.DExp (trt a)) (trt b) Prod x a b | isWildIdent x -> P.EProd (P.DExp (trt a)) (trt b)
Prod x a b -> P.EProd (P.DDec [trb x] (trt a)) (trt b) Prod x a b -> P.EProd (P.DDec [trb x] (trt a)) (trt b)
Example t s -> P.EExample (trt t) s
R [] -> P.ETuple [] --- to get correct parsing when read back R [] -> P.ETuple [] --- to get correct parsing when read back
R r -> P.ERecord $ map trAssign r R r -> P.ERecord $ map trAssign r
RecType r -> P.ERecord $ map trLabelling r RecType r -> P.ERecord $ map trLabelling r

View File

@@ -437,6 +437,7 @@ transExp x = case x of
EExtend exp0 exp -> liftM2 G.ExtR (transExp exp0) (transExp exp) EExtend exp0 exp -> liftM2 G.ExtR (transExp exp0) (transExp exp)
EAbstr binds exp -> liftM2 M.mkAbs (mapM transBind binds) (transExp exp) EAbstr binds exp -> liftM2 M.mkAbs (mapM transBind binds) (transExp exp)
ETyped exp0 exp -> liftM2 G.Typed (transExp exp0) (transExp exp) ETyped exp0 exp -> liftM2 G.Typed (transExp exp0) (transExp exp)
EExample exp str -> liftM2 G.Example (transExp exp) (return str)
EProd decl exp -> liftM2 M.mkProdSimple (transDecl decl) (transExp exp) EProd decl exp -> liftM2 M.mkProdSimple (transDecl decl) (transExp exp)
ETType exp0 exp -> liftM2 G.Table (transExp exp0) (transExp exp) ETType exp0 exp -> liftM2 G.Table (transExp exp0) (transExp exp)

View File

@@ -25,6 +25,7 @@ module GF.UseGrammar.Treebank (
readMultiTreebank, readMultiTreebank,
lookupTreebank, lookupTreebank,
assocsTreebank, assocsTreebank,
isWordInTreebank,
printAssoc printAssoc
) where ) where
@@ -45,6 +46,7 @@ import GF.Infra.Ident (Ident)
import GF.Infra.UseIO import GF.Infra.UseIO
import qualified GF.Grammar.Abstract as A import qualified GF.Grammar.Abstract as A
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S
-- Generate a treebank with a multilingual grammar. AR 8/2/2006 -- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL -- (c) Aarne Ranta 2006 under GNU GPL
@@ -142,6 +144,9 @@ ret = [] -- return ()
assocsTreebank :: UniTreebank -> [(String,[String])] assocsTreebank :: UniTreebank -> [(String,[String])]
assocsTreebank = M.assocs assocsTreebank = M.assocs
isWordInTreebank :: UniTreebank -> String -> Bool
isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb)))
printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts] printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts]
getTreebanks :: [String] -> [(String,String,String)] getTreebanks :: [String] -> [(String,String,String)]