forked from GitHub/gf-core
example based also with treebank, with real term parser
This commit is contained in:
@@ -1,23 +1,17 @@
|
|||||||
-- File generated by GF from QuestionsI.gfe
|
-- File generated by GF from QuestionsI.gfe
|
||||||
--# -resource=../../lib/resource-1.0/english/LangEng.gf
|
|
||||||
--# -path=.:present:prelude
|
|
||||||
|
|
||||||
-- to compile: gf -examples QuestionsI.gfe
|
|
||||||
-- or use directly gf <mkAnimals.gfs
|
|
||||||
|
|
||||||
incomplete concrete QuestionsI of Questions = open Lang in {
|
incomplete concrete QuestionsI of Questions = open Lang in {
|
||||||
lincat
|
lincat Action = V2 ;
|
||||||
Phrase = Phr ;
|
lin Answer = \woman_N -> \love_V2 -> \man_N -> % Lang.PhrUtt % Lang.NoPConj (% Lang.UttS (% Lang.UseCl % Lang.TPres % Lang.ASimul % Lang.PPos (% Lang.PredVP (% Lang.DetCN (% Lang.DetSg (% Lang.SgQuant % Lang.DefArt)% Lang.NoOrd)(% Lang.UseN % Lang.woman_N)) (% Lang.ComplV2 % Lang.love_V2 (% Lang.DetCN (% Lang.DetPl (% Lang.PlQuant % Lang.IndefArt)% Lang.NoNum % Lang.NoOrd)(% Lang.UseN % Lang.man_N)))))) % Lang.NoVoc ;
|
||||||
Entity = N ;
|
lincat Entity = N ;
|
||||||
Action = V2 ;
|
lincat Phrase = Phr ;
|
||||||
|
lin Who = \love_V2 -> \man_N -> in Phr "who lovs men" ;
|
||||||
lin
|
lin Whom = \man_N -> \love_V2 -> % Lang.PhrUtt % Lang.NoPConj (% Lang.UttQS (% Lang.UseQCl % Lang.TPres % Lang.ASimul % Lang.PPos (% Lang.QuestSlash % Lang.whoPl_IP (% Lang.SlashV2 (% Lang.DetCN (% Lang.DetSg (% Lang.SgQuant % Lang.DefArt)% Lang.NoOrd)(% Lang.UseN % Lang.man_N)) % Lang.love_V2)))) % Lang.NoVoc ;
|
||||||
Who love_V2 man_N = PhrUtt NoPConj (UttAdv (AdvSC (EmbedQS (UseQCl TPres ASimul PPos (QuestVP whoSg_IP (ComplV2 love_V2 (DetCN (DetPl (PlQuant IndefArt) NoNum NoOrd) (UseN man_N)))))))) NoVoc ; -- AMBIGUOUS:
|
}
|
||||||
-- PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestVP whoSg_IP (ComplV2 love_V2 (DetCN (DetPl (PlQuant IndefArt) NoNum NoOrd) (UseN man_N)))))) NoVoc ;
|
{-
|
||||||
|
NO PARSE Who
|
||||||
Whom man_N love_V2 = PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestSlash whoPl_IP (SlashV2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN man_N)) love_V2)))) NoVoc ; -- AMBIGUOUS:
|
who lovs men
|
||||||
-- PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestSlash whoSg_IP (SlashV2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN man_N)) love_V2)))) NoVoc ;
|
unknown words: lovs
|
||||||
|
AMBIGUOUS Whom
|
||||||
Answer woman_N love_V2 man_N = PhrUtt NoPConj (UttS (UseCl TPres ASimul PPos (PredVP (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN woman_N)) (ComplV2 love_V2 (DetCN (DetPl (PlQuant IndefArt) NoNum NoOrd) (UseN man_N)))))) NoVoc ;
|
whom does the man love
|
||||||
|
PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos (QuestSlash whoSg_IP (SlashV2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN man_N)) love_V2)))) NoVoc
|
||||||
}
|
-}
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ main = do
|
|||||||
return ()
|
return ()
|
||||||
|
|
||||||
_ | opt makeConcrete -> do
|
_ | opt makeConcrete -> do
|
||||||
mkConcretes fs
|
mkConcretes os fs
|
||||||
|
|
||||||
_ | opt openEditor -> do
|
_ | opt openEditor -> do
|
||||||
system $ "jgf" +++ unwords xs
|
system $ "jgf" +++ unwords xs
|
||||||
@@ -88,7 +88,7 @@ main = do
|
|||||||
if opt fromExamples
|
if opt fromExamples
|
||||||
then do
|
then do
|
||||||
es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs
|
es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs
|
||||||
mkConcretes es
|
mkConcretes os es
|
||||||
doGF (removeOption fromExamples os) fs
|
doGF (removeOption fromExamples os) fs
|
||||||
else doGF os fs
|
else doGF os fs
|
||||||
|
|
||||||
|
|||||||
@@ -12,15 +12,16 @@
|
|||||||
-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
|
-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where
|
module GF.Compile.MkConcrete (mkConcretes) where
|
||||||
|
|
||||||
import GF.Grammar.Values (Tree,tree2exp)
|
import GF.Grammar.Values (Tree,tree2exp)
|
||||||
import GF.Grammar.PrGrammar (prt_)
|
import GF.Grammar.PrGrammar (prt_,prModule)
|
||||||
import GF.Grammar.Grammar (Term(Q,QC)) ---
|
import GF.Grammar.Grammar --- (Term(..),SourceModule)
|
||||||
import GF.Grammar.Macros (composSafeOp, record2subst)
|
import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent)
|
||||||
import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
|
import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
|
||||||
import GF.Compile.PGrammar (pTerm)
|
import GF.Compile.PGrammar (pTerm,pTrm)
|
||||||
import GF.Compile.Compile
|
import GF.Compile.Compile
|
||||||
|
import GF.Compile.GetGrammar
|
||||||
import GF.API
|
import GF.API
|
||||||
import GF.API.IOGrammar
|
import GF.API.IOGrammar
|
||||||
import qualified GF.Embed.EmbedAPI as EA
|
import qualified GF.Embed.EmbedAPI as EA
|
||||||
@@ -28,8 +29,10 @@ import qualified GF.Embed.EmbedAPI as EA
|
|||||||
import GF.Data.Operations
|
import GF.Data.Operations
|
||||||
import GF.Infra.UseIO
|
import GF.Infra.UseIO
|
||||||
import GF.Infra.Option
|
import GF.Infra.Option
|
||||||
|
import GF.Infra.Modules
|
||||||
import GF.Infra.ReadFiles
|
import GF.Infra.ReadFiles
|
||||||
import GF.System.Arch
|
import GF.System.Arch
|
||||||
|
import GF.UseGrammar.Treebank
|
||||||
|
|
||||||
import System.Directory
|
import System.Directory
|
||||||
import Data.Char
|
import Data.Char
|
||||||
@@ -50,38 +53,40 @@ import Data.List
|
|||||||
-- notice: we use a hand-crafted lexer and parser in order to preserve
|
-- notice: we use a hand-crafted lexer and parser in order to preserve
|
||||||
-- the layout and comments in the rest of the file.
|
-- the layout and comments in the rest of the file.
|
||||||
|
|
||||||
mkConcretes :: [FilePath] -> IO ()
|
mkConcretes :: Options -> [FilePath] -> IO ()
|
||||||
mkConcretes files = do
|
mkConcretes opts files = do
|
||||||
ress <- mapM getResPath files
|
ress <- mapM getResPath files
|
||||||
let grps = groupBy (\a b -> fst a == fst b) $
|
let grps = groupBy (\a b -> fst a == fst b) $
|
||||||
sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
|
sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
|
||||||
mapM_ mkCncGroups [(rp,map snd gs) | gs@((rp,_):_) <- grps]
|
mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps]
|
||||||
|
|
||||||
mkCncGroups ((res,path),files) = do
|
mkCncGroups opts0 ((res,path),files) = do
|
||||||
putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
|
putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
|
||||||
putStrLn $ "Compiling resource " ++ res
|
putStrLn $ "Compiling resource " ++ res
|
||||||
let opts = options [beSilent,pathList path]
|
let opts = addOptions (options [beSilent,pathList path]) opts0
|
||||||
|
let treebank = oElem (iOpt "treebank") opts
|
||||||
egr <- appIOE $ shellStateFromFiles opts emptyShellState res
|
egr <- appIOE $ shellStateFromFiles opts emptyShellState res
|
||||||
gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
|
(parser,morpho) <- if treebank then do
|
||||||
(return . firstStateGrammar) egr
|
tb <- err (\_ -> error "no treebank")
|
||||||
let parser cat =
|
return
|
||||||
errVal ([],"No parse") .
|
(egr >>= flip findTreebank (zIdent (unsuffixFile res)))
|
||||||
optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr
|
return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb,
|
||||||
let morpho = isKnownWord gr
|
isWordInTreebank tb)
|
||||||
|
else do
|
||||||
|
gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
|
||||||
|
(return . firstStateGrammar) egr
|
||||||
|
return
|
||||||
|
(\cat s ->
|
||||||
|
errVal ([],"No parse") $
|
||||||
|
optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr s >>=
|
||||||
|
(\ (ts,e) -> return (map tree2exp ts, e)) ,
|
||||||
|
isKnownWord gr)
|
||||||
putStrLn "Building parser"
|
putStrLn "Building parser"
|
||||||
mapM_ (mkConcrete parser morpho) files
|
mapM_ (mkConcrete parser morpho) files
|
||||||
|
|
||||||
type Parser = String -> String -> ([Tree],String)
|
type Parser = String -> String -> ([Term],String)
|
||||||
type Morpho = String -> Bool
|
type Morpho = String -> Bool
|
||||||
|
|
||||||
mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
|
|
||||||
mkConcrete parser morpho file = do
|
|
||||||
cont <- liftM getExLines $ readFileIf file
|
|
||||||
let out = suffixFile "gf" $ justModuleName file
|
|
||||||
writeFile out $ "-- File generated by GF from " ++ file
|
|
||||||
appendFile out "\n"
|
|
||||||
mapM_ (mkCnc out parser morpho) cont
|
|
||||||
|
|
||||||
getResPath :: FilePath -> IO (String,String)
|
getResPath :: FilePath -> IO (String,String)
|
||||||
getResPath file = do
|
getResPath file = do
|
||||||
s <- liftM lines $ readFileIf file
|
s <- liftM lines $ readFileIf file
|
||||||
@@ -95,62 +100,46 @@ getResPath file = do
|
|||||||
"--#":w:_ -> isPrefixOf ('-':tag) w
|
"--#":w:_ -> isPrefixOf ('-':tag) w
|
||||||
_ -> False
|
_ -> False
|
||||||
|
|
||||||
getExLines :: String -> [Either String String]
|
|
||||||
getExLines = getl . lines where
|
|
||||||
getl ls = case ls of
|
|
||||||
s:ss | begEx (words s) -> case break endEx ls of
|
|
||||||
(x,y:z) -> Left (unwords (x ++ [y])) : getl z
|
|
||||||
_ -> Left s : getl ss
|
|
||||||
s:ss -> Right s : getl ss
|
|
||||||
[] -> []
|
|
||||||
begEx s = case s of
|
|
||||||
"=":"in":_ -> True
|
|
||||||
_:ws -> begEx ws
|
|
||||||
_ -> False
|
|
||||||
endEx s = case dropWhile isSpace (reverse s) of
|
|
||||||
';':_ -> True
|
|
||||||
_ -> False
|
|
||||||
|
|
||||||
mkCnc :: FilePath -> Parser -> Morpho -> Either String String -> IO ()
|
mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
|
||||||
mkCnc out parser morpho line = do
|
mkConcrete parser morpho file = do
|
||||||
let (res,msg) = mkCncLine parser morpho line
|
src <- appIOE (getSourceModule noOptions file) >>= err error return
|
||||||
appendFile out res
|
let (src',msgs) = mkModule parser morpho src
|
||||||
|
let out = suffixFile "gf" $ justModuleName file
|
||||||
|
writeFile out $ "-- File generated by GF from " ++ file
|
||||||
appendFile out "\n"
|
appendFile out "\n"
|
||||||
ifNull (return ()) putStrLnFlush msg
|
appendFile out (prModule src')
|
||||||
|
appendFile out "{-\n"
|
||||||
|
appendFile out $ unlines $ filter (not . null) msgs
|
||||||
|
appendFile out "-}\n"
|
||||||
|
|
||||||
mkCncLine :: (String -> String -> ([Tree],String)) -> (String -> Bool) ->
|
mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String])
|
||||||
Either String String -> (String,String)
|
mkModule parser morpho (name,src) = case src of
|
||||||
mkCncLine parser morpho (Right line) = (line,[])
|
ModMod m@(Module mt st fs me ops js) ->
|
||||||
mkCncLine parser morpho (Left line) = mkLinRule (words line) where
|
|
||||||
mkLinRule s =
|
let js1 = jments m
|
||||||
let
|
(js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) []
|
||||||
(pre,str) = span (/= "in") s
|
mod2 = ModMod $ Module mt st fs me ops $ js2
|
||||||
([mcat],rest) = splitAt 1 $ tail str
|
in ((name,mod2), msgs)
|
||||||
(lin,subst) = span (/= '"') $ tail $ unwords rest
|
where
|
||||||
cat = reverse $ takeWhile (/= '.') $ reverse mcat
|
mkInfo ni@(name,info) = case info of
|
||||||
substs = doSubst (init (tail subst))
|
CncFun mt (Yes trm) ppr -> do
|
||||||
def
|
trm' <- mkTrm trm
|
||||||
| last pre /= "=" = line -- ordinary lin rule
|
return (name, CncFun mt (Yes trm') ppr)
|
||||||
| otherwise = case parser cat lin of
|
_ -> return ni
|
||||||
(t:ts,_) -> ind ++ unwords pre +++
|
where
|
||||||
substs (tree2exp t) +++ ";" ++
|
mkTrm t = case t of
|
||||||
if null ts then [] else (" -- AMBIGUOUS:" ++++
|
Example (P _ cat) s -> parse cat s t
|
||||||
unlines ["-- " ++ substs (tree2exp s) +++ ";" | s <- ts])
|
Example (Vr cat) s -> parse cat s t
|
||||||
([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}"
|
_ -> composOp mkTrm t
|
||||||
in
|
parse cat s t = case parser (prt_ cat) s of
|
||||||
(def,def)
|
(tr:[], _) -> return tr
|
||||||
|
(tr:trs,_) -> do
|
||||||
|
updateSTM ((("AMBIGUOUS" +++ prt_ name) : s : map prt_ trs) ++)
|
||||||
|
return tr
|
||||||
|
([],ms) -> do
|
||||||
|
updateSTM ((("NO PARSE" +++ prt_ name) : s : ms : [morph s]) ++)
|
||||||
|
return t
|
||||||
morph s = case [w | w <- words s, not (morpho w)] of
|
morph s = case [w | w <- words s, not (morpho w)] of
|
||||||
[] -> ""
|
[] -> ""
|
||||||
ws -> "unknown words: " ++ unwords ws
|
ws -> "unknown words: " ++ unwords ws
|
||||||
ind = takeWhile isSpace line
|
|
||||||
|
|
||||||
doSubst :: String -> Term -> String
|
|
||||||
doSubst subst0 trm = prt_ $ subt subst trm where
|
|
||||||
subst
|
|
||||||
| all isSpace subst0 = []
|
|
||||||
| otherwise = err error id $ pTerm subst0 >>= record2subst
|
|
||||||
subt g t = case t of
|
|
||||||
Q _ c -> maybe t id $ lookup c g
|
|
||||||
QC _ c -> maybe t id $ lookup c g
|
|
||||||
_ -> composSafeOp (subt g) t
|
|
||||||
|
|
||||||
|
|||||||
@@ -128,6 +128,7 @@ data Term =
|
|||||||
| Typed Term Term -- ^ type-annotated term
|
| Typed Term Term -- ^ type-annotated term
|
||||||
--
|
--
|
||||||
-- /below this, the constructors are only for concrete syntax/
|
-- /below this, the constructors are only for concrete syntax/
|
||||||
|
| Example Term String -- ^ example-based term: @in M.C "foo"
|
||||||
| RecType [Labelling] -- ^ record type: @{ p : A ; ...}@
|
| RecType [Labelling] -- ^ record type: @{ p : A ; ...}@
|
||||||
| R [Assign] -- ^ record: @{ p = a ; ...}@
|
| R [Assign] -- ^ record: @{ p = a ; ...}@
|
||||||
| P Term Label -- ^ projection: @r.p@
|
| P Term Label -- ^ projection: @r.p@
|
||||||
|
|||||||
@@ -35,7 +35,9 @@ trGrammar (MGrammar ms) = P.Gr (map trModule ms) -- no includes
|
|||||||
trModule :: (Ident,SourceModInfo) -> P.ModDef
|
trModule :: (Ident,SourceModInfo) -> P.ModDef
|
||||||
trModule (i,mo) = case mo of
|
trModule (i,mo) = case mo of
|
||||||
ModMod m -> P.MModule compl typ body where
|
ModMod m -> P.MModule compl typ body where
|
||||||
compl = P.CMCompl -- always complete module
|
compl = case mstatus m of
|
||||||
|
MSIncomplete -> P.CMIncompl
|
||||||
|
_ -> P.CMCompl
|
||||||
i' = tri i
|
i' = tri i
|
||||||
typ = case typeOfModule mo of
|
typ = case typeOfModule mo of
|
||||||
MTResource -> P.MTResource i'
|
MTResource -> P.MTResource i'
|
||||||
@@ -140,6 +142,7 @@ trt trm = case trm of
|
|||||||
Prod x a b | isWildIdent x -> P.EProd (P.DExp (trt a)) (trt b)
|
Prod x a b | isWildIdent x -> P.EProd (P.DExp (trt a)) (trt b)
|
||||||
Prod x a b -> P.EProd (P.DDec [trb x] (trt a)) (trt b)
|
Prod x a b -> P.EProd (P.DDec [trb x] (trt a)) (trt b)
|
||||||
|
|
||||||
|
Example t s -> P.EExample (trt t) s
|
||||||
R [] -> P.ETuple [] --- to get correct parsing when read back
|
R [] -> P.ETuple [] --- to get correct parsing when read back
|
||||||
R r -> P.ERecord $ map trAssign r
|
R r -> P.ERecord $ map trAssign r
|
||||||
RecType r -> P.ERecord $ map trLabelling r
|
RecType r -> P.ERecord $ map trLabelling r
|
||||||
|
|||||||
@@ -437,6 +437,7 @@ transExp x = case x of
|
|||||||
EExtend exp0 exp -> liftM2 G.ExtR (transExp exp0) (transExp exp)
|
EExtend exp0 exp -> liftM2 G.ExtR (transExp exp0) (transExp exp)
|
||||||
EAbstr binds exp -> liftM2 M.mkAbs (mapM transBind binds) (transExp exp)
|
EAbstr binds exp -> liftM2 M.mkAbs (mapM transBind binds) (transExp exp)
|
||||||
ETyped exp0 exp -> liftM2 G.Typed (transExp exp0) (transExp exp)
|
ETyped exp0 exp -> liftM2 G.Typed (transExp exp0) (transExp exp)
|
||||||
|
EExample exp str -> liftM2 G.Example (transExp exp) (return str)
|
||||||
|
|
||||||
EProd decl exp -> liftM2 M.mkProdSimple (transDecl decl) (transExp exp)
|
EProd decl exp -> liftM2 M.mkProdSimple (transDecl decl) (transExp exp)
|
||||||
ETType exp0 exp -> liftM2 G.Table (transExp exp0) (transExp exp)
|
ETType exp0 exp -> liftM2 G.Table (transExp exp0) (transExp exp)
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ module GF.UseGrammar.Treebank (
|
|||||||
readMultiTreebank,
|
readMultiTreebank,
|
||||||
lookupTreebank,
|
lookupTreebank,
|
||||||
assocsTreebank,
|
assocsTreebank,
|
||||||
|
isWordInTreebank,
|
||||||
printAssoc
|
printAssoc
|
||||||
) where
|
) where
|
||||||
|
|
||||||
@@ -45,6 +46,7 @@ import GF.Infra.Ident (Ident)
|
|||||||
import GF.Infra.UseIO
|
import GF.Infra.UseIO
|
||||||
import qualified GF.Grammar.Abstract as A
|
import qualified GF.Grammar.Abstract as A
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.Set as S
|
||||||
|
|
||||||
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
|
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
|
||||||
-- (c) Aarne Ranta 2006 under GNU GPL
|
-- (c) Aarne Ranta 2006 under GNU GPL
|
||||||
@@ -142,6 +144,9 @@ ret = [] -- return ()
|
|||||||
assocsTreebank :: UniTreebank -> [(String,[String])]
|
assocsTreebank :: UniTreebank -> [(String,[String])]
|
||||||
assocsTreebank = M.assocs
|
assocsTreebank = M.assocs
|
||||||
|
|
||||||
|
isWordInTreebank :: UniTreebank -> String -> Bool
|
||||||
|
isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb)))
|
||||||
|
|
||||||
printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts]
|
printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts]
|
||||||
|
|
||||||
getTreebanks :: [String] -> [(String,String,String)]
|
getTreebanks :: [String] -> [(String,String,String)]
|
||||||
|
|||||||
Reference in New Issue
Block a user