example_based returns the list of all unknown words

This commit is contained in:
aarne
2010-01-28 16:27:28 +00:00
parent 8f0df0442a
commit 53a081bf0b
6 changed files with 39 additions and 22 deletions

View File

@@ -7,7 +7,7 @@ concrete AnimalsFre of Animals = QuestionsFre **
Dog = regN "chien" ;
Cat = regN "chat" ;
Mouse = regGenN "souris" feminine ;
Lion = regN "lion" ;
Lion = mkN "lion" masculine ;
Zebra = regGenN "zèbre" masculine ;
Chase = dirV2 (regV "chasser") ;
Eat = dirV2 (regV "manger") ;

View File

@@ -1,4 +1,4 @@
-- to compile: echo "eb -file=QuestionsI.gfe" | gf $GF_LIB_PATH/present/LangEng.gfo
-- to compile: echo "eb -file=QuestionsI.gfe" -probs=probs | gf $GF_LIB_PATH/present/LangEng.gfo
-- or use directly gf <mkAnimals.gfs
incomplete concrete QuestionsI of Questions = open Lang in {
@@ -10,17 +10,20 @@ incomplete concrete QuestionsI of Questions = open Lang in {
lin
Who love_V2 man_N = (
--- WARNING: ambiguous example who loves men
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestVP whoSg_IP (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestVP whoSg_IP (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))) -- 2.122431752061382e-11
)
;
Whom man_N love_V2 = (
--- WARNING: ambiguous example whom does the man love
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoPl_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
--- UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoSg_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoPl_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2)))) -- 1.3265198450383634e-11
--- UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoSg_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2)))) -- 1.3265198450383634e-11
)
;
Answer woman_N love_V2 man_N = (
--- WARNING: cannot parse example the woman lovess men; unknown words: lovess
--- WARNING: ambiguous example the woman loves men
UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (DetCN (DetQuant DefArt NumSg) (UseN woman_N)) (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))) -- 1.1637456560533483e-14
--- UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (UseN love_N))) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))) -- 2.018579347059343e-20
--- UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (ApposCN (UseN love_N) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))) -- 2.018579347059343e-20
)
;

View File

@@ -1,4 +1,4 @@
-- to compile: echo "eb -file=QuestionsI.gfe" | gf $GF_LIB_PATH/present/LangEng.gfo
-- to compile: echo "eb -file=QuestionsI.gfe" -probs=probs | gf $GF_LIB_PATH/present/LangEng.gfo
-- or use directly gf <mkAnimals.gfs
incomplete concrete QuestionsI of Questions = open Lang in {

View File

@@ -28,3 +28,5 @@ TFut 0.1
TCond 0.1
PPos 0.7
ApposCN 0.01

View File

@@ -255,7 +255,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
let file = optFile opts
mprobs <- optProbs opts pgf
let conf = configureExBased pgf (optMorpho opts) mprobs (optLang opts)
file' <- parseExamplesInGrammar conf file
(file',ws) <- parseExamplesInGrammar conf file
if null ws then return () else putStrLn ("unknown words: " ++ unwords ws)
return (fromString ("wrote " ++ file')),
needsTypeCheck = False
}),

View File

@@ -1,26 +1,31 @@
module GF.Compile.ExampleBased (parseExamplesInGrammar,configureExBased) where
module GF.Compile.ExampleBased (
parseExamplesInGrammar,
configureExBased
) where
import PGF
import PGF.Probabilistic
import PGF.Morphology
parseExamplesInGrammar :: ExConfiguration -> FilePath -> IO FilePath
import Data.List
parseExamplesInGrammar :: ExConfiguration -> FilePath -> IO (FilePath,[String])
parseExamplesInGrammar conf file = do
src <- readFile file -- .gfe
let file' = take (length file - 3) file ++ "gf" -- .gf
convertFile conf src file'
return file'
ws <- convertFile conf src file'
return (file',ws)
convertFile :: ExConfiguration -> String -> FilePath -> IO ()
convertFile :: ExConfiguration -> String -> FilePath -> IO [String]
convertFile conf src file = do
writeFile file "" -- "-- created by example-based grammar writing in GF\n"
conv src
conv [] src
where
conv s = do
conv ws s = do
(cex,end) <- findExample s
if null end then return () else do
convEx cex
conv end
if null end then return (nub (sort ws)) else do
ws2 <- convEx cex
conv (ws2 ++ ws) end
findExample s = case s of
'%':'e':'x':cs -> return $ getExample cs
c:cs -> appf [c] >> findExample cs
@@ -37,12 +42,18 @@ convertFile conf src file = do
appn "("
let typ = maybe (error "no valid cat") id $ readType cat
let ts = rank $ parse pgf lang typ ex
case ts of
[] -> appv ("WARNING: cannot parse example " ++ ex ++
missingWordMsg morpho (words ex))
ws <- case ts of
[] -> do
let ws = morphoMissing morpho (words ex)
appv ("WARNING: cannot parse example " ++ ex)
case ws of
[] -> return ()
_ -> appv (" missing words: " ++ unwords ws)
return ws
t:tt -> appv ("WARNING: ambiguous example " ++ ex) >>
appn t >> mapM_ (appn . (" --- " ++)) tt
appn t >> mapM_ (appn . (" --- " ++)) tt >> return []
appn ")"
return ws
rank ts = case probs conf of
Just probs -> [showExpr [] t ++ " -- " ++ show p | (t,p) <- rankTreesByProbs probs ts]
_ -> map (showExpr []) ts