From e056cc2bfd2e5e152edd9829b8d4d77637536a14 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 28 Jan 2010 10:06:16 +0000 Subject: [PATCH] in example-based grammar conversion, warnings about unknown words --- examples/animals/QuestionsI.gf | 6 +++--- src/compiler/GF/Command/Commands.hs | 14 ++++++-------- src/compiler/GF/Compile/ExampleBased.hs | 18 +++++++++++------- src/runtime/haskell/PGF/Morphology.hs | 14 +++++++++++++- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/examples/animals/QuestionsI.gf b/examples/animals/QuestionsI.gf index 30476ccdb..c35eeb796 100644 --- a/examples/animals/QuestionsI.gf +++ b/examples/animals/QuestionsI.gf @@ -9,18 +9,18 @@ incomplete concrete QuestionsI of Questions = open Lang in { lin Who love_V2 man_N = ( +--- WARNING: ambiguous example who loves men UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestVP whoSg_IP (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))) ) ; Whom man_N love_V2 = ( +--- WARNING: ambiguous example whom does the man love UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoPl_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2)))) --- UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoSg_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2)))) ) ; Answer woman_N love_V2 man_N = ( -UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (UseN love_N))) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))) - --- UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (ApposCN (UseN love_N) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))) - --- UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (DetCN (DetQuant DefArt NumSg) (UseN woman_N)) (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))) +--- WARNING: cannot parse example the woman lovess men; unknown words: lovess ) ; diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 7f8722d00..b56c19d72 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -254,7 +254,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ exec = \opts _ -> do let file = optFile opts mprobs <- optProbs opts pgf - let conf = configureExBased pgf mprobs (optLang opts) + let conf = configureExBased pgf (optMorpho opts) mprobs (optLang opts) file' <- parseExamplesInGrammar conf file return (fromString ("wrote " ++ file')), needsTypeCheck = False @@ -397,7 +397,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ exec = \opts -> case opts of _ | isOpt "missing" opts -> return . fromString . unwords . - morphoMissing (theMorpho opts) . + morphoMissing (optMorpho opts) . concatMap words . toStrings _ -> return . fromString . unlines . map prMorphoAnalysis . concatMap (morphos opts) . @@ -925,9 +925,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [ [] -> ([], "no trees found") _ -> fromExprs es returnFromExprsPar opts ts es = return $ case es of - [] -> ([], "no trees found; unknown words:" +++ - unwords (morphoMissing (theMorpho opts) - (concatMap words (toStrings ts)))) + [] -> ([], "no trees found" ++ + missingWordMsg (optMorpho opts) (concatMap words (toStrings ts)) + ) _ -> fromExprs es prGrammar opts @@ -944,7 +944,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ morpho z f la = maybe z f $ Map.lookup la mos - theMorpho opts = morpho (error "no morpho") id (head (optLangs opts)) + optMorpho opts = morpho (error "no morpho") id (head (optLangs opts)) -- ps -f -g s returns g (f s) stringOps menv opts s = foldr (menvop . app) s (reverse opts) where @@ -1016,6 +1016,4 @@ prMorphoAnalysis :: (String,[(Lemma,Analysis)]) -> String prMorphoAnalysis (w,lps) = unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps]) -morphoMissing :: Morpho -> [String] -> [String] -morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)] diff --git a/src/compiler/GF/Compile/ExampleBased.hs b/src/compiler/GF/Compile/ExampleBased.hs index 10d7cdc88..93116c4eb 100644 --- a/src/compiler/GF/Compile/ExampleBased.hs +++ b/src/compiler/GF/Compile/ExampleBased.hs @@ -2,6 +2,7 @@ module GF.Compile.ExampleBased (parseExamplesInGrammar,configureExBased) where import PGF import PGF.Probabilistic +import PGF.Morphology parseExamplesInGrammar :: ExConfiguration -> FilePath -> IO FilePath parseExamplesInGrammar conf file = do @@ -30,30 +31,33 @@ convertFile conf src file = do (ex, end) = break (=='"') (tail exend) in ((unwords (words cat),ex), tail end) -- quotes ignored pgf = resource_pgf conf + morpho = resource_morpho conf lang = language conf convEx (cat,ex) = do appn "(" let typ = maybe (error "no valid cat") id $ readType cat let ts = rank $ parse pgf lang typ ex case ts of - [] -> appv ("WARNING: cannot parse example " ++ ex) - t:tt -> appn t >> mapM_ (appn . (" --- " ++)) tt + [] -> appv ("WARNING: cannot parse example " ++ ex ++ + missingWordMsg morpho (words ex)) + t:tt -> appv ("WARNING: ambiguous example " ++ ex) >> + appn t >> mapM_ (appn . (" --- " ++)) tt appn ")" rank ts = case probs conf of Just probs -> [showExpr [] t ++ " -- " ++ show p | (t,p) <- rankTreesByProbs probs ts] _ -> map (showExpr []) ts appf = appendFile file appn s = appf s >> appf "\n" - appv s = appn s >> putStrLn s + appv s = appn ("--- " ++ s) >> putStrLn s data ExConfiguration = ExConf { - resource_file :: FilePath, - resource_pgf :: PGF, + resource_pgf :: PGF, + resource_morpho :: Morpho, probs :: Maybe Probabilities, verbose :: Bool, language :: Language } -configureExBased :: PGF -> Maybe Probabilities -> Language -> ExConfiguration -configureExBased pgf mprobs lang = ExConf [] pgf mprobs False lang +configureExBased :: PGF -> Morpho -> Maybe Probabilities -> Language -> ExConfiguration +configureExBased pgf morpho mprobs lang = ExConf pgf morpho mprobs False lang diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs index f5c65b3ba..711f9c01d 100644 --- a/src/runtime/haskell/PGF/Morphology.hs +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -1,6 +1,7 @@ module PGF.Morphology(Lemma,Analysis,Morpho, buildMorpho, - lookupMorpho,fullFormLexicon) where + lookupMorpho,fullFormLexicon, + morphoMissing,missingWordMsg) where import PGF.CId import PGF.Data @@ -10,6 +11,7 @@ import qualified Data.Set as Set import qualified Data.IntMap as IntMap import Data.Array.IArray import Data.List (intersperse) +import Data.Char (isDigit) ---- -- these 4 definitions depend on the datastructure used @@ -42,3 +44,13 @@ lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])] fullFormLexicon (Morpho mo) = Map.toList mo + +morphoMissing :: Morpho -> [String] -> [String] +morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w), notLiteral w] where + notLiteral w = not (all isDigit w) ---- should be defined somewhere + +missingWordMsg :: Morpho -> [String] -> String +missingWordMsg morpho ws = case morphoMissing morpho ws of + [] -> ", but all words are known" + ws -> "; unknown words: " ++ unwords ws +