mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 03:32:51 -06:00
in example-based grammar conversion, warnings about unknown words
This commit is contained in:
@@ -9,18 +9,18 @@ incomplete concrete QuestionsI of Questions = open Lang in {
|
|||||||
|
|
||||||
lin
|
lin
|
||||||
Who love_V2 man_N = (
|
Who love_V2 man_N = (
|
||||||
|
--- WARNING: ambiguous example who loves men
|
||||||
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestVP whoSg_IP (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))
|
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestVP whoSg_IP (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))
|
||||||
)
|
)
|
||||||
;
|
;
|
||||||
Whom man_N love_V2 = (
|
Whom man_N love_V2 = (
|
||||||
|
--- WARNING: ambiguous example whom does the man love
|
||||||
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoPl_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
|
UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoPl_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
|
||||||
--- UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoSg_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
|
--- UttQS (UseQCl (TTAnt TPres ASimul) PPos (QuestSlash whoSg_IP (SlashVP (DetCN (DetQuant DefArt NumSg) (UseN man_N)) (SlashV2a love_V2))))
|
||||||
)
|
)
|
||||||
;
|
;
|
||||||
Answer woman_N love_V2 man_N = (
|
Answer woman_N love_V2 man_N = (
|
||||||
UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (UseN love_N))) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))
|
--- WARNING: cannot parse example the woman lovess men; unknown words: lovess
|
||||||
--- UttNP (DetCN (DetQuant DefArt NumSg) (ApposCN (UseN woman_N) (DetCN (DetQuant IndefArt NumPl) (ApposCN (UseN love_N) (DetCN (DetQuant IndefArt NumPl) (UseN man_N))))))
|
|
||||||
--- UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (DetCN (DetQuant DefArt NumSg) (UseN woman_N)) (ComplSlash (SlashV2a love_V2) (DetCN (DetQuant IndefArt NumPl) (UseN man_N)))))
|
|
||||||
)
|
)
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|||||||
@@ -254,7 +254,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
exec = \opts _ -> do
|
exec = \opts _ -> do
|
||||||
let file = optFile opts
|
let file = optFile opts
|
||||||
mprobs <- optProbs opts pgf
|
mprobs <- optProbs opts pgf
|
||||||
let conf = configureExBased pgf mprobs (optLang opts)
|
let conf = configureExBased pgf (optMorpho opts) mprobs (optLang opts)
|
||||||
file' <- parseExamplesInGrammar conf file
|
file' <- parseExamplesInGrammar conf file
|
||||||
return (fromString ("wrote " ++ file')),
|
return (fromString ("wrote " ++ file')),
|
||||||
needsTypeCheck = False
|
needsTypeCheck = False
|
||||||
@@ -397,7 +397,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
exec = \opts -> case opts of
|
exec = \opts -> case opts of
|
||||||
_ | isOpt "missing" opts ->
|
_ | isOpt "missing" opts ->
|
||||||
return . fromString . unwords .
|
return . fromString . unwords .
|
||||||
morphoMissing (theMorpho opts) .
|
morphoMissing (optMorpho opts) .
|
||||||
concatMap words . toStrings
|
concatMap words . toStrings
|
||||||
_ -> return . fromString . unlines .
|
_ -> return . fromString . unlines .
|
||||||
map prMorphoAnalysis . concatMap (morphos opts) .
|
map prMorphoAnalysis . concatMap (morphos opts) .
|
||||||
@@ -925,9 +925,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
[] -> ([], "no trees found")
|
[] -> ([], "no trees found")
|
||||||
_ -> fromExprs es
|
_ -> fromExprs es
|
||||||
returnFromExprsPar opts ts es = return $ case es of
|
returnFromExprsPar opts ts es = return $ case es of
|
||||||
[] -> ([], "no trees found; unknown words:" +++
|
[] -> ([], "no trees found" ++
|
||||||
unwords (morphoMissing (theMorpho opts)
|
missingWordMsg (optMorpho opts) (concatMap words (toStrings ts))
|
||||||
(concatMap words (toStrings ts))))
|
)
|
||||||
_ -> fromExprs es
|
_ -> fromExprs es
|
||||||
|
|
||||||
prGrammar opts
|
prGrammar opts
|
||||||
@@ -944,7 +944,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
|
|
||||||
morpho z f la = maybe z f $ Map.lookup la mos
|
morpho z f la = maybe z f $ Map.lookup la mos
|
||||||
|
|
||||||
theMorpho opts = morpho (error "no morpho") id (head (optLangs opts))
|
optMorpho opts = morpho (error "no morpho") id (head (optLangs opts))
|
||||||
|
|
||||||
-- ps -f -g s returns g (f s)
|
-- ps -f -g s returns g (f s)
|
||||||
stringOps menv opts s = foldr (menvop . app) s (reverse opts) where
|
stringOps menv opts s = foldr (menvop . app) s (reverse opts) where
|
||||||
@@ -1016,6 +1016,4 @@ prMorphoAnalysis :: (String,[(Lemma,Analysis)]) -> String
|
|||||||
prMorphoAnalysis (w,lps) =
|
prMorphoAnalysis (w,lps) =
|
||||||
unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
|
unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
|
||||||
|
|
||||||
morphoMissing :: Morpho -> [String] -> [String]
|
|
||||||
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ module GF.Compile.ExampleBased (parseExamplesInGrammar,configureExBased) where
|
|||||||
|
|
||||||
import PGF
|
import PGF
|
||||||
import PGF.Probabilistic
|
import PGF.Probabilistic
|
||||||
|
import PGF.Morphology
|
||||||
|
|
||||||
parseExamplesInGrammar :: ExConfiguration -> FilePath -> IO FilePath
|
parseExamplesInGrammar :: ExConfiguration -> FilePath -> IO FilePath
|
||||||
parseExamplesInGrammar conf file = do
|
parseExamplesInGrammar conf file = do
|
||||||
@@ -30,30 +31,33 @@ convertFile conf src file = do
|
|||||||
(ex, end) = break (=='"') (tail exend)
|
(ex, end) = break (=='"') (tail exend)
|
||||||
in ((unwords (words cat),ex), tail end) -- quotes ignored
|
in ((unwords (words cat),ex), tail end) -- quotes ignored
|
||||||
pgf = resource_pgf conf
|
pgf = resource_pgf conf
|
||||||
|
morpho = resource_morpho conf
|
||||||
lang = language conf
|
lang = language conf
|
||||||
convEx (cat,ex) = do
|
convEx (cat,ex) = do
|
||||||
appn "("
|
appn "("
|
||||||
let typ = maybe (error "no valid cat") id $ readType cat
|
let typ = maybe (error "no valid cat") id $ readType cat
|
||||||
let ts = rank $ parse pgf lang typ ex
|
let ts = rank $ parse pgf lang typ ex
|
||||||
case ts of
|
case ts of
|
||||||
[] -> appv ("WARNING: cannot parse example " ++ ex)
|
[] -> appv ("WARNING: cannot parse example " ++ ex ++
|
||||||
t:tt -> appn t >> mapM_ (appn . (" --- " ++)) tt
|
missingWordMsg morpho (words ex))
|
||||||
|
t:tt -> appv ("WARNING: ambiguous example " ++ ex) >>
|
||||||
|
appn t >> mapM_ (appn . (" --- " ++)) tt
|
||||||
appn ")"
|
appn ")"
|
||||||
rank ts = case probs conf of
|
rank ts = case probs conf of
|
||||||
Just probs -> [showExpr [] t ++ " -- " ++ show p | (t,p) <- rankTreesByProbs probs ts]
|
Just probs -> [showExpr [] t ++ " -- " ++ show p | (t,p) <- rankTreesByProbs probs ts]
|
||||||
_ -> map (showExpr []) ts
|
_ -> map (showExpr []) ts
|
||||||
appf = appendFile file
|
appf = appendFile file
|
||||||
appn s = appf s >> appf "\n"
|
appn s = appf s >> appf "\n"
|
||||||
appv s = appn s >> putStrLn s
|
appv s = appn ("--- " ++ s) >> putStrLn s
|
||||||
|
|
||||||
data ExConfiguration = ExConf {
|
data ExConfiguration = ExConf {
|
||||||
resource_file :: FilePath,
|
resource_pgf :: PGF,
|
||||||
resource_pgf :: PGF,
|
resource_morpho :: Morpho,
|
||||||
probs :: Maybe Probabilities,
|
probs :: Maybe Probabilities,
|
||||||
verbose :: Bool,
|
verbose :: Bool,
|
||||||
language :: Language
|
language :: Language
|
||||||
}
|
}
|
||||||
|
|
||||||
configureExBased :: PGF -> Maybe Probabilities -> Language -> ExConfiguration
|
configureExBased :: PGF -> Morpho -> Maybe Probabilities -> Language -> ExConfiguration
|
||||||
configureExBased pgf mprobs lang = ExConf [] pgf mprobs False lang
|
configureExBased pgf morpho mprobs lang = ExConf pgf morpho mprobs False lang
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
module PGF.Morphology(Lemma,Analysis,Morpho,
|
module PGF.Morphology(Lemma,Analysis,Morpho,
|
||||||
buildMorpho,
|
buildMorpho,
|
||||||
lookupMorpho,fullFormLexicon) where
|
lookupMorpho,fullFormLexicon,
|
||||||
|
morphoMissing,missingWordMsg) where
|
||||||
|
|
||||||
import PGF.CId
|
import PGF.CId
|
||||||
import PGF.Data
|
import PGF.Data
|
||||||
@@ -10,6 +11,7 @@ import qualified Data.Set as Set
|
|||||||
import qualified Data.IntMap as IntMap
|
import qualified Data.IntMap as IntMap
|
||||||
import Data.Array.IArray
|
import Data.Array.IArray
|
||||||
import Data.List (intersperse)
|
import Data.List (intersperse)
|
||||||
|
import Data.Char (isDigit) ----
|
||||||
|
|
||||||
-- these 4 definitions depend on the datastructure used
|
-- these 4 definitions depend on the datastructure used
|
||||||
|
|
||||||
@@ -42,3 +44,13 @@ lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
|
|||||||
|
|
||||||
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
|
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
|
||||||
fullFormLexicon (Morpho mo) = Map.toList mo
|
fullFormLexicon (Morpho mo) = Map.toList mo
|
||||||
|
|
||||||
|
morphoMissing :: Morpho -> [String] -> [String]
|
||||||
|
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w), notLiteral w] where
|
||||||
|
notLiteral w = not (all isDigit w) ---- should be defined somewhere
|
||||||
|
|
||||||
|
missingWordMsg :: Morpho -> [String] -> String
|
||||||
|
missingWordMsg morpho ws = case morphoMissing morpho ws of
|
||||||
|
[] -> ", but all words are known"
|
||||||
|
ws -> "; unknown words: " ++ unwords ws
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user