1
0
forked from GitHub/gf-core

morpho analysis with -missing flag, shows words outside lexicon; also invoked if parsing fails; also added pg -words to show the list of words

This commit is contained in:
aarne
2009-12-31 11:02:26 +00:00
parent b92c34bafd
commit 34b839c0f9
3 changed files with 44 additions and 14 deletions

View File

@@ -359,12 +359,22 @@ allCommands cod env@(pgf, mos) = Map.fromList [
synopsis = "print the morphological analyses of all words in the string", synopsis = "print the morphological analyses of all words in the string",
explanation = unlines [ explanation = unlines [
"Prints all the analyses of space-separated words in the input string,", "Prints all the analyses of space-separated words in the input string,",
"using the morphological analyser of the actual grammar (see command pf)" "using the morphological analyser of the actual grammar (see command pg)"
], ],
exec = \opts -> exec = \opts -> case opts of
return . fromString . unlines . _ | isOpt "missing" opts ->
map prMorphoAnalysis . concatMap (morphos opts) . return . fromString . unwords .
concatMap words . toStrings morphoMissing (theMorpho opts) .
concatMap words . toStrings
_ -> return . fromString . unlines .
map prMorphoAnalysis . concatMap (morphos opts) .
concatMap words . toStrings ,
flags = [
("lang","the languages of analysis (comma-separated, no spaces)")
],
options = [
("missing","show the list of unknown words in the input")
]
}), }),
("mq", emptyCommandInfo { ("mq", emptyCommandInfo {
@@ -395,7 +405,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
"the parser. For example if -openclass=\"A,N,V\" is given, the parser", "the parser. For example if -openclass=\"A,N,V\" is given, the parser",
"will accept unknown adjectives, nouns and verbs with the resource grammar." "will accept unknown adjectives, nouns and verbs with the resource grammar."
], ],
exec = \opts -> returnFromExprs . concatMap (par opts) . toStrings, exec = \opts ts ->
returnFromExprsPar opts ts $ concatMap (par opts) $ toStrings ts,
flags = [ flags = [
("cat","target category of parsing"), ("cat","target category of parsing"),
("lang","the languages of parsing (comma-separated, no spaces)"), ("lang","the languages of parsing (comma-separated, no spaces)"),
@@ -424,7 +435,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
options = [ options = [
("cats", "show just the names of abstract syntax categories"), ("cats", "show just the names of abstract syntax categories"),
("fullform", "print the fullform lexicon"), ("fullform", "print the fullform lexicon"),
("missing","show just the names of functions that have no linearization") ("missing","show just the names of functions that have no linearization"),
("words", "print the list of words")
] ]
}), }),
("ph", emptyCommandInfo { ("ph", emptyCommandInfo {
@@ -770,6 +782,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
] ]
where where
enc = encodeUnicode cod enc = encodeUnicode cod
par opts s = case optOpenTypes opts of par opts s = case optOpenTypes opts of
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang] [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang]
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang] open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang]
@@ -847,20 +860,28 @@ allCommands cod env@(pgf, mos) = Map.fromList [
returnFromExprs es = return $ case es of returnFromExprs es = return $ case es of
[] -> ([], "no trees found") [] -> ([], "no trees found")
_ -> fromExprs es _ -> fromExprs es
returnFromExprsPar opts ts es = return $ case es of
[] -> ([], "no trees found; unknown words:" +++
unwords (morphoMissing (theMorpho opts)
(concatMap words (toStrings ts))))
_ -> fromExprs es
prGrammar opts prGrammar opts
| isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf | isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf
| isOpt "fullform" opts = return $ fromString $ concatMap (morpho "" prFullFormLexicon) $ optLangs opts | isOpt "fullform" opts = return $ fromString $ concatMap (morpho "" prFullFormLexicon) $ optLangs opts
| isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) | | isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) |
la <- optLangs opts, let cs = missingLins pgf la] la <- optLangs opts, let cs = missingLins pgf la]
| isOpt "words" opts = return $ fromString $ concatMap (morpho "" prAllWords) $ optLangs opts
| otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts) | otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts)
return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf
morphos opts s = morphos opts s =
[morpho [] (\mo -> lookupMorpho mo s) la | la <- optLangs opts] [(s,morpho [] (\mo -> lookupMorpho mo s) la) | la <- optLangs opts]
morpho z f la = maybe z f $ Map.lookup la mos morpho z f la = maybe z f $ Map.lookup la mos
theMorpho opts = morpho (error "no morpho") id (head (optLangs opts))
-- ps -f -g s returns g (f s) -- ps -f -g s returns g (f s)
stringOps menv opts s = foldr (menvop . app) s (reverse opts) where stringOps menv opts s = foldr (menvop . app) s (reverse opts) where
app f = maybe id id (stringOp f) app f = maybe id id (stringOp f)
@@ -924,8 +945,16 @@ lookFlag pgf lang flag = lookConcrFlag pgf (mkCId lang) (mkCId flag)
prFullFormLexicon :: Morpho -> String prFullFormLexicon :: Morpho -> String
prFullFormLexicon mo = prFullFormLexicon mo =
unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- fullFormLexicon mo] unlines (map prMorphoAnalysis (fullFormLexicon mo))
prMorphoAnalysis :: [(Lemma,Analysis)] -> String prAllWords :: Morpho -> String
prMorphoAnalysis lps = unlines [showCId l ++ " " ++ p | (l,p) <- lps] prAllWords mo =
unwords [w | (w,_) <- fullFormLexicon mo]
prMorphoAnalysis :: (String,[(Lemma,Analysis)]) -> String
prMorphoAnalysis (w,lps) =
unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]

View File

@@ -75,7 +75,7 @@ linTree pgf lang e = lin (expr2tree e) Nothing
Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc)) Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (showCId x) Nothing -> TM (showCId x)
lin (Meta i) mty = case mty of lin (Meta i) mty = case mty of
Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc)) Just (DTyp _ cat _) -> compute pgf lang [K (KS ("?" ++ show i))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (show i) Nothing -> TM (show i)
variants :: [Term] -> Term variants :: [Term] -> Term

View File

@@ -100,7 +100,7 @@ markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
collectWords :: PGF -> Language -> [(String, [(CId,String)])] collectWords :: PGF -> Language -> [(String, [(CId,String)])]
collectWords pgf lang = collectWords pgf lang =
concatMap collOne concatMap collOne
[(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf] [(f,c,length xs) | (f,(DTyp xs c _,_,_)) <- Map.toList $ funs $ abstract pgf]
where where
collOne (f,c,i) = collOne (f,c,i) =
fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888)))) fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
@@ -108,6 +108,7 @@ collectWords pgf lang =
RR rs -> concat [fromRec f v t | (_,t) <- rs] RR rs -> concat [fromRec f v t | (_,t) <- rs]
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs] RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
RFV rs -> concatMap (fromRec f v) rs RFV rs -> concatMap (fromRec f v) rs
RS s -> [(s,[(f,unwords (reverse v))])] RS s -> [(w,[(f,unwords (reverse v))]) | w <- words s, w /= "?888"] ---
-- RS s -> [(s,[(f,unwords (reverse v))])]
RCon c -> [] ---- inherent RCon c -> [] ---- inherent