command option ma -known to drop unknown words

This commit is contained in:
aarne
2012-06-10 10:43:57 +00:00
parent 8412792d66
commit 8ef8af479f
2 changed files with 16 additions and 4 deletions

View File

@@ -521,6 +521,10 @@ allCommands env@(pgf, mos) = Map.fromList [
return . fromString . unwords .
morphoMissing (optMorpho opts) .
concatMap words . toStrings
_ | isOpt "known" opts ->
return . fromString . unwords .
morphoKnown (optMorpho opts) .
concatMap words . toStrings
_ -> return . fromString . unlines .
map prMorphoAnalysis . concatMap (morphos opts) .
concatMap words . toStrings ,
@@ -528,7 +532,8 @@ allCommands env@(pgf, mos) = Map.fromList [
("lang","the languages of analysis (comma-separated, no spaces)")
],
options = [
("missing","show the list of unknown words in the input")
("known", "return only the known words, in order of appearance"),
("missing","show the list of unknown words, in order of appearance")
]
}),

View File

@@ -1,7 +1,8 @@
module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,isInMorpho,
lookupMorpho,fullFormLexicon,
morphoMissing,missingWordMsg) where
morphoMissing,morphoKnown,morphoClassify,
missingWordMsg) where
import PGF.CId
import PGF.Data
@@ -48,8 +49,14 @@ isInMorpho (Morpho mo) s = maybe False (const True) $ Map.lookup s mo
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
fullFormLexicon (Morpho mo) = Map.toList mo
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w), notLiteral w] where
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing = morphoClassify False
morphoKnown :: Morpho -> [String] -> [String]
morphoKnown = morphoClassify True
morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify k mo ws = [w | w <- ws, k /= null (lookupMorpho mo w), notLiteral w] where
notLiteral w = not (all isDigit w) ---- should be defined somewhere
missingWordMsg :: Morpho -> [String] -> String