1
0
forked from GitHub/gf-core

command option ma -known to drop unknown words

This commit is contained in:
aarne
2012-06-10 10:43:57 +00:00
parent 12b2a0d665
commit 191ecc71b8
2 changed files with 16 additions and 4 deletions

View File

@@ -521,6 +521,10 @@ allCommands env@(pgf, mos) = Map.fromList [
return . fromString . unwords . return . fromString . unwords .
morphoMissing (optMorpho opts) . morphoMissing (optMorpho opts) .
concatMap words . toStrings concatMap words . toStrings
_ | isOpt "known" opts ->
return . fromString . unwords .
morphoKnown (optMorpho opts) .
concatMap words . toStrings
_ -> return . fromString . unlines . _ -> return . fromString . unlines .
map prMorphoAnalysis . concatMap (morphos opts) . map prMorphoAnalysis . concatMap (morphos opts) .
concatMap words . toStrings , concatMap words . toStrings ,
@@ -528,7 +532,8 @@ allCommands env@(pgf, mos) = Map.fromList [
("lang","the languages of analysis (comma-separated, no spaces)") ("lang","the languages of analysis (comma-separated, no spaces)")
], ],
options = [ options = [
("missing","show the list of unknown words in the input") ("known", "return only the known words, in order of appearance"),
("missing","show the list of unknown words, in order of appearance")
] ]
}), }),

View File

@@ -1,7 +1,8 @@
module PGF.Morphology(Lemma,Analysis,Morpho, module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,isInMorpho, buildMorpho,isInMorpho,
lookupMorpho,fullFormLexicon, lookupMorpho,fullFormLexicon,
morphoMissing,missingWordMsg) where morphoMissing,morphoKnown,morphoClassify,
missingWordMsg) where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
@@ -48,8 +49,14 @@ isInMorpho (Morpho mo) s = maybe False (const True) $ Map.lookup s mo
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])] fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
fullFormLexicon (Morpho mo) = Map.toList mo fullFormLexicon (Morpho mo) = Map.toList mo
morphoMissing :: Morpho -> [String] -> [String] morphoMissing :: Morpho -> [String] -> [String]
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w), notLiteral w] where morphoMissing = morphoClassify False
morphoKnown :: Morpho -> [String] -> [String]
morphoKnown = morphoClassify True
morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify k mo ws = [w | w <- ws, k /= null (lookupMorpho mo w), notLiteral w] where
notLiteral w = not (all isDigit w) ---- should be defined somewhere notLiteral w = not (all isDigit w) ---- should be defined somewhere
missingWordMsg :: Morpho -> [String] -> String missingWordMsg :: Morpho -> [String] -> String