1
0
forked from GitHub/gf-core

new DictEngFin in finnished/stemmed. Only 9k lemmas missing.

This commit is contained in:
aarne
2013-03-28 14:22:09 +00:00
parent abd95ff3c1
commit 2eab210435
5 changed files with 58232 additions and 38821 deletions

View File

@@ -2,12 +2,21 @@ import qualified Data.Set as S
-- comment out words that are predefined in another lexicon
-- runghc ElimPredef.hs <DictEngFin.gf
-- removeFile = "predef.txt"
-- removeMsg = "PREDEF"
-- also used for temporarily eliminating whatever from compilation
--removeFile = "commentOut"
--removeMsg = "POSTPONE"
removeFile = "t-nouns"
removeMsg = "PLURNOUN"
main = do
predefs <- readFile "predef.txt" >>= return . S.fromList . map (head . words) . lines
predefs <- readFile removeFile >>= return . S.fromList . map (head . words) . lines
interact (unlines . map (elimPredef predefs) . lines)
elimPredef predefs line = case words line of
w:_ | S.member w predefs -> "--PREDEF " ++ line
w:_ | S.member w predefs -> "--" ++ removeMsg ++ " " ++ line
_ -> line