more accurate BigLexEng (V3 still to do)

This commit is contained in:
aarne
2006-11-02 21:45:32 +00:00
parent 13cd2b96be
commit d2897dd7d9
5 changed files with 11569 additions and 11998 deletions

View File

@@ -1,3 +1,5 @@
import Char
infile = "mywordlist1"
main = do
@@ -6,21 +8,24 @@ main = do
mkOne s = case words s of
"--":_ -> ""
('(':_):w:cat:ws ->
let
(c,f) = mkCatf (nopar cat) (more ws)
in unwords $ [c, f, w]
('(':_):w:cat:ws -> unwords $ mkCatf (nopar cat) (more ws) w
_ -> "-- " ++ s
where
more ws = case ws of
_ | elem "(REG" ws -> "irreg"
_ -> "reg"
nopar = filter (flip notElem "()")
mkCatf c r = case c of
"Noun" -> ("N","regN")
"Adject" -> ("A","regA")
"Adject_LONG" -> ("A","longA")
"Verb" -> ("V","regV")
"PNoun" -> ("PN","regPN")
_ -> (c,"mk" ++ c)
mkCatf c r w = case c of
"Noun" -> ["N","regN",w]
"Adject" -> ["A","regA",w]
"Adject_LONG" -> ["A","longA",w]
"Verb" | r == "irreg" -> []
"Verb" -> ["V","regV",w]
"V2" | r == "irreg" -> ["V2","irreg", w, "_V"]
"V2" -> ["V2","regV2", w]
"PNoun" -> ["PN","regPN",toUpper (head w): tail w]
'V':'2':'_':prep | r == "irreg" ->
["V2","mkV2", w, "_V", map toLower prep]
x:'2':'_':prep -> [[x]++"2","prep" ++[x]++"2", w, map toLower prep]
_ -> [c,"mk" ++ c, w]