Started experimenting with shallow resource API.

Started experimenting with shallow resource API.
Added PossessPrep to Structural.
This commit is contained in:
aarne
2004-02-20 16:28:42 +00:00
parent 7f3009e2cc
commit 2ff53b2ffd
17 changed files with 45686 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
module MkLex where
import System
import Monad
-- to massage an ispell word list into a GF lexicon. AR 20/2/2004
lexfile = "linux.words"
absfile = "Lex.gf"
cncfile = "LexEng.gf"
maxsize = 10000 :: Int --- add more!
massage :: IO ()
massage = do
-- initialize target files
system $ "echo \"\" >" ++ absfile
system $ "echo \"\" >" ++ cncfile
appendFile absfile $
"abstract Lex = Shallow ** {\nfun\n"
appendFile cncfile $
"--# -path=.:..:../../../prelude:../../abstract:../../english\n\n"
appendFile cncfile $
"concrete LexEng of Lex = ShallowEng ** open ParadigmsEng in {\nlin\n"
-- reverse to study endings
ws <- liftM (map reverse . lines) $ readFile lexfile
sortWords $ take maxsize ws
-- we exploit the fact that the original list is sorted and
-- different forms therefore lie consecutively
sortWords :: [String] -> IO ()
sortWords ws = case ws of
u : ('d':'e':v) : ('s':w) : vs | v == u && w == u -> -- regular verb
mkEntry verbReg u >> sortWords vs
('e':u) : ('d':'e':v) : ('s':'e':w) : vs | v == u && w == u -> -- e-verb
mkEntry verbE ('e':u) >> sortWords vs
u : ('y':'l':v) : vs | v == u -> -- regular adjective
mkEntry adjReg u >> sortWords vs
u : ('s':v) : vs | v == u -> -- regular noun
mkEntry nounReg u >> sortWords vs
---- add more
_ : vs -> sortWords vs
[] -> appendFile absfile " }\n" >> appendFile cncfile " }\n"
mkEntry :: (String -> (String,String)) -> String -> IO ()
mkEntry abc w0 = do
let w = reverse w0
let (ab,cn) = abc w
appendFile absfile $ " " ++ ab
appendFile absfile $ " ;\n"
appendFile cncfile $ " " ++ cn
appendFile cncfile $ " ;\n"
verbReg :: String -> (String,String)
verbReg = mkGF "Verb" "vReg"
verbE = verbReg ----
adjReg = mkGF "Adj" "mkAdj1"
nounReg = mkGF "Noun" "cnNonhuman"
mkGF :: String -> String -> (String -> (String,String))
mkGF cat oper w = (
w ++ "L : " ++ cat,
w ++ "L = " ++ oper ++ " \"" ++ w ++ "\""
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
Project: to massage an ispell word list into a GF lexicon. AR 20/2/2004
Used MkLex on linux.words (45424 word forms)
The first run gave 6471 words, which was too much for GF.
With the maxsize, got this down to 1587.
The grammar now compiles in 25sec, and parses as follows:
"every commander assumes of an abbot"
610 msec, 6 ws
"every accusing commander assumes of a coarse abbot"
840 msec, 8 ws
"every accusing commander assumes of a coarse abbot of all
cognitive colonizers"
820 msec, 12 ws
"every accusing commander of a comical careful colt assumes of
a coarse abbot of all cognitive colonizers"
3670 msec, 17 ws (nesting inside)
"every accusing commander assumes of a coarse abbot of all cognitive
colonizers of a colt of a careful carpenter of every carriage"
TIMEOUT!
Note: it is essential to import with the -cflexer flag!
Time spent: 58 min, includes writing MkLex, running tests,
and writing these notes.