mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-04 00:32:51 -06:00
Dutch lexicon generated via Google translate; method in lib/src/MkExx.hs
This commit is contained in:
89
lib/src/MkExx.hs
Normal file
89
lib/src/MkExx.hs
Normal file
@@ -0,0 +1,89 @@
|
||||
module Main where
|
||||
|
||||
-- to learn a lexicon from Google translate via sentence translation
|
||||
|
||||
import System
|
||||
|
||||
main = do
|
||||
xx <- getArgs
|
||||
case xx of
|
||||
"align":trees:lins:_ -> do
|
||||
ts <- readFile trees >>= return . lines
|
||||
ls <- readFile lins >>= return . lines
|
||||
mapM_ (putStrLn . align) (zip ts ls)
|
||||
n:f:_ -> do
|
||||
nouns <- readFile n >>= return . words
|
||||
preds <- readFile f >>= return . words
|
||||
interact (const (mkExx nouns preds))
|
||||
|
||||
type Ident = String
|
||||
|
||||
mkExx nouns preds = unlines $ map predic (zip nouns predss) where
|
||||
predss = preds ++ predss -- there are more nouns than predicates
|
||||
|
||||
predic :: (Ident,Ident) -> String
|
||||
predic (n,f) = case c of
|
||||
'A':_ -> predn n ("(UseComp (CompAP (PositA " ++ f ++ ")))")
|
||||
"V2" -> predn n ("(ComplSlash (SlashV2a " ++ f ++ ") (" ++ detn n ++ "))")
|
||||
'V':_ -> predn n ("(UseV " ++ f ++ ")")
|
||||
where
|
||||
c = tail $ dropWhile (/='_') f
|
||||
|
||||
predn n f = "PredVP (" ++ detn n ++ ") " ++ f
|
||||
detn n = "DetCN (DetQuant DefArt NumSg) (UseN " ++ n ++ ")"
|
||||
|
||||
align (t,s) = unlines [
|
||||
noun ++ " = mkN " ++ nargs ++ " ;",
|
||||
pred ++ " = mk" ++ cat ++ " " ++ fargs ++ " ;"
|
||||
]
|
||||
where
|
||||
(noun,(pred,cat)) = case words t of
|
||||
_:_:_:_:_:_:n:ps -> (
|
||||
takeWhile (/=')') n,
|
||||
case ps of
|
||||
"(UseComp":_:_:a:_ -> (takeWhile (/=')') a,"A")
|
||||
"(UseV":v:_ -> (takeWhile (/=')') v,"V")
|
||||
"(ComplSlash":_:v:_ -> (takeWhile (/=')') v,"V2")
|
||||
)
|
||||
(nargs,fargs) = case words s of
|
||||
de:n:"is":a:_ -> (nargsOf n de, quote (init a))
|
||||
de:n:v:_:_ -> (nargsOf n de, quote (verb v))
|
||||
de:n:v:_ -> (nargsOf n de, quote (verb (init v)))
|
||||
|
||||
nargsOf n d = unwords [quote n, if d == "Het" then "neuter" else "utrum"]
|
||||
|
||||
verb s = init s ++ "en"
|
||||
|
||||
quote s = "\"" ++ s ++ "\""
|
||||
|
||||
|
||||
|
||||
-- do this way:
|
||||
|
||||
{-
|
||||
gf LangEng
|
||||
> gt -cat=N | wf -file=ns
|
||||
> gt -cat=A | wf -file=fs
|
||||
> gt -cat=V | wf -append -file=fs
|
||||
> gt -cat=V2 | wf -append -file=fs
|
||||
|
||||
sort -u ns >nouns
|
||||
sort -u fs >preds
|
||||
|
||||
runghc MkExx.hs nouns preds >exx-input
|
||||
|
||||
gf
|
||||
> rf -file=exx-input -lines -tree | l | wf -file=all-exx
|
||||
|
||||
ghci
|
||||
> let mk (_:c:cs) = Data.Char.toUpper c : cs ++ "."
|
||||
> do {s <- readFile "all-exx" ; writeFile "trans-eng" (unlines (map mk (lines s)))}
|
||||
|
||||
-- google-translate trans-eng, obtaining trans-dut
|
||||
|
||||
-- align the files, producing LexiconDut.gf
|
||||
|
||||
runghc MkExx.hs align exx-input trans-dut | sort -u >newlex
|
||||
|
||||
-}
|
||||
|
||||
Reference in New Issue
Block a user