mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-10 13:29:32 -06:00
90 lines
2.3 KiB
Haskell
90 lines
2.3 KiB
Haskell
module Main where
|
|
|
|
-- to learn a lexicon from Google translate via sentence translation
|
|
|
|
import System
|
|
|
|
main = do
|
|
xx <- getArgs
|
|
case xx of
|
|
"align":trees:lins:_ -> do
|
|
ts <- readFile trees >>= return . lines
|
|
ls <- readFile lins >>= return . lines
|
|
mapM_ (putStrLn . align) (zip ts ls)
|
|
n:f:_ -> do
|
|
nouns <- readFile n >>= return . words
|
|
preds <- readFile f >>= return . words
|
|
interact (const (mkExx nouns preds))
|
|
|
|
type Ident = String
|
|
|
|
mkExx nouns preds = unlines $ map predic (zip nouns predss) where
|
|
predss = preds ++ predss -- there are more nouns than predicates
|
|
|
|
predic :: (Ident,Ident) -> String
|
|
predic (n,f) = case c of
|
|
'A':_ -> predn n ("(UseComp (CompAP (PositA " ++ f ++ ")))")
|
|
"V2" -> predn n ("(ComplSlash (SlashV2a " ++ f ++ ") (" ++ detn n ++ "))")
|
|
'V':_ -> predn n ("(UseV " ++ f ++ ")")
|
|
where
|
|
c = tail $ dropWhile (/='_') f
|
|
|
|
predn n f = "PredVP (" ++ detn n ++ ") " ++ f
|
|
detn n = "DetCN (DetQuant DefArt NumSg) (UseN " ++ n ++ ")"
|
|
|
|
align (t,s) = unlines [
|
|
noun ++ " = mkN " ++ nargs ++ " ;",
|
|
pred ++ " = mk" ++ cat ++ " " ++ fargs ++ " ;"
|
|
]
|
|
where
|
|
(noun,(pred,cat)) = case words t of
|
|
_:_:_:_:_:_:n:ps -> (
|
|
takeWhile (/=')') n,
|
|
case ps of
|
|
"(UseComp":_:_:a:_ -> (takeWhile (/=')') a,"A")
|
|
"(UseV":v:_ -> (takeWhile (/=')') v,"V")
|
|
"(ComplSlash":_:v:_ -> (takeWhile (/=')') v,"V2")
|
|
)
|
|
(nargs,fargs) = case words s of
|
|
de:n:"is":a:_ -> (nargsOf n de, quote (init a))
|
|
de:n:v:_:_ -> (nargsOf n de, quote (verb v))
|
|
de:n:v:_ -> (nargsOf n de, quote (verb (init v)))
|
|
|
|
nargsOf n d = unwords [quote n, if d == "Het" then "neuter" else "utrum"]
|
|
|
|
verb s = init s ++ "en"
|
|
|
|
quote s = "\"" ++ s ++ "\""
|
|
|
|
|
|
|
|
-- do this way:
|
|
|
|
{-
|
|
gf LangEng
|
|
> gt -cat=N | wf -file=ns
|
|
> gt -cat=A | wf -file=fs
|
|
> gt -cat=V | wf -append -file=fs
|
|
> gt -cat=V2 | wf -append -file=fs
|
|
|
|
sort -u ns >nouns
|
|
sort -u fs >preds
|
|
|
|
runghc MkExx.hs nouns preds >exx-input
|
|
|
|
gf
|
|
> rf -file=exx-input -lines -tree | l | wf -file=all-exx
|
|
|
|
ghci
|
|
> let mk (_:c:cs) = Data.Char.toUpper c : cs ++ "."
|
|
> do {s <- readFile "all-exx" ; writeFile "trans-eng" (unlines (map mk (lines s)))}
|
|
|
|
-- google-translate trans-eng, obtaining trans-dut
|
|
|
|
-- align the files, producing LexiconDut.gf
|
|
|
|
runghc MkExx.hs align exx-input trans-dut | sort -u >newlex
|
|
|
|
-}
|
|
|