mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-13 14:59:32 -06:00
305 lines
8.2 KiB
Haskell
305 lines
8.2 KiB
Haskell
import System.Directory
|
|
import System.FilePath
|
|
import Data.List
|
|
import Data.Char(toLower)
|
|
import Control.Monad
|
|
import qualified Data.Map as Map
|
|
|
|
import PGF (readPGF, readLanguage, buildMorpho, lookupMorpho, mkCId, functionType, unType)
|
|
import SusanneFormat
|
|
import Parser
|
|
import Idents
|
|
|
|
Just eng = readLanguage "DictEng"
|
|
|
|
main = do
|
|
gr <- readPGF "DictEngAbs.pgf"
|
|
let morpho = buildMorpho gr eng
|
|
fs <- getDirectoryContents "data"
|
|
txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
|
|
--let ts' = readTreebank (lines (concat txts))
|
|
--writeFile "text" (unlines (map show ts'))
|
|
let (ts',rs') = combineRes (convert gr morpho) (readTreebank (lines (concat txts)))
|
|
let rm = Map.fromListWith (++) rs'
|
|
writeFile "susanne.gft" (unlines (map show ts'))
|
|
writeFile "rules" (unlines (concat [unwords ("-":cat:"->":cats) : map (\t -> " "++show t) rs'' | (cat :-> cats,rs'') <- Map.toList rm]))
|
|
|
|
data Rule = Tag :-> [Tag]
|
|
deriving (Eq,Ord)
|
|
|
|
convert pgf morpho w@(Word _ tag _ lemma)
|
|
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = ([],[])
|
|
{- | tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
|
| tag == "NN1n" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
|
| tag == "NN2" = convertLemma pgf morpho (mkCId "N") "s Pl Nom" w
|
|
| tag == "JJ" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
|
| tag == "JB" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
|
| tag == "JBo" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
|
| tag == "AT" = convertLemma pgf morpho (mkCId "Quant") "s False Sg" w
|
|
| tag == "VVDi" = convertLemma pgf morpho (mkCId "V") "s VPast" w
|
|
| tag == "VVDt" = convertLemma pgf morpho (mkCId "V2") "s VPast" w
|
|
| tag == "VVDv" = convertLemma pgf morpho (mkCId "V") "s VPast" w
|
|
| tag == "VVZi" = convertLemma pgf morpho (mkCId "V") "s VPres" w
|
|
| tag == "VVZt" = convertLemma pgf morpho (mkCId "V2") "s VPres" w
|
|
| tag == "VVZv" = convertLemma pgf morpho (mkCId "V") "s VPres" w
|
|
| tag == "PPHS2"= convertLemma pgf morpho (mkCId "Pron") "s (NCase Nom)" w
|
|
| tag == "PPHO2"= convertLemma pgf morpho (mkCId "Pron") "s NPAcc" w
|
|
| tag == "RR" = convertLemma pgf morpho (mkCId "Adv") "s" w
|
|
| tag == "II" = convertLemma pgf morpho (mkCId "Prep") "s" w
|
|
| tag == "IO" = convertLemma pgf morpho (mkCId "Prep") "s" w-}
|
|
| otherwise = ([w],[])
|
|
convert pgf morpho t@(Phrase tag mods fn idx ts)
|
|
| tag == "O" = (ts',rs')
|
|
| tag == "Q" = (ts',rs')
|
|
| tag == "S" = case runP pS pgf morpho ts' of
|
|
Just ([],x) -> ([x], rs')
|
|
_ -> ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
|
|
| otherwise = ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
|
|
where
|
|
(ts',rs') = combineRes (convert pgf morpho) ts
|
|
r = tag :-> map getTag ts
|
|
|
|
isExtra (Word _ "YIL" _ _) = True
|
|
isExtra (Word _ "YIR" _ _) = True
|
|
isExtra (Word _ "YTL" _ _) = True
|
|
isExtra (Word _ "YTR" _ _) = True
|
|
isExtra _ = False
|
|
|
|
getTag (Phrase tag mods fn idx ts) = tag++if null fn then "" else ":"++fn
|
|
getTag (Word _ tag _ _) = tag
|
|
|
|
convertLemma pgf morpho cat an0 w@(Word _ tag form _) =
|
|
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
|
|
[f] -> ([App f []], [])
|
|
_ -> ([w],[])
|
|
where
|
|
hasCat pgf f cat =
|
|
case functionType pgf f of
|
|
Just ty -> case unType ty of
|
|
(_,cat1,_) -> cat1 == cat
|
|
Nothing -> False
|
|
|
|
combineRes f ts = (ts',rs')
|
|
where
|
|
(x,y) = unzip (map f ts)
|
|
ts' = concat x
|
|
rs' = concat y
|
|
|
|
pS =
|
|
do mplus pConj (return ())
|
|
advs <- many pAdS
|
|
np <- pSubject
|
|
(t,p,vp) <- pVP
|
|
return (foldr ($) (cidUseCl (cidTTAnt t p) (cidPredVP np vp)) advs)
|
|
`mplus`
|
|
do mplus pConj (return ())
|
|
(t,p,vp) <- pVP
|
|
return (cidImpVP vp)
|
|
`mplus`
|
|
do mplus pConj (return ())
|
|
advs <- many pAdS
|
|
t1 <- match "EX"
|
|
(t,p,vp) <- pVP
|
|
return (foldr ($) (cidUseCl (cidTTAnt t p) (cidExistNP t1 vp)) advs)
|
|
|
|
pSubject =
|
|
do insideOpt "N:s" pNP
|
|
`mplus`
|
|
do insideOpt "N:S" pNP
|
|
`mplus`
|
|
do match "M:s"
|
|
`mplus`
|
|
do match "M:S"
|
|
`mplus`
|
|
do match "D:s"
|
|
`mplus`
|
|
do match "D:S"
|
|
|
|
pConj =
|
|
do match "CC"
|
|
return ()
|
|
`mplus`
|
|
do match "CCB"
|
|
return ()
|
|
|
|
pAdS =
|
|
do adv <- pAdv
|
|
match "YC"
|
|
return (\t -> cidExtAdvS adv t)
|
|
`mplus`
|
|
do adv <- pAdv
|
|
return (\t -> cidAdvS adv t)
|
|
|
|
pVP =
|
|
do adVs <- many pAdV
|
|
(t,p,vs) <- pV "VS"
|
|
advs <- many pAdv
|
|
s <- insideOpt "F:o"
|
|
(opt (match "CST") >> pS)
|
|
return (t,p,foldr (\adv t -> cidAdVVP adv t)
|
|
(foldl (\t adv -> cidAdvVP t adv)
|
|
(cidComplVS vs s)
|
|
advs)
|
|
adVs)
|
|
`mplus`
|
|
do adVs <- many pAdV
|
|
(t,p,vv) <- pV "VV"
|
|
advs <- many pAdv
|
|
vp <- match "Ti"
|
|
return (t,p,foldr (\adv t -> cidAdVVP adv t)
|
|
(foldl (\t adv -> cidAdvVP t adv)
|
|
(cidComplVV vv vp)
|
|
advs)
|
|
adVs)
|
|
`mplus`
|
|
do adVs <- many pAdV
|
|
(t,p,v2) <- pV "V2"
|
|
o <- pObject
|
|
opt (match "YC") -- what is this?
|
|
advs <- many pAdv
|
|
return (t,p,foldr (\adv t -> cidAdVVP adv t)
|
|
(foldl (\t adv -> cidAdvVP t adv)
|
|
(cidComplSlash (cidSlashV2a v2) o)
|
|
advs)
|
|
adVs)
|
|
`mplus`
|
|
do adVs <- many pAdV
|
|
(t,p,v) <- pV "V"
|
|
advs <- many pAdv
|
|
return (t,p,foldr (\adv t -> cidAdVVP adv t)
|
|
(foldl (\t adv -> cidAdvVP t adv)
|
|
(cidUseV v)
|
|
advs)
|
|
adVs)
|
|
|
|
pV cat =
|
|
do inside "V" $
|
|
do v <- lemma "VVDv" (mkCId cat) "s VPast"
|
|
return (cidTTAnt cidTPast cidASimul,cidPPos,v)
|
|
`mplus`
|
|
do v <- lemma "VVDt" (mkCId cat) "s VPast"
|
|
return (cidTTAnt cidTPast cidASimul,cidPPos,v)
|
|
`mplus`
|
|
do v <- lemma "VVZv" (mkCId cat) "s VPres"
|
|
return (cidTTAnt cidTPres cidASimul,cidPPos,v)
|
|
`mplus`
|
|
do match "VHD"
|
|
match "VHD"
|
|
v <- lemma "VVNv" (mkCId cat) "s VPPart"
|
|
return (cidTTAnt cidTPres cidAAnter,cidPPos,v)
|
|
`mplus`
|
|
do v <- match "V"
|
|
return (App (mkCId "XXX") [],App (mkCId "XXX") [],v)
|
|
|
|
pAdV =
|
|
do insideOpt "R:c" $
|
|
lemma "RRR" (mkCId "AdV") "s"
|
|
`mplus`
|
|
do match "R:m"
|
|
|
|
pObject =
|
|
match "P:u"
|
|
`mplus`
|
|
insideOpt "N:o" pNP
|
|
`mplus`
|
|
match "N:e"
|
|
`mplus`
|
|
match "M:e"
|
|
`mplus`
|
|
match "D:e"
|
|
`mplus`
|
|
match "P:e"
|
|
|
|
pAdv =
|
|
do match "N:t"
|
|
`mplus`
|
|
do match "N:h"
|
|
`mplus`
|
|
do match "P:p"
|
|
`mplus`
|
|
do match "P:q"
|
|
`mplus`
|
|
do match "P:a"
|
|
`mplus`
|
|
do match "P:t"
|
|
`mplus`
|
|
do match "P:h"
|
|
`mplus`
|
|
do match "P:m"
|
|
`mplus`
|
|
do match "P:r"
|
|
`mplus`
|
|
do match "R:p"
|
|
`mplus`
|
|
do match "R:q"
|
|
`mplus`
|
|
do match "R:a"
|
|
`mplus`
|
|
do match "R:t"
|
|
`mplus`
|
|
do match "R:h"
|
|
`mplus`
|
|
do match "R:m"
|
|
`mplus`
|
|
do match "R:c"
|
|
`mplus`
|
|
do match "R:r"
|
|
`mplus`
|
|
do match "F:p"
|
|
`mplus`
|
|
do match "F:q"
|
|
`mplus`
|
|
do match "F:a"
|
|
`mplus`
|
|
do match "F:t"
|
|
`mplus`
|
|
do match "F:h"
|
|
`mplus`
|
|
do match "F:m"
|
|
`mplus`
|
|
do match "F:r"
|
|
`mplus`
|
|
do match "W:b"
|
|
`mplus`
|
|
do match "L:b"
|
|
|
|
pNP = do
|
|
q <- pQuant
|
|
(n,cn) <- pCN
|
|
return (cidDetCN (cidDetQuant q n) cn)
|
|
|
|
pQuant =
|
|
do lemma "AT" (mkCId "Quant") "s False Sg"
|
|
`mplus`
|
|
do match "AT1"
|
|
return cidIndefArt
|
|
|
|
pCN =
|
|
do np <- insideOpt "N" pNP
|
|
(n,cn) <- pCN
|
|
return (n,App (mkCId "Appos") [np,cn])
|
|
`mplus`
|
|
do a <- lemma "JJ" (mkCId "A") "s (AAdj Posit Nom)"
|
|
(n,cn) <- pCN
|
|
return (n,cidAdjCN (cidPositA a) cn)
|
|
`mplus`
|
|
do (num,n) <- pN
|
|
advs <- many pPo
|
|
return (num,
|
|
foldl (\t adv -> cidAdvCN t adv)
|
|
(cidUseN n)
|
|
advs)
|
|
|
|
pN =
|
|
do n <- lemma "NN1c" (mkCId "N") "s Sg Nom"
|
|
return (cidNumSg, n)
|
|
`mplus`
|
|
do n <- lemma "NN1n" (mkCId "N") "s Sg Nom"
|
|
return (cidNumSg, n)
|
|
|
|
pPo =
|
|
insideOpt "Po" $ do
|
|
p <- match "IO"
|
|
np <- insideOpt "N" pNP
|
|
return (cidPrepNP p np)
|