forked from GitHub/gf-core
partial reconstruction for sentence structures in Susanne
This commit is contained in:
@@ -1,37 +1,90 @@
|
||||
module Parser where
|
||||
|
||||
import Data.Char
|
||||
import Control.Monad
|
||||
|
||||
import PGF(PGF,Morpho,lookupMorpho,functionType,unType)
|
||||
import SusanneFormat
|
||||
|
||||
newtype P a = P {runP :: [ParseTree] -> Maybe ([ParseTree], a)}
|
||||
newtype P a = P {runP :: PGF -> Morpho -> [ParseTree] -> Maybe ([ParseTree], a)}
|
||||
|
||||
instance Monad P where
|
||||
return x = P (\ts -> Just (ts, x))
|
||||
f >>= g = P (\ts -> case runP f ts of
|
||||
Nothing -> Nothing
|
||||
Just (ts,x) -> runP (g x) ts)
|
||||
return x = P (\pgf morpho ts -> Just (ts, x))
|
||||
f >>= g = P (\pgf morpho ts -> case runP f pgf morpho ts of
|
||||
Nothing -> Nothing
|
||||
Just (ts,x) -> runP (g x) pgf morpho ts)
|
||||
|
||||
instance MonadPlus P where
|
||||
mzero = P (\ts -> Nothing)
|
||||
mplus f g = P (\ts -> mplus (runP f ts) (runP g ts))
|
||||
mzero = P (\pgf morpho ts -> Nothing)
|
||||
mplus f g = P (\pgf morpho ts -> mplus (runP f pgf morpho ts) (runP g pgf morpho ts))
|
||||
|
||||
match tag_spec = P (\ts ->
|
||||
match tag_spec = P (\pgf morpho ts ->
|
||||
case ts of
|
||||
(Phrase tag1 mods1 fn1 _ _:ts)
|
||||
(t@(Phrase tag1 mods1 fn1 _ _):ts)
|
||||
| tag == tag1 &&
|
||||
all (flip elem mods1) mods &&
|
||||
(null fn || fn == fn1) -> Just (ts,())
|
||||
(Word _ tag1 _ _:ts)
|
||||
| tag == tag1 -> Just (ts,())
|
||||
(null fn || fn == fn1) -> Just (ts,t)
|
||||
(t@(Word _ tag1 _ _):ts)
|
||||
| tag == tag1 -> Just (ts,t)
|
||||
_ -> Nothing)
|
||||
where
|
||||
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
|
||||
Phrase tag mods fn _ _ = f []
|
||||
|
||||
many1 f = do
|
||||
x <- f
|
||||
xs <- many f
|
||||
return (x:xs)
|
||||
|
||||
many f =
|
||||
do x <- f
|
||||
xs <- many f
|
||||
return (x:xs)
|
||||
`mplus`
|
||||
do return []
|
||||
|
||||
inside tag_spec p = P (\pgf morpho ts ->
|
||||
case ts of
|
||||
(t@(Phrase tag1 mods1 fn1 _ ts'):ts)
|
||||
| tag == tag1 &&
|
||||
all (flip elem mods1) mods &&
|
||||
(null fn || fn == fn1) -> case runP p pgf morpho ts' of
|
||||
Just ([],x) -> Just (ts,x)
|
||||
_ -> Nothing
|
||||
_ -> Nothing)
|
||||
where
|
||||
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
|
||||
Phrase tag mods fn _ _ = f []
|
||||
|
||||
insideOpt tag_spec p = P (\pgf morpho ts ->
|
||||
case ts of
|
||||
(t@(Phrase tag1 mods1 fn1 _ ts'):ts)
|
||||
| tag == tag1 &&
|
||||
all (flip elem mods1) mods &&
|
||||
(null fn || fn == fn1) -> case runP p pgf morpho ts' of
|
||||
Just ([],x) -> Just (ts,x)
|
||||
_ -> Just (ts,t)
|
||||
_ -> Nothing)
|
||||
where
|
||||
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
|
||||
Phrase tag mods fn _ _ = f []
|
||||
|
||||
lemma tag cat an0 = P (\pgf morpho ts ->
|
||||
case ts of
|
||||
(t@(Word _ tag1 form _):ts) | tag == tag1 ->
|
||||
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
|
||||
[f] -> Just (ts,App f [])
|
||||
_ -> Just (ts,t)
|
||||
_ -> Nothing)
|
||||
where
|
||||
hasCat pgf f cat =
|
||||
case functionType pgf f of
|
||||
Just ty -> case unType ty of
|
||||
(_,cat1,_) -> cat1 == cat
|
||||
Nothing -> False
|
||||
|
||||
opt f =
|
||||
do x <- f
|
||||
return (Just x)
|
||||
`mplus`
|
||||
do return Nothing
|
||||
|
||||
@@ -15,6 +15,7 @@ data ParseTree
|
||||
= Phrase Tag Mods Fn Index [ParseTree]
|
||||
| Word Id Tag Word Lemma
|
||||
| App CId [ParseTree]
|
||||
deriving Eq
|
||||
|
||||
data ParseTreePos
|
||||
= Root
|
||||
|
||||
@@ -2,24 +2,34 @@ import System.Directory
|
||||
import System.FilePath
|
||||
import Data.List
|
||||
import Data.Char(toLower)
|
||||
import Control.Monad
|
||||
import qualified Data.Map as Map
|
||||
|
||||
import PGF (readPGF, readLanguage, buildMorpho, lookupMorpho, mkCId, functionType, unType)
|
||||
import SusanneFormat
|
||||
import Parser
|
||||
import Idents
|
||||
|
||||
Just eng = readLanguage "ParseEng"
|
||||
Just eng = readLanguage "DictEng"
|
||||
|
||||
main = do
|
||||
gr <- readPGF "../../ParseEngAbs.pgf"
|
||||
gr <- readPGF "DictEngAbs.pgf"
|
||||
let morpho = buildMorpho gr eng
|
||||
fs <- getDirectoryContents "data"
|
||||
txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
|
||||
--let ts = concatMap (convert gr morpho) (readTreebank (lines (concat txts)))
|
||||
let ts = readTreebank (lines (concat txts))
|
||||
writeFile "text" (unlines (map show ts))
|
||||
--let ts' = readTreebank (lines (concat txts))
|
||||
--writeFile "text" (unlines (map show ts'))
|
||||
let (ts',rs') = combineRes (convert gr morpho) (readTreebank (lines (concat txts)))
|
||||
let rm = Map.fromListWith (++) rs'
|
||||
writeFile "susanne.gft" (unlines (map show ts'))
|
||||
writeFile "rules" (unlines (concat [unwords ("-":cat:"->":cats) : map (\t -> " "++show t) rs'' | (cat :-> cats,rs'') <- Map.toList rm]))
|
||||
|
||||
data Rule = Tag :-> [Tag]
|
||||
deriving (Eq,Ord)
|
||||
|
||||
convert pgf morpho w@(Word _ tag _ lemma)
|
||||
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = []
|
||||
| tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
||||
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = ([],[])
|
||||
{- | tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
||||
| tag == "NN1n" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
||||
| tag == "NN2" = convertLemma pgf morpho (mkCId "N") "s Pl Nom" w
|
||||
| tag == "JJ" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
||||
@@ -36,19 +46,259 @@ convert pgf morpho w@(Word _ tag _ lemma)
|
||||
| tag == "PPHO2"= convertLemma pgf morpho (mkCId "Pron") "s NPAcc" w
|
||||
| tag == "RR" = convertLemma pgf morpho (mkCId "Adv") "s" w
|
||||
| tag == "II" = convertLemma pgf morpho (mkCId "Prep") "s" w
|
||||
| tag == "IO" = convertLemma pgf morpho (mkCId "Prep") "s" w
|
||||
| otherwise = [w]
|
||||
convert pgf morpho (Phrase tag mods fn idx ts)
|
||||
| tag == "O" = concatMap (convert pgf morpho) ts
|
||||
| otherwise = [Phrase tag mods fn idx (concatMap (convert pgf morpho) ts)]
|
||||
| tag == "IO" = convertLemma pgf morpho (mkCId "Prep") "s" w-}
|
||||
| otherwise = ([w],[])
|
||||
convert pgf morpho t@(Phrase tag mods fn idx ts)
|
||||
| tag == "O" = (ts',rs')
|
||||
| tag == "Q" = (ts',rs')
|
||||
| tag == "S" = case runP pS pgf morpho ts' of
|
||||
Just ([],x) -> ([x], rs')
|
||||
_ -> ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
|
||||
| otherwise = ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
|
||||
where
|
||||
(ts',rs') = combineRes (convert pgf morpho) ts
|
||||
r = tag :-> map getTag ts
|
||||
|
||||
isExtra (Word _ "YIL" _ _) = True
|
||||
isExtra (Word _ "YIR" _ _) = True
|
||||
isExtra (Word _ "YTL" _ _) = True
|
||||
isExtra (Word _ "YTR" _ _) = True
|
||||
isExtra _ = False
|
||||
|
||||
getTag (Phrase tag mods fn idx ts) = tag++if null fn then "" else ":"++fn
|
||||
getTag (Word _ tag _ _) = tag
|
||||
|
||||
convertLemma pgf morpho cat an0 w@(Word _ tag form _) =
|
||||
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
|
||||
[f] -> [App f []]
|
||||
_ -> [w]
|
||||
[f] -> ([App f []], [])
|
||||
_ -> ([w],[])
|
||||
where
|
||||
hasCat pgf f cat =
|
||||
case functionType pgf f of
|
||||
Just ty -> case unType ty of
|
||||
(_,cat1,_) -> cat1 == cat
|
||||
Nothing -> False
|
||||
|
||||
combineRes f ts = (ts',rs')
|
||||
where
|
||||
(x,y) = unzip (map f ts)
|
||||
ts' = concat x
|
||||
rs' = concat y
|
||||
|
||||
pS =
|
||||
do mplus pConj (return ())
|
||||
advs <- many pAdS
|
||||
np <- pSubject
|
||||
(t,p,vp) <- pVP
|
||||
return (foldr ($) (App cidUseCl [t,p,App cidPredVP [np, vp]]) advs)
|
||||
`mplus`
|
||||
do mplus pConj (return ())
|
||||
(t,p,vp) <- pVP
|
||||
return (App cidImpVP [vp])
|
||||
`mplus`
|
||||
do mplus pConj (return ())
|
||||
advs <- many pAdS
|
||||
t1 <- match "EX"
|
||||
(t,p,vp) <- pVP
|
||||
return (foldr ($) (App cidUseCl [t,p,App cidExistNP [t1,vp]]) advs)
|
||||
|
||||
pSubject =
|
||||
do insideOpt "N:s" pNP
|
||||
`mplus`
|
||||
do insideOpt "N:S" pNP
|
||||
`mplus`
|
||||
do match "M:s"
|
||||
`mplus`
|
||||
do match "M:S"
|
||||
`mplus`
|
||||
do match "D:s"
|
||||
`mplus`
|
||||
do match "D:S"
|
||||
|
||||
pConj =
|
||||
do match "CC"
|
||||
return ()
|
||||
`mplus`
|
||||
do match "CCB"
|
||||
return ()
|
||||
|
||||
pAdS =
|
||||
do adv <- pAdv
|
||||
match "YC"
|
||||
return (\t -> App cidExtAdvS [adv,t])
|
||||
`mplus`
|
||||
do adv <- pAdv
|
||||
return (\t -> App cidAdvS [adv,t])
|
||||
|
||||
pVP =
|
||||
do adVs <- many pAdV
|
||||
(t,p,vs) <- pV "VS"
|
||||
advs <- many pAdv
|
||||
s <- insideOpt "F:o"
|
||||
(opt (match "CST") >> pS)
|
||||
return (t,p,foldr (\adv t -> App cidAdVVP [adv,t])
|
||||
(foldl (\t adv -> App cidAdvVP [t, adv])
|
||||
(App cidComplVS [vs, s])
|
||||
advs)
|
||||
adVs)
|
||||
`mplus`
|
||||
do adVs <- many pAdV
|
||||
(t,p,vv) <- pV "VV"
|
||||
advs <- many pAdv
|
||||
vp <- match "Ti"
|
||||
return (t,p,foldr (\adv t -> App cidAdVVP [adv,t])
|
||||
(foldl (\t adv -> App cidAdvVP [t, adv])
|
||||
(App cidComplVV [vv, vp])
|
||||
advs)
|
||||
adVs)
|
||||
`mplus`
|
||||
do adVs <- many pAdV
|
||||
(t,p,v2) <- pV "V2"
|
||||
o <- pObject
|
||||
opt (match "YC") -- what is this?
|
||||
advs <- many pAdv
|
||||
return (t,p,foldr (\adv t -> App cidAdVVP [adv,t])
|
||||
(foldl (\t adv -> App cidAdvVP [t, adv])
|
||||
(App cidComplSlash [App cidSlashV2a [v2],o])
|
||||
advs)
|
||||
adVs)
|
||||
`mplus`
|
||||
do adVs <- many pAdV
|
||||
(t,p,v) <- pV "V"
|
||||
advs <- many pAdv
|
||||
return (t,p,foldr (\adv t -> App cidAdVVP [adv,t])
|
||||
(foldl (\t adv -> App cidAdvVP [t, adv])
|
||||
(App cidUseV [v])
|
||||
advs)
|
||||
adVs)
|
||||
|
||||
pV cat =
|
||||
do inside "V" $
|
||||
do v <- lemma "VVDv" (mkCId cat) "s VPast"
|
||||
return (App cidTTAnt [App cidTPast [],App cidASimul []],App cidPPos [],v)
|
||||
`mplus`
|
||||
do v <- lemma "VVDt" (mkCId cat) "s VPast"
|
||||
return (App cidTTAnt [App cidTPast [],App cidASimul []],App cidPPos [],v)
|
||||
`mplus`
|
||||
do v <- lemma "VVZv" (mkCId cat) "s VPres"
|
||||
return (App cidTTAnt [App cidTPres [],App cidASimul []],App cidPPos [],v)
|
||||
`mplus`
|
||||
do match "VHD"
|
||||
match "VHD"
|
||||
v <- lemma "VVNv" (mkCId cat) "s VPPart"
|
||||
return (App cidTTAnt [App cidTPres [],App cidAAnter []],App cidPPos [],v)
|
||||
`mplus`
|
||||
do v <- match "V"
|
||||
return (App (mkCId "XXX") [],App (mkCId "XXX") [],v)
|
||||
|
||||
pAdV =
|
||||
do insideOpt "R:c" $
|
||||
lemma "RRR" (mkCId "AdV") "s"
|
||||
`mplus`
|
||||
do match "R:m"
|
||||
|
||||
pObject =
|
||||
match "P:u"
|
||||
`mplus`
|
||||
insideOpt "N:o" pNP
|
||||
`mplus`
|
||||
match "N:e"
|
||||
`mplus`
|
||||
match "M:e"
|
||||
`mplus`
|
||||
match "D:e"
|
||||
`mplus`
|
||||
match "P:e"
|
||||
|
||||
pAdv =
|
||||
do match "N:t"
|
||||
`mplus`
|
||||
do match "N:h"
|
||||
`mplus`
|
||||
do match "P:p"
|
||||
`mplus`
|
||||
do match "P:q"
|
||||
`mplus`
|
||||
do match "P:a"
|
||||
`mplus`
|
||||
do match "P:t"
|
||||
`mplus`
|
||||
do match "P:h"
|
||||
`mplus`
|
||||
do match "P:m"
|
||||
`mplus`
|
||||
do match "P:r"
|
||||
`mplus`
|
||||
do match "R:p"
|
||||
`mplus`
|
||||
do match "R:q"
|
||||
`mplus`
|
||||
do match "R:a"
|
||||
`mplus`
|
||||
do match "R:t"
|
||||
`mplus`
|
||||
do match "R:h"
|
||||
`mplus`
|
||||
do match "R:m"
|
||||
`mplus`
|
||||
do match "R:c"
|
||||
`mplus`
|
||||
do match "R:r"
|
||||
`mplus`
|
||||
do match "F:p"
|
||||
`mplus`
|
||||
do match "F:q"
|
||||
`mplus`
|
||||
do match "F:a"
|
||||
`mplus`
|
||||
do match "F:t"
|
||||
`mplus`
|
||||
do match "F:h"
|
||||
`mplus`
|
||||
do match "F:m"
|
||||
`mplus`
|
||||
do match "F:r"
|
||||
`mplus`
|
||||
do match "W:b"
|
||||
`mplus`
|
||||
do match "L:b"
|
||||
|
||||
pNP = do
|
||||
q <- pQuant
|
||||
(n,cn) <- pCN
|
||||
return (App cidDetCN [App cidDetQuant [q,n],cn])
|
||||
|
||||
pQuant =
|
||||
do lemma "AT" (mkCId "Quant") "s False Sg"
|
||||
`mplus`
|
||||
do match "AT1"
|
||||
return (App cidIndefArt [])
|
||||
|
||||
pCN =
|
||||
do np <- insideOpt "N" pNP
|
||||
(n,cn) <- pCN
|
||||
return (n,App (mkCId "Appos") [np,cn])
|
||||
`mplus`
|
||||
do a <- lemma "JJ" (mkCId "A") "s (AAdj Posit Nom)"
|
||||
(n,cn) <- pCN
|
||||
return (n,App cidAdjCN [App cidPositA [a],cn])
|
||||
`mplus`
|
||||
do (num,n) <- pN
|
||||
advs <- many pPo
|
||||
return (num,
|
||||
foldl (\t adv -> App cidAdvCN [t, adv])
|
||||
(App cidUseN [n])
|
||||
advs)
|
||||
|
||||
pN =
|
||||
do n <- lemma "NN1c" (mkCId "N") "s Sg Nom"
|
||||
return (App cidNumSg [], n)
|
||||
`mplus`
|
||||
do n <- lemma "NN1n" (mkCId "N") "s Sg Nom"
|
||||
return (App cidNumSg [], n)
|
||||
|
||||
pPo =
|
||||
insideOpt "Po" $ do
|
||||
p <- match "IO"
|
||||
np <- insideOpt "N" pNP
|
||||
return (App cidPrepNP [p,np])
|
||||
|
||||
Reference in New Issue
Block a user