1
0
forked from GitHub/gf-core

partial reconstruction for sentence structures in Susanne

This commit is contained in:
kr.angelov
2013-12-06 15:05:56 +00:00
parent 0d047707fc
commit 1026824060
3 changed files with 330 additions and 26 deletions

View File

@@ -1,37 +1,90 @@
module Parser where
import Data.Char
import Control.Monad
import PGF(PGF,Morpho,lookupMorpho,functionType,unType)
import SusanneFormat
newtype P a = P {runP :: [ParseTree] -> Maybe ([ParseTree], a)}
newtype P a = P {runP :: PGF -> Morpho -> [ParseTree] -> Maybe ([ParseTree], a)}
instance Monad P where
return x = P (\ts -> Just (ts, x))
f >>= g = P (\ts -> case runP f ts of
Nothing -> Nothing
Just (ts,x) -> runP (g x) ts)
return x = P (\pgf morpho ts -> Just (ts, x))
f >>= g = P (\pgf morpho ts -> case runP f pgf morpho ts of
Nothing -> Nothing
Just (ts,x) -> runP (g x) pgf morpho ts)
instance MonadPlus P where
mzero = P (\ts -> Nothing)
mplus f g = P (\ts -> mplus (runP f ts) (runP g ts))
mzero = P (\pgf morpho ts -> Nothing)
mplus f g = P (\pgf morpho ts -> mplus (runP f pgf morpho ts) (runP g pgf morpho ts))
match tag_spec = P (\ts ->
match tag_spec = P (\pgf morpho ts ->
case ts of
(Phrase tag1 mods1 fn1 _ _:ts)
(t@(Phrase tag1 mods1 fn1 _ _):ts)
| tag == tag1 &&
all (flip elem mods1) mods &&
(null fn || fn == fn1) -> Just (ts,())
(Word _ tag1 _ _:ts)
| tag == tag1 -> Just (ts,())
(null fn || fn == fn1) -> Just (ts,t)
(t@(Word _ tag1 _ _):ts)
| tag == tag1 -> Just (ts,t)
_ -> Nothing)
where
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
Phrase tag mods fn _ _ = f []
many1 f = do
x <- f
xs <- many f
return (x:xs)
many f =
do x <- f
xs <- many f
return (x:xs)
`mplus`
do return []
inside tag_spec p = P (\pgf morpho ts ->
case ts of
(t@(Phrase tag1 mods1 fn1 _ ts'):ts)
| tag == tag1 &&
all (flip elem mods1) mods &&
(null fn || fn == fn1) -> case runP p pgf morpho ts' of
Just ([],x) -> Just (ts,x)
_ -> Nothing
_ -> Nothing)
where
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
Phrase tag mods fn _ _ = f []
insideOpt tag_spec p = P (\pgf morpho ts ->
case ts of
(t@(Phrase tag1 mods1 fn1 _ ts'):ts)
| tag == tag1 &&
all (flip elem mods1) mods &&
(null fn || fn == fn1) -> case runP p pgf morpho ts' of
Just ([],x) -> Just (ts,x)
_ -> Just (ts,t)
_ -> Nothing)
where
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
Phrase tag mods fn _ _ = f []
lemma tag cat an0 = P (\pgf morpho ts ->
case ts of
(t@(Word _ tag1 form _):ts) | tag == tag1 ->
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
[f] -> Just (ts,App f [])
_ -> Just (ts,t)
_ -> Nothing)
where
hasCat pgf f cat =
case functionType pgf f of
Just ty -> case unType ty of
(_,cat1,_) -> cat1 == cat
Nothing -> False
opt f =
do x <- f
return (Just x)
`mplus`
do return Nothing

View File

@@ -15,6 +15,7 @@ data ParseTree
= Phrase Tag Mods Fn Index [ParseTree]
| Word Id Tag Word Lemma
| App CId [ParseTree]
deriving Eq
data ParseTreePos
= Root

View File

@@ -2,24 +2,34 @@ import System.Directory
import System.FilePath
import Data.List
import Data.Char(toLower)
import Control.Monad
import qualified Data.Map as Map
import PGF (readPGF, readLanguage, buildMorpho, lookupMorpho, mkCId, functionType, unType)
import SusanneFormat
import Parser
import Idents
Just eng = readLanguage "ParseEng"
Just eng = readLanguage "DictEng"
main = do
gr <- readPGF "../../ParseEngAbs.pgf"
gr <- readPGF "DictEngAbs.pgf"
let morpho = buildMorpho gr eng
fs <- getDirectoryContents "data"
txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
--let ts = concatMap (convert gr morpho) (readTreebank (lines (concat txts)))
let ts = readTreebank (lines (concat txts))
writeFile "text" (unlines (map show ts))
--let ts' = readTreebank (lines (concat txts))
--writeFile "text" (unlines (map show ts'))
let (ts',rs') = combineRes (convert gr morpho) (readTreebank (lines (concat txts)))
let rm = Map.fromListWith (++) rs'
writeFile "susanne.gft" (unlines (map show ts'))
writeFile "rules" (unlines (concat [unwords ("-":cat:"->":cats) : map (\t -> " "++show t) rs'' | (cat :-> cats,rs'') <- Map.toList rm]))
data Rule = Tag :-> [Tag]
deriving (Eq,Ord)
convert pgf morpho w@(Word _ tag _ lemma)
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = []
| tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = ([],[])
{- | tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
| tag == "NN1n" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
| tag == "NN2" = convertLemma pgf morpho (mkCId "N") "s Pl Nom" w
| tag == "JJ" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
@@ -36,19 +46,259 @@ convert pgf morpho w@(Word _ tag _ lemma)
| tag == "PPHO2"= convertLemma pgf morpho (mkCId "Pron") "s NPAcc" w
| tag == "RR" = convertLemma pgf morpho (mkCId "Adv") "s" w
| tag == "II" = convertLemma pgf morpho (mkCId "Prep") "s" w
| tag == "IO" = convertLemma pgf morpho (mkCId "Prep") "s" w
| otherwise = [w]
convert pgf morpho (Phrase tag mods fn idx ts)
| tag == "O" = concatMap (convert pgf morpho) ts
| otherwise = [Phrase tag mods fn idx (concatMap (convert pgf morpho) ts)]
| tag == "IO" = convertLemma pgf morpho (mkCId "Prep") "s" w-}
| otherwise = ([w],[])
convert pgf morpho t@(Phrase tag mods fn idx ts)
| tag == "O" = (ts',rs')
| tag == "Q" = (ts',rs')
| tag == "S" = case runP pS pgf morpho ts' of
Just ([],x) -> ([x], rs')
_ -> ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
| otherwise = ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
where
(ts',rs') = combineRes (convert pgf morpho) ts
r = tag :-> map getTag ts
isExtra (Word _ "YIL" _ _) = True
isExtra (Word _ "YIR" _ _) = True
isExtra (Word _ "YTL" _ _) = True
isExtra (Word _ "YTR" _ _) = True
isExtra _ = False
getTag (Phrase tag mods fn idx ts) = tag++if null fn then "" else ":"++fn
getTag (Word _ tag _ _) = tag
convertLemma pgf morpho cat an0 w@(Word _ tag form _) =
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
[f] -> [App f []]
_ -> [w]
[f] -> ([App f []], [])
_ -> ([w],[])
where
hasCat pgf f cat =
case functionType pgf f of
Just ty -> case unType ty of
(_,cat1,_) -> cat1 == cat
Nothing -> False
combineRes f ts = (ts',rs')
where
(x,y) = unzip (map f ts)
ts' = concat x
rs' = concat y
pS =
do mplus pConj (return ())
advs <- many pAdS
np <- pSubject
(t,p,vp) <- pVP
return (foldr ($) (App cidUseCl [t,p,App cidPredVP [np, vp]]) advs)
`mplus`
do mplus pConj (return ())
(t,p,vp) <- pVP
return (App cidImpVP [vp])
`mplus`
do mplus pConj (return ())
advs <- many pAdS
t1 <- match "EX"
(t,p,vp) <- pVP
return (foldr ($) (App cidUseCl [t,p,App cidExistNP [t1,vp]]) advs)
pSubject =
do insideOpt "N:s" pNP
`mplus`
do insideOpt "N:S" pNP
`mplus`
do match "M:s"
`mplus`
do match "M:S"
`mplus`
do match "D:s"
`mplus`
do match "D:S"
pConj =
do match "CC"
return ()
`mplus`
do match "CCB"
return ()
pAdS =
do adv <- pAdv
match "YC"
return (\t -> App cidExtAdvS [adv,t])
`mplus`
do adv <- pAdv
return (\t -> App cidAdvS [adv,t])
pVP =
do adVs <- many pAdV
(t,p,vs) <- pV "VS"
advs <- many pAdv
s <- insideOpt "F:o"
(opt (match "CST") >> pS)
return (t,p,foldr (\adv t -> App cidAdVVP [adv,t])
(foldl (\t adv -> App cidAdvVP [t, adv])
(App cidComplVS [vs, s])
advs)
adVs)
`mplus`
do adVs <- many pAdV
(t,p,vv) <- pV "VV"
advs <- many pAdv
vp <- match "Ti"
return (t,p,foldr (\adv t -> App cidAdVVP [adv,t])
(foldl (\t adv -> App cidAdvVP [t, adv])
(App cidComplVV [vv, vp])
advs)
adVs)
`mplus`
do adVs <- many pAdV
(t,p,v2) <- pV "V2"
o <- pObject
opt (match "YC") -- what is this?
advs <- many pAdv
return (t,p,foldr (\adv t -> App cidAdVVP [adv,t])
(foldl (\t adv -> App cidAdvVP [t, adv])
(App cidComplSlash [App cidSlashV2a [v2],o])
advs)
adVs)
`mplus`
do adVs <- many pAdV
(t,p,v) <- pV "V"
advs <- many pAdv
return (t,p,foldr (\adv t -> App cidAdVVP [adv,t])
(foldl (\t adv -> App cidAdvVP [t, adv])
(App cidUseV [v])
advs)
adVs)
pV cat =
do inside "V" $
do v <- lemma "VVDv" (mkCId cat) "s VPast"
return (App cidTTAnt [App cidTPast [],App cidASimul []],App cidPPos [],v)
`mplus`
do v <- lemma "VVDt" (mkCId cat) "s VPast"
return (App cidTTAnt [App cidTPast [],App cidASimul []],App cidPPos [],v)
`mplus`
do v <- lemma "VVZv" (mkCId cat) "s VPres"
return (App cidTTAnt [App cidTPres [],App cidASimul []],App cidPPos [],v)
`mplus`
do match "VHD"
match "VHD"
v <- lemma "VVNv" (mkCId cat) "s VPPart"
return (App cidTTAnt [App cidTPres [],App cidAAnter []],App cidPPos [],v)
`mplus`
do v <- match "V"
return (App (mkCId "XXX") [],App (mkCId "XXX") [],v)
pAdV =
do insideOpt "R:c" $
lemma "RRR" (mkCId "AdV") "s"
`mplus`
do match "R:m"
pObject =
match "P:u"
`mplus`
insideOpt "N:o" pNP
`mplus`
match "N:e"
`mplus`
match "M:e"
`mplus`
match "D:e"
`mplus`
match "P:e"
pAdv =
do match "N:t"
`mplus`
do match "N:h"
`mplus`
do match "P:p"
`mplus`
do match "P:q"
`mplus`
do match "P:a"
`mplus`
do match "P:t"
`mplus`
do match "P:h"
`mplus`
do match "P:m"
`mplus`
do match "P:r"
`mplus`
do match "R:p"
`mplus`
do match "R:q"
`mplus`
do match "R:a"
`mplus`
do match "R:t"
`mplus`
do match "R:h"
`mplus`
do match "R:m"
`mplus`
do match "R:c"
`mplus`
do match "R:r"
`mplus`
do match "F:p"
`mplus`
do match "F:q"
`mplus`
do match "F:a"
`mplus`
do match "F:t"
`mplus`
do match "F:h"
`mplus`
do match "F:m"
`mplus`
do match "F:r"
`mplus`
do match "W:b"
`mplus`
do match "L:b"
pNP = do
q <- pQuant
(n,cn) <- pCN
return (App cidDetCN [App cidDetQuant [q,n],cn])
pQuant =
do lemma "AT" (mkCId "Quant") "s False Sg"
`mplus`
do match "AT1"
return (App cidIndefArt [])
pCN =
do np <- insideOpt "N" pNP
(n,cn) <- pCN
return (n,App (mkCId "Appos") [np,cn])
`mplus`
do a <- lemma "JJ" (mkCId "A") "s (AAdj Posit Nom)"
(n,cn) <- pCN
return (n,App cidAdjCN [App cidPositA [a],cn])
`mplus`
do (num,n) <- pN
advs <- many pPo
return (num,
foldl (\t adv -> App cidAdvCN [t, adv])
(App cidUseN [n])
advs)
pN =
do n <- lemma "NN1c" (mkCId "N") "s Sg Nom"
return (App cidNumSg [], n)
`mplus`
do n <- lemma "NN1n" (mkCId "N") "s Sg Nom"
return (App cidNumSg [], n)
pPo =
insideOpt "Po" $ do
p <- match "IO"
np <- insideOpt "N" pNP
return (App cidPrepNP [p,np])