a new version of the conversion script for the Susanne corpus which covers a bit less than half of the data

This commit is contained in:
krasimir
2015-11-13 13:05:21 +00:00
parent b33ea36c18
commit 6011fbc033
4 changed files with 903 additions and 199 deletions

148
treebanks/susanne/Idents.hs Normal file
View File

@@ -0,0 +1,148 @@
module Idents where
import SusanneFormat
cidASimul = app0 "ASimul"
cidAAnter = app0 "AAnter"
cidPositAdvAdj = app0 "PositAdvAdj"
cidPositAdVAdj = app0 "PositAdVAdj"
cidUseCl = app3 "UseCl"
cidPredVP = app2 "PredVP"
cidSlashVP = app2 "SlashVP"
cidVPSlashPrep = app2 "VPSlashPrep"
cidComplPredVP = app0 "ComplPredVP"
cidAdjCN = app2 "AdjCN"
cidUseN = app1 "UseN"
cidDetQuant = app2 "DetQuant"
cidDetQuantOrd = app3 "DetQuantOrd"
cidNumSg = app0 "NumSg"
cidNumPl = app0 "NumPl"
cidDetCN = app2 "DetCN"
cidIndefArt = app0 "IndefArt"
cidDefArt = app0 "DefArt"
cidUsePN = app1 "UsePN"
cidUseQuantPN = app0 "UseQuantPN"
cidSymbPN = app1 "SymbPN"
cidMkSymb = app1 "MkSymb"
cidUsePron = app1 "UsePron"
cidConjNP = app0 "ConjNP"
cidBaseNP = app0 "BaseNP"
cidConsNP = app0 "ConsNP"
cidConjCN = app0 "ConjCN"
cidBaseCN = app0 "BaseCN"
cidConsCN = app0 "ConsCN"
cidConjAdv = app0 "ConjAdv"
cidBaseAdv = app0 "BaseAdv"
cidConsAdv = app0 "ConsAdv"
cidBaseS = app0 "BaseS"
cidConsS = app0 "ConsS"
cidConjS = app0 "ConjS"
cidMassNP = app1 "MassNP"
cidAdvNP = app2 "AdvNP"
cidTPres = app0 "TPres"
cidTPast = app0 "TPast"
cidTFut = app0 "TFut"
cidTCond = app0 "TCond"
cidTTAnt = app2 "TTAnt"
cidPPos = app0 "PPos"
cidPNeg = app0 "PNeg"
cidComplSlash = app2 "ComplSlash"
cidSlashV2a = app1 "SlashV2a"
cidSlashV2A = app2 "SlashV2A"
cidComplVS = app2 "ComplVS"
cidComplVV = app4 "ComplVV"
cidUseV = app1 "UseV"
cidAdVVP = app2 "AdVVP"
cidAdvVP = app2 "AdvVP"
cidAdvVPSlash = app2 "AdvVPSlash"
cidPrepNP = app2 "PrepNP"
cidto_Prep = app0 "to_Prep"
cidsuch_as_Prep= app0 "such_as_Prep"
cidPastPartAP = app1 "PastPartAP"
cidPassVPSlash = app0 "PassVPSlash"
cidAdvS = app2 "AdvS"
cidPositA = app1 "PositA"
cidIDig = app0 "IDig"
cidIIDig = app0 "IIDig"
cidNumCard = app0 "NumCard"
cidNumDigits = app0 "NumDigits"
cidNumNumeral = app0 "NumNumeral"
cidnum = app0 "num"
cidpot2as3 = app0 "pot2as3"
cidpot1as2 = app0 "pot1as2"
cidpot0as1 = app0 "pot0as1"
cidpot01 = app0 "pot01"
cidpot0 = app0 "pot0"
cidn2 = app0 "n2"
cidn3 = app0 "n3"
cidn4 = app0 "n4"
cidn5 = app0 "n5"
cidn6 = app0 "n6"
cidn7 = app0 "n7"
cidn8 = app0 "n8"
cidn9 = app0 "n9"
cidPossPron = app1 "PossPron"
cidCompAP = app1 "CompAP"
cidCompNP = app1 "CompNP"
cidCompAdv = app1 "CompAdv"
cidCompS = app1 "CompS"
cidCompVP = app1 "CompVP"
cidUseComp = app1 "UseComp"
cidCompoundSgCN= app2 "CompoundSgCN"
cidCompoundPlCN= app2 "CompoundPlCN"
cidDashSgN = app2 "DashSgN"
cidDashPlN = app2 "DashPlN"
cidProgrVP = app0 "ProgrVP"
cidGerundN = app0 "GerundN"
cidGerundAP = app0 "GerundAP"
cidGenNP = app1 "GenNP"
cidPredetNP = app1 "PredetNP"
cidDetNP = app1 "DetNP"
cidAdAP = app2 "AdAP"
cidAdvAP = app2 "AdvAP"
cidPositAdAAdj = app1 "PositAdAAdj"
cideither7or_DConj = app0 "either7or_DConj"
cidboth7and_DConj = app0 "both7and_DConj"
cidor_Conj = app0 "or_Conj"
cidand_Conj = app0 "and_Conj"
cidamp_Conj = app0 "amp_Conj"
cidSlashV2V = app0 "SlashV2V"
cidComplVA = app0 "ComplVA"
cidAdNum = app0 "AdNum"
cidOrdSuperl = app1 "OrdSuperl"
cidno_RP = app0 "no_RP"
cidthat_RP = app0 "that_RP"
cidUseRCl = app3 "UseRCl"
cidRelSlash = app2 "RelSlash"
cidRelNP = app2 "RelNP"
cidRelCN = app2 "RelCN"
cidRelVP = app2 "RelVP"
cidIdRP = app0 "IdRP"
cidmany_Det = app0 "many_Det"
cidImpVP = app1 "ImpVP"
cidExistNP = app2 "ExistNP"
cidExtAdvS = app2 "ExtAdvS"
cidAdvCN = app2 "AdvCN"
cidNameCN = app2 "NameCN"
cidno_Quant = app0 "no_Quant"
cidSubjS = app2 "SubjS"
cidthat_Subj = app0 "that_Subj"
cidanySg_Det = app0 "anySg_Det"
cidanyPl_Det = app0 "anyPl_Det"
cidhave_V2 = app0 "have_V2"
cidby_Prep = app0 "by_Prep"
cidweekdayPunctualAdv = app1 "weekdayPunctualAdv"
cidi_Pron = app0 "i_Pron"
cidyouSg_Pron = app0 "youSg_Pron"
cidhe_Pron = app0 "he_Pron"
cidshe_Pron = app0 "she_Pron"
cidit_Pron = app0 "it_Pron"
cidwe_Pron = app0 "we_Pron"
cidthey_Pron = app0 "they_Pron"
cidUseComparA = app1 "UseComparA"
app0 f = App f []
app1 f = \x -> App f [x]
app2 f = \x y -> App f [x,y]
app3 f = \x y z -> App f [x,y,z]
app4 f = \w x y z -> App f [w,x,y,z]

View File

@@ -3,29 +3,30 @@ module Parser where
import Data.Char
import Control.Monad
import PGF(PGF,Morpho,lookupMorpho,functionType,unType)
import PGF2
import SusanneFormat
import Debug.Trace
newtype P a = P {runP :: PGF -> Morpho -> [ParseTree] -> Maybe ([ParseTree], a)}
newtype P a = P {runP :: PGF -> Concr -> [ParseTree] -> Maybe ([ParseTree], a)}
instance Monad P where
return x = P (\pgf morpho ts -> Just (ts, x))
f >>= g = P (\pgf morpho ts -> case runP f pgf morpho ts of
return x = P (\pgf cnc ts -> Just (ts, x))
f >>= g = P (\pgf cnc ts -> case runP f pgf cnc ts of
Nothing -> Nothing
Just (ts,x) -> runP (g x) pgf morpho ts)
Just (ts,x) -> runP (g x) pgf cnc ts)
instance MonadPlus P where
mzero = P (\pgf morpho ts -> Nothing)
mplus f g = P (\pgf morpho ts -> mplus (runP f pgf morpho ts) (runP g pgf morpho ts))
mzero = P (\pgf cnc ts -> Nothing)
mplus f g = P (\pgf cnc ts -> mplus (runP f pgf cnc ts) (runP g pgf cnc ts))
match tag_spec = P (\pgf morpho ts ->
match convert tag_spec = P (\pgf cnc ts ->
case ts of
(t@(Phrase tag1 mods1 fn1 _ _):ts)
| tag == tag1 &&
all (flip elem mods1) mods &&
(null fn || fn == fn1) -> Just (ts,t)
(null fn || fn == fn1) -> Just (ts,convert pgf cnc t)
(t@(Word _ tag1 _ _):ts)
| tag == tag1 -> Just (ts,t)
| tag == tag1 && null mods-> Just (ts,convert pgf cnc t)
_ -> Nothing)
where
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
@@ -43,12 +44,12 @@ many f =
`mplus`
do return []
inside tag_spec p = P (\pgf morpho ts ->
inside tag_spec p = P (\pgf cnc ts ->
case ts of
(t@(Phrase tag1 mods1 fn1 _ ts'):ts)
| tag == tag1 &&
all (flip elem mods1) mods &&
(null fn || fn == fn1) -> case runP p pgf morpho ts' of
(null fn || fn == fn1) -> case runP p pgf cnc ts' of
Just ([],x) -> Just (ts,x)
_ -> Nothing
_ -> Nothing)
@@ -56,35 +57,45 @@ inside tag_spec p = P (\pgf morpho ts ->
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
Phrase tag mods fn _ _ = f []
insideOpt tag_spec p = P (\pgf morpho ts ->
insideOpt convert tag_spec p = P (\pgf cnc ts ->
case ts of
(t@(Phrase tag1 mods1 fn1 _ ts'):ts)
| tag == tag1 &&
all (flip elem mods1) mods &&
(null fn || fn == fn1) -> case runP p pgf morpho ts' of
(null fn || fn == fn1) -> case runP p pgf cnc ts' of
Just ([],x) -> Just (ts,x)
_ -> Just (ts,t)
_ -> Just (ts,convert pgf cnc t)
_ -> Nothing)
where
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
Phrase tag mods fn _ _ = f []
lemma tag cat an0 = P (\pgf morpho ts ->
lemma tag cat an0 = P (\pgf cnc ts ->
case ts of
(t@(Word _ tag1 form _):ts) | tag == tag1 ->
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
[f] -> Just (ts,App f [])
_ -> Just (ts,t)
_ -> Nothing)
(t@(Word _ tag1 form _):ts) | tag == tag1 -> case runP (lookupForm cat an0 form) pgf cnc ts of
Nothing -> Just (ts,t)
x -> x
_ -> Nothing)
lookupForm cat an0 form = P (\pgf cnc ts ->
case [f | (f,an,_) <- lookupMorpho cnc form, hasCat pgf f cat, an == an0] of
[] -> case [f | (f,an,_) <- lookupMorpho cnc (map toLower form), hasCat pgf f cat, an == an0] of
[f] -> Just (ts,App f [])
_ -> Nothing
[f] -> Just (ts,App f [])
_ -> Nothing)
where
hasCat pgf f cat =
case functionType pgf f of
Just ty -> case unType ty of
(_,cat1,_) -> cat1 == cat
Nothing -> False
(DTyp _ cat1 _) -> cat1 == cat
opt f =
do x <- f
return (Just x)
`mplus`
do return Nothing
word tag = P (\pgf cnc ts ->
case ts of
((Word _ tag1 form _):ts) | tag == tag1 -> Just (ts,form)
_ -> Nothing)

View File

@@ -1,7 +1,7 @@
module SusanneFormat(Tag,Id,Word,Lemma,ParseTree(..),readTreebank,readTag) where
import PGF(CId)
import Data.Char
import qualified Data.Map as Map
type Tag = String
type Mods = String
@@ -14,7 +14,8 @@ type Lemma = String
data ParseTree
= Phrase Tag Mods Fn Index [ParseTree]
| Word Id Tag Word Lemma
| App CId [ParseTree]
| App String [ParseTree]
| Lit String
deriving Eq
data ParseTreePos
@@ -28,14 +29,15 @@ instance Show ParseTree where
| otherwise = "["++tag++mods++":"++fn++show idx++" "++unwords (map show ts)++"]"
show (Word _ tag w _) = "["++tag++" "++w++"]"
show (App f ts)
| null ts = show f
| otherwise = "("++show f++" "++unwords (map show ts)++")"
| null ts = f
| otherwise = "("++f++" "++unwords (map show ts)++")"
show (Lit s) = show s
readTreebank ls = readLines Root (map words ls)
readLines p [] = []
readLines p ([id,_,tag,w,l,parse]:ls) =
readParse (Word id tag w l) p parse ls
readParse (Word id tag (readWord w) l) p parse ls
readParse w p [] ls = readLines p ls
readParse w p ('[':cs) ls =
@@ -81,3 +83,61 @@ readTag w (c:cs) -- phrase tag
readTag w cs = readError w
readError (Word id _ _ _) = error id
readWord w0 = replaceEntities w2
where
w1 | head w0 == '+' = tail w0
| otherwise = w0
w2 | last w1 == '+' = init w1
| otherwise = w1
replaceEntities [] = []
replaceEntities ('<':cs) =
let (e,'>':cs1) = break (=='>') cs
in case Map.lookup e entity_names of
Just c -> c : replaceEntities cs1
Nothing -> "<"++e++">"++ replaceEntities cs1
replaceEntities (c: cs) = c : replaceEntities cs
entity_names = Map.fromList
[("agr",'α')
,("agrave",'à')
,("apos",'\'')
,("auml",'ä')
,("bgr",'β')
,("blank",' ')
,("ccedil",'ç')
,("deg",'°')
,("dollar",'$')
,("eacute",'é')
,("egr",'ε')
,("egrave",'è')
,("frac12",'½')
,("frac14",'¼')
,("ggr",'γ')
,("hellip",'…')
,("hyphen",'-')
,("iuml",'ï')
,("khgr",'χ')
,("ldquo",'“')
,("lgr",'λ')
,("lsquo",'')
,("mdash",'—')
,("mgr",'μ')
,("minus",'-')
,("ntilde",'ñ')
,("oelig",'œ')
,("ouml",'ö')
,("para",'¶')
,("pgr",'π')
,("phgr",'φ')
,("prime",'')
,("Prime",'″')
,("rdquo",'”')
,("rgr",'ρ')
,("rsquo",'')
,("sect",'§')
,("sol",'/')
,("tggr",'θ')
]

View File

@@ -5,92 +5,63 @@ import Data.Char(toLower)
import Control.Monad
import qualified Data.Map as Map
import PGF (readPGF, readLanguage, buildMorpho, lookupMorpho, mkCId, functionType, unType)
import PGF2
import SusanneFormat
import Parser
import Idents
Just eng = readLanguage "DictEng"
main = do
gr <- readPGF "DictEngAbs.pgf"
let morpho = buildMorpho gr eng
gr <- readPGF "ParseEngAbs.pgf"
let Just eng = Map.lookup "ParseEng" (languages gr)
fs <- getDirectoryContents "data"
txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
--let ts' = readTreebank (lines (concat txts))
--writeFile "text" (unlines (map show ts'))
let (ts',rs') = combineRes (convert gr morpho) (readTreebank (lines (concat txts)))
let rm = Map.fromListWith (++) rs'
writeFile "susanne.gft" (unlines (map show ts'))
writeFile "rules" (unlines (concat [unwords ("-":cat:"->":cats) : map (\t -> " "++show t) rs'' | (cat :-> cats,rs'') <- Map.toList rm]))
let ts = (map (convert gr eng) . concatMap filterTree) (readTreebank (lines (concat txts)))
writeFile "susanne.gft" (unlines (map show ts))
data Rule = Tag :-> [Tag]
deriving (Eq,Ord)
convert pgf morpho w@(Word _ tag _ lemma)
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = ([],[])
{- | tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
| tag == "NN1n" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
| tag == "NN2" = convertLemma pgf morpho (mkCId "N") "s Pl Nom" w
| tag == "JJ" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
| tag == "JB" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
| tag == "JBo" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
| tag == "AT" = convertLemma pgf morpho (mkCId "Quant") "s False Sg" w
| tag == "VVDi" = convertLemma pgf morpho (mkCId "V") "s VPast" w
| tag == "VVDt" = convertLemma pgf morpho (mkCId "V2") "s VPast" w
| tag == "VVDv" = convertLemma pgf morpho (mkCId "V") "s VPast" w
| tag == "VVZi" = convertLemma pgf morpho (mkCId "V") "s VPres" w
| tag == "VVZt" = convertLemma pgf morpho (mkCId "V2") "s VPres" w
| tag == "VVZv" = convertLemma pgf morpho (mkCId "V") "s VPres" w
| tag == "PPHS2"= convertLemma pgf morpho (mkCId "Pron") "s (NCase Nom)" w
| tag == "PPHO2"= convertLemma pgf morpho (mkCId "Pron") "s NPAcc" w
| tag == "RR" = convertLemma pgf morpho (mkCId "Adv") "s" w
| tag == "II" = convertLemma pgf morpho (mkCId "Prep") "s" w
| tag == "IO" = convertLemma pgf morpho (mkCId "Prep") "s" w-}
| otherwise = ([w],[])
convert pgf morpho t@(Phrase tag mods fn idx ts)
| tag == "O" = (ts',rs')
| tag == "Q" = (ts',rs')
| tag == "S" = case runP pS pgf morpho ts' of
Just ([],x) -> ([x], rs')
_ -> ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
| otherwise = ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
filterTree w@(Word _ tag _ lemma)
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = []
| otherwise = [w]
filterTree (Phrase tag mods fn idx ts)
| tag == "O" = ts'
| tag == "Q" = ts'
| otherwise = [Phrase tag mods fn idx ts']
where
(ts',rs') = combineRes (convert pgf morpho) ts
r = tag :-> map getTag ts
ts' = concatMap filterTree ts
isExtra (Word _ "YIL" _ _) = True
isExtra (Word _ "YIR" _ _) = True
isExtra (Word _ "YTL" _ _) = True
isExtra (Word _ "YTR" _ _) = True
isExtra _ = False
getTag (Phrase tag mods fn idx ts) = tag++if null fn then "" else ":"++fn
getTag (Word _ tag _ _) = tag
convertLemma pgf morpho cat an0 w@(Word _ tag form _) =
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
[f] -> ([App f []], [])
_ -> ([w],[])
convert pgf eng t@(Phrase tag mods fn idx ts)
| tag == "S" = case runP pS pgf eng ts of
Just ([],x) -> x
_ -> Phrase tag mods fn idx ts'
| tag == "N" = case runP pNP pgf eng ts of
Just ([],x) -> x
_ -> Phrase tag mods fn idx ts'
| tag == "V" = case runP (pV "V") pgf eng [t] of
Just ([],(_,_,_,_,x)) -> x
_ -> Phrase tag mods fn idx ts'
| tag == "P" = case runP pPP pgf eng ts of
Just ([],x) -> x
_ -> Phrase tag mods fn idx ts'
| tag == "Po"= case runP pPP pgf eng ts of
Just ([],x) -> x
_ -> Phrase tag mods fn idx ts'
| otherwise = Phrase tag mods fn idx ts'
where
hasCat pgf f cat =
case functionType pgf f of
Just ty -> case unType ty of
(_,cat1,_) -> cat1 == cat
Nothing -> False
combineRes f ts = (ts',rs')
where
(x,y) = unzip (map f ts)
ts' = concat x
rs' = concat y
ts' = map (convert pgf eng) ts
convert pgf eng t@(Word _ tag _ lemma)
| take 2 tag == "NN" = case runP pN pgf eng [t] of
Just ([],(_,x)) -> x
_ -> t
| take 1 tag == "J" = case runP pAP pgf eng [t] of
Just ([],x) -> x
_ -> t
| otherwise = t
pS =
do mplus pConj (return ())
advs <- many pAdS
np <- pSubject
(t,p,vp) <- pVP
return (foldr ($) (cidUseCl (cidTTAnt t p) (cidPredVP np vp)) advs)
return (foldr ($) (cidUseCl t p (cidPredVP np vp)) advs)
`mplus`
do mplus pConj (return ())
(t,p,vp) <- pVP
@@ -98,44 +69,56 @@ pS =
`mplus`
do mplus pConj (return ())
advs <- many pAdS
t1 <- match "EX"
t1 <- match convert "EX"
(t,p,vp) <- pVP
return (foldr ($) (cidUseCl (cidTTAnt t p) (cidExistNP t1 vp)) advs)
return (foldr ($) (cidUseCl t p (cidExistNP t1 vp)) advs)
pSubject =
do insideOpt "N:s" pNP
do insideOpt convert "N:s" pNP
`mplus`
do insideOpt "N:S" pNP
do insideOpt convert "N:S" pNP
`mplus`
do match "M:s"
do match convert "M:s"
`mplus`
do match "M:S"
do match convert "M:S"
`mplus`
do match "D:s"
do insideOpt convert "Ds:s" $ do
det <- pDet
return (cidDetNP (det cidNumSg))
`mplus`
do match "D:S"
do insideOpt convert "Dp:s" $ do
det <- pDet
return (cidDetNP (det cidNumPl))
`mplus`
do insideOpt convert "Ds:S" $ do
det <- pDet
return (cidDetNP (det cidNumSg))
`mplus`
do insideOpt convert "Dp:S" $ do
det <- pDet
return (cidDetNP (det cidNumPl))
pConj =
do match "CC"
do match convert "CC"
return ()
`mplus`
do match "CCB"
do match convert "CCB"
return ()
pAdS =
do adv <- pAdv
match "YC"
do adv <- pVPMods
match convert "YC"
return (\t -> cidExtAdvS adv t)
`mplus`
do adv <- pAdv
do adv <- pVPMods
return (\t -> cidAdvS adv t)
pVP =
do adVs <- many pAdV
(t,p,vs) <- pV "VS"
advs <- many pAdv
s <- insideOpt "F:o"
(opt (match "CST") >> pS)
(t,p,voice,apect,vs) <- pV "VS"
advs <- many pVPMods
s <- insideOpt convert "F:o"
(opt (match convert "CST") >> pS)
return (t,p,foldr (\adv t -> cidAdVVP adv t)
(foldl (\t adv -> cidAdvVP t adv)
(cidComplVS vs s)
@@ -143,20 +126,22 @@ pVP =
adVs)
`mplus`
do adVs <- many pAdV
(t,p,vv) <- pV "VV"
advs <- many pAdv
vp <- match "Ti"
(t,p,voice,apect,vv) <- pV "VV"
advs <- many pVPMods
(p2,voice,aspect,vp) <- inside "Ti" $ do
match convert "s"
pVPInf
return (t,p,foldr (\adv t -> cidAdVVP adv t)
(foldl (\t adv -> cidAdvVP t adv)
(cidComplVV vv vp)
(cidComplVV vv cidASimul p2 vp)
advs)
adVs)
`mplus`
do adVs <- many pAdV
(t,p,v2) <- pV "V2"
(t,p,voice,apect,v2) <- pV "V2"
o <- pObject
opt (match "YC") -- what is this?
advs <- many pAdv
opt (match convert "YC") -- what is this?
advs <- many pVPMods
return (t,p,foldr (\adv t -> cidAdVVP adv t)
(foldl (\t adv -> cidAdvVP t adv)
(cidComplSlash (cidSlashV2a v2) o)
@@ -164,141 +149,641 @@ pVP =
adVs)
`mplus`
do adVs <- many pAdV
(t,p,v) <- pV "V"
advs <- many pAdv
(t,p,voice,apect,v) <- pV "V"
advs <- many pVPMods
return (t,p,foldr (\adv t -> cidAdVVP adv t)
(foldl (\t adv -> cidAdvVP t adv)
(cidUseV v)
advs)
adVs)
`mplus`
do inside "V" (match convert "VBZ")
adVs <- many pAdV
p <- pPol
comp <- pComp
advs <- many pVPMods
return (cidTTAnt cidTPres cidASimul,p,foldr (\adv t -> cidAdVVP adv t)
(foldl (\t adv -> cidAdvVP t adv)
(cidUseComp comp)
advs)
adVs)
pComp =
do adv <- insideOpt convert "R:e" pAdv
return (cidCompAdv adv)
`mplus`
do np <- insideOpt convert "N:e" pNP
return (cidCompNP np)
`mplus`
do ap <- pAP
return (cidCompAP ap)
pAdv =
do lemma "RP" "Adv" "s"
data Voice = Active | Passive
data Aspect = Simple | Progressive
pV cat =
do inside "V" $
do v <- lemma "VVDv" (mkCId cat) "s VPast"
return (cidTTAnt cidTPast cidASimul,cidPPos,v)
do v <- pVPres cat
return (cidTTAnt cidTPres cidASimul,cidPPos,Active,Simple,v)
`mplus`
do v <- lemma "VVDt" (mkCId cat) "s VPast"
return (cidTTAnt cidTPast cidASimul,cidPPos,v)
do v <- do lemma "VVZi" cat "s VPres"
`mplus`
do lemma "VVZt" cat "s VPres"
`mplus`
do lemma "VVZv" cat "s VPres"
return (cidTTAnt cidTPres cidASimul,cidPPos,Active,Simple,v)
`mplus`
do v <- lemma "VVZv" (mkCId cat) "s VPres"
return (cidTTAnt cidTPres cidASimul,cidPPos,v)
do v <- do lemma "VVDi" cat "s VPast"
`mplus`
do lemma "VVDt" cat "s VPast"
`mplus`
do lemma "VVDv" cat "s VPast"
return (cidTTAnt cidTPast cidASimul,cidPPos,Active,Simple,v)
`mplus`
do match "VHD"
match "VHD"
v <- lemma "VVNv" (mkCId cat) "s VPPart"
return (cidTTAnt cidTPres cidAAnter,cidPPos,v)
do (match convert "VBM" `mplus` match convert "VBR" `mplus` match convert "VBZ") -- am,are,is
pol <- pPol
(voice,aspect,v) <- pVPart cat
return (cidTTAnt cidTPres cidASimul,pol,voice,aspect,v)
`mplus`
do (match convert "VBDZ" `mplus` match convert "VBDR") -- was,were
pol <- pPol
(voice,aspect,v) <- pVPart cat
return (cidTTAnt cidTPast cidASimul,pol,voice,aspect,v)
`mplus`
do match convert "VH0" -- have
pol <- pPol
(voice,aspect,v) <- do v <- pVPastPart cat
return (Active,Simple,v)
`mplus`
do match convert "VBN" -- been
pVPart cat
return (cidTTAnt cidTPres cidAAnter,pol,voice,aspect,v)
`mplus`
do match convert "VH0" -- have
return (cidTTAnt cidTPres cidAAnter,cidPPos,Active,Simple,cidhave_V2)
`mplus`
do match convert "VHZ" -- has
pol <- pPol
(voice,aspect,v) <- do v <- pVPastPart cat
return (Active,Simple,v)
`mplus`
do match convert "VBN" -- been
pVPart cat
return (cidTTAnt cidTPres cidAAnter,pol,voice,aspect,v)
`mplus`
do match convert "VHZ" -- has
return (cidTTAnt cidTPres cidAAnter,cidPPos,Active,Simple,cidhave_V2)
`mplus`
do match convert "VHD" -- had
pol <- pPol
(voice,aspect,v) <- do v <- pVPastPart cat
return (Active,Simple,v)
`mplus`
do match convert "VBN" -- been
pVPart cat
return (cidTTAnt cidTPast cidAAnter,pol,voice,aspect,v)
`mplus`
do match convert "VHD" -- had
return (cidTTAnt cidTPast cidASimul,cidPPos,Active,Simple,cidhave_V2)
`mplus`
do w <- word "VMo" -- will
guard (w == "will")
pol <- pPol
(voice,apect,vp) <- pVInf cat
return (cidTTAnt cidTFut cidASimul,pol,voice,apect,vp)
`mplus`
do mplus (match convert "VD0") (match convert "VDZ") -- do,does
match convert "XX"
vp <- pVPres cat
return (cidTTAnt cidTPres cidASimul,cidPNeg,Active,Simple,vp)
`mplus`
do match convert "VDD" -- did
match convert "XX"
vp <- pVPres cat
return (cidTTAnt cidTPast cidASimul,cidPNeg,Active,Simple,vp)
pVPInf = do
adVs <- many pAdV
(pol,voice,apect,vp) <- do (pol,voice,apect,v2) <- inside "Vi" $
do pol <- pPol
match convert "TO"
(voice,aspect,v) <- pVInf "V2"
return (pol,voice,aspect,v)
o <- pObject
return (pol,voice,apect,cidComplSlash (cidSlashV2a v2) o)
`mplus`
do (pol,voice,apect,v2a) <- inside "Vi" $
do pol <- pPol
match convert "TO"
(voice,aspect,v) <- pVInf "V2A"
return (pol,voice,aspect,v)
o <- pObject
ap <- pAP
return (pol,voice,apect,cidComplSlash (cidSlashV2A v2a ap) o)
advs <- many pVPMods
return (pol,voice,apect,
foldr (\adv t -> cidAdVVP adv t)
(foldl (\t adv -> cidAdvVP t adv)
vp
advs)
adVs)
pVInf cat =
do v <- pVPres cat
return (Active,Simple,v)
`mplus`
do v <- match "V"
return (App (mkCId "XXX") [],App (mkCId "XXX") [],v)
do match convert "VB0" -- be
pVPart cat
`mplus`
do v <- match convert "VH0" -- have
return (Active,Simple,v)
pVPart cat =
do v <- do lemma "VVGi" cat "s VPresPart"
`mplus`
do lemma "VVGt" cat "s VPresPart"
`mplus`
do lemma "VVGv" cat "s VPresPart"
return (Active,Progressive,v)
`mplus`
do v <- pVPastPart cat
return (Passive,Simple,v)
pVPres cat =
do lemma "VV0i" cat "s VInf"
`mplus`
do lemma "VV0t" cat "s VInf"
`mplus`
do lemma "VV0v" cat "s VInf"
pVPastPart cat =
do lemma "VVNi" cat "s VPPart"
`mplus`
do lemma "VVNt" cat "s VPPart"
`mplus`
do lemma "VVNv" cat "s VPPart"
pPol =
do match convert "XX"
return cidPNeg
`mplus`
do return cidPPos
pAdV =
do insideOpt "R:c" $
lemma "RRR" (mkCId "AdV") "s"
do insideOpt convert "R:c" $
lemma "RRR" "AdV" "s"
`mplus`
do match "R:m"
do insideOpt convert "R:m" $
lemma "RRR" "AdV" "s"
pObject =
match "P:u"
match convert "P:u"
`mplus`
insideOpt "N:o" pNP
insideOpt convert "N:o" pNP
`mplus`
match "N:e"
match convert "N:e"
`mplus`
match "M:e"
match convert "M:e"
`mplus`
match "D:e"
do insideOpt convert "Ds:e" $ do
det <- pDet
return (cidDetNP (det cidNumSg))
`mplus`
match "P:e"
do insideOpt convert "Dp:e" $ do
det <- pDet
return (cidDetNP (det cidNumPl))
`mplus`
do insideOpt convert "P:e" $ pPP
pAdv =
do match "N:t"
pVPMods =
do insideOpt convert "N:t" pTimeNPAdv
`mplus`
do match "N:h"
do match convert "N:h"
`mplus`
do match "P:p"
do insideOpt convert "P:p" $ pPP
`mplus`
do match "P:q"
do insideOpt convert "P:q" $ pPP
`mplus`
do match "P:a"
do insideOpt convert "Pb:a" $ do
match convert "IIb"
np <- insideOpt convert "N" pNP
return (cidPrepNP cidby_Prep np)
`mplus`
do match "P:t"
do insideOpt convert "P:t" $ pPP
`mplus`
do match "P:h"
do insideOpt convert "P:h" $ pPP
`mplus`
do match "P:m"
do insideOpt convert "P:m" $ pPP
`mplus`
do match "P:r"
do insideOpt convert "P:r" $ pPP
`mplus`
do match "R:p"
do insideOpt convert "R:p" $ pAdv
`mplus`
do match "R:q"
do insideOpt convert "R:q" $ pAdv
`mplus`
do match "R:a"
do insideOpt convert "R:a" $ pAdv
`mplus`
do match "R:t"
do insideOpt convert "R:t" $ pAdv
`mplus`
do match "R:h"
do insideOpt convert "R:h" $ pAdv
`mplus`
do match "R:m"
do insideOpt convert "R:m" $ pAdv
`mplus`
do match "R:c"
do insideOpt convert "R:c" $ pAdv
`mplus`
do match "R:r"
do insideOpt convert "R:r" $ pAdv
`mplus`
do match "F:p"
do match convert "F:p"
`mplus`
do match "F:q"
do match convert "F:q"
`mplus`
do match "F:a"
do match convert "F:a"
`mplus`
do match "F:t"
do match convert "F:t"
`mplus`
do match "F:h"
do match convert "F:h"
`mplus`
do match "F:m"
do match convert "F:m"
`mplus`
do match "F:r"
do match convert "F:r"
`mplus`
do match "W:b"
do match convert "W:b"
`mplus`
do match "L:b"
do match convert "L:b"
pNP = do
q <- pQuant
(n,cn) <- pCN
return (cidDetCN (cidDetQuant q n) cn)
pPP =
do prep <- do lemma "ICS" "Prep" "s"
`mplus`
do lemma "ICSk" "Prep" "s"
`mplus`
do lemma "ICSt" "Prep" "s"
`mplus`
do lemma "ICSx" "Prep" "s"
`mplus`
do lemma "IF" "Prep" "s"
`mplus`
do lemma "II" "Prep" "s"
`mplus`
do lemma "IIa" "Prep" "s"
`mplus`
do lemma "IIb" "Prep" "s"
`mplus`
do lemma "IIg" "Prep" "s"
`mplus`
do lemma "IIp" "Prep" "s"
`mplus`
do lemma "IIt" "Prep" "s"
`mplus`
do lemma "IO" "Prep" "s"
`mplus`
do lemma "IW" "Prep" "s"
`mplus`
do insideOpt convert "II=" $ do
w1 <- word "II21"
w2 <- word "II22"
lookupForm "Prep" "s" (unwords [w1,w2])
np <- do insideOpt convert "N" pNP
`mplus`
do (mb_num,n) <- pN
case mb_num of
Just num | num == cidNumPl -> return (cidDetCN (cidDetQuant cidIndefArt num) (cidUseN n))
_ -> return (cidMassNP (cidUseN n)) -- we don't know the number
return (cidPrepNP prep np)
pNP =
do np <- pBaseNP
match convert "YC"
fr <- insideOpt convert "Fr" pRCl
return (cidRelNP np fr)
`mplus`
do pBaseNP
pBaseNP =
do det <- pDet
(mb_num,cn) <- pCN
case mb_num of
Just num -> return (cidDetCN (det num) cn)
Nothing -> mzero -- we don't know the number
`mplus`
do pn <- pName
return (cidUsePN pn)
`mplus`
do (mb_num,cn) <- pCN
case mb_num of
Just num | num == cidNumPl -> return (cidDetCN (cidDetQuant cidIndefArt num) cn)
_ -> return (cidMassNP cn) -- we don't know the number
`mplus`
do match convert "PPIS1"
return (cidUsePron cidi_Pron)
`mplus`
do match convert "PPY"
return (cidUsePron cidyouSg_Pron)
`mplus`
do match convert "PPHS1m"
return (cidUsePron cidhe_Pron)
`mplus`
do match convert "PPHS1f"
return (cidUsePron cidshe_Pron)
`mplus`
do match convert "PPH1"
return (cidUsePron cidit_Pron)
`mplus`
do match convert "PPIS2"
return (cidUsePron cidwe_Pron)
`mplus`
do match convert "PPHS2"
return (cidUsePron cidthey_Pron)
`mplus`
do match convert "Nn"
pDet =
do match convert "DDy"
return (\num -> if num == cidNumSg
then cidanySg_Det
else cidanyPl_Det)
`mplus`
do det <- lemma "DA2" "Det" "s"
return (\num -> det)
`mplus`
do q <- pQuant
ord <- pOrd
return (\num -> cidDetQuantOrd q num ord)
`mplus`
do q <- pQuant
return (\num -> cidDetQuant q num)
pQuant =
do lemma "AT" (mkCId "Quant") "s False Sg"
do match convert "AT"
return cidDefArt
`mplus`
do match "AT1"
do match convert "AT1"
return cidIndefArt
`mplus`
do match convert "ATn"
return cidno_Quant
`mplus`
do match convert "APPGi1"
return (cidPossPron cidi_Pron)
`mplus`
do match convert "APPGy"
return (cidPossPron cidyouSg_Pron)
`mplus`
do match convert "APPGm"
return (cidPossPron cidhe_Pron)
`mplus`
do match convert "APPGf"
return (cidPossPron cidshe_Pron)
`mplus`
do match convert "APPGh1"
return (cidPossPron cidit_Pron)
`mplus`
do match convert "APPGi2"
return (cidPossPron cidwe_Pron)
`mplus`
do match convert "APPGh2"
return (cidPossPron cidthey_Pron)
`mplus`
do lemma "DD1a" "Quant" "s True Sg"
`mplus`
do lemma "DD1i" "Quant" "s True Sg"
`mplus`
do lemma "DD2a" "Quant" "s True Pl"
`mplus`
do lemma "DD2i" "Quant" "s True Pl"
`mplus`
do lemma "DDi" "Quant" "s True Pl"
`mplus`
insideOpt convert "G" pGenitive
pOrd =
do a <- lemma "JJT" "A" "s (AAdj Superl Nom)"
return (cidOrdSuperl a)
pGenitive =
do np <- insideOpt convert "N" pNP
match convert "GG"
return (cidGenNP np)
pCN =
do np <- insideOpt "N" pNP
(n,cn) <- pCN
return (n,App (mkCId "Appos") [np,cn])
do pn <- mplus pName (insideOpt convert "Nn" pName)
(mb_num,cn) <- pCN
return (mb_num,cidNameCN pn cn)
`mplus`
do a <- lemma "JJ" (mkCId "A") "s (AAdj Posit Nom)"
(n,cn) <- pCN
return (n,cidAdjCN (cidPositA a) cn)
do (mb_num_n,n) <- mplus pN (inside "N" pCN)
(mb_num,cn) <- pCN
case mb_num_n of
Just num | num == cidNumPl -> return (mb_num,cidCompoundPlCN (cidUseN n) cn)
_ -> return (mb_num,cidCompoundSgCN (cidUseN n) cn) -- here we don't really know the number
`mplus`
do (num,n) <- pN
advs <- many pPo
return (num,
foldl (\t adv -> cidAdvCN t adv)
do ap <- pAP
(mb_num,cn) <- pCN
return (mb_num,cidAdjCN ap cn)
`mplus`
do t <- match convert "NN1u&"
mods <- many pCNMods
return (Just cidNumSg
,foldl (\t mod -> mod t)
t
mods)
`mplus`
do (mb_num,n) <- pN
mods <- many pCNMods
return (mb_num,
foldl (\t mod -> mod t)
(cidUseN n)
advs)
mods)
pN =
do n <- lemma "NN1c" (mkCId "N") "s Sg Nom"
return (cidNumSg, n)
pAP =
do a <- lemma "JJ" "A" "s (AAdj Posit Nom)"
`mplus`
lemma "JA" "A" "s (AAdj Posit Nom)"
`mplus`
lemma "JB" "A" "s (AAdj Posit Nom)"
`mplus`
lemma "JBo" "A" "s (AAdj Posit Nom)"
`mplus`
lemma "DAy" "A" "s (AAdj Posit Nom)"
`mplus`
lemma "DAz" "A" "s (AAdj Posit Nom)"
return (cidPositA a)
`mplus`
do n <- lemma "NN1n" (mkCId "N") "s Sg Nom"
return (cidNumSg, n)
do a <- lemma "JJR" "A" "s (AAdj Compar Nom)"
return (cidUseComparA a)
`mplus`
do vp <- match convert "Tn"
return (cidPastPartAP vp)
`mplus`
do insideOpt convert "J" $ do
adas <- many pAdA
ap <- pAP
mods <- many pAPMods
return (foldl (\t ada -> cidAdAP ada t)
(foldl (\t mod -> cidAdvAP t mod)
ap
mods)
adas)
pPo =
insideOpt "Po" $ do
p <- match "IO"
np <- insideOpt "N" pNP
return (cidPrepNP p np)
pAdA = do
a <- lemma "RR" "A" "s AAdv"
return (cidPositAdAAdj a)
pAPMods =
do insideOpt convert "P" pPP
pN =
do n <- lemma "NNn" "N" "s Sg Nom"
return (Nothing, n)
`mplus`
do n <- lemma "NNu" "N" "s Sg Nom"
return (Just cidNumPl, n)
`mplus`
do n <- lemma "NNux" "N" "s Sg Nom"
return (Nothing, n)
`mplus`
do n <- lemma "NN1c" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NN1m" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NN1n" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NN1u" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NN1ux" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NN2" "N" "s Pl Nom"
return (Just cidNumPl, n)
`mplus`
do n <- lemma "NNJ1c" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNJ1n" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNJ2" "N" "s Pl Nom"
return (Just cidNumPl, n)
`mplus`
do n <- lemma "NNLc" "N" "s Sg Nom"
return (Nothing, n)
`mplus`
do n <- lemma "NNL1c" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNL1cb" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNL1n" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNL2" "N" "s Pl Nom"
return (Just cidNumPl, n)
`mplus`
do n <- lemma "NNS1c" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNS1n" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNS2" "N" "s Pl Nom"
return (Just cidNumPl, n)
`mplus`
do n <- lemma "NNT1h" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNT1m" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNT1c" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNT2" "N" "s Pl Nom"
return (Just cidNumPl, n)
`mplus`
do n <- lemma "NNUc" "N" "s Sg Nom"
return (Nothing, n)
`mplus`
do n <- lemma "NNUn" "N" "s Sg Nom"
return (Nothing, n)
`mplus`
do n <- lemma "NNU1c" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNU1n" "N" "s Sg Nom"
return (Just cidNumSg, n)
`mplus`
do n <- lemma "NNU2" "N" "s Pl Nom"
return (Just cidNumPl, n)
`mplus`
do inside "Ns" $ do
(mb_num1,n1) <- pN
match convert "YH"
(mb_num2,n2) <- pN
case mb_num1 of
Just num | num == cidNumPl -> return (mb_num2,cidDashPlN n1 n2)
_ -> return (mb_num2,cidDashSgN n1 n2) -- here we don't really know the number
pCNMods =
do adv <- insideOpt convert "Po" $ pPP
return (\t -> cidAdvCN t adv)
`mplus`
do adv <- insideOpt convert "P" $ pPP
return (\t -> cidAdvCN t adv)
`mplus`
do adv <- insideOpt convert "Fn" $ do
match convert "CST"
s <- pS
return (cidSubjS cidthat_Subj s)
return (\t -> cidAdvCN t adv)
`mplus`
do fr <- insideOpt convert "Fr" pRCl
return (\t -> cidRelCN t fr)
pName =
do w1 <- word "NP1s"
w2 <- word "NNL1cb"
return (cidSymbPN (cidMkSymb (Lit (unwords [w1,w2]))))
`mplus`
do w1 <- word "NPM1"
match convert "YH"
w2 <- word "NPM1"
return (cidSymbPN (cidMkSymb (Lit (unwords [w1,"-",w2]))))
`mplus`
do w1 <- msum [word "NP1c", word "NP1f", word "NP1g"
,word "NP1j", word "NP1m", word "NP1p"
,word "NP1s", word "NP1t", word "NP1x"
,word "NP1z", word "NP2c", word "NP2f"
,word "NP2g", word "NP2j", word "NP2m"
,word "NP2p", word "NP2s", word "NP2t"
,word "NP2x", word "NP2z"]
return (cidSymbPN (cidMkSymb (Lit w1)))
pRCl =
do rp <- pRP
(t,p,vp) <- pVP
opt (match convert "YC")
return (cidUseRCl t p (cidRelVP rp vp))
`mplus`
do (prep,rp) <- inside "Pq" $ do
prep <- lemma "II" "Prep" "s"
rp <- pRP
return (prep,rp)
np <- pSubject
(t,p,vp) <- pVP
opt (match convert "YC")
return (cidUseRCl t p (cidRelSlash rp (cidSlashVP np (cidVPSlashPrep vp prep))))
pRP =
do inside "Dq" (match convert "DDQr")
return cidIdRP
pTimeNPAdv = do
day <- lemma "NPD1" "Weekday" "s Sg Nom"
return (cidweekdayPunctualAdv day)