diff --git a/treebanks/susanne/Idents.hs b/treebanks/susanne/Idents.hs index 14a731f76..50fbc6d26 100644 --- a/treebanks/susanne/Idents.hs +++ b/treebanks/susanne/Idents.hs @@ -39,6 +39,7 @@ cidConsS = app0 "ConsS" cidConjS = app0 "ConjS" cidMassNP = app1 "MassNP" cidAdvNP = app2 "AdvNP" +cidExtAdvNP = app2 "ExtAdvNP" cidTPres = app0 "TPres" cidTPast = app0 "TPast" cidTFut = app0 "TFut" @@ -64,10 +65,10 @@ cidAdvS = app2 "AdvS" cidPositA = app1 "PositA" cidIDig = app0 "IDig" cidIIDig = app0 "IIDig" -cidNumCard = app0 "NumCard" -cidNumDigits = app0 "NumDigits" -cidNumNumeral = app0 "NumNumeral" -cidnum = app0 "num" +cidNumCard = app1 "NumCard" +cidNumDigits = app1 "NumDigits" +cidNumNumeral = app1 "NumNumeral" +cidnum = app1 "num" cidpot2as3 = app0 "pot2as3" cidpot1as2 = app0 "pot1as2" cidpot0as1 = app0 "pot0as1" @@ -94,7 +95,7 @@ cidDashSgN = app2 "DashSgN" cidDashPlN = app2 "DashPlN" cidProgrVP = app0 "ProgrVP" cidGerundN = app0 "GerundN" -cidGerundAP = app0 "GerundAP" +cidPresPartAP = app1 "PresPartAP" cidGenNP = app1 "GenNP" cidPredetNP = app1 "PredetNP" cidDetNP = app1 "DetNP" diff --git a/treebanks/susanne/Parser.hs b/treebanks/susanne/Parser.hs index f34bb3423..c3db1d3a2 100644 --- a/treebanks/susanne/Parser.hs +++ b/treebanks/susanne/Parser.hs @@ -19,6 +19,8 @@ instance MonadPlus P where mzero = P (\pgf cnc ts -> Nothing) mplus f g = P (\pgf cnc ts -> mplus (runP f pgf cnc ts) (runP g pgf cnc ts)) +getConcr = P (\pgf cnc ts -> Just (ts,cnc)) + match convert tag_spec = P (\pgf cnc ts -> case ts of (t@(Phrase tag1 mods1 fn1 _ _):ts) diff --git a/treebanks/susanne/convert.hs b/treebanks/susanne/convert.hs index edeea2825..509063379 100644 --- a/treebanks/susanne/convert.hs +++ b/treebanks/susanne/convert.hs @@ -2,6 +2,7 @@ import System.Directory import System.FilePath import Data.List import Data.Char(toLower) +import Data.Maybe(fromMaybe) import Control.Monad import qualified Data.Map as Map @@ -44,6 +45,9 @@ convert pgf eng t@(Phrase tag mods fn idx ts) | tag == "Po"= case runP pPP pgf eng ts of Just ([],x) -> x _ -> Phrase tag mods fn idx ts' + | tag == "Fr"= case runP pRS pgf eng ts of + Just ([],x) -> x + _ -> Phrase tag mods fn idx ts' | otherwise = Phrase tag mods fn idx ts' where ts' = map (convert pgf eng) ts @@ -78,9 +82,9 @@ pSubject = `mplus` do insideOpt convert "N:S" pNP `mplus` - do match convert "M:s" + do insideOpt convert "M:s" pM `mplus` - do match convert "M:S" + do insideOpt convert "M:S" pM `mplus` do insideOpt convert "Ds:s" $ do det <- pDet @@ -157,16 +161,21 @@ pVP = advs) adVs) `mplus` - do inside "V" (match convert "VBZ") - adVs <- many pAdV - p <- pPol + do (t,p) <- inside "V" pCopula comp <- pComp advs <- many pVPMods - return (cidTTAnt cidTPres cidASimul,p,foldr (\adv t -> cidAdVVP adv t) - (foldl (\t adv -> cidAdvVP t adv) - (cidUseComp comp) - advs) - adVs) + return (cidTTAnt t cidASimul,p,foldl (\t adv -> cidAdvVP t adv) + (cidUseComp comp) + advs) + +pCopula = + do match convert "VBZ" + p <- pPol + return (cidTPres,p) + `mplus` + do match convert "VBDZ" + p <- pPol + return (cidTPast,p) pComp = do adv <- insideOpt convert "R:e" pAdv @@ -302,12 +311,13 @@ pVInf cat = do v <- match convert "VH0" -- have return (Active,Simple,v) +pVPPresPart = + insideOpt convert "Vg" $ do + v <- pVPresPart "V" + return (cidUseV v) + pVPart cat = - do v <- do lemma "VVGi" cat "s VPresPart" - `mplus` - do lemma "VVGt" cat "s VPresPart" - `mplus` - do lemma "VVGv" cat "s VPresPart" + do v <- pVPresPart cat return (Active,Progressive,v) `mplus` do v <- pVPastPart cat @@ -320,6 +330,13 @@ pVPres cat = `mplus` do lemma "VV0v" cat "s VInf" +pVPresPart cat = + do lemma "VVGi" cat "s VPresPart" + `mplus` + do lemma "VVGt" cat "s VPresPart" + `mplus` + do lemma "VVGv" cat "s VPresPart" + pVPastPart cat = do lemma "VVNi" cat "s VPPart" `mplus` @@ -347,7 +364,7 @@ pObject = `mplus` match convert "N:e" `mplus` - match convert "M:e" + insideOpt convert "M:e" pM `mplus` do insideOpt convert "Ds:e" $ do det <- pDet @@ -454,11 +471,12 @@ pPP = _ -> return (cidMassNP (cidUseN n)) -- we don't know the number return (cidPrepNP prep np) -pNP = +pNP = do np <- pBaseNP - match convert "YC" - fr <- insideOpt convert "Fr" pRCl - return (cidRelNP np fr) + mods <- many pNPMods + return (foldl (\t mod -> mod t) + np + mods) `mplus` do pBaseNP @@ -509,12 +527,16 @@ pDet = do det <- lemma "DA2" "Det" "s" return (\num -> det) `mplus` + do num0 <- pNumeral + return (\num -> cidDetQuant cidIndefArt num0) + `mplus` do q <- pQuant ord <- pOrd return (\num -> cidDetQuantOrd q num ord) `mplus` - do q <- pQuant - return (\num -> cidDetQuant q num) + do q <- pQuant + mb_num <- opt pNumeral + return (\num -> cidDetQuant q (fromMaybe num mb_num)) pQuant = do match convert "AT" @@ -563,6 +585,27 @@ pOrd = do a <- lemma "JJT" "A" "s (AAdj Superl Nom)" return (cidOrdSuperl a) +pNumeral = + do w <- word "MC" `mplus` word "MC1" + cnc <- getConcr + case parse cnc "Numeral" w of + Right [(e,_)] -> do n <- toParseTree e + return (cidNumCard (cidNumNumeral n)) + _ -> mzero + `mplus` + do w <- word "MCn" + cnc <- getConcr + case parse cnc "Digits" w of + Right [(e,_)] -> do n <- toParseTree e + return (cidNumCard (cidNumDigits n)) + _ -> mzero + where + toParseTree e = + case unApp e of + Just (f,es) -> do ps <- mapM toParseTree es + return (App f ps) + Nothing -> mzero + pGenitive = do np <- insideOpt convert "N" pNP match convert "GG" @@ -617,6 +660,9 @@ pAP = do vp <- match convert "Tn" return (cidPastPartAP vp) `mplus` + do vp <- insideOpt convert "Tg" pVPPresPart + return (cidPresPartAP vp) + `mplus` do insideOpt convert "J" $ do adas <- many pAdA ap <- pAP @@ -743,9 +789,45 @@ pCNMods = return (cidSubjS cidthat_Subj s) return (\t -> cidAdvCN t adv) `mplus` - do fr <- insideOpt convert "Fr" pRCl + do fr <- insideOpt convert "Fr" pRS return (\t -> cidRelCN t fr) +pNPMods = + do adv <- insideOpt convert "Po" $ pPP + return (\t -> cidAdvNP t adv) + `mplus` + do adv <- insideOpt convert "P" $ pPP + return (\t -> cidAdvNP t adv) + `mplus` + do adv <- insideOpt convert "Fn" $ do + match convert "CST" + s <- pS + return (cidSubjS cidthat_Subj s) + return (\t -> cidAdvNP t adv) + `mplus` + do match convert "YC" + adv <- insideOpt convert "Po" $ pPP + opt (match convert "YC") + return (\t -> cidExtAdvNP t adv) + `mplus` + do match convert "YC" + adv <- insideOpt convert "P" $ pPP + opt (match convert "YC") + return (\t -> cidExtAdvNP t adv) + `mplus` + do match convert "YC" + adv <- insideOpt convert "Fn" $ do + match convert "CST" + s <- pS + return (cidSubjS cidthat_Subj s) + opt (match convert "YC") + return (\t -> cidExtAdvNP t adv) + `mplus` + do match convert "YC" + fr <- insideOpt convert "Fr" pRS + opt (match convert "YC") + return (\t -> cidRelNP t fr) + pName = do w1 <- word "NP1s" w2 <- word "NNL1cb" @@ -765,7 +847,19 @@ pName = ,word "NP2x", word "NP2z"] return (cidSymbPN (cidMkSymb (Lit w1))) -pRCl = +pM = do + num <- pNumeral + mods <- many pNPMods + return (foldl (\t mod -> mod t) + (cidDetNP (cidDetQuant cidIndefArt num)) + mods) + +pRS = + do rp <- pRP + np <- pSubject + (t,p,vp) <- pVP + return (cidUseRCl t p (cidRelSlash rp (cidSlashVP np vp))) + `mplus` do rp <- pRP (t,p,vp) <- pVP opt (match convert "YC")