mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
a new version of the conversion script for the Susanne corpus which covers a bit less than half of the data
This commit is contained in:
148
treebanks/susanne/Idents.hs
Normal file
148
treebanks/susanne/Idents.hs
Normal file
@@ -0,0 +1,148 @@
|
||||
module Idents where
|
||||
|
||||
import SusanneFormat
|
||||
|
||||
cidASimul = app0 "ASimul"
|
||||
cidAAnter = app0 "AAnter"
|
||||
cidPositAdvAdj = app0 "PositAdvAdj"
|
||||
cidPositAdVAdj = app0 "PositAdVAdj"
|
||||
cidUseCl = app3 "UseCl"
|
||||
cidPredVP = app2 "PredVP"
|
||||
cidSlashVP = app2 "SlashVP"
|
||||
cidVPSlashPrep = app2 "VPSlashPrep"
|
||||
cidComplPredVP = app0 "ComplPredVP"
|
||||
cidAdjCN = app2 "AdjCN"
|
||||
cidUseN = app1 "UseN"
|
||||
cidDetQuant = app2 "DetQuant"
|
||||
cidDetQuantOrd = app3 "DetQuantOrd"
|
||||
cidNumSg = app0 "NumSg"
|
||||
cidNumPl = app0 "NumPl"
|
||||
cidDetCN = app2 "DetCN"
|
||||
cidIndefArt = app0 "IndefArt"
|
||||
cidDefArt = app0 "DefArt"
|
||||
cidUsePN = app1 "UsePN"
|
||||
cidUseQuantPN = app0 "UseQuantPN"
|
||||
cidSymbPN = app1 "SymbPN"
|
||||
cidMkSymb = app1 "MkSymb"
|
||||
cidUsePron = app1 "UsePron"
|
||||
cidConjNP = app0 "ConjNP"
|
||||
cidBaseNP = app0 "BaseNP"
|
||||
cidConsNP = app0 "ConsNP"
|
||||
cidConjCN = app0 "ConjCN"
|
||||
cidBaseCN = app0 "BaseCN"
|
||||
cidConsCN = app0 "ConsCN"
|
||||
cidConjAdv = app0 "ConjAdv"
|
||||
cidBaseAdv = app0 "BaseAdv"
|
||||
cidConsAdv = app0 "ConsAdv"
|
||||
cidBaseS = app0 "BaseS"
|
||||
cidConsS = app0 "ConsS"
|
||||
cidConjS = app0 "ConjS"
|
||||
cidMassNP = app1 "MassNP"
|
||||
cidAdvNP = app2 "AdvNP"
|
||||
cidTPres = app0 "TPres"
|
||||
cidTPast = app0 "TPast"
|
||||
cidTFut = app0 "TFut"
|
||||
cidTCond = app0 "TCond"
|
||||
cidTTAnt = app2 "TTAnt"
|
||||
cidPPos = app0 "PPos"
|
||||
cidPNeg = app0 "PNeg"
|
||||
cidComplSlash = app2 "ComplSlash"
|
||||
cidSlashV2a = app1 "SlashV2a"
|
||||
cidSlashV2A = app2 "SlashV2A"
|
||||
cidComplVS = app2 "ComplVS"
|
||||
cidComplVV = app4 "ComplVV"
|
||||
cidUseV = app1 "UseV"
|
||||
cidAdVVP = app2 "AdVVP"
|
||||
cidAdvVP = app2 "AdvVP"
|
||||
cidAdvVPSlash = app2 "AdvVPSlash"
|
||||
cidPrepNP = app2 "PrepNP"
|
||||
cidto_Prep = app0 "to_Prep"
|
||||
cidsuch_as_Prep= app0 "such_as_Prep"
|
||||
cidPastPartAP = app1 "PastPartAP"
|
||||
cidPassVPSlash = app0 "PassVPSlash"
|
||||
cidAdvS = app2 "AdvS"
|
||||
cidPositA = app1 "PositA"
|
||||
cidIDig = app0 "IDig"
|
||||
cidIIDig = app0 "IIDig"
|
||||
cidNumCard = app0 "NumCard"
|
||||
cidNumDigits = app0 "NumDigits"
|
||||
cidNumNumeral = app0 "NumNumeral"
|
||||
cidnum = app0 "num"
|
||||
cidpot2as3 = app0 "pot2as3"
|
||||
cidpot1as2 = app0 "pot1as2"
|
||||
cidpot0as1 = app0 "pot0as1"
|
||||
cidpot01 = app0 "pot01"
|
||||
cidpot0 = app0 "pot0"
|
||||
cidn2 = app0 "n2"
|
||||
cidn3 = app0 "n3"
|
||||
cidn4 = app0 "n4"
|
||||
cidn5 = app0 "n5"
|
||||
cidn6 = app0 "n6"
|
||||
cidn7 = app0 "n7"
|
||||
cidn8 = app0 "n8"
|
||||
cidn9 = app0 "n9"
|
||||
cidPossPron = app1 "PossPron"
|
||||
cidCompAP = app1 "CompAP"
|
||||
cidCompNP = app1 "CompNP"
|
||||
cidCompAdv = app1 "CompAdv"
|
||||
cidCompS = app1 "CompS"
|
||||
cidCompVP = app1 "CompVP"
|
||||
cidUseComp = app1 "UseComp"
|
||||
cidCompoundSgCN= app2 "CompoundSgCN"
|
||||
cidCompoundPlCN= app2 "CompoundPlCN"
|
||||
cidDashSgN = app2 "DashSgN"
|
||||
cidDashPlN = app2 "DashPlN"
|
||||
cidProgrVP = app0 "ProgrVP"
|
||||
cidGerundN = app0 "GerundN"
|
||||
cidGerundAP = app0 "GerundAP"
|
||||
cidGenNP = app1 "GenNP"
|
||||
cidPredetNP = app1 "PredetNP"
|
||||
cidDetNP = app1 "DetNP"
|
||||
cidAdAP = app2 "AdAP"
|
||||
cidAdvAP = app2 "AdvAP"
|
||||
cidPositAdAAdj = app1 "PositAdAAdj"
|
||||
cideither7or_DConj = app0 "either7or_DConj"
|
||||
cidboth7and_DConj = app0 "both7and_DConj"
|
||||
cidor_Conj = app0 "or_Conj"
|
||||
cidand_Conj = app0 "and_Conj"
|
||||
cidamp_Conj = app0 "amp_Conj"
|
||||
cidSlashV2V = app0 "SlashV2V"
|
||||
cidComplVA = app0 "ComplVA"
|
||||
cidAdNum = app0 "AdNum"
|
||||
cidOrdSuperl = app1 "OrdSuperl"
|
||||
cidno_RP = app0 "no_RP"
|
||||
cidthat_RP = app0 "that_RP"
|
||||
cidUseRCl = app3 "UseRCl"
|
||||
cidRelSlash = app2 "RelSlash"
|
||||
cidRelNP = app2 "RelNP"
|
||||
cidRelCN = app2 "RelCN"
|
||||
cidRelVP = app2 "RelVP"
|
||||
cidIdRP = app0 "IdRP"
|
||||
cidmany_Det = app0 "many_Det"
|
||||
cidImpVP = app1 "ImpVP"
|
||||
cidExistNP = app2 "ExistNP"
|
||||
cidExtAdvS = app2 "ExtAdvS"
|
||||
cidAdvCN = app2 "AdvCN"
|
||||
cidNameCN = app2 "NameCN"
|
||||
cidno_Quant = app0 "no_Quant"
|
||||
cidSubjS = app2 "SubjS"
|
||||
cidthat_Subj = app0 "that_Subj"
|
||||
cidanySg_Det = app0 "anySg_Det"
|
||||
cidanyPl_Det = app0 "anyPl_Det"
|
||||
cidhave_V2 = app0 "have_V2"
|
||||
cidby_Prep = app0 "by_Prep"
|
||||
cidweekdayPunctualAdv = app1 "weekdayPunctualAdv"
|
||||
cidi_Pron = app0 "i_Pron"
|
||||
cidyouSg_Pron = app0 "youSg_Pron"
|
||||
cidhe_Pron = app0 "he_Pron"
|
||||
cidshe_Pron = app0 "she_Pron"
|
||||
cidit_Pron = app0 "it_Pron"
|
||||
cidwe_Pron = app0 "we_Pron"
|
||||
cidthey_Pron = app0 "they_Pron"
|
||||
cidUseComparA = app1 "UseComparA"
|
||||
|
||||
app0 f = App f []
|
||||
app1 f = \x -> App f [x]
|
||||
app2 f = \x y -> App f [x,y]
|
||||
app3 f = \x y z -> App f [x,y,z]
|
||||
app4 f = \w x y z -> App f [w,x,y,z]
|
||||
@@ -3,29 +3,30 @@ module Parser where
|
||||
import Data.Char
|
||||
import Control.Monad
|
||||
|
||||
import PGF(PGF,Morpho,lookupMorpho,functionType,unType)
|
||||
import PGF2
|
||||
import SusanneFormat
|
||||
import Debug.Trace
|
||||
|
||||
newtype P a = P {runP :: PGF -> Morpho -> [ParseTree] -> Maybe ([ParseTree], a)}
|
||||
newtype P a = P {runP :: PGF -> Concr -> [ParseTree] -> Maybe ([ParseTree], a)}
|
||||
|
||||
instance Monad P where
|
||||
return x = P (\pgf morpho ts -> Just (ts, x))
|
||||
f >>= g = P (\pgf morpho ts -> case runP f pgf morpho ts of
|
||||
return x = P (\pgf cnc ts -> Just (ts, x))
|
||||
f >>= g = P (\pgf cnc ts -> case runP f pgf cnc ts of
|
||||
Nothing -> Nothing
|
||||
Just (ts,x) -> runP (g x) pgf morpho ts)
|
||||
Just (ts,x) -> runP (g x) pgf cnc ts)
|
||||
|
||||
instance MonadPlus P where
|
||||
mzero = P (\pgf morpho ts -> Nothing)
|
||||
mplus f g = P (\pgf morpho ts -> mplus (runP f pgf morpho ts) (runP g pgf morpho ts))
|
||||
mzero = P (\pgf cnc ts -> Nothing)
|
||||
mplus f g = P (\pgf cnc ts -> mplus (runP f pgf cnc ts) (runP g pgf cnc ts))
|
||||
|
||||
match tag_spec = P (\pgf morpho ts ->
|
||||
match convert tag_spec = P (\pgf cnc ts ->
|
||||
case ts of
|
||||
(t@(Phrase tag1 mods1 fn1 _ _):ts)
|
||||
| tag == tag1 &&
|
||||
all (flip elem mods1) mods &&
|
||||
(null fn || fn == fn1) -> Just (ts,t)
|
||||
(null fn || fn == fn1) -> Just (ts,convert pgf cnc t)
|
||||
(t@(Word _ tag1 _ _):ts)
|
||||
| tag == tag1 -> Just (ts,t)
|
||||
| tag == tag1 && null mods-> Just (ts,convert pgf cnc t)
|
||||
_ -> Nothing)
|
||||
where
|
||||
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
|
||||
@@ -43,12 +44,12 @@ many f =
|
||||
`mplus`
|
||||
do return []
|
||||
|
||||
inside tag_spec p = P (\pgf morpho ts ->
|
||||
inside tag_spec p = P (\pgf cnc ts ->
|
||||
case ts of
|
||||
(t@(Phrase tag1 mods1 fn1 _ ts'):ts)
|
||||
| tag == tag1 &&
|
||||
all (flip elem mods1) mods &&
|
||||
(null fn || fn == fn1) -> case runP p pgf morpho ts' of
|
||||
(null fn || fn == fn1) -> case runP p pgf cnc ts' of
|
||||
Just ([],x) -> Just (ts,x)
|
||||
_ -> Nothing
|
||||
_ -> Nothing)
|
||||
@@ -56,35 +57,45 @@ inside tag_spec p = P (\pgf morpho ts ->
|
||||
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
|
||||
Phrase tag mods fn _ _ = f []
|
||||
|
||||
insideOpt tag_spec p = P (\pgf morpho ts ->
|
||||
insideOpt convert tag_spec p = P (\pgf cnc ts ->
|
||||
case ts of
|
||||
(t@(Phrase tag1 mods1 fn1 _ ts'):ts)
|
||||
| tag == tag1 &&
|
||||
all (flip elem mods1) mods &&
|
||||
(null fn || fn == fn1) -> case runP p pgf morpho ts' of
|
||||
(null fn || fn == fn1) -> case runP p pgf cnc ts' of
|
||||
Just ([],x) -> Just (ts,x)
|
||||
_ -> Just (ts,t)
|
||||
_ -> Just (ts,convert pgf cnc t)
|
||||
_ -> Nothing)
|
||||
where
|
||||
(f,_) = readTag (Word "<match>" undefined undefined undefined) tag_spec
|
||||
Phrase tag mods fn _ _ = f []
|
||||
|
||||
lemma tag cat an0 = P (\pgf morpho ts ->
|
||||
lemma tag cat an0 = P (\pgf cnc ts ->
|
||||
case ts of
|
||||
(t@(Word _ tag1 form _):ts) | tag == tag1 ->
|
||||
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
|
||||
[f] -> Just (ts,App f [])
|
||||
_ -> Just (ts,t)
|
||||
_ -> Nothing)
|
||||
(t@(Word _ tag1 form _):ts) | tag == tag1 -> case runP (lookupForm cat an0 form) pgf cnc ts of
|
||||
Nothing -> Just (ts,t)
|
||||
x -> x
|
||||
_ -> Nothing)
|
||||
|
||||
lookupForm cat an0 form = P (\pgf cnc ts ->
|
||||
case [f | (f,an,_) <- lookupMorpho cnc form, hasCat pgf f cat, an == an0] of
|
||||
[] -> case [f | (f,an,_) <- lookupMorpho cnc (map toLower form), hasCat pgf f cat, an == an0] of
|
||||
[f] -> Just (ts,App f [])
|
||||
_ -> Nothing
|
||||
[f] -> Just (ts,App f [])
|
||||
_ -> Nothing)
|
||||
where
|
||||
hasCat pgf f cat =
|
||||
case functionType pgf f of
|
||||
Just ty -> case unType ty of
|
||||
(_,cat1,_) -> cat1 == cat
|
||||
Nothing -> False
|
||||
(DTyp _ cat1 _) -> cat1 == cat
|
||||
|
||||
opt f =
|
||||
do x <- f
|
||||
return (Just x)
|
||||
`mplus`
|
||||
do return Nothing
|
||||
|
||||
word tag = P (\pgf cnc ts ->
|
||||
case ts of
|
||||
((Word _ tag1 form _):ts) | tag == tag1 -> Just (ts,form)
|
||||
_ -> Nothing)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
module SusanneFormat(Tag,Id,Word,Lemma,ParseTree(..),readTreebank,readTag) where
|
||||
|
||||
import PGF(CId)
|
||||
import Data.Char
|
||||
import qualified Data.Map as Map
|
||||
|
||||
type Tag = String
|
||||
type Mods = String
|
||||
@@ -14,7 +14,8 @@ type Lemma = String
|
||||
data ParseTree
|
||||
= Phrase Tag Mods Fn Index [ParseTree]
|
||||
| Word Id Tag Word Lemma
|
||||
| App CId [ParseTree]
|
||||
| App String [ParseTree]
|
||||
| Lit String
|
||||
deriving Eq
|
||||
|
||||
data ParseTreePos
|
||||
@@ -28,14 +29,15 @@ instance Show ParseTree where
|
||||
| otherwise = "["++tag++mods++":"++fn++show idx++" "++unwords (map show ts)++"]"
|
||||
show (Word _ tag w _) = "["++tag++" "++w++"]"
|
||||
show (App f ts)
|
||||
| null ts = show f
|
||||
| otherwise = "("++show f++" "++unwords (map show ts)++")"
|
||||
| null ts = f
|
||||
| otherwise = "("++f++" "++unwords (map show ts)++")"
|
||||
show (Lit s) = show s
|
||||
|
||||
readTreebank ls = readLines Root (map words ls)
|
||||
|
||||
readLines p [] = []
|
||||
readLines p ([id,_,tag,w,l,parse]:ls) =
|
||||
readParse (Word id tag w l) p parse ls
|
||||
readParse (Word id tag (readWord w) l) p parse ls
|
||||
|
||||
readParse w p [] ls = readLines p ls
|
||||
readParse w p ('[':cs) ls =
|
||||
@@ -81,3 +83,61 @@ readTag w (c:cs) -- phrase tag
|
||||
readTag w cs = readError w
|
||||
|
||||
readError (Word id _ _ _) = error id
|
||||
|
||||
readWord w0 = replaceEntities w2
|
||||
where
|
||||
w1 | head w0 == '+' = tail w0
|
||||
| otherwise = w0
|
||||
w2 | last w1 == '+' = init w1
|
||||
| otherwise = w1
|
||||
|
||||
replaceEntities [] = []
|
||||
replaceEntities ('<':cs) =
|
||||
let (e,'>':cs1) = break (=='>') cs
|
||||
in case Map.lookup e entity_names of
|
||||
Just c -> c : replaceEntities cs1
|
||||
Nothing -> "<"++e++">"++ replaceEntities cs1
|
||||
replaceEntities (c: cs) = c : replaceEntities cs
|
||||
|
||||
entity_names = Map.fromList
|
||||
[("agr",'α')
|
||||
,("agrave",'à')
|
||||
,("apos",'\'')
|
||||
,("auml",'ä')
|
||||
,("bgr",'β')
|
||||
,("blank",' ')
|
||||
,("ccedil",'ç')
|
||||
,("deg",'°')
|
||||
,("dollar",'$')
|
||||
,("eacute",'é')
|
||||
,("egr",'ε')
|
||||
,("egrave",'è')
|
||||
,("frac12",'½')
|
||||
,("frac14",'¼')
|
||||
,("ggr",'γ')
|
||||
,("hellip",'…')
|
||||
,("hyphen",'-')
|
||||
,("iuml",'ï')
|
||||
,("khgr",'χ')
|
||||
,("ldquo",'“')
|
||||
,("lgr",'λ')
|
||||
,("lsquo",'‘')
|
||||
,("mdash",'—')
|
||||
,("mgr",'μ')
|
||||
,("minus",'-')
|
||||
,("ntilde",'ñ')
|
||||
,("oelig",'œ')
|
||||
,("ouml",'ö')
|
||||
,("para",'¶')
|
||||
,("pgr",'π')
|
||||
,("phgr",'φ')
|
||||
,("prime",'′')
|
||||
,("Prime",'″')
|
||||
,("rdquo",'”')
|
||||
,("rgr",'ρ')
|
||||
,("rsquo",'’')
|
||||
,("sect",'§')
|
||||
,("sol",'/')
|
||||
,("tggr",'θ')
|
||||
]
|
||||
|
||||
|
||||
@@ -5,92 +5,63 @@ import Data.Char(toLower)
|
||||
import Control.Monad
|
||||
import qualified Data.Map as Map
|
||||
|
||||
import PGF (readPGF, readLanguage, buildMorpho, lookupMorpho, mkCId, functionType, unType)
|
||||
import PGF2
|
||||
import SusanneFormat
|
||||
import Parser
|
||||
import Idents
|
||||
|
||||
Just eng = readLanguage "DictEng"
|
||||
|
||||
main = do
|
||||
gr <- readPGF "DictEngAbs.pgf"
|
||||
let morpho = buildMorpho gr eng
|
||||
gr <- readPGF "ParseEngAbs.pgf"
|
||||
let Just eng = Map.lookup "ParseEng" (languages gr)
|
||||
fs <- getDirectoryContents "data"
|
||||
txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
|
||||
--let ts' = readTreebank (lines (concat txts))
|
||||
--writeFile "text" (unlines (map show ts'))
|
||||
let (ts',rs') = combineRes (convert gr morpho) (readTreebank (lines (concat txts)))
|
||||
let rm = Map.fromListWith (++) rs'
|
||||
writeFile "susanne.gft" (unlines (map show ts'))
|
||||
writeFile "rules" (unlines (concat [unwords ("-":cat:"->":cats) : map (\t -> " "++show t) rs'' | (cat :-> cats,rs'') <- Map.toList rm]))
|
||||
let ts = (map (convert gr eng) . concatMap filterTree) (readTreebank (lines (concat txts)))
|
||||
writeFile "susanne.gft" (unlines (map show ts))
|
||||
|
||||
data Rule = Tag :-> [Tag]
|
||||
deriving (Eq,Ord)
|
||||
|
||||
convert pgf morpho w@(Word _ tag _ lemma)
|
||||
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = ([],[])
|
||||
{- | tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
||||
| tag == "NN1n" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
||||
| tag == "NN2" = convertLemma pgf morpho (mkCId "N") "s Pl Nom" w
|
||||
| tag == "JJ" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
||||
| tag == "JB" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
||||
| tag == "JBo" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
||||
| tag == "AT" = convertLemma pgf morpho (mkCId "Quant") "s False Sg" w
|
||||
| tag == "VVDi" = convertLemma pgf morpho (mkCId "V") "s VPast" w
|
||||
| tag == "VVDt" = convertLemma pgf morpho (mkCId "V2") "s VPast" w
|
||||
| tag == "VVDv" = convertLemma pgf morpho (mkCId "V") "s VPast" w
|
||||
| tag == "VVZi" = convertLemma pgf morpho (mkCId "V") "s VPres" w
|
||||
| tag == "VVZt" = convertLemma pgf morpho (mkCId "V2") "s VPres" w
|
||||
| tag == "VVZv" = convertLemma pgf morpho (mkCId "V") "s VPres" w
|
||||
| tag == "PPHS2"= convertLemma pgf morpho (mkCId "Pron") "s (NCase Nom)" w
|
||||
| tag == "PPHO2"= convertLemma pgf morpho (mkCId "Pron") "s NPAcc" w
|
||||
| tag == "RR" = convertLemma pgf morpho (mkCId "Adv") "s" w
|
||||
| tag == "II" = convertLemma pgf morpho (mkCId "Prep") "s" w
|
||||
| tag == "IO" = convertLemma pgf morpho (mkCId "Prep") "s" w-}
|
||||
| otherwise = ([w],[])
|
||||
convert pgf morpho t@(Phrase tag mods fn idx ts)
|
||||
| tag == "O" = (ts',rs')
|
||||
| tag == "Q" = (ts',rs')
|
||||
| tag == "S" = case runP pS pgf morpho ts' of
|
||||
Just ([],x) -> ([x], rs')
|
||||
_ -> ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
|
||||
| otherwise = ([Phrase tag mods fn idx ts'], (r,[t]) : rs')
|
||||
filterTree w@(Word _ tag _ lemma)
|
||||
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = []
|
||||
| otherwise = [w]
|
||||
filterTree (Phrase tag mods fn idx ts)
|
||||
| tag == "O" = ts'
|
||||
| tag == "Q" = ts'
|
||||
| otherwise = [Phrase tag mods fn idx ts']
|
||||
where
|
||||
(ts',rs') = combineRes (convert pgf morpho) ts
|
||||
r = tag :-> map getTag ts
|
||||
ts' = concatMap filterTree ts
|
||||
|
||||
isExtra (Word _ "YIL" _ _) = True
|
||||
isExtra (Word _ "YIR" _ _) = True
|
||||
isExtra (Word _ "YTL" _ _) = True
|
||||
isExtra (Word _ "YTR" _ _) = True
|
||||
isExtra _ = False
|
||||
|
||||
getTag (Phrase tag mods fn idx ts) = tag++if null fn then "" else ":"++fn
|
||||
getTag (Word _ tag _ _) = tag
|
||||
|
||||
convertLemma pgf morpho cat an0 w@(Word _ tag form _) =
|
||||
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
|
||||
[f] -> ([App f []], [])
|
||||
_ -> ([w],[])
|
||||
convert pgf eng t@(Phrase tag mods fn idx ts)
|
||||
| tag == "S" = case runP pS pgf eng ts of
|
||||
Just ([],x) -> x
|
||||
_ -> Phrase tag mods fn idx ts'
|
||||
| tag == "N" = case runP pNP pgf eng ts of
|
||||
Just ([],x) -> x
|
||||
_ -> Phrase tag mods fn idx ts'
|
||||
| tag == "V" = case runP (pV "V") pgf eng [t] of
|
||||
Just ([],(_,_,_,_,x)) -> x
|
||||
_ -> Phrase tag mods fn idx ts'
|
||||
| tag == "P" = case runP pPP pgf eng ts of
|
||||
Just ([],x) -> x
|
||||
_ -> Phrase tag mods fn idx ts'
|
||||
| tag == "Po"= case runP pPP pgf eng ts of
|
||||
Just ([],x) -> x
|
||||
_ -> Phrase tag mods fn idx ts'
|
||||
| otherwise = Phrase tag mods fn idx ts'
|
||||
where
|
||||
hasCat pgf f cat =
|
||||
case functionType pgf f of
|
||||
Just ty -> case unType ty of
|
||||
(_,cat1,_) -> cat1 == cat
|
||||
Nothing -> False
|
||||
|
||||
combineRes f ts = (ts',rs')
|
||||
where
|
||||
(x,y) = unzip (map f ts)
|
||||
ts' = concat x
|
||||
rs' = concat y
|
||||
ts' = map (convert pgf eng) ts
|
||||
convert pgf eng t@(Word _ tag _ lemma)
|
||||
| take 2 tag == "NN" = case runP pN pgf eng [t] of
|
||||
Just ([],(_,x)) -> x
|
||||
_ -> t
|
||||
| take 1 tag == "J" = case runP pAP pgf eng [t] of
|
||||
Just ([],x) -> x
|
||||
_ -> t
|
||||
| otherwise = t
|
||||
|
||||
pS =
|
||||
do mplus pConj (return ())
|
||||
advs <- many pAdS
|
||||
np <- pSubject
|
||||
(t,p,vp) <- pVP
|
||||
return (foldr ($) (cidUseCl (cidTTAnt t p) (cidPredVP np vp)) advs)
|
||||
return (foldr ($) (cidUseCl t p (cidPredVP np vp)) advs)
|
||||
`mplus`
|
||||
do mplus pConj (return ())
|
||||
(t,p,vp) <- pVP
|
||||
@@ -98,44 +69,56 @@ pS =
|
||||
`mplus`
|
||||
do mplus pConj (return ())
|
||||
advs <- many pAdS
|
||||
t1 <- match "EX"
|
||||
t1 <- match convert "EX"
|
||||
(t,p,vp) <- pVP
|
||||
return (foldr ($) (cidUseCl (cidTTAnt t p) (cidExistNP t1 vp)) advs)
|
||||
return (foldr ($) (cidUseCl t p (cidExistNP t1 vp)) advs)
|
||||
|
||||
pSubject =
|
||||
do insideOpt "N:s" pNP
|
||||
do insideOpt convert "N:s" pNP
|
||||
`mplus`
|
||||
do insideOpt "N:S" pNP
|
||||
do insideOpt convert "N:S" pNP
|
||||
`mplus`
|
||||
do match "M:s"
|
||||
do match convert "M:s"
|
||||
`mplus`
|
||||
do match "M:S"
|
||||
do match convert "M:S"
|
||||
`mplus`
|
||||
do match "D:s"
|
||||
do insideOpt convert "Ds:s" $ do
|
||||
det <- pDet
|
||||
return (cidDetNP (det cidNumSg))
|
||||
`mplus`
|
||||
do match "D:S"
|
||||
do insideOpt convert "Dp:s" $ do
|
||||
det <- pDet
|
||||
return (cidDetNP (det cidNumPl))
|
||||
`mplus`
|
||||
do insideOpt convert "Ds:S" $ do
|
||||
det <- pDet
|
||||
return (cidDetNP (det cidNumSg))
|
||||
`mplus`
|
||||
do insideOpt convert "Dp:S" $ do
|
||||
det <- pDet
|
||||
return (cidDetNP (det cidNumPl))
|
||||
|
||||
pConj =
|
||||
do match "CC"
|
||||
do match convert "CC"
|
||||
return ()
|
||||
`mplus`
|
||||
do match "CCB"
|
||||
do match convert "CCB"
|
||||
return ()
|
||||
|
||||
pAdS =
|
||||
do adv <- pAdv
|
||||
match "YC"
|
||||
do adv <- pVPMods
|
||||
match convert "YC"
|
||||
return (\t -> cidExtAdvS adv t)
|
||||
`mplus`
|
||||
do adv <- pAdv
|
||||
do adv <- pVPMods
|
||||
return (\t -> cidAdvS adv t)
|
||||
|
||||
pVP =
|
||||
do adVs <- many pAdV
|
||||
(t,p,vs) <- pV "VS"
|
||||
advs <- many pAdv
|
||||
s <- insideOpt "F:o"
|
||||
(opt (match "CST") >> pS)
|
||||
(t,p,voice,apect,vs) <- pV "VS"
|
||||
advs <- many pVPMods
|
||||
s <- insideOpt convert "F:o"
|
||||
(opt (match convert "CST") >> pS)
|
||||
return (t,p,foldr (\adv t -> cidAdVVP adv t)
|
||||
(foldl (\t adv -> cidAdvVP t adv)
|
||||
(cidComplVS vs s)
|
||||
@@ -143,20 +126,22 @@ pVP =
|
||||
adVs)
|
||||
`mplus`
|
||||
do adVs <- many pAdV
|
||||
(t,p,vv) <- pV "VV"
|
||||
advs <- many pAdv
|
||||
vp <- match "Ti"
|
||||
(t,p,voice,apect,vv) <- pV "VV"
|
||||
advs <- many pVPMods
|
||||
(p2,voice,aspect,vp) <- inside "Ti" $ do
|
||||
match convert "s"
|
||||
pVPInf
|
||||
return (t,p,foldr (\adv t -> cidAdVVP adv t)
|
||||
(foldl (\t adv -> cidAdvVP t adv)
|
||||
(cidComplVV vv vp)
|
||||
(cidComplVV vv cidASimul p2 vp)
|
||||
advs)
|
||||
adVs)
|
||||
`mplus`
|
||||
do adVs <- many pAdV
|
||||
(t,p,v2) <- pV "V2"
|
||||
(t,p,voice,apect,v2) <- pV "V2"
|
||||
o <- pObject
|
||||
opt (match "YC") -- what is this?
|
||||
advs <- many pAdv
|
||||
opt (match convert "YC") -- what is this?
|
||||
advs <- many pVPMods
|
||||
return (t,p,foldr (\adv t -> cidAdVVP adv t)
|
||||
(foldl (\t adv -> cidAdvVP t adv)
|
||||
(cidComplSlash (cidSlashV2a v2) o)
|
||||
@@ -164,141 +149,641 @@ pVP =
|
||||
adVs)
|
||||
`mplus`
|
||||
do adVs <- many pAdV
|
||||
(t,p,v) <- pV "V"
|
||||
advs <- many pAdv
|
||||
(t,p,voice,apect,v) <- pV "V"
|
||||
advs <- many pVPMods
|
||||
return (t,p,foldr (\adv t -> cidAdVVP adv t)
|
||||
(foldl (\t adv -> cidAdvVP t adv)
|
||||
(cidUseV v)
|
||||
advs)
|
||||
adVs)
|
||||
`mplus`
|
||||
do inside "V" (match convert "VBZ")
|
||||
adVs <- many pAdV
|
||||
p <- pPol
|
||||
comp <- pComp
|
||||
advs <- many pVPMods
|
||||
return (cidTTAnt cidTPres cidASimul,p,foldr (\adv t -> cidAdVVP adv t)
|
||||
(foldl (\t adv -> cidAdvVP t adv)
|
||||
(cidUseComp comp)
|
||||
advs)
|
||||
adVs)
|
||||
|
||||
pComp =
|
||||
do adv <- insideOpt convert "R:e" pAdv
|
||||
return (cidCompAdv adv)
|
||||
`mplus`
|
||||
do np <- insideOpt convert "N:e" pNP
|
||||
return (cidCompNP np)
|
||||
`mplus`
|
||||
do ap <- pAP
|
||||
return (cidCompAP ap)
|
||||
|
||||
pAdv =
|
||||
do lemma "RP" "Adv" "s"
|
||||
|
||||
data Voice = Active | Passive
|
||||
data Aspect = Simple | Progressive
|
||||
|
||||
pV cat =
|
||||
do inside "V" $
|
||||
do v <- lemma "VVDv" (mkCId cat) "s VPast"
|
||||
return (cidTTAnt cidTPast cidASimul,cidPPos,v)
|
||||
do v <- pVPres cat
|
||||
return (cidTTAnt cidTPres cidASimul,cidPPos,Active,Simple,v)
|
||||
`mplus`
|
||||
do v <- lemma "VVDt" (mkCId cat) "s VPast"
|
||||
return (cidTTAnt cidTPast cidASimul,cidPPos,v)
|
||||
do v <- do lemma "VVZi" cat "s VPres"
|
||||
`mplus`
|
||||
do lemma "VVZt" cat "s VPres"
|
||||
`mplus`
|
||||
do lemma "VVZv" cat "s VPres"
|
||||
return (cidTTAnt cidTPres cidASimul,cidPPos,Active,Simple,v)
|
||||
`mplus`
|
||||
do v <- lemma "VVZv" (mkCId cat) "s VPres"
|
||||
return (cidTTAnt cidTPres cidASimul,cidPPos,v)
|
||||
do v <- do lemma "VVDi" cat "s VPast"
|
||||
`mplus`
|
||||
do lemma "VVDt" cat "s VPast"
|
||||
`mplus`
|
||||
do lemma "VVDv" cat "s VPast"
|
||||
return (cidTTAnt cidTPast cidASimul,cidPPos,Active,Simple,v)
|
||||
`mplus`
|
||||
do match "VHD"
|
||||
match "VHD"
|
||||
v <- lemma "VVNv" (mkCId cat) "s VPPart"
|
||||
return (cidTTAnt cidTPres cidAAnter,cidPPos,v)
|
||||
do (match convert "VBM" `mplus` match convert "VBR" `mplus` match convert "VBZ") -- am,are,is
|
||||
pol <- pPol
|
||||
(voice,aspect,v) <- pVPart cat
|
||||
return (cidTTAnt cidTPres cidASimul,pol,voice,aspect,v)
|
||||
`mplus`
|
||||
do (match convert "VBDZ" `mplus` match convert "VBDR") -- was,were
|
||||
pol <- pPol
|
||||
(voice,aspect,v) <- pVPart cat
|
||||
return (cidTTAnt cidTPast cidASimul,pol,voice,aspect,v)
|
||||
`mplus`
|
||||
do match convert "VH0" -- have
|
||||
pol <- pPol
|
||||
(voice,aspect,v) <- do v <- pVPastPart cat
|
||||
return (Active,Simple,v)
|
||||
`mplus`
|
||||
do match convert "VBN" -- been
|
||||
pVPart cat
|
||||
return (cidTTAnt cidTPres cidAAnter,pol,voice,aspect,v)
|
||||
`mplus`
|
||||
do match convert "VH0" -- have
|
||||
return (cidTTAnt cidTPres cidAAnter,cidPPos,Active,Simple,cidhave_V2)
|
||||
`mplus`
|
||||
do match convert "VHZ" -- has
|
||||
pol <- pPol
|
||||
(voice,aspect,v) <- do v <- pVPastPart cat
|
||||
return (Active,Simple,v)
|
||||
`mplus`
|
||||
do match convert "VBN" -- been
|
||||
pVPart cat
|
||||
return (cidTTAnt cidTPres cidAAnter,pol,voice,aspect,v)
|
||||
`mplus`
|
||||
do match convert "VHZ" -- has
|
||||
return (cidTTAnt cidTPres cidAAnter,cidPPos,Active,Simple,cidhave_V2)
|
||||
`mplus`
|
||||
do match convert "VHD" -- had
|
||||
pol <- pPol
|
||||
(voice,aspect,v) <- do v <- pVPastPart cat
|
||||
return (Active,Simple,v)
|
||||
`mplus`
|
||||
do match convert "VBN" -- been
|
||||
pVPart cat
|
||||
return (cidTTAnt cidTPast cidAAnter,pol,voice,aspect,v)
|
||||
`mplus`
|
||||
do match convert "VHD" -- had
|
||||
return (cidTTAnt cidTPast cidASimul,cidPPos,Active,Simple,cidhave_V2)
|
||||
`mplus`
|
||||
do w <- word "VMo" -- will
|
||||
guard (w == "will")
|
||||
pol <- pPol
|
||||
(voice,apect,vp) <- pVInf cat
|
||||
return (cidTTAnt cidTFut cidASimul,pol,voice,apect,vp)
|
||||
`mplus`
|
||||
do mplus (match convert "VD0") (match convert "VDZ") -- do,does
|
||||
match convert "XX"
|
||||
vp <- pVPres cat
|
||||
return (cidTTAnt cidTPres cidASimul,cidPNeg,Active,Simple,vp)
|
||||
`mplus`
|
||||
do match convert "VDD" -- did
|
||||
match convert "XX"
|
||||
vp <- pVPres cat
|
||||
return (cidTTAnt cidTPast cidASimul,cidPNeg,Active,Simple,vp)
|
||||
|
||||
|
||||
pVPInf = do
|
||||
adVs <- many pAdV
|
||||
(pol,voice,apect,vp) <- do (pol,voice,apect,v2) <- inside "Vi" $
|
||||
do pol <- pPol
|
||||
match convert "TO"
|
||||
(voice,aspect,v) <- pVInf "V2"
|
||||
return (pol,voice,aspect,v)
|
||||
o <- pObject
|
||||
return (pol,voice,apect,cidComplSlash (cidSlashV2a v2) o)
|
||||
`mplus`
|
||||
do (pol,voice,apect,v2a) <- inside "Vi" $
|
||||
do pol <- pPol
|
||||
match convert "TO"
|
||||
(voice,aspect,v) <- pVInf "V2A"
|
||||
return (pol,voice,aspect,v)
|
||||
o <- pObject
|
||||
ap <- pAP
|
||||
return (pol,voice,apect,cidComplSlash (cidSlashV2A v2a ap) o)
|
||||
advs <- many pVPMods
|
||||
return (pol,voice,apect,
|
||||
foldr (\adv t -> cidAdVVP adv t)
|
||||
(foldl (\t adv -> cidAdvVP t adv)
|
||||
vp
|
||||
advs)
|
||||
adVs)
|
||||
|
||||
pVInf cat =
|
||||
do v <- pVPres cat
|
||||
return (Active,Simple,v)
|
||||
`mplus`
|
||||
do v <- match "V"
|
||||
return (App (mkCId "XXX") [],App (mkCId "XXX") [],v)
|
||||
do match convert "VB0" -- be
|
||||
pVPart cat
|
||||
`mplus`
|
||||
do v <- match convert "VH0" -- have
|
||||
return (Active,Simple,v)
|
||||
|
||||
pVPart cat =
|
||||
do v <- do lemma "VVGi" cat "s VPresPart"
|
||||
`mplus`
|
||||
do lemma "VVGt" cat "s VPresPart"
|
||||
`mplus`
|
||||
do lemma "VVGv" cat "s VPresPart"
|
||||
return (Active,Progressive,v)
|
||||
`mplus`
|
||||
do v <- pVPastPart cat
|
||||
return (Passive,Simple,v)
|
||||
|
||||
pVPres cat =
|
||||
do lemma "VV0i" cat "s VInf"
|
||||
`mplus`
|
||||
do lemma "VV0t" cat "s VInf"
|
||||
`mplus`
|
||||
do lemma "VV0v" cat "s VInf"
|
||||
|
||||
pVPastPart cat =
|
||||
do lemma "VVNi" cat "s VPPart"
|
||||
`mplus`
|
||||
do lemma "VVNt" cat "s VPPart"
|
||||
`mplus`
|
||||
do lemma "VVNv" cat "s VPPart"
|
||||
|
||||
pPol =
|
||||
do match convert "XX"
|
||||
return cidPNeg
|
||||
`mplus`
|
||||
do return cidPPos
|
||||
|
||||
pAdV =
|
||||
do insideOpt "R:c" $
|
||||
lemma "RRR" (mkCId "AdV") "s"
|
||||
do insideOpt convert "R:c" $
|
||||
lemma "RRR" "AdV" "s"
|
||||
`mplus`
|
||||
do match "R:m"
|
||||
do insideOpt convert "R:m" $
|
||||
lemma "RRR" "AdV" "s"
|
||||
|
||||
pObject =
|
||||
match "P:u"
|
||||
match convert "P:u"
|
||||
`mplus`
|
||||
insideOpt "N:o" pNP
|
||||
insideOpt convert "N:o" pNP
|
||||
`mplus`
|
||||
match "N:e"
|
||||
match convert "N:e"
|
||||
`mplus`
|
||||
match "M:e"
|
||||
match convert "M:e"
|
||||
`mplus`
|
||||
match "D:e"
|
||||
do insideOpt convert "Ds:e" $ do
|
||||
det <- pDet
|
||||
return (cidDetNP (det cidNumSg))
|
||||
`mplus`
|
||||
match "P:e"
|
||||
do insideOpt convert "Dp:e" $ do
|
||||
det <- pDet
|
||||
return (cidDetNP (det cidNumPl))
|
||||
`mplus`
|
||||
do insideOpt convert "P:e" $ pPP
|
||||
|
||||
pAdv =
|
||||
do match "N:t"
|
||||
pVPMods =
|
||||
do insideOpt convert "N:t" pTimeNPAdv
|
||||
`mplus`
|
||||
do match "N:h"
|
||||
do match convert "N:h"
|
||||
`mplus`
|
||||
do match "P:p"
|
||||
do insideOpt convert "P:p" $ pPP
|
||||
`mplus`
|
||||
do match "P:q"
|
||||
do insideOpt convert "P:q" $ pPP
|
||||
`mplus`
|
||||
do match "P:a"
|
||||
do insideOpt convert "Pb:a" $ do
|
||||
match convert "IIb"
|
||||
np <- insideOpt convert "N" pNP
|
||||
return (cidPrepNP cidby_Prep np)
|
||||
`mplus`
|
||||
do match "P:t"
|
||||
do insideOpt convert "P:t" $ pPP
|
||||
`mplus`
|
||||
do match "P:h"
|
||||
do insideOpt convert "P:h" $ pPP
|
||||
`mplus`
|
||||
do match "P:m"
|
||||
do insideOpt convert "P:m" $ pPP
|
||||
`mplus`
|
||||
do match "P:r"
|
||||
do insideOpt convert "P:r" $ pPP
|
||||
`mplus`
|
||||
do match "R:p"
|
||||
do insideOpt convert "R:p" $ pAdv
|
||||
`mplus`
|
||||
do match "R:q"
|
||||
do insideOpt convert "R:q" $ pAdv
|
||||
`mplus`
|
||||
do match "R:a"
|
||||
do insideOpt convert "R:a" $ pAdv
|
||||
`mplus`
|
||||
do match "R:t"
|
||||
do insideOpt convert "R:t" $ pAdv
|
||||
`mplus`
|
||||
do match "R:h"
|
||||
do insideOpt convert "R:h" $ pAdv
|
||||
`mplus`
|
||||
do match "R:m"
|
||||
do insideOpt convert "R:m" $ pAdv
|
||||
`mplus`
|
||||
do match "R:c"
|
||||
do insideOpt convert "R:c" $ pAdv
|
||||
`mplus`
|
||||
do match "R:r"
|
||||
do insideOpt convert "R:r" $ pAdv
|
||||
`mplus`
|
||||
do match "F:p"
|
||||
do match convert "F:p"
|
||||
`mplus`
|
||||
do match "F:q"
|
||||
do match convert "F:q"
|
||||
`mplus`
|
||||
do match "F:a"
|
||||
do match convert "F:a"
|
||||
`mplus`
|
||||
do match "F:t"
|
||||
do match convert "F:t"
|
||||
`mplus`
|
||||
do match "F:h"
|
||||
do match convert "F:h"
|
||||
`mplus`
|
||||
do match "F:m"
|
||||
do match convert "F:m"
|
||||
`mplus`
|
||||
do match "F:r"
|
||||
do match convert "F:r"
|
||||
`mplus`
|
||||
do match "W:b"
|
||||
do match convert "W:b"
|
||||
`mplus`
|
||||
do match "L:b"
|
||||
do match convert "L:b"
|
||||
|
||||
pNP = do
|
||||
q <- pQuant
|
||||
(n,cn) <- pCN
|
||||
return (cidDetCN (cidDetQuant q n) cn)
|
||||
pPP =
|
||||
do prep <- do lemma "ICS" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "ICSk" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "ICSt" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "ICSx" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "IF" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "II" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "IIa" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "IIb" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "IIg" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "IIp" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "IIt" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "IO" "Prep" "s"
|
||||
`mplus`
|
||||
do lemma "IW" "Prep" "s"
|
||||
`mplus`
|
||||
do insideOpt convert "II=" $ do
|
||||
w1 <- word "II21"
|
||||
w2 <- word "II22"
|
||||
lookupForm "Prep" "s" (unwords [w1,w2])
|
||||
np <- do insideOpt convert "N" pNP
|
||||
`mplus`
|
||||
do (mb_num,n) <- pN
|
||||
case mb_num of
|
||||
Just num | num == cidNumPl -> return (cidDetCN (cidDetQuant cidIndefArt num) (cidUseN n))
|
||||
_ -> return (cidMassNP (cidUseN n)) -- we don't know the number
|
||||
return (cidPrepNP prep np)
|
||||
|
||||
pNP =
|
||||
do np <- pBaseNP
|
||||
match convert "YC"
|
||||
fr <- insideOpt convert "Fr" pRCl
|
||||
return (cidRelNP np fr)
|
||||
`mplus`
|
||||
do pBaseNP
|
||||
|
||||
pBaseNP =
|
||||
do det <- pDet
|
||||
(mb_num,cn) <- pCN
|
||||
case mb_num of
|
||||
Just num -> return (cidDetCN (det num) cn)
|
||||
Nothing -> mzero -- we don't know the number
|
||||
`mplus`
|
||||
do pn <- pName
|
||||
return (cidUsePN pn)
|
||||
`mplus`
|
||||
do (mb_num,cn) <- pCN
|
||||
case mb_num of
|
||||
Just num | num == cidNumPl -> return (cidDetCN (cidDetQuant cidIndefArt num) cn)
|
||||
_ -> return (cidMassNP cn) -- we don't know the number
|
||||
`mplus`
|
||||
do match convert "PPIS1"
|
||||
return (cidUsePron cidi_Pron)
|
||||
`mplus`
|
||||
do match convert "PPY"
|
||||
return (cidUsePron cidyouSg_Pron)
|
||||
`mplus`
|
||||
do match convert "PPHS1m"
|
||||
return (cidUsePron cidhe_Pron)
|
||||
`mplus`
|
||||
do match convert "PPHS1f"
|
||||
return (cidUsePron cidshe_Pron)
|
||||
`mplus`
|
||||
do match convert "PPH1"
|
||||
return (cidUsePron cidit_Pron)
|
||||
`mplus`
|
||||
do match convert "PPIS2"
|
||||
return (cidUsePron cidwe_Pron)
|
||||
`mplus`
|
||||
do match convert "PPHS2"
|
||||
return (cidUsePron cidthey_Pron)
|
||||
`mplus`
|
||||
do match convert "Nn"
|
||||
|
||||
pDet =
|
||||
do match convert "DDy"
|
||||
return (\num -> if num == cidNumSg
|
||||
then cidanySg_Det
|
||||
else cidanyPl_Det)
|
||||
`mplus`
|
||||
do det <- lemma "DA2" "Det" "s"
|
||||
return (\num -> det)
|
||||
`mplus`
|
||||
do q <- pQuant
|
||||
ord <- pOrd
|
||||
return (\num -> cidDetQuantOrd q num ord)
|
||||
`mplus`
|
||||
do q <- pQuant
|
||||
return (\num -> cidDetQuant q num)
|
||||
|
||||
pQuant =
|
||||
do lemma "AT" (mkCId "Quant") "s False Sg"
|
||||
do match convert "AT"
|
||||
return cidDefArt
|
||||
`mplus`
|
||||
do match "AT1"
|
||||
do match convert "AT1"
|
||||
return cidIndefArt
|
||||
`mplus`
|
||||
do match convert "ATn"
|
||||
return cidno_Quant
|
||||
`mplus`
|
||||
do match convert "APPGi1"
|
||||
return (cidPossPron cidi_Pron)
|
||||
`mplus`
|
||||
do match convert "APPGy"
|
||||
return (cidPossPron cidyouSg_Pron)
|
||||
`mplus`
|
||||
do match convert "APPGm"
|
||||
return (cidPossPron cidhe_Pron)
|
||||
`mplus`
|
||||
do match convert "APPGf"
|
||||
return (cidPossPron cidshe_Pron)
|
||||
`mplus`
|
||||
do match convert "APPGh1"
|
||||
return (cidPossPron cidit_Pron)
|
||||
`mplus`
|
||||
do match convert "APPGi2"
|
||||
return (cidPossPron cidwe_Pron)
|
||||
`mplus`
|
||||
do match convert "APPGh2"
|
||||
return (cidPossPron cidthey_Pron)
|
||||
`mplus`
|
||||
do lemma "DD1a" "Quant" "s True Sg"
|
||||
`mplus`
|
||||
do lemma "DD1i" "Quant" "s True Sg"
|
||||
`mplus`
|
||||
do lemma "DD2a" "Quant" "s True Pl"
|
||||
`mplus`
|
||||
do lemma "DD2i" "Quant" "s True Pl"
|
||||
`mplus`
|
||||
do lemma "DDi" "Quant" "s True Pl"
|
||||
`mplus`
|
||||
insideOpt convert "G" pGenitive
|
||||
|
||||
pOrd =
|
||||
do a <- lemma "JJT" "A" "s (AAdj Superl Nom)"
|
||||
return (cidOrdSuperl a)
|
||||
|
||||
pGenitive =
|
||||
do np <- insideOpt convert "N" pNP
|
||||
match convert "GG"
|
||||
return (cidGenNP np)
|
||||
|
||||
pCN =
|
||||
do np <- insideOpt "N" pNP
|
||||
(n,cn) <- pCN
|
||||
return (n,App (mkCId "Appos") [np,cn])
|
||||
do pn <- mplus pName (insideOpt convert "Nn" pName)
|
||||
(mb_num,cn) <- pCN
|
||||
return (mb_num,cidNameCN pn cn)
|
||||
`mplus`
|
||||
do a <- lemma "JJ" (mkCId "A") "s (AAdj Posit Nom)"
|
||||
(n,cn) <- pCN
|
||||
return (n,cidAdjCN (cidPositA a) cn)
|
||||
do (mb_num_n,n) <- mplus pN (inside "N" pCN)
|
||||
(mb_num,cn) <- pCN
|
||||
case mb_num_n of
|
||||
Just num | num == cidNumPl -> return (mb_num,cidCompoundPlCN (cidUseN n) cn)
|
||||
_ -> return (mb_num,cidCompoundSgCN (cidUseN n) cn) -- here we don't really know the number
|
||||
`mplus`
|
||||
do (num,n) <- pN
|
||||
advs <- many pPo
|
||||
return (num,
|
||||
foldl (\t adv -> cidAdvCN t adv)
|
||||
do ap <- pAP
|
||||
(mb_num,cn) <- pCN
|
||||
return (mb_num,cidAdjCN ap cn)
|
||||
`mplus`
|
||||
do t <- match convert "NN1u&"
|
||||
mods <- many pCNMods
|
||||
return (Just cidNumSg
|
||||
,foldl (\t mod -> mod t)
|
||||
t
|
||||
mods)
|
||||
`mplus`
|
||||
do (mb_num,n) <- pN
|
||||
mods <- many pCNMods
|
||||
return (mb_num,
|
||||
foldl (\t mod -> mod t)
|
||||
(cidUseN n)
|
||||
advs)
|
||||
mods)
|
||||
|
||||
pN =
|
||||
do n <- lemma "NN1c" (mkCId "N") "s Sg Nom"
|
||||
return (cidNumSg, n)
|
||||
pAP =
|
||||
do a <- lemma "JJ" "A" "s (AAdj Posit Nom)"
|
||||
`mplus`
|
||||
lemma "JA" "A" "s (AAdj Posit Nom)"
|
||||
`mplus`
|
||||
lemma "JB" "A" "s (AAdj Posit Nom)"
|
||||
`mplus`
|
||||
lemma "JBo" "A" "s (AAdj Posit Nom)"
|
||||
`mplus`
|
||||
lemma "DAy" "A" "s (AAdj Posit Nom)"
|
||||
`mplus`
|
||||
lemma "DAz" "A" "s (AAdj Posit Nom)"
|
||||
return (cidPositA a)
|
||||
`mplus`
|
||||
do n <- lemma "NN1n" (mkCId "N") "s Sg Nom"
|
||||
return (cidNumSg, n)
|
||||
do a <- lemma "JJR" "A" "s (AAdj Compar Nom)"
|
||||
return (cidUseComparA a)
|
||||
`mplus`
|
||||
do vp <- match convert "Tn"
|
||||
return (cidPastPartAP vp)
|
||||
`mplus`
|
||||
do insideOpt convert "J" $ do
|
||||
adas <- many pAdA
|
||||
ap <- pAP
|
||||
mods <- many pAPMods
|
||||
return (foldl (\t ada -> cidAdAP ada t)
|
||||
(foldl (\t mod -> cidAdvAP t mod)
|
||||
ap
|
||||
mods)
|
||||
adas)
|
||||
|
||||
pPo =
|
||||
insideOpt "Po" $ do
|
||||
p <- match "IO"
|
||||
np <- insideOpt "N" pNP
|
||||
return (cidPrepNP p np)
|
||||
pAdA = do
|
||||
a <- lemma "RR" "A" "s AAdv"
|
||||
return (cidPositAdAAdj a)
|
||||
|
||||
pAPMods =
|
||||
do insideOpt convert "P" pPP
|
||||
|
||||
pN =
|
||||
do n <- lemma "NNn" "N" "s Sg Nom"
|
||||
return (Nothing, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNu" "N" "s Sg Nom"
|
||||
return (Just cidNumPl, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNux" "N" "s Sg Nom"
|
||||
return (Nothing, n)
|
||||
`mplus`
|
||||
do n <- lemma "NN1c" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NN1m" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NN1n" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NN1u" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NN1ux" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NN2" "N" "s Pl Nom"
|
||||
return (Just cidNumPl, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNJ1c" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNJ1n" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNJ2" "N" "s Pl Nom"
|
||||
return (Just cidNumPl, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNLc" "N" "s Sg Nom"
|
||||
return (Nothing, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNL1c" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNL1cb" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNL1n" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNL2" "N" "s Pl Nom"
|
||||
return (Just cidNumPl, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNS1c" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNS1n" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNS2" "N" "s Pl Nom"
|
||||
return (Just cidNumPl, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNT1h" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNT1m" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNT1c" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNT2" "N" "s Pl Nom"
|
||||
return (Just cidNumPl, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNUc" "N" "s Sg Nom"
|
||||
return (Nothing, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNUn" "N" "s Sg Nom"
|
||||
return (Nothing, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNU1c" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNU1n" "N" "s Sg Nom"
|
||||
return (Just cidNumSg, n)
|
||||
`mplus`
|
||||
do n <- lemma "NNU2" "N" "s Pl Nom"
|
||||
return (Just cidNumPl, n)
|
||||
`mplus`
|
||||
do inside "Ns" $ do
|
||||
(mb_num1,n1) <- pN
|
||||
match convert "YH"
|
||||
(mb_num2,n2) <- pN
|
||||
case mb_num1 of
|
||||
Just num | num == cidNumPl -> return (mb_num2,cidDashPlN n1 n2)
|
||||
_ -> return (mb_num2,cidDashSgN n1 n2) -- here we don't really know the number
|
||||
|
||||
pCNMods =
|
||||
do adv <- insideOpt convert "Po" $ pPP
|
||||
return (\t -> cidAdvCN t adv)
|
||||
`mplus`
|
||||
do adv <- insideOpt convert "P" $ pPP
|
||||
return (\t -> cidAdvCN t adv)
|
||||
`mplus`
|
||||
do adv <- insideOpt convert "Fn" $ do
|
||||
match convert "CST"
|
||||
s <- pS
|
||||
return (cidSubjS cidthat_Subj s)
|
||||
return (\t -> cidAdvCN t adv)
|
||||
`mplus`
|
||||
do fr <- insideOpt convert "Fr" pRCl
|
||||
return (\t -> cidRelCN t fr)
|
||||
|
||||
pName =
|
||||
do w1 <- word "NP1s"
|
||||
w2 <- word "NNL1cb"
|
||||
return (cidSymbPN (cidMkSymb (Lit (unwords [w1,w2]))))
|
||||
`mplus`
|
||||
do w1 <- word "NPM1"
|
||||
match convert "YH"
|
||||
w2 <- word "NPM1"
|
||||
return (cidSymbPN (cidMkSymb (Lit (unwords [w1,"-",w2]))))
|
||||
`mplus`
|
||||
do w1 <- msum [word "NP1c", word "NP1f", word "NP1g"
|
||||
,word "NP1j", word "NP1m", word "NP1p"
|
||||
,word "NP1s", word "NP1t", word "NP1x"
|
||||
,word "NP1z", word "NP2c", word "NP2f"
|
||||
,word "NP2g", word "NP2j", word "NP2m"
|
||||
,word "NP2p", word "NP2s", word "NP2t"
|
||||
,word "NP2x", word "NP2z"]
|
||||
return (cidSymbPN (cidMkSymb (Lit w1)))
|
||||
|
||||
pRCl =
|
||||
do rp <- pRP
|
||||
(t,p,vp) <- pVP
|
||||
opt (match convert "YC")
|
||||
return (cidUseRCl t p (cidRelVP rp vp))
|
||||
`mplus`
|
||||
do (prep,rp) <- inside "Pq" $ do
|
||||
prep <- lemma "II" "Prep" "s"
|
||||
rp <- pRP
|
||||
return (prep,rp)
|
||||
np <- pSubject
|
||||
(t,p,vp) <- pVP
|
||||
opt (match convert "YC")
|
||||
return (cidUseRCl t p (cidRelSlash rp (cidSlashVP np (cidVPSlashPrep vp prep))))
|
||||
|
||||
pRP =
|
||||
do inside "Dq" (match convert "DDQr")
|
||||
return cidIdRP
|
||||
|
||||
pTimeNPAdv = do
|
||||
day <- lemma "NPD1" "Weekday" "s Sg Nom"
|
||||
return (cidweekdayPunctualAdv day)
|
||||
|
||||
Reference in New Issue
Block a user