mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
added the GF version of Talbanken which was imported by Malin
This commit is contained in:
106
treebanks/talbanken/Format.hs
Normal file
106
treebanks/talbanken/Format.hs
Normal file
@@ -0,0 +1,106 @@
|
||||
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
|
||||
module Format where
|
||||
import Prelude hiding (words,id)
|
||||
import Text.XML.HXT.Core
|
||||
import Data.Ord
|
||||
import Data.List hiding (words)
|
||||
import Data.Maybe
|
||||
import qualified Data.Map as M
|
||||
import System.Environment
|
||||
import Debug.Trace as Debug
|
||||
import Data.Char
|
||||
import qualified Data.Tree as T
|
||||
import Control.Monad.State
|
||||
|
||||
-- Functions for parsing XML to a format read by the other Haskell files
|
||||
|
||||
data Word = W {id :: Id, word :: String, pos :: Tag}
|
||||
data PhrTag = Ph {idPh :: Id, cat :: Tag, tags :: [(Tag,Id)]}
|
||||
data Sentence = Sent {rootS :: Id, words :: [Word], info :: [PhrTag], ws :: Int}
|
||||
type Tag = String
|
||||
type Id = String
|
||||
|
||||
|
||||
instance XmlPickler [Word] where
|
||||
xpickle = xpWords
|
||||
|
||||
instance XmlPickler Sentence where
|
||||
xpickle = xpSentence
|
||||
|
||||
xpSentences :: PU [Sentence]
|
||||
xpSentences = xpElem "corpus"
|
||||
$ xpWrap (snd, \a -> ((),a))
|
||||
$ xpPair (xpElem "head" $ xpUnit) (xpElem "body" $ xpList $ xpSentence)
|
||||
xpTags :: PU [PhrTag]
|
||||
xpTags = xpList $ xpElem "nt"
|
||||
$ xpWrap (uncurry3 Ph,\p -> (idPh p,cat p,tags p))
|
||||
$ xpTriple (xpAttr "id" xpText) (xpAttr "cat" xpText)
|
||||
(xpList $ xpTagMap)
|
||||
|
||||
xpTagMap :: PU (Tag,String)
|
||||
xpTagMap = xpElem "edge"
|
||||
$ xpPair (xpAttr "label" xpText)
|
||||
(xpAttr "idref" xpText)
|
||||
|
||||
xpSentence :: PU Sentence
|
||||
xpSentence = xpElem "s"
|
||||
$ xpWrap (makeSentence,\s -> (rootS s,words s, info s))
|
||||
$ xpElem "graph"
|
||||
$ xpTriple (xpAttr "root" xpText)
|
||||
( xpElem "terminals" xpWords)
|
||||
( xpElem "nonterminals" xpTags)
|
||||
where makeSentence (r,ws,tgs) = Sent r ws tgs (length ws)
|
||||
|
||||
|
||||
xpWords :: PU [Word]
|
||||
xpWords = xpList $ xpElem "t"
|
||||
$ xpWrap (uncurry3 W,\t -> (id t, word t,pos t))
|
||||
$ xpTriple (xpAttr "id" xpText)
|
||||
(xpAttr "word" xpText)
|
||||
(xpAttr "pos" xpText)
|
||||
|
||||
mainF src =
|
||||
runX (xunpickleDocument xpSentences [withInputEncoding utf8
|
||||
, withRemoveWS yes] src
|
||||
>>> arrIO (putStrLn . unlines . map (show . toTree)))
|
||||
|
||||
runPickle f src =
|
||||
runX (xunpickleDocument xpSentences [withInputEncoding utf8
|
||||
, withRemoveWS yes] src
|
||||
>>> arrIO (return . map f))
|
||||
|
||||
parse = runPickle toStringTree
|
||||
parseIdTree = runPickle toTree
|
||||
|
||||
toStringTree :: Sentence -> (String,T.Tree String)
|
||||
toStringTree = second (fmap snd) . toTree
|
||||
|
||||
toTree :: Sentence -> (String,T.Tree (Id,String))
|
||||
toTree s@(Sent root ws inf _) = (root,toTree' root s)
|
||||
|
||||
toTree' :: String -> Sentence -> T.Tree (Id,String)
|
||||
toTree' nr s@(Sent root ws inf _) =
|
||||
case (lookup' nr ws,lookup'' nr inf) of
|
||||
(Just w,_) -> putWord w
|
||||
(_,Just p) -> putPhrase p
|
||||
_ -> error $ "Error in toTree' "++show nr++" could not be found"
|
||||
where putWord (W i w p) = T.Node (i,p) [T.Node (i,w) []]
|
||||
putPhrase (Ph i c t) = T.Node (i,c)
|
||||
$ map (\(tag,next) -> T.Node (next,tag) [toTree' next s]) t
|
||||
|
||||
lookup' y (w@(W x _ _):xs) | y ==x = Just w
|
||||
| otherwise = lookup' y xs
|
||||
lookup' y [] = Nothing
|
||||
|
||||
lookup'' y (w@(Ph x _ _):xs) | y ==x = Just w
|
||||
| otherwise = lookup'' y xs
|
||||
lookup'' y [] = Nothing
|
||||
|
||||
|
||||
treeToSentence :: [T.Tree String] -> String
|
||||
treeToSentence ts = unwords $ map extractS ts
|
||||
where extractS (T.Node ws []) = ws
|
||||
extractS (T.Node c ts) = unwords $ map extractS ts
|
||||
|
||||
showa :: T.Tree String -> String
|
||||
showa (T.Node root ts) = "("++root++" "++concatMap showa ts++" )"
|
||||
182
treebanks/talbanken/Idents.hs
Normal file
182
treebanks/talbanken/Idents.hs
Normal file
@@ -0,0 +1,182 @@
|
||||
module Idents where
|
||||
|
||||
import PGF
|
||||
|
||||
cidASimul = mkCId "ASimul"
|
||||
cidAAnter = mkCId "AAnter"
|
||||
cidPositAdvAdj = mkCId "PositAdvAdj"
|
||||
cidPositAdVAdj = mkCId "PositAdVAdj"
|
||||
cidUseCl = mkCId "UseCl"
|
||||
cidUseQCl = mkCId "UseQCl"
|
||||
cidPredVP = mkCId "PredVP"
|
||||
cidAdjCN = mkCId "AdjCN"
|
||||
cidUseN = mkCId "UseN"
|
||||
cidDetQuant = mkCId "DetQuant"
|
||||
cidNumSg = mkCId "NumSg"
|
||||
cidNumPl = mkCId "NumPl"
|
||||
cidDetCN = mkCId "DetCN"
|
||||
cidIndefArt = mkCId "IndefArt"
|
||||
cidDefArt = mkCId "DefArt"
|
||||
cidUsePN = mkCId "UsePN"
|
||||
cidUseQuantPN = mkCId "UseQuantPN"
|
||||
cidSymbPN = mkCId "SymbPN"
|
||||
cidMkSymb = mkCId "MkSymb"
|
||||
cidUsePron = mkCId "UsePron"
|
||||
cidConjNP = mkCId "ConjNP"
|
||||
cidBaseNP = mkCId "BaseNP"
|
||||
cidConsNP = mkCId "ConsNP"
|
||||
cidConjCN = mkCId "ConjCN"
|
||||
cidBaseCN = mkCId "BaseCN"
|
||||
cidConsCN = mkCId "ConsCN"
|
||||
cidMassNP = mkCId "MassNP"
|
||||
cidAdvNP = mkCId "AdvNP"
|
||||
cidTPres = mkCId "TPres"
|
||||
cidTPast = mkCId "TPast"
|
||||
cidTFut = mkCId "TFut"
|
||||
cidTFutKommer = mkCId "TFutKommer"
|
||||
cidTCond = mkCId "TCond"
|
||||
cidTTAnt = mkCId "TTAnt"
|
||||
cidPPos = mkCId "PPos"
|
||||
cidPNeg = mkCId "PNeg"
|
||||
cidComplSlash = mkCId "ComplSlash"
|
||||
cidSlashV2a = mkCId "SlashV2a"
|
||||
cidSlashV2A = mkCId "SlashV2A"
|
||||
cidComplVS = mkCId "ComplVS"
|
||||
cidUseV = mkCId "UseV"
|
||||
cidAdVVP = mkCId "AdVVP"
|
||||
cidAdvVP = mkCId "AdvVP"
|
||||
cidAdvVPSlash = mkCId "AdvVPSlash"
|
||||
cidPrepNP = mkCId "PrepNP"
|
||||
cidto_Prep = mkCId "to_Prep"
|
||||
cidsuch_as_Prep= mkCId "such_as_Prep"
|
||||
cidPastPartAP = mkCId "PastPartAP"
|
||||
cidPassV2 = mkCId "PassV2"
|
||||
cidAdvS = mkCId "AdvS"
|
||||
cidPositA = mkCId "PositA"
|
||||
cidIDig = mkCId "IDig"
|
||||
cidIIDig = mkCId "IIDig"
|
||||
cidNumCard = mkCId "NumCard"
|
||||
cidNumDigits = mkCId "NumDigits"
|
||||
cidNumNumeral = mkCId "NumNumeral"
|
||||
cidnum = mkCId "num"
|
||||
cidpot2as3 = mkCId "pot2as3"
|
||||
cidpot1as2 = mkCId "pot1as2"
|
||||
cidpot0as1 = mkCId "pot0as1"
|
||||
cidpot01 = mkCId "pot01"
|
||||
cidpot0 = mkCId "pot0"
|
||||
cidn7 = mkCId "n7"
|
||||
cidPossPron = mkCId "PossPron"
|
||||
cidCompAP = mkCId "CompAP"
|
||||
cidCompNP = mkCId "CompNP"
|
||||
cidCompAdv = mkCId "CompAdv"
|
||||
cidUseComp = mkCId "UseComp"
|
||||
cidCompoundCN = mkCId "CompoundCN"
|
||||
cidDashCN = mkCId "DashCN"
|
||||
cidProgrVP = mkCId "ProgrVP"
|
||||
cidGerundN = mkCId "GerundN"
|
||||
cidGenNP = mkCId "GenNP"
|
||||
cidPredetNP = mkCId "PredetNP"
|
||||
cidDetNP = mkCId "DetNP"
|
||||
cidAdAP = mkCId "AdAP"
|
||||
cidPositAdAAdj = mkCId "PositAdAAdj"
|
||||
|
||||
cidBaseAP = mkCId "BaseAP"
|
||||
cidConjAP = mkCId "ConjAP"
|
||||
cidAndConj = mkCId "and_Conj"
|
||||
cidOrConj = mkCId "or_Conj"
|
||||
cidConsAP = mkCId "ConsAP"
|
||||
cidQuestVP = mkCId "QuestVP"
|
||||
cidComplVV = mkCId "ComplVV"
|
||||
cidComplVA = mkCId "ComplVA"
|
||||
cidUseCopula = mkCId "UseCopula"
|
||||
cidPhrUtt = mkCId "PhrUtt"
|
||||
cidNoPConj = mkCId "NoPConj"
|
||||
cidNoVoc = mkCId "NoVoc"
|
||||
cidUttS = mkCId "UttS"
|
||||
cidUttQS = mkCId "UttQS"
|
||||
cidUseComparA = mkCId "UseComparA"
|
||||
cidOrdSuperl = mkCId "OrdSuperl"
|
||||
cidUttImpPol = mkCId "UttImpPol"
|
||||
cidImpVP = mkCId "ImpVP"
|
||||
cidPConjConj = mkCId "PConjConj"
|
||||
cidUttNP = mkCId "UttNP"
|
||||
cidGenericCl = mkCId "GenericCl"
|
||||
cidAdAdv = mkCId "AdAdv"
|
||||
cidConsAdv = mkCId "ConsAdv"
|
||||
cidBaseAdv = mkCId "BaseAdv"
|
||||
cidConjAdv = mkCId "ConjAdv"
|
||||
cidConsVPS = mkCId "ConsVPS"
|
||||
cidBaseVPS = mkCId "BaseVPS"
|
||||
cidConjVPS = mkCId "ConjVPS"
|
||||
cidConsS = mkCId "ConsS"
|
||||
cidBaseS = mkCId "BaseS"
|
||||
cidConjS = mkCId "ConjS"
|
||||
cidSubjS = mkCId "SubjS"
|
||||
cidUttAdv = mkCId "UttAdv"
|
||||
cidApposCN = mkCId "ApposCN"
|
||||
cidUseRCl = mkCId "UseRCl"
|
||||
cidImpersCl = mkCId "ImpersCl"
|
||||
cidReflVP = mkCId "ReflVP"
|
||||
cidExistNP = mkCId "ExistNP"
|
||||
cidUseA2 = mkCId "UseA2"
|
||||
cidComplN2 = mkCId "ComplN2"
|
||||
cidAdvIAdv = mkCId "AdvIAdv"
|
||||
cidQuestIAdv = mkCId "QuestIAdv"
|
||||
cidQuestIComp = mkCId "QuestIComp"
|
||||
cidQuestSlash = mkCId "QuestSlash"
|
||||
cidCompIAdv = mkCId "CompIAdv"
|
||||
cidCompIP = mkCId "CompIP"
|
||||
cidIdetCN = mkCId "IdetCN"
|
||||
cidIdetQuant = mkCId "IdetQuant"
|
||||
cidPrepIP = mkCId "PrepIP"
|
||||
cidFocObj = mkCId "FocObj"
|
||||
cidSlashVP = mkCId "SlashVP"
|
||||
cidAdvSlash = mkCId "AdvSlash"
|
||||
cidSSubjS = mkCId "SSubjS"
|
||||
cidRelVP = mkCId "RelVP"
|
||||
cidIdRP = mkCId "IdRP"
|
||||
cidRelSlash = mkCId "RelSlash"
|
||||
cidTopAdv = mkCId "TopAdv"
|
||||
cidTopAP = mkCId "TopAP"
|
||||
cidTopObj = mkCId "TopObj"
|
||||
cidUseTop = mkCId "UseTop"
|
||||
|
||||
|
||||
-- added to Extra
|
||||
cidDropAttVV = mkCId "ComplBareVV"
|
||||
cidRelCN = mkCId "RelCN"
|
||||
cidPassV2' = mkCId "PassV2"
|
||||
cidVPSlashAP = mkCId "PPartAP"
|
||||
cidReflCN = mkCId "ReflIdPron"
|
||||
cidReflIdPron = mkCId "ReflIdPron"
|
||||
cidPredetAdvF = mkCId "PredetAdvF"
|
||||
|
||||
cidCompPronAQ = mkCId "CompPronAQ"
|
||||
cidQuantPronAQ = mkCId "QuantPronAQ"
|
||||
|
||||
--not present in grammar
|
||||
cidCNNumNP = mkCId "CNNumNP"
|
||||
|
||||
-- words
|
||||
cidhow8much_IAdv = mkCId "how8much_IAdv"
|
||||
cidBy8agent_Prep = mkCId "by8agent_Prep"
|
||||
cidD_1 = mkCId "D_1"
|
||||
cidName = mkCId "john_PN"
|
||||
cidCan_VV = mkCId "can_VV"
|
||||
cidMust_VV = mkCId "must_VV"
|
||||
cidWant_VV = mkCId "want_VV"
|
||||
cidHave_V2 = mkCId "have_V2"
|
||||
cidGet_V2 = mkCId "faa_vb_1_1_V2" -- in lexicon, not grammar
|
||||
cidGet_VV = mkCId "faa_vb_1_1_VV" -- in lexicon, not grammar
|
||||
cidDo_V2 = mkCId "do_V2"
|
||||
cidDo_VV = mkCId "do_VV"
|
||||
cidBecome_V2 = mkCId "become_V2"
|
||||
cidBecome_VA = mkCId "become_VA"
|
||||
|
||||
-- for old translation
|
||||
cidReflSlash = mkCId "ReflSlash"
|
||||
cidSlash3V3 = mkCId "Slash3V3"
|
||||
cidSlash2V3 = mkCId "Slash2V3"
|
||||
cidSlashV2V = mkCId "Slash2V2"
|
||||
cidComplVQ = mkCId "ComplVQ"
|
||||
cidRelNP' = mkCId "RelNP'"
|
||||
193
treebanks/talbanken/MonadSP.hs
Normal file
193
treebanks/talbanken/MonadSP.hs
Normal file
@@ -0,0 +1,193 @@
|
||||
{-# LANGUAGE FlexibleInstances,
|
||||
MultiParamTypeClasses,
|
||||
ScopedTypeVariables,
|
||||
FlexibleContexts,
|
||||
UndecidableInstances #-}
|
||||
module MonadSP ( Rule(..), Grammar, grammar
|
||||
, P, parse
|
||||
, cat, word, word2, lemma, inside, insideSuff, transform
|
||||
, many, many1, opt
|
||||
, optEat, consume, wordlookup,write
|
||||
) where
|
||||
import Data.Tree
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import qualified Data.Map as Map
|
||||
import Control.Monad
|
||||
import Control.Monad.State
|
||||
import Control.Monad.Writer
|
||||
import PGF hiding (Tree,parse)
|
||||
|
||||
|
||||
infix 1 :->
|
||||
|
||||
|
||||
data Rule m t e = t :-> P t e m e
|
||||
type Grammar m t e = t -> PGF -> Morpho -> [Tree t] -> m e
|
||||
|
||||
instance Show t => Show (Rule m t e) where
|
||||
show (t :-> x) = show t
|
||||
|
||||
|
||||
grammar :: (MonadWriter [String] m,MonadState s m,Ord t,Show t,Show e)
|
||||
=> ([e] -> e) -> [Rule m t e] -> Grammar m t e
|
||||
grammar def rules = gr
|
||||
where
|
||||
gr = \tag -> do
|
||||
|
||||
let retry = \pgf m ts -> case ts of
|
||||
[Node w []] -> return (def [])
|
||||
trs -> def `liftM` sequence [ gr tag pgf m trs'
|
||||
| Node tag trs' <- trs]
|
||||
|
||||
case Map.lookup tag pmap of
|
||||
Just f -> \pgf m ts -> do
|
||||
stored <- get
|
||||
r <- unP f gr pgf m ts
|
||||
case r of
|
||||
Just (e,[]) -> return e
|
||||
Just (e,xs) -> tell ["Rest parse"] >> retry pgf m ts -- use xs here?
|
||||
Nothing -> put stored >> retry pgf m ts
|
||||
Nothing -> retry
|
||||
|
||||
-- If many rules match, try all of them (mplus)
|
||||
pmap = Map.fromListWith mplus (map (\(t :-> r) -> (t,r)) rules)
|
||||
|
||||
newtype P t e m a = P {unP :: Grammar m t e -> PGF -> Morpho -> [Tree t] -> m (Maybe (a,[Tree t]))}
|
||||
|
||||
instance Monad m => Monad (P t e m) where
|
||||
return x = P $ \gr pgf m ts -> return (Just (x,ts))
|
||||
f >>= g = P $ \gr pgf m ts -> unP f gr pgf m ts >>= \r -> case r of
|
||||
Just (x,ts') -> unP (g x) gr pgf m ts'
|
||||
Nothing -> return Nothing
|
||||
|
||||
instance MonadState s m => MonadPlus (P t e m) where
|
||||
mzero = P $ \gr pgf m ts -> return Nothing
|
||||
mplus f g = P $ \gr pgf m ts -> do
|
||||
store <- get
|
||||
res <- unP f gr pgf m ts
|
||||
case res of
|
||||
Just x -> return (Just x)
|
||||
Nothing -> put store >> unP g gr pgf m ts
|
||||
|
||||
instance MonadState s m => MonadState s (P t e m) where
|
||||
put s = P $ \gr pgf m ts -> put s >> return (Just ((),ts))
|
||||
get = P $ \gr pgf m ts -> get >>= \s -> return (Just (s,ts))
|
||||
|
||||
instance MonadWriter w m => MonadWriter w (P t e m) where
|
||||
tell w = P $ \gr pgf m ts -> tell w >> return (Just ((),ts))
|
||||
listen = error "listen not implemented for P"
|
||||
pass = error "pass not implemented for P"
|
||||
|
||||
-- write x = tell [x]
|
||||
write :: MonadWriter [w] m => w -> P t e m ()
|
||||
write = tell . return
|
||||
|
||||
instance MonadTrans (P t e) where
|
||||
lift m = P $ \gr p morpho ts -> m >>= \r -> return (Just (r,ts))
|
||||
|
||||
parse :: Monad m => Grammar m t e -> PGF -> Morpho -> Tree t -> m e
|
||||
parse gr pgf morpho (Node tag ts) = gr tag pgf morpho ts
|
||||
|
||||
silent m = (m,[])
|
||||
speak s (m,w) = (m,s:w)
|
||||
speaks s (m,w) = (m,s++w)
|
||||
addS s m = (m,s)
|
||||
add s m = (m,[s])
|
||||
|
||||
|
||||
cat :: (Monad m,Eq t,Show t) => [t] -> P [t] e m e
|
||||
cat tag = P $ \gr pgf morpho ts ->
|
||||
case ts of
|
||||
Node tag1 ts1 : ts | tag `isPrefixOf` tag1
|
||||
-> gr tag1 pgf morpho ts1 >>= \r -> return (Just (r,ts))
|
||||
_ -> return Nothing
|
||||
|
||||
word :: (Monad m,Show t,Eq t) => [t] -> P [t] e m [t]
|
||||
word tag = P $ \gr pgf morpho ts -> return $
|
||||
case ts of
|
||||
(Node tag1 [Node w []] : ts) | tag `isPrefixOf` tag1
|
||||
-> Just (w,ts)
|
||||
_ -> Nothing
|
||||
|
||||
|
||||
word2 :: (Monad m,Eq t) => t -> P t e m t
|
||||
word2 tag = P $ \gr pgf morpho ts -> return $
|
||||
case ts of
|
||||
(Node tag1 [Node tag2 [Node w []]] : ts) | tag == tag1 -> Just (w,ts)
|
||||
_ -> Nothing
|
||||
|
||||
|
||||
inside, insideSuff :: (MonadWriter [String] m,Eq t,Show t)=> [t] -> P [t] e m a -> P [t] e m a
|
||||
insideSuff = inside' isSuffixOf
|
||||
inside = inside' isPrefixOf
|
||||
|
||||
inside' :: (MonadWriter [String] m,Eq t,Show t)=>
|
||||
([t] -> [t] -> Bool) -> [t] -> P [t] e m a -> P [t] e m a
|
||||
inside' isEq tag f = P $ \gr pgf morpho ts ->
|
||||
case ts of
|
||||
Node tag1 ts1 : ts | tag `isEq` tag1 -> do
|
||||
tell [show tag++" "++show tag1]
|
||||
unP f gr pgf morpho ts1 >>= \r -> case r of
|
||||
Just (x,[]) -> return (Just (x,ts))
|
||||
Just (x,xs) -> tell ["inside fail "++show xs] >> return Nothing
|
||||
Nothing -> return Nothing
|
||||
_ -> return Nothing
|
||||
|
||||
|
||||
magicLookup :: String -> String -> String -> Morpho -> PGF -> [Lemma]
|
||||
magicLookup w cat0 an0 morpho pgf = [ lem
|
||||
| (lem, an1) <- lookupMorpho morpho (map toLower w)
|
||||
, let cat1 = maybe "" (showType []) (functionType pgf lem)
|
||||
, cat0 == cat1 && an0 == an1
|
||||
]
|
||||
|
||||
wordlookup :: MonadWriter [String] m => String -> String -> String -> P String e m CId
|
||||
wordlookup w cat0 an0 = P $ \gr pgf morpho ts -> do
|
||||
tell ["wordlookup: " ++ w ++ show ts ++ show cat0]
|
||||
let wds = magicLookup w cat0 an0 morpho pgf
|
||||
tell [show wds]
|
||||
case wds of
|
||||
(wd:_) -> return $ Just (wd,ts)
|
||||
[] -> return Nothing
|
||||
|
||||
|
||||
lemma :: MonadWriter [String] m => String -> String -> P String e m CId
|
||||
lemma cat = liftM head . lemmas cat
|
||||
|
||||
lemmas :: MonadWriter [String] m => String -> String -> P String e m [CId]
|
||||
lemmas cat0 an0 = P $ \gr pgf morpho ts -> do
|
||||
tell ["lemma: "++show ts++show cat0]
|
||||
case ts of
|
||||
Node w [] : ts -> case magicLookup w cat0 an0 morpho pgf of
|
||||
(id:ids) -> tell ["lemma ok"] >> return (Just (id:ids,ts))
|
||||
_ -> tell ["no word "++w++cat0++an0] >> return Nothing
|
||||
_ -> tell ["tried to lemma a tag"] >> return Nothing
|
||||
|
||||
|
||||
transform :: Monad m => ([Tree t] -> [Tree t]) -> P t e m ()
|
||||
transform f = P $ \gr pgf morpho ts -> return (Just ((),f ts))
|
||||
|
||||
many :: MonadState s m => P t e m a -> P t e m [a]
|
||||
many f = many1 f
|
||||
`mplus`
|
||||
return []
|
||||
|
||||
many1 :: MonadState s m => P t e m a -> P t e m [a]
|
||||
many1 f = do x <- f
|
||||
xs <- many f
|
||||
return (x:xs)
|
||||
|
||||
opt :: MonadState s m => P t e m a -> a -> P t e m a
|
||||
opt f x = mplus f (return x)
|
||||
|
||||
optEat :: MonadState s m => P t e m a -> a -> P t e m a
|
||||
optEat f x = mplus f (consume >> return x)
|
||||
|
||||
|
||||
consume :: Monad m => P t e m ()
|
||||
consume = P $ \gr pgf morpho ts ->
|
||||
case ts of
|
||||
Node x w:ws -> return (Just ((),ws))
|
||||
[] -> return (Just ((),[]))
|
||||
|
||||
40
treebanks/talbanken/State.hs
Normal file
40
treebanks/talbanken/State.hs
Normal file
@@ -0,0 +1,40 @@
|
||||
{-# LANGUAGE TemplateHaskell #-}
|
||||
module State where
|
||||
import Structure
|
||||
import PGF
|
||||
import Data.Label
|
||||
|
||||
data State = State {
|
||||
_isExist :: Bool
|
||||
, _iquant :: Bool
|
||||
, _passive :: Bool
|
||||
, _sentenceType :: SentenceType
|
||||
, _vform :: [VPForm]
|
||||
, _complement :: (VPForm,[Maybe Expr],[Bool])
|
||||
, _object :: Maybe Expr -- for objects not within the VP 'vilka äpplen äter han'
|
||||
, _tmp :: Maybe (VForm CId)
|
||||
, _anter :: Bool
|
||||
, _pol :: Maybe Bool
|
||||
, _subj :: Maybe Expr
|
||||
, _nptype :: NPType
|
||||
}
|
||||
|
||||
$(mkLabels [''State])
|
||||
|
||||
startState :: State
|
||||
startState = State {
|
||||
_isExist = False
|
||||
,_passive = False
|
||||
,_iquant = False
|
||||
,_vform = []
|
||||
,_complement = (V,[],[])
|
||||
,_sentenceType = Dir
|
||||
,_object = Nothing
|
||||
,_tmp = Nothing
|
||||
,_anter = False
|
||||
,_pol = Nothing
|
||||
,_subj = Nothing
|
||||
,_nptype = Normal}
|
||||
|
||||
|
||||
|
||||
27
treebanks/talbanken/Structure.hs
Normal file
27
treebanks/talbanken/Structure.hs
Normal file
@@ -0,0 +1,27 @@
|
||||
module Structure where
|
||||
|
||||
data SentenceType = Q | Dir | Top
|
||||
deriving (Show,Eq)
|
||||
|
||||
data NPType = Generic | Impers | Normal | Exist
|
||||
deriving (Show,Eq)
|
||||
|
||||
data VPForm = Cop | Sup | VV | VA
|
||||
| V | V2 | V2A | V2Pass
|
||||
| Fut | FutKommer
|
||||
| VS
|
||||
|
||||
deriving (Eq,Show)
|
||||
|
||||
data VForm a
|
||||
= VInf | VPart | VSupin | VImp | VTense a
|
||||
deriving (Show,Eq)
|
||||
|
||||
instance Functor VForm where
|
||||
fmap f VInf = VInf
|
||||
fmap f VPart = VPart
|
||||
fmap f VSupin = VSupin
|
||||
fmap f VImp = VImp
|
||||
fmap f (VTense t) = VTense (f t)
|
||||
|
||||
|
||||
6100
treebanks/talbanken/TBExtracted
Normal file
6100
treebanks/talbanken/TBExtracted
Normal file
File diff suppressed because it is too large
Load Diff
1798
treebanks/talbanken/Translate.hs
Normal file
1798
treebanks/talbanken/Translate.hs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user