mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
added the GF version of Talbanken which was imported by Malin
This commit is contained in:
106
treebanks/talbanken/Format.hs
Normal file
106
treebanks/talbanken/Format.hs
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
|
||||||
|
module Format where
|
||||||
|
import Prelude hiding (words,id)
|
||||||
|
import Text.XML.HXT.Core
|
||||||
|
import Data.Ord
|
||||||
|
import Data.List hiding (words)
|
||||||
|
import Data.Maybe
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import System.Environment
|
||||||
|
import Debug.Trace as Debug
|
||||||
|
import Data.Char
|
||||||
|
import qualified Data.Tree as T
|
||||||
|
import Control.Monad.State
|
||||||
|
|
||||||
|
-- Functions for parsing XML to a format read by the other Haskell files
|
||||||
|
|
||||||
|
data Word = W {id :: Id, word :: String, pos :: Tag}
|
||||||
|
data PhrTag = Ph {idPh :: Id, cat :: Tag, tags :: [(Tag,Id)]}
|
||||||
|
data Sentence = Sent {rootS :: Id, words :: [Word], info :: [PhrTag], ws :: Int}
|
||||||
|
type Tag = String
|
||||||
|
type Id = String
|
||||||
|
|
||||||
|
|
||||||
|
instance XmlPickler [Word] where
|
||||||
|
xpickle = xpWords
|
||||||
|
|
||||||
|
instance XmlPickler Sentence where
|
||||||
|
xpickle = xpSentence
|
||||||
|
|
||||||
|
xpSentences :: PU [Sentence]
|
||||||
|
xpSentences = xpElem "corpus"
|
||||||
|
$ xpWrap (snd, \a -> ((),a))
|
||||||
|
$ xpPair (xpElem "head" $ xpUnit) (xpElem "body" $ xpList $ xpSentence)
|
||||||
|
xpTags :: PU [PhrTag]
|
||||||
|
xpTags = xpList $ xpElem "nt"
|
||||||
|
$ xpWrap (uncurry3 Ph,\p -> (idPh p,cat p,tags p))
|
||||||
|
$ xpTriple (xpAttr "id" xpText) (xpAttr "cat" xpText)
|
||||||
|
(xpList $ xpTagMap)
|
||||||
|
|
||||||
|
xpTagMap :: PU (Tag,String)
|
||||||
|
xpTagMap = xpElem "edge"
|
||||||
|
$ xpPair (xpAttr "label" xpText)
|
||||||
|
(xpAttr "idref" xpText)
|
||||||
|
|
||||||
|
xpSentence :: PU Sentence
|
||||||
|
xpSentence = xpElem "s"
|
||||||
|
$ xpWrap (makeSentence,\s -> (rootS s,words s, info s))
|
||||||
|
$ xpElem "graph"
|
||||||
|
$ xpTriple (xpAttr "root" xpText)
|
||||||
|
( xpElem "terminals" xpWords)
|
||||||
|
( xpElem "nonterminals" xpTags)
|
||||||
|
where makeSentence (r,ws,tgs) = Sent r ws tgs (length ws)
|
||||||
|
|
||||||
|
|
||||||
|
xpWords :: PU [Word]
|
||||||
|
xpWords = xpList $ xpElem "t"
|
||||||
|
$ xpWrap (uncurry3 W,\t -> (id t, word t,pos t))
|
||||||
|
$ xpTriple (xpAttr "id" xpText)
|
||||||
|
(xpAttr "word" xpText)
|
||||||
|
(xpAttr "pos" xpText)
|
||||||
|
|
||||||
|
mainF src =
|
||||||
|
runX (xunpickleDocument xpSentences [withInputEncoding utf8
|
||||||
|
, withRemoveWS yes] src
|
||||||
|
>>> arrIO (putStrLn . unlines . map (show . toTree)))
|
||||||
|
|
||||||
|
runPickle f src =
|
||||||
|
runX (xunpickleDocument xpSentences [withInputEncoding utf8
|
||||||
|
, withRemoveWS yes] src
|
||||||
|
>>> arrIO (return . map f))
|
||||||
|
|
||||||
|
parse = runPickle toStringTree
|
||||||
|
parseIdTree = runPickle toTree
|
||||||
|
|
||||||
|
toStringTree :: Sentence -> (String,T.Tree String)
|
||||||
|
toStringTree = second (fmap snd) . toTree
|
||||||
|
|
||||||
|
toTree :: Sentence -> (String,T.Tree (Id,String))
|
||||||
|
toTree s@(Sent root ws inf _) = (root,toTree' root s)
|
||||||
|
|
||||||
|
toTree' :: String -> Sentence -> T.Tree (Id,String)
|
||||||
|
toTree' nr s@(Sent root ws inf _) =
|
||||||
|
case (lookup' nr ws,lookup'' nr inf) of
|
||||||
|
(Just w,_) -> putWord w
|
||||||
|
(_,Just p) -> putPhrase p
|
||||||
|
_ -> error $ "Error in toTree' "++show nr++" could not be found"
|
||||||
|
where putWord (W i w p) = T.Node (i,p) [T.Node (i,w) []]
|
||||||
|
putPhrase (Ph i c t) = T.Node (i,c)
|
||||||
|
$ map (\(tag,next) -> T.Node (next,tag) [toTree' next s]) t
|
||||||
|
|
||||||
|
lookup' y (w@(W x _ _):xs) | y ==x = Just w
|
||||||
|
| otherwise = lookup' y xs
|
||||||
|
lookup' y [] = Nothing
|
||||||
|
|
||||||
|
lookup'' y (w@(Ph x _ _):xs) | y ==x = Just w
|
||||||
|
| otherwise = lookup'' y xs
|
||||||
|
lookup'' y [] = Nothing
|
||||||
|
|
||||||
|
|
||||||
|
treeToSentence :: [T.Tree String] -> String
|
||||||
|
treeToSentence ts = unwords $ map extractS ts
|
||||||
|
where extractS (T.Node ws []) = ws
|
||||||
|
extractS (T.Node c ts) = unwords $ map extractS ts
|
||||||
|
|
||||||
|
showa :: T.Tree String -> String
|
||||||
|
showa (T.Node root ts) = "("++root++" "++concatMap showa ts++" )"
|
||||||
182
treebanks/talbanken/Idents.hs
Normal file
182
treebanks/talbanken/Idents.hs
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
module Idents where
|
||||||
|
|
||||||
|
import PGF
|
||||||
|
|
||||||
|
cidASimul = mkCId "ASimul"
|
||||||
|
cidAAnter = mkCId "AAnter"
|
||||||
|
cidPositAdvAdj = mkCId "PositAdvAdj"
|
||||||
|
cidPositAdVAdj = mkCId "PositAdVAdj"
|
||||||
|
cidUseCl = mkCId "UseCl"
|
||||||
|
cidUseQCl = mkCId "UseQCl"
|
||||||
|
cidPredVP = mkCId "PredVP"
|
||||||
|
cidAdjCN = mkCId "AdjCN"
|
||||||
|
cidUseN = mkCId "UseN"
|
||||||
|
cidDetQuant = mkCId "DetQuant"
|
||||||
|
cidNumSg = mkCId "NumSg"
|
||||||
|
cidNumPl = mkCId "NumPl"
|
||||||
|
cidDetCN = mkCId "DetCN"
|
||||||
|
cidIndefArt = mkCId "IndefArt"
|
||||||
|
cidDefArt = mkCId "DefArt"
|
||||||
|
cidUsePN = mkCId "UsePN"
|
||||||
|
cidUseQuantPN = mkCId "UseQuantPN"
|
||||||
|
cidSymbPN = mkCId "SymbPN"
|
||||||
|
cidMkSymb = mkCId "MkSymb"
|
||||||
|
cidUsePron = mkCId "UsePron"
|
||||||
|
cidConjNP = mkCId "ConjNP"
|
||||||
|
cidBaseNP = mkCId "BaseNP"
|
||||||
|
cidConsNP = mkCId "ConsNP"
|
||||||
|
cidConjCN = mkCId "ConjCN"
|
||||||
|
cidBaseCN = mkCId "BaseCN"
|
||||||
|
cidConsCN = mkCId "ConsCN"
|
||||||
|
cidMassNP = mkCId "MassNP"
|
||||||
|
cidAdvNP = mkCId "AdvNP"
|
||||||
|
cidTPres = mkCId "TPres"
|
||||||
|
cidTPast = mkCId "TPast"
|
||||||
|
cidTFut = mkCId "TFut"
|
||||||
|
cidTFutKommer = mkCId "TFutKommer"
|
||||||
|
cidTCond = mkCId "TCond"
|
||||||
|
cidTTAnt = mkCId "TTAnt"
|
||||||
|
cidPPos = mkCId "PPos"
|
||||||
|
cidPNeg = mkCId "PNeg"
|
||||||
|
cidComplSlash = mkCId "ComplSlash"
|
||||||
|
cidSlashV2a = mkCId "SlashV2a"
|
||||||
|
cidSlashV2A = mkCId "SlashV2A"
|
||||||
|
cidComplVS = mkCId "ComplVS"
|
||||||
|
cidUseV = mkCId "UseV"
|
||||||
|
cidAdVVP = mkCId "AdVVP"
|
||||||
|
cidAdvVP = mkCId "AdvVP"
|
||||||
|
cidAdvVPSlash = mkCId "AdvVPSlash"
|
||||||
|
cidPrepNP = mkCId "PrepNP"
|
||||||
|
cidto_Prep = mkCId "to_Prep"
|
||||||
|
cidsuch_as_Prep= mkCId "such_as_Prep"
|
||||||
|
cidPastPartAP = mkCId "PastPartAP"
|
||||||
|
cidPassV2 = mkCId "PassV2"
|
||||||
|
cidAdvS = mkCId "AdvS"
|
||||||
|
cidPositA = mkCId "PositA"
|
||||||
|
cidIDig = mkCId "IDig"
|
||||||
|
cidIIDig = mkCId "IIDig"
|
||||||
|
cidNumCard = mkCId "NumCard"
|
||||||
|
cidNumDigits = mkCId "NumDigits"
|
||||||
|
cidNumNumeral = mkCId "NumNumeral"
|
||||||
|
cidnum = mkCId "num"
|
||||||
|
cidpot2as3 = mkCId "pot2as3"
|
||||||
|
cidpot1as2 = mkCId "pot1as2"
|
||||||
|
cidpot0as1 = mkCId "pot0as1"
|
||||||
|
cidpot01 = mkCId "pot01"
|
||||||
|
cidpot0 = mkCId "pot0"
|
||||||
|
cidn7 = mkCId "n7"
|
||||||
|
cidPossPron = mkCId "PossPron"
|
||||||
|
cidCompAP = mkCId "CompAP"
|
||||||
|
cidCompNP = mkCId "CompNP"
|
||||||
|
cidCompAdv = mkCId "CompAdv"
|
||||||
|
cidUseComp = mkCId "UseComp"
|
||||||
|
cidCompoundCN = mkCId "CompoundCN"
|
||||||
|
cidDashCN = mkCId "DashCN"
|
||||||
|
cidProgrVP = mkCId "ProgrVP"
|
||||||
|
cidGerundN = mkCId "GerundN"
|
||||||
|
cidGenNP = mkCId "GenNP"
|
||||||
|
cidPredetNP = mkCId "PredetNP"
|
||||||
|
cidDetNP = mkCId "DetNP"
|
||||||
|
cidAdAP = mkCId "AdAP"
|
||||||
|
cidPositAdAAdj = mkCId "PositAdAAdj"
|
||||||
|
|
||||||
|
cidBaseAP = mkCId "BaseAP"
|
||||||
|
cidConjAP = mkCId "ConjAP"
|
||||||
|
cidAndConj = mkCId "and_Conj"
|
||||||
|
cidOrConj = mkCId "or_Conj"
|
||||||
|
cidConsAP = mkCId "ConsAP"
|
||||||
|
cidQuestVP = mkCId "QuestVP"
|
||||||
|
cidComplVV = mkCId "ComplVV"
|
||||||
|
cidComplVA = mkCId "ComplVA"
|
||||||
|
cidUseCopula = mkCId "UseCopula"
|
||||||
|
cidPhrUtt = mkCId "PhrUtt"
|
||||||
|
cidNoPConj = mkCId "NoPConj"
|
||||||
|
cidNoVoc = mkCId "NoVoc"
|
||||||
|
cidUttS = mkCId "UttS"
|
||||||
|
cidUttQS = mkCId "UttQS"
|
||||||
|
cidUseComparA = mkCId "UseComparA"
|
||||||
|
cidOrdSuperl = mkCId "OrdSuperl"
|
||||||
|
cidUttImpPol = mkCId "UttImpPol"
|
||||||
|
cidImpVP = mkCId "ImpVP"
|
||||||
|
cidPConjConj = mkCId "PConjConj"
|
||||||
|
cidUttNP = mkCId "UttNP"
|
||||||
|
cidGenericCl = mkCId "GenericCl"
|
||||||
|
cidAdAdv = mkCId "AdAdv"
|
||||||
|
cidConsAdv = mkCId "ConsAdv"
|
||||||
|
cidBaseAdv = mkCId "BaseAdv"
|
||||||
|
cidConjAdv = mkCId "ConjAdv"
|
||||||
|
cidConsVPS = mkCId "ConsVPS"
|
||||||
|
cidBaseVPS = mkCId "BaseVPS"
|
||||||
|
cidConjVPS = mkCId "ConjVPS"
|
||||||
|
cidConsS = mkCId "ConsS"
|
||||||
|
cidBaseS = mkCId "BaseS"
|
||||||
|
cidConjS = mkCId "ConjS"
|
||||||
|
cidSubjS = mkCId "SubjS"
|
||||||
|
cidUttAdv = mkCId "UttAdv"
|
||||||
|
cidApposCN = mkCId "ApposCN"
|
||||||
|
cidUseRCl = mkCId "UseRCl"
|
||||||
|
cidImpersCl = mkCId "ImpersCl"
|
||||||
|
cidReflVP = mkCId "ReflVP"
|
||||||
|
cidExistNP = mkCId "ExistNP"
|
||||||
|
cidUseA2 = mkCId "UseA2"
|
||||||
|
cidComplN2 = mkCId "ComplN2"
|
||||||
|
cidAdvIAdv = mkCId "AdvIAdv"
|
||||||
|
cidQuestIAdv = mkCId "QuestIAdv"
|
||||||
|
cidQuestIComp = mkCId "QuestIComp"
|
||||||
|
cidQuestSlash = mkCId "QuestSlash"
|
||||||
|
cidCompIAdv = mkCId "CompIAdv"
|
||||||
|
cidCompIP = mkCId "CompIP"
|
||||||
|
cidIdetCN = mkCId "IdetCN"
|
||||||
|
cidIdetQuant = mkCId "IdetQuant"
|
||||||
|
cidPrepIP = mkCId "PrepIP"
|
||||||
|
cidFocObj = mkCId "FocObj"
|
||||||
|
cidSlashVP = mkCId "SlashVP"
|
||||||
|
cidAdvSlash = mkCId "AdvSlash"
|
||||||
|
cidSSubjS = mkCId "SSubjS"
|
||||||
|
cidRelVP = mkCId "RelVP"
|
||||||
|
cidIdRP = mkCId "IdRP"
|
||||||
|
cidRelSlash = mkCId "RelSlash"
|
||||||
|
cidTopAdv = mkCId "TopAdv"
|
||||||
|
cidTopAP = mkCId "TopAP"
|
||||||
|
cidTopObj = mkCId "TopObj"
|
||||||
|
cidUseTop = mkCId "UseTop"
|
||||||
|
|
||||||
|
|
||||||
|
-- added to Extra
|
||||||
|
cidDropAttVV = mkCId "ComplBareVV"
|
||||||
|
cidRelCN = mkCId "RelCN"
|
||||||
|
cidPassV2' = mkCId "PassV2"
|
||||||
|
cidVPSlashAP = mkCId "PPartAP"
|
||||||
|
cidReflCN = mkCId "ReflIdPron"
|
||||||
|
cidReflIdPron = mkCId "ReflIdPron"
|
||||||
|
cidPredetAdvF = mkCId "PredetAdvF"
|
||||||
|
|
||||||
|
cidCompPronAQ = mkCId "CompPronAQ"
|
||||||
|
cidQuantPronAQ = mkCId "QuantPronAQ"
|
||||||
|
|
||||||
|
--not present in grammar
|
||||||
|
cidCNNumNP = mkCId "CNNumNP"
|
||||||
|
|
||||||
|
-- words
|
||||||
|
cidhow8much_IAdv = mkCId "how8much_IAdv"
|
||||||
|
cidBy8agent_Prep = mkCId "by8agent_Prep"
|
||||||
|
cidD_1 = mkCId "D_1"
|
||||||
|
cidName = mkCId "john_PN"
|
||||||
|
cidCan_VV = mkCId "can_VV"
|
||||||
|
cidMust_VV = mkCId "must_VV"
|
||||||
|
cidWant_VV = mkCId "want_VV"
|
||||||
|
cidHave_V2 = mkCId "have_V2"
|
||||||
|
cidGet_V2 = mkCId "faa_vb_1_1_V2" -- in lexicon, not grammar
|
||||||
|
cidGet_VV = mkCId "faa_vb_1_1_VV" -- in lexicon, not grammar
|
||||||
|
cidDo_V2 = mkCId "do_V2"
|
||||||
|
cidDo_VV = mkCId "do_VV"
|
||||||
|
cidBecome_V2 = mkCId "become_V2"
|
||||||
|
cidBecome_VA = mkCId "become_VA"
|
||||||
|
|
||||||
|
-- for old translation
|
||||||
|
cidReflSlash = mkCId "ReflSlash"
|
||||||
|
cidSlash3V3 = mkCId "Slash3V3"
|
||||||
|
cidSlash2V3 = mkCId "Slash2V3"
|
||||||
|
cidSlashV2V = mkCId "Slash2V2"
|
||||||
|
cidComplVQ = mkCId "ComplVQ"
|
||||||
|
cidRelNP' = mkCId "RelNP'"
|
||||||
193
treebanks/talbanken/MonadSP.hs
Normal file
193
treebanks/talbanken/MonadSP.hs
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
{-# LANGUAGE FlexibleInstances,
|
||||||
|
MultiParamTypeClasses,
|
||||||
|
ScopedTypeVariables,
|
||||||
|
FlexibleContexts,
|
||||||
|
UndecidableInstances #-}
|
||||||
|
module MonadSP ( Rule(..), Grammar, grammar
|
||||||
|
, P, parse
|
||||||
|
, cat, word, word2, lemma, inside, insideSuff, transform
|
||||||
|
, many, many1, opt
|
||||||
|
, optEat, consume, wordlookup,write
|
||||||
|
) where
|
||||||
|
import Data.Tree
|
||||||
|
import Data.Char
|
||||||
|
import Data.List
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Control.Monad
|
||||||
|
import Control.Monad.State
|
||||||
|
import Control.Monad.Writer
|
||||||
|
import PGF hiding (Tree,parse)
|
||||||
|
|
||||||
|
|
||||||
|
infix 1 :->
|
||||||
|
|
||||||
|
|
||||||
|
data Rule m t e = t :-> P t e m e
|
||||||
|
type Grammar m t e = t -> PGF -> Morpho -> [Tree t] -> m e
|
||||||
|
|
||||||
|
instance Show t => Show (Rule m t e) where
|
||||||
|
show (t :-> x) = show t
|
||||||
|
|
||||||
|
|
||||||
|
grammar :: (MonadWriter [String] m,MonadState s m,Ord t,Show t,Show e)
|
||||||
|
=> ([e] -> e) -> [Rule m t e] -> Grammar m t e
|
||||||
|
grammar def rules = gr
|
||||||
|
where
|
||||||
|
gr = \tag -> do
|
||||||
|
|
||||||
|
let retry = \pgf m ts -> case ts of
|
||||||
|
[Node w []] -> return (def [])
|
||||||
|
trs -> def `liftM` sequence [ gr tag pgf m trs'
|
||||||
|
| Node tag trs' <- trs]
|
||||||
|
|
||||||
|
case Map.lookup tag pmap of
|
||||||
|
Just f -> \pgf m ts -> do
|
||||||
|
stored <- get
|
||||||
|
r <- unP f gr pgf m ts
|
||||||
|
case r of
|
||||||
|
Just (e,[]) -> return e
|
||||||
|
Just (e,xs) -> tell ["Rest parse"] >> retry pgf m ts -- use xs here?
|
||||||
|
Nothing -> put stored >> retry pgf m ts
|
||||||
|
Nothing -> retry
|
||||||
|
|
||||||
|
-- If many rules match, try all of them (mplus)
|
||||||
|
pmap = Map.fromListWith mplus (map (\(t :-> r) -> (t,r)) rules)
|
||||||
|
|
||||||
|
newtype P t e m a = P {unP :: Grammar m t e -> PGF -> Morpho -> [Tree t] -> m (Maybe (a,[Tree t]))}
|
||||||
|
|
||||||
|
instance Monad m => Monad (P t e m) where
|
||||||
|
return x = P $ \gr pgf m ts -> return (Just (x,ts))
|
||||||
|
f >>= g = P $ \gr pgf m ts -> unP f gr pgf m ts >>= \r -> case r of
|
||||||
|
Just (x,ts') -> unP (g x) gr pgf m ts'
|
||||||
|
Nothing -> return Nothing
|
||||||
|
|
||||||
|
instance MonadState s m => MonadPlus (P t e m) where
|
||||||
|
mzero = P $ \gr pgf m ts -> return Nothing
|
||||||
|
mplus f g = P $ \gr pgf m ts -> do
|
||||||
|
store <- get
|
||||||
|
res <- unP f gr pgf m ts
|
||||||
|
case res of
|
||||||
|
Just x -> return (Just x)
|
||||||
|
Nothing -> put store >> unP g gr pgf m ts
|
||||||
|
|
||||||
|
instance MonadState s m => MonadState s (P t e m) where
|
||||||
|
put s = P $ \gr pgf m ts -> put s >> return (Just ((),ts))
|
||||||
|
get = P $ \gr pgf m ts -> get >>= \s -> return (Just (s,ts))
|
||||||
|
|
||||||
|
instance MonadWriter w m => MonadWriter w (P t e m) where
|
||||||
|
tell w = P $ \gr pgf m ts -> tell w >> return (Just ((),ts))
|
||||||
|
listen = error "listen not implemented for P"
|
||||||
|
pass = error "pass not implemented for P"
|
||||||
|
|
||||||
|
-- write x = tell [x]
|
||||||
|
write :: MonadWriter [w] m => w -> P t e m ()
|
||||||
|
write = tell . return
|
||||||
|
|
||||||
|
instance MonadTrans (P t e) where
|
||||||
|
lift m = P $ \gr p morpho ts -> m >>= \r -> return (Just (r,ts))
|
||||||
|
|
||||||
|
parse :: Monad m => Grammar m t e -> PGF -> Morpho -> Tree t -> m e
|
||||||
|
parse gr pgf morpho (Node tag ts) = gr tag pgf morpho ts
|
||||||
|
|
||||||
|
silent m = (m,[])
|
||||||
|
speak s (m,w) = (m,s:w)
|
||||||
|
speaks s (m,w) = (m,s++w)
|
||||||
|
addS s m = (m,s)
|
||||||
|
add s m = (m,[s])
|
||||||
|
|
||||||
|
|
||||||
|
cat :: (Monad m,Eq t,Show t) => [t] -> P [t] e m e
|
||||||
|
cat tag = P $ \gr pgf morpho ts ->
|
||||||
|
case ts of
|
||||||
|
Node tag1 ts1 : ts | tag `isPrefixOf` tag1
|
||||||
|
-> gr tag1 pgf morpho ts1 >>= \r -> return (Just (r,ts))
|
||||||
|
_ -> return Nothing
|
||||||
|
|
||||||
|
word :: (Monad m,Show t,Eq t) => [t] -> P [t] e m [t]
|
||||||
|
word tag = P $ \gr pgf morpho ts -> return $
|
||||||
|
case ts of
|
||||||
|
(Node tag1 [Node w []] : ts) | tag `isPrefixOf` tag1
|
||||||
|
-> Just (w,ts)
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
|
||||||
|
word2 :: (Monad m,Eq t) => t -> P t e m t
|
||||||
|
word2 tag = P $ \gr pgf morpho ts -> return $
|
||||||
|
case ts of
|
||||||
|
(Node tag1 [Node tag2 [Node w []]] : ts) | tag == tag1 -> Just (w,ts)
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
|
||||||
|
inside, insideSuff :: (MonadWriter [String] m,Eq t,Show t)=> [t] -> P [t] e m a -> P [t] e m a
|
||||||
|
insideSuff = inside' isSuffixOf
|
||||||
|
inside = inside' isPrefixOf
|
||||||
|
|
||||||
|
inside' :: (MonadWriter [String] m,Eq t,Show t)=>
|
||||||
|
([t] -> [t] -> Bool) -> [t] -> P [t] e m a -> P [t] e m a
|
||||||
|
inside' isEq tag f = P $ \gr pgf morpho ts ->
|
||||||
|
case ts of
|
||||||
|
Node tag1 ts1 : ts | tag `isEq` tag1 -> do
|
||||||
|
tell [show tag++" "++show tag1]
|
||||||
|
unP f gr pgf morpho ts1 >>= \r -> case r of
|
||||||
|
Just (x,[]) -> return (Just (x,ts))
|
||||||
|
Just (x,xs) -> tell ["inside fail "++show xs] >> return Nothing
|
||||||
|
Nothing -> return Nothing
|
||||||
|
_ -> return Nothing
|
||||||
|
|
||||||
|
|
||||||
|
magicLookup :: String -> String -> String -> Morpho -> PGF -> [Lemma]
|
||||||
|
magicLookup w cat0 an0 morpho pgf = [ lem
|
||||||
|
| (lem, an1) <- lookupMorpho morpho (map toLower w)
|
||||||
|
, let cat1 = maybe "" (showType []) (functionType pgf lem)
|
||||||
|
, cat0 == cat1 && an0 == an1
|
||||||
|
]
|
||||||
|
|
||||||
|
wordlookup :: MonadWriter [String] m => String -> String -> String -> P String e m CId
|
||||||
|
wordlookup w cat0 an0 = P $ \gr pgf morpho ts -> do
|
||||||
|
tell ["wordlookup: " ++ w ++ show ts ++ show cat0]
|
||||||
|
let wds = magicLookup w cat0 an0 morpho pgf
|
||||||
|
tell [show wds]
|
||||||
|
case wds of
|
||||||
|
(wd:_) -> return $ Just (wd,ts)
|
||||||
|
[] -> return Nothing
|
||||||
|
|
||||||
|
|
||||||
|
lemma :: MonadWriter [String] m => String -> String -> P String e m CId
|
||||||
|
lemma cat = liftM head . lemmas cat
|
||||||
|
|
||||||
|
lemmas :: MonadWriter [String] m => String -> String -> P String e m [CId]
|
||||||
|
lemmas cat0 an0 = P $ \gr pgf morpho ts -> do
|
||||||
|
tell ["lemma: "++show ts++show cat0]
|
||||||
|
case ts of
|
||||||
|
Node w [] : ts -> case magicLookup w cat0 an0 morpho pgf of
|
||||||
|
(id:ids) -> tell ["lemma ok"] >> return (Just (id:ids,ts))
|
||||||
|
_ -> tell ["no word "++w++cat0++an0] >> return Nothing
|
||||||
|
_ -> tell ["tried to lemma a tag"] >> return Nothing
|
||||||
|
|
||||||
|
|
||||||
|
transform :: Monad m => ([Tree t] -> [Tree t]) -> P t e m ()
|
||||||
|
transform f = P $ \gr pgf morpho ts -> return (Just ((),f ts))
|
||||||
|
|
||||||
|
many :: MonadState s m => P t e m a -> P t e m [a]
|
||||||
|
many f = many1 f
|
||||||
|
`mplus`
|
||||||
|
return []
|
||||||
|
|
||||||
|
many1 :: MonadState s m => P t e m a -> P t e m [a]
|
||||||
|
many1 f = do x <- f
|
||||||
|
xs <- many f
|
||||||
|
return (x:xs)
|
||||||
|
|
||||||
|
opt :: MonadState s m => P t e m a -> a -> P t e m a
|
||||||
|
opt f x = mplus f (return x)
|
||||||
|
|
||||||
|
optEat :: MonadState s m => P t e m a -> a -> P t e m a
|
||||||
|
optEat f x = mplus f (consume >> return x)
|
||||||
|
|
||||||
|
|
||||||
|
consume :: Monad m => P t e m ()
|
||||||
|
consume = P $ \gr pgf morpho ts ->
|
||||||
|
case ts of
|
||||||
|
Node x w:ws -> return (Just ((),ws))
|
||||||
|
[] -> return (Just ((),[]))
|
||||||
|
|
||||||
40
treebanks/talbanken/State.hs
Normal file
40
treebanks/talbanken/State.hs
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
{-# LANGUAGE TemplateHaskell #-}
|
||||||
|
module State where
|
||||||
|
import Structure
|
||||||
|
import PGF
|
||||||
|
import Data.Label
|
||||||
|
|
||||||
|
data State = State {
|
||||||
|
_isExist :: Bool
|
||||||
|
, _iquant :: Bool
|
||||||
|
, _passive :: Bool
|
||||||
|
, _sentenceType :: SentenceType
|
||||||
|
, _vform :: [VPForm]
|
||||||
|
, _complement :: (VPForm,[Maybe Expr],[Bool])
|
||||||
|
, _object :: Maybe Expr -- for objects not within the VP 'vilka äpplen äter han'
|
||||||
|
, _tmp :: Maybe (VForm CId)
|
||||||
|
, _anter :: Bool
|
||||||
|
, _pol :: Maybe Bool
|
||||||
|
, _subj :: Maybe Expr
|
||||||
|
, _nptype :: NPType
|
||||||
|
}
|
||||||
|
|
||||||
|
$(mkLabels [''State])
|
||||||
|
|
||||||
|
startState :: State
|
||||||
|
startState = State {
|
||||||
|
_isExist = False
|
||||||
|
,_passive = False
|
||||||
|
,_iquant = False
|
||||||
|
,_vform = []
|
||||||
|
,_complement = (V,[],[])
|
||||||
|
,_sentenceType = Dir
|
||||||
|
,_object = Nothing
|
||||||
|
,_tmp = Nothing
|
||||||
|
,_anter = False
|
||||||
|
,_pol = Nothing
|
||||||
|
,_subj = Nothing
|
||||||
|
,_nptype = Normal}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
27
treebanks/talbanken/Structure.hs
Normal file
27
treebanks/talbanken/Structure.hs
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
module Structure where
|
||||||
|
|
||||||
|
data SentenceType = Q | Dir | Top
|
||||||
|
deriving (Show,Eq)
|
||||||
|
|
||||||
|
data NPType = Generic | Impers | Normal | Exist
|
||||||
|
deriving (Show,Eq)
|
||||||
|
|
||||||
|
data VPForm = Cop | Sup | VV | VA
|
||||||
|
| V | V2 | V2A | V2Pass
|
||||||
|
| Fut | FutKommer
|
||||||
|
| VS
|
||||||
|
|
||||||
|
deriving (Eq,Show)
|
||||||
|
|
||||||
|
data VForm a
|
||||||
|
= VInf | VPart | VSupin | VImp | VTense a
|
||||||
|
deriving (Show,Eq)
|
||||||
|
|
||||||
|
instance Functor VForm where
|
||||||
|
fmap f VInf = VInf
|
||||||
|
fmap f VPart = VPart
|
||||||
|
fmap f VSupin = VSupin
|
||||||
|
fmap f VImp = VImp
|
||||||
|
fmap f (VTense t) = VTense (f t)
|
||||||
|
|
||||||
|
|
||||||
6100
treebanks/talbanken/TBExtracted
Normal file
6100
treebanks/talbanken/TBExtracted
Normal file
File diff suppressed because it is too large
Load Diff
1798
treebanks/talbanken/Translate.hs
Normal file
1798
treebanks/talbanken/Translate.hs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user