now the linearization is completely based on PMCFG

This commit is contained in:
krasimir
2010-01-17 17:05:21 +00:00
parent 61867b2b72
commit 362f333ebd
17 changed files with 250 additions and 346 deletions

View File

@@ -2,11 +2,13 @@ module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,
lookupMorpho,fullFormLexicon) where
import PGF.ShowLinearize (collectWords)
import PGF.Data
import PGF.CId
import PGF.Data
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.List (intersperse)
-- these 4 definitions depend on the datastructure used
@@ -17,7 +19,23 @@ type Analysis = String
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang))
buildMorpho pgf lang = Morpho $
case Map.lookup lang (concretes pgf) >>= parser of
Just pinfo -> collectWords pinfo
Nothing -> Map.empty
collectWords pinfo = Map.fromListWith (++)
[(t, [(fun,lbls ! l)]) | (s,e,lbls) <- Map.elems (startCats pinfo)
, fid <- [s..e]
, FApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (pproductions pinfo))
, let FFun fun lins = functions pinfo ! funid
, (l,seqid) <- assocs lins
, sym <- elems (sequences pinfo ! seqid)
, t <- sym2tokns sym]
where
sym2tokns (FSymKS ts) = ts
sym2tokns (FSymKP ts alts) = ts ++ [t | Alt ts ps <- alts, t <- ts]
sym2tokns _ = []
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo