export the morphology API from PGF

This commit is contained in:
krasimir
2009-09-28 12:06:20 +00:00
parent 97d25fa6c8
commit bb0be1e964
5 changed files with 34 additions and 25 deletions

View File

@@ -50,12 +50,16 @@ library
PGF.Paraphrase PGF.Paraphrase
PGF.TypeCheck PGF.TypeCheck
PGF.Binary PGF.Binary
PGF.Morphology
PGF.ShowLinearize
GF.Data.MultiMap GF.Data.MultiMap
GF.Data.TrieMap GF.Data.TrieMap
GF.Data.Utilities GF.Data.Utilities
GF.Data.SortedList GF.Data.SortedList
GF.Data.Assoc GF.Data.Assoc
GF.Data.ErrM GF.Data.ErrM
GF.Data.Relation
GF.Data.Operations
-- needed only for the on demand generation of PMCFG -- needed only for the on demand generation of PMCFG
GF.Infra.GetOpt GF.Infra.GetOpt
GF.Infra.Option GF.Infra.Option

View File

@@ -730,16 +730,16 @@ allCommands cod env@(pgf, mos) = Map.fromList [
prGrammar opts prGrammar opts
| isOpt "cats" opts = return $ fromString $ unwords $ map (showType []) $ categories pgf | isOpt "cats" opts = return $ fromString $ unwords $ map (showType []) $ categories pgf
| isOpt "fullform" opts = return $ fromString $ concatMap (prFullFormLexicon . morpho) $ optLangs opts | isOpt "fullform" opts = return $ fromString $ concatMap (morpho "" prFullFormLexicon) $ optLangs opts
| isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) | | isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) |
la <- optLangs opts, let cs = missingLins pgf la] la <- optLangs opts, let cs = missingLins pgf la]
| otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts) | otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts)
return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf
morphos opts s = morphos opts s =
[lookupMorpho (morpho la) s | la <- optLangs opts] [morpho [] (\mo -> lookupMorpho mo s) la | la <- optLangs opts]
morpho la = maybe Map.empty id $ Map.lookup la mos morpho z f la = maybe z f $ Map.lookup la mos
-- ps -f -g s returns g (f s) -- ps -f -g s returns g (f s)
stringOps menv opts s = foldr (menvop . app) s (reverse opts) where stringOps menv opts s = foldr (menvop . app) s (reverse opts) where
@@ -802,4 +802,10 @@ infinity = 256
lookFlag :: PGF -> String -> String -> Maybe String lookFlag :: PGF -> String -> String -> Maybe String
lookFlag pgf lang flag = lookConcrFlag pgf (mkCId lang) (mkCId flag) lookFlag pgf lang flag = lookConcrFlag pgf (mkCId lang) (mkCId flag)
prFullFormLexicon :: Morpho -> String
prFullFormLexicon mo =
unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- fullFormLexicon mo]
prMorphoAnalysis :: [(Lemma,Analysis)] -> String
prMorphoAnalysis lps = unlines [showCId l ++ " " ++ p | (l,p) <- lps]

View File

@@ -77,7 +77,11 @@ module PGF(
Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.extractTrees, Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.extractTrees,
-- ** Generation -- ** Generation
generateRandom, generateAll, generateAllDepth generateRandom, generateAll, generateAllDepth,
-- ** Morphological Analysis
Lemma, Analysis, Morpho,
lookupMorpho, buildMorpho
) where ) where
import PGF.CId import PGF.CId
@@ -87,6 +91,7 @@ import PGF.TypeCheck
import PGF.Paraphrase import PGF.Paraphrase
import PGF.Macros import PGF.Macros
import PGF.Expr (Tree) import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data hiding (functions) import PGF.Data hiding (functions)
import PGF.Binary import PGF.Binary
import qualified PGF.Parsing.FCFG.Active as Active import qualified PGF.Parsing.FCFG.Active as Active

View File

@@ -1,4 +1,6 @@
module PGF.Morphology where module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,
lookupMorpho,fullFormLexicon) where
import PGF.ShowLinearize (collectWords) import PGF.ShowLinearize (collectWords)
import PGF.Data import PGF.Data
@@ -9,24 +11,16 @@ import Data.List (intersperse)
-- these 4 definitions depend on the datastructure used -- these 4 definitions depend on the datastructure used
type Morpho = Map.Map String [(Lemma,Analysis)] type Lemma = CId
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho mo s = maybe noAnalysis id $ Map.lookup s mo
buildMorpho :: PGF -> CId -> Morpho
buildMorpho pgf = Map.fromListWith (++) . collectWords pgf
prFullFormLexicon :: Morpho -> String
prFullFormLexicon mo =
unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- Map.assocs mo]
prMorphoAnalysis :: [(Lemma,Analysis)] -> String
prMorphoAnalysis lps = unlines [l ++ " " ++ p | (l,p) <- lps]
type Lemma = String
type Analysis = String type Analysis = String
noAnalysis :: [(Lemma,Analysis)] newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
noAnalysis = []
buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang))
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
fullFormLexicon (Morpho mo) = Map.toList mo

View File

@@ -97,7 +97,7 @@ markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
-- for Morphology: word, lemma, tags -- for Morphology: word, lemma, tags
collectWords :: PGF -> CId -> [(String, [(String,String)])] collectWords :: PGF -> Language -> [(String, [(CId,String)])]
collectWords pgf lang = collectWords pgf lang =
concatMap collOne concatMap collOne
[(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf] [(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf]
@@ -108,6 +108,6 @@ collectWords pgf lang =
RR rs -> concat [fromRec f v t | (_,t) <- rs] RR rs -> concat [fromRec f v t | (_,t) <- rs]
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs] RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
RFV rs -> concatMap (fromRec f v) rs RFV rs -> concatMap (fromRec f v) rs
RS s -> [(s,[(showCId f,unwords (reverse v))])] RS s -> [(s,[(f,unwords (reverse v))])]
RCon c -> [] ---- inherent RCon c -> [] ---- inherent