From 59aff851a7b6aca4ba9fac8b6fc528e3b4f8fe15 Mon Sep 17 00:00:00 2001 From: aarne Date: Tue, 10 Jun 2008 16:00:42 +0000 Subject: [PATCH] preliminary version of morpho analysis --- src-3.0/GF/Command/Commands.hs | 34 ++++++++++++++++++++++--- src-3.0/PGF/Morphology.hs | 46 ++++++++++++++++++++++++++++++++++ src-3.0/PGF/ShowLinearize.hs | 1 + 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 src-3.0/PGF/Morphology.hs diff --git a/src-3.0/GF/Command/Commands.hs b/src-3.0/GF/Command/Commands.hs index 6aa19dfba..292e802b9 100644 --- a/src-3.0/GF/Command/Commands.hs +++ b/src-3.0/GF/Command/Commands.hs @@ -14,6 +14,7 @@ import PGF.CId import PGF.ShowLinearize import PGF.Macros import PGF.Data ---- +import PGF.Morphology import GF.Compile.Export import GF.Infra.UseIO import GF.Data.ErrM ---- @@ -60,9 +61,9 @@ commandHelp full (co,info) = unlines $ [ "flags: " ++ unwords (flags info) ] else [] --- this list must be kept sorted by the command name! +-- this list must no more be kept sorted by the command name allCommands :: PGF -> Map.Map String CommandInfo -allCommands pgf = Map.fromAscList [ +allCommands pgf = Map.fromList [ ("cc", emptyCommandInfo { longname = "compute_concrete", synopsis = "computes concrete syntax term using the source grammar", @@ -144,15 +145,39 @@ allCommands pgf = Map.fromAscList [ options = ["all","record","table","term"], flags = ["lang"] }), + + ("ma", emptyCommandInfo { + longname = "morpho_analyse", + synopsis = "print the morphological analyses of all words in the string", + explanation = unlines [ + "Prints all the analyses of space-separated words in the input string,", + "using the morphological analyser of the actual grammar (see command pf)" + ], + exec = \opts -> + return . fromString . unlines . + map prMorphoAnalysis . concatMap (morphos opts) . + concatMap words . toStrings + }), + ("p", emptyCommandInfo { longname = "parse", synopsis = "parse a string to abstract syntax expression", explanation = "Shows all trees (expressions) returned for String by the actual\n"++ "grammar (overridden by the -lang flag), in the category S (overridden\n"++ "by the -cat flag).", - exec = \opts -> return . fromTrees . concatMap (par opts). toStrings, + exec = \opts -> return . fromTrees . concatMap (par opts) . toStrings, flags = ["cat","lang"] }), + ("pf", emptyCommandInfo { + longname = "print_fullform", + synopsis = "print the full-form lexicon of the actual grammar", + explanation = unlines [ + "Prints all the strings in the actual grammar with their possible analyses" + ], + exec = \opts _ -> + return $ fromString $ concatMap + (prFullFormLexicon . buildMorpho pgf . mkCId) $ optLangs opts + }), ("pg", emptyCommandInfo { longname = "print_grammar", synopsis = "print the actual grammar with the given printer", @@ -206,3 +231,6 @@ allCommands pgf = Map.fromAscList [ prGrammar opts = case valIdOpts "printer" "" opts of "cats" -> unwords $ categories pgf v -> prPGF (read v) pgf (prCId (absname pgf)) + + morphos opts s = + [lookupMorpho (buildMorpho pgf (mkCId la)) s | la <- optLangs opts] diff --git a/src-3.0/PGF/Morphology.hs b/src-3.0/PGF/Morphology.hs new file mode 100644 index 000000000..97def6b9a --- /dev/null +++ b/src-3.0/PGF/Morphology.hs @@ -0,0 +1,46 @@ +module PGF.Morphology where + +import PGF.ShowLinearize +import PGF.Data +import PGF.CId + +import qualified Data.Map as Map +import Data.List (intersperse) + +-- these 4 definitions depend on the datastructure used + +type Morpho = Map.Map String [(Lemma,Analysis)] + +lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] +lookupMorpho mo s = maybe noAnalysis id $ Map.lookup s mo + +buildMorpho :: PGF -> CId -> Morpho +buildMorpho pgf = Map.fromListWith (++) . collectWords pgf + +prFullFormLexicon :: Morpho -> String +prFullFormLexicon mo = + unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- Map.assocs mo] + +prMorphoAnalysis :: [(Lemma,Analysis)] -> String +prMorphoAnalysis lps = unlines [l ++ " " ++ p | (l,p) <- lps] + +type Lemma = String +type Analysis = String + +noAnalysis :: [(Lemma,Analysis)] +noAnalysis = [] + +collectWords :: PGF -> CId -> [(String, [(Lemma,Analysis)])] +collectWords pgf lang = + concatMap collOne + [(f,c,0) | (f,(DTyp [] c _,_)) <- Map.toList $ funs $ abstract pgf] + where + collOne (f,c,i) = + fromRec f [prCId c] (recLinearize pgf lang (EApp f (replicate i (EMeta 888)))) + fromRec f v r = case r of + RR rs -> concat [fromRec f v t | (_,t) <- rs] + RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs] + RFV rs -> concatMap (fromRec f v) rs + RS s -> [(s,[(prCId f,unwords (reverse v))])] + RCon c -> [] ---- inherent + diff --git a/src-3.0/PGF/ShowLinearize.hs b/src-3.0/PGF/ShowLinearize.hs index a1c1e476a..98a0806ba 100644 --- a/src-3.0/PGF/ShowLinearize.hs +++ b/src-3.0/PGF/ShowLinearize.hs @@ -1,4 +1,5 @@ module PGF.ShowLinearize ( + Record (..), recLinearize, --- used in PGF.Morphology tableLinearize, recordLinearize, termLinearize,