diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 22485e94b..d4e8e406b 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -33,6 +33,7 @@ import GF.Data.ErrM ---- import GF.Command.Abstract import GF.Command.Messages import GF.Text.Lexing +import GF.Text.Clitics import GF.Text.Transliterations import GF.Quiz @@ -200,6 +201,38 @@ allCommands env@(pgf, mos) = Map.fromList [ ] }), + ("ca", emptyCommandInfo { + longname = "clitic_analyse", + synopsis = "print the analyses of all words into stems and clitics", + explanation = unlines [ + "Analyses all words into all possible combinations of stem + clitics.", + "The analysis is returned in the format stem &+ clitic1 &+ clitic2 ...", + "which is hence the inverse of 'pt -bind'. The list of clitics is give", + "by the flag '-clitics'. The list of stems is given as the list of words", + "of the language given by the '-lang' flag." + ], + exec = \opts -> case opts of + _ | isOpt "raw" opts -> + return . fromString . + unlines . map (unwords . map (concat . intersperse "+")) . + map (getClitics (isInMorpho (optMorpho opts)) (optClitics opts)) . + concatMap words . toStrings + _ -> + return . fromStrings . + getCliticsText (isInMorpho (optMorpho opts)) (optClitics opts) . + concatMap words . toStrings, + flags = [ + ("clitics","the list of possible clitics (comma-separated, no spaces)"), + ("lang", "the language of analysis") + ], + options = [ + ("raw", "analyse each word separately (not suitable input for parser)") + ], + examples = [ + "ca -lang=Fin -clitics=ko,ni \"nukkuuko minun vaimoni\" | p -- to parse Finnish" + ] + }), + ("cc", emptyCommandInfo { longname = "compute_concrete", syntax = "cc (-all | -table | -unqual)? TERM", @@ -1113,6 +1146,10 @@ allCommands env@(pgf, mos) = Map.fromList [ optMorpho opts = morpho (error "no morpho") id (head (optLangs opts)) + optClitics opts = case valStrOpts "clitics" "" opts of + "" -> [] + cs -> map reverse $ chunks ',' cs + mexp xs = case xs of t:_ -> Just t _ -> Nothing diff --git a/src/compiler/GF/Text/Clitics.hs b/src/compiler/GF/Text/Clitics.hs new file mode 100644 index 000000000..849deb94e --- /dev/null +++ b/src/compiler/GF/Text/Clitics.hs @@ -0,0 +1,40 @@ +module GF.Text.Clitics (getClitics,getCliticsText) where + +import Data.List + +-- AR 6/2/2011 +-- Analyse word as stem+clitic whenever +-- (1) clitic is in clitic list +-- (2) either +-- (a) stem is in Lexicon +-- (b) stem can be analysed as stem0+clitic0 +-- +-- Examples: +-- Italian amarmi = amar+mi +-- Finnish autossanikohan = autossa+ni+kohan +-- +-- The analysis gives all results, including the case where the whole word is in Lexicon. +-- +-- The clitics in the list are expected to be reversed. + +getClitics :: (String -> Bool) -> [String] -> String -> [[String]] +getClitics isLex rclitics = map (reverse . map reverse) . clits . reverse where + clits rword = ifLex rword [rclit:more | + rclit <- rclitics, stem <- splits rclit rword, more <- clits stem] + splits c = maybe [] return . stripPrefix c + + ifLex w ws = if isLex (reverse w) then [w] : ws else ws + + +getCliticsText :: (String -> Bool) -> [String] -> [String] -> [String] +getCliticsText isLex rclitics = + map unwords . sequence . map (map render . getClitics isLex rclitics) + where + render = unwords . intersperse "&+" + + +-- example + +getClitics1 = getClitics exlex1 exclits1 +exlex1 = flip elem ["auto", "naise", "rahan","maa","maahan","maahankaan"] +exclits1 = map reverse ["ni","ko","han","pas","nsa","kin","kaan"] diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs index d5a2d28bc..70fa70458 100644 --- a/src/runtime/haskell/PGF/Morphology.hs +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -1,5 +1,5 @@ module PGF.Morphology(Lemma,Analysis,Morpho, - buildMorpho, + buildMorpho,isInMorpho, lookupMorpho,fullFormLexicon, morphoMissing,missingWordMsg) where @@ -42,6 +42,9 @@ collectWords pinfo = Map.fromListWith (++) lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo +isInMorpho :: Morpho -> String -> Bool +isInMorpho (Morpho mo) s = maybe False (const True) $ Map.lookup s mo + fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])] fullFormLexicon (Morpho mo) = Map.toList mo