From ab0eaa7b62115c0cd045a4043f0529177688ebe6 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 31 Dec 2009 11:02:26 +0000 Subject: [PATCH] morpho analysis with -missing flag, shows words outside lexicon; also invoked if parsing fails; also added pg -words to show the list of words --- src/compiler/GF/Command/Commands.hs | 51 +++++++++++++++++++----- src/runtime/haskell/PGF/Linearize.hs | 2 +- src/runtime/haskell/PGF/ShowLinearize.hs | 5 ++- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index d8e2a3023..3647d2e14 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -359,12 +359,22 @@ allCommands cod env@(pgf, mos) = Map.fromList [ synopsis = "print the morphological analyses of all words in the string", explanation = unlines [ "Prints all the analyses of space-separated words in the input string,", - "using the morphological analyser of the actual grammar (see command pf)" + "using the morphological analyser of the actual grammar (see command pg)" ], - exec = \opts -> - return . fromString . unlines . - map prMorphoAnalysis . concatMap (morphos opts) . - concatMap words . toStrings + exec = \opts -> case opts of + _ | isOpt "missing" opts -> + return . fromString . unwords . + morphoMissing (theMorpho opts) . + concatMap words . toStrings + _ -> return . fromString . unlines . + map prMorphoAnalysis . concatMap (morphos opts) . + concatMap words . toStrings , + flags = [ + ("lang","the languages of analysis (comma-separated, no spaces)") + ], + options = [ + ("missing","show the list of unknown words in the input") + ] }), ("mq", emptyCommandInfo { @@ -395,7 +405,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [ "the parser. For example if -openclass=\"A,N,V\" is given, the parser", "will accept unknown adjectives, nouns and verbs with the resource grammar." ], - exec = \opts -> returnFromExprs . concatMap (par opts) . toStrings, + exec = \opts ts -> + returnFromExprsPar opts ts $ concatMap (par opts) $ toStrings ts, flags = [ ("cat","target category of parsing"), ("lang","the languages of parsing (comma-separated, no spaces)"), @@ -424,7 +435,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [ options = [ ("cats", "show just the names of abstract syntax categories"), ("fullform", "print the fullform lexicon"), - ("missing","show just the names of functions that have no linearization") + ("missing","show just the names of functions that have no linearization"), + ("words", "print the list of words") ] }), ("ph", emptyCommandInfo { @@ -770,6 +782,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ ] where enc = encodeUnicode cod + par opts s = case optOpenTypes opts of [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang] open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang] @@ -847,20 +860,28 @@ allCommands cod env@(pgf, mos) = Map.fromList [ returnFromExprs es = return $ case es of [] -> ([], "no trees found") _ -> fromExprs es + returnFromExprsPar opts ts es = return $ case es of + [] -> ([], "no trees found; unknown words:" +++ + unwords (morphoMissing (theMorpho opts) + (concatMap words (toStrings ts)))) + _ -> fromExprs es prGrammar opts | isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf | isOpt "fullform" opts = return $ fromString $ concatMap (morpho "" prFullFormLexicon) $ optLangs opts | isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) | la <- optLangs opts, let cs = missingLins pgf la] + | isOpt "words" opts = return $ fromString $ concatMap (morpho "" prAllWords) $ optLangs opts | otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts) return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf morphos opts s = - [morpho [] (\mo -> lookupMorpho mo s) la | la <- optLangs opts] + [(s,morpho [] (\mo -> lookupMorpho mo s) la) | la <- optLangs opts] morpho z f la = maybe z f $ Map.lookup la mos + theMorpho opts = morpho (error "no morpho") id (head (optLangs opts)) + -- ps -f -g s returns g (f s) stringOps menv opts s = foldr (menvop . app) s (reverse opts) where app f = maybe id id (stringOp f) @@ -924,8 +945,16 @@ lookFlag pgf lang flag = lookConcrFlag pgf (mkCId lang) (mkCId flag) prFullFormLexicon :: Morpho -> String prFullFormLexicon mo = - unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- fullFormLexicon mo] + unlines (map prMorphoAnalysis (fullFormLexicon mo)) -prMorphoAnalysis :: [(Lemma,Analysis)] -> String -prMorphoAnalysis lps = unlines [showCId l ++ " " ++ p | (l,p) <- lps] +prAllWords :: Morpho -> String +prAllWords mo = + unwords [w | (w,_) <- fullFormLexicon mo] + +prMorphoAnalysis :: (String,[(Lemma,Analysis)]) -> String +prMorphoAnalysis (w,lps) = + unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps]) + +morphoMissing :: Morpho -> [String] -> [String] +morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)] diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs index fdd4cecb5..80d1f1acf 100644 --- a/src/runtime/haskell/PGF/Linearize.hs +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -75,7 +75,7 @@ linTree pgf lang e = lin (expr2tree e) Nothing Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc)) Nothing -> TM (showCId x) lin (Meta i) mty = case mty of - Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc)) + Just (DTyp _ cat _) -> compute pgf lang [K (KS ("?" ++ show i))] (lookMap tm0 cat (lindefs cnc)) Nothing -> TM (show i) variants :: [Term] -> Term diff --git a/src/runtime/haskell/PGF/ShowLinearize.hs b/src/runtime/haskell/PGF/ShowLinearize.hs index dd3b997a6..fa4de86c8 100644 --- a/src/runtime/haskell/PGF/ShowLinearize.hs +++ b/src/runtime/haskell/PGF/ShowLinearize.hs @@ -100,7 +100,7 @@ markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang collectWords :: PGF -> Language -> [(String, [(CId,String)])] collectWords pgf lang = concatMap collOne - [(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf] + [(f,c,length xs) | (f,(DTyp xs c _,_,_)) <- Map.toList $ funs $ abstract pgf] where collOne (f,c,i) = fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888)))) @@ -108,6 +108,7 @@ collectWords pgf lang = RR rs -> concat [fromRec f v t | (_,t) <- rs] RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs] RFV rs -> concatMap (fromRec f v) rs - RS s -> [(s,[(f,unwords (reverse v))])] + RS s -> [(w,[(f,unwords (reverse v))]) | w <- words s, w /= "?888"] --- +-- RS s -> [(s,[(f,unwords (reverse v))])] RCon c -> [] ---- inherent