forked from GitHub/gf-core
morpho analysis with -missing flag, shows words outside lexicon; also invoked if parsing fails; also added pg -words to show the list of words
This commit is contained in:
@@ -359,12 +359,22 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
synopsis = "print the morphological analyses of all words in the string",
|
synopsis = "print the morphological analyses of all words in the string",
|
||||||
explanation = unlines [
|
explanation = unlines [
|
||||||
"Prints all the analyses of space-separated words in the input string,",
|
"Prints all the analyses of space-separated words in the input string,",
|
||||||
"using the morphological analyser of the actual grammar (see command pf)"
|
"using the morphological analyser of the actual grammar (see command pg)"
|
||||||
],
|
],
|
||||||
exec = \opts ->
|
exec = \opts -> case opts of
|
||||||
return . fromString . unlines .
|
_ | isOpt "missing" opts ->
|
||||||
map prMorphoAnalysis . concatMap (morphos opts) .
|
return . fromString . unwords .
|
||||||
concatMap words . toStrings
|
morphoMissing (theMorpho opts) .
|
||||||
|
concatMap words . toStrings
|
||||||
|
_ -> return . fromString . unlines .
|
||||||
|
map prMorphoAnalysis . concatMap (morphos opts) .
|
||||||
|
concatMap words . toStrings ,
|
||||||
|
flags = [
|
||||||
|
("lang","the languages of analysis (comma-separated, no spaces)")
|
||||||
|
],
|
||||||
|
options = [
|
||||||
|
("missing","show the list of unknown words in the input")
|
||||||
|
]
|
||||||
}),
|
}),
|
||||||
|
|
||||||
("mq", emptyCommandInfo {
|
("mq", emptyCommandInfo {
|
||||||
@@ -395,7 +405,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
"the parser. For example if -openclass=\"A,N,V\" is given, the parser",
|
"the parser. For example if -openclass=\"A,N,V\" is given, the parser",
|
||||||
"will accept unknown adjectives, nouns and verbs with the resource grammar."
|
"will accept unknown adjectives, nouns and verbs with the resource grammar."
|
||||||
],
|
],
|
||||||
exec = \opts -> returnFromExprs . concatMap (par opts) . toStrings,
|
exec = \opts ts ->
|
||||||
|
returnFromExprsPar opts ts $ concatMap (par opts) $ toStrings ts,
|
||||||
flags = [
|
flags = [
|
||||||
("cat","target category of parsing"),
|
("cat","target category of parsing"),
|
||||||
("lang","the languages of parsing (comma-separated, no spaces)"),
|
("lang","the languages of parsing (comma-separated, no spaces)"),
|
||||||
@@ -424,7 +435,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
options = [
|
options = [
|
||||||
("cats", "show just the names of abstract syntax categories"),
|
("cats", "show just the names of abstract syntax categories"),
|
||||||
("fullform", "print the fullform lexicon"),
|
("fullform", "print the fullform lexicon"),
|
||||||
("missing","show just the names of functions that have no linearization")
|
("missing","show just the names of functions that have no linearization"),
|
||||||
|
("words", "print the list of words")
|
||||||
]
|
]
|
||||||
}),
|
}),
|
||||||
("ph", emptyCommandInfo {
|
("ph", emptyCommandInfo {
|
||||||
@@ -770,6 +782,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
]
|
]
|
||||||
where
|
where
|
||||||
enc = encodeUnicode cod
|
enc = encodeUnicode cod
|
||||||
|
|
||||||
par opts s = case optOpenTypes opts of
|
par opts s = case optOpenTypes opts of
|
||||||
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang]
|
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang]
|
||||||
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang]
|
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang]
|
||||||
@@ -847,20 +860,28 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
returnFromExprs es = return $ case es of
|
returnFromExprs es = return $ case es of
|
||||||
[] -> ([], "no trees found")
|
[] -> ([], "no trees found")
|
||||||
_ -> fromExprs es
|
_ -> fromExprs es
|
||||||
|
returnFromExprsPar opts ts es = return $ case es of
|
||||||
|
[] -> ([], "no trees found; unknown words:" +++
|
||||||
|
unwords (morphoMissing (theMorpho opts)
|
||||||
|
(concatMap words (toStrings ts))))
|
||||||
|
_ -> fromExprs es
|
||||||
|
|
||||||
prGrammar opts
|
prGrammar opts
|
||||||
| isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf
|
| isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf
|
||||||
| isOpt "fullform" opts = return $ fromString $ concatMap (morpho "" prFullFormLexicon) $ optLangs opts
|
| isOpt "fullform" opts = return $ fromString $ concatMap (morpho "" prFullFormLexicon) $ optLangs opts
|
||||||
| isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) |
|
| isOpt "missing" opts = return $ fromString $ unlines $ [unwords (showCId la:":": map showCId cs) |
|
||||||
la <- optLangs opts, let cs = missingLins pgf la]
|
la <- optLangs opts, let cs = missingLins pgf la]
|
||||||
|
| isOpt "words" opts = return $ fromString $ concatMap (morpho "" prAllWords) $ optLangs opts
|
||||||
| otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts)
|
| otherwise = do fmt <- readOutputFormat (valStrOpts "printer" "pgf_pretty" opts)
|
||||||
return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf
|
return $ fromString $ concatMap snd $ exportPGF noOptions fmt pgf
|
||||||
|
|
||||||
morphos opts s =
|
morphos opts s =
|
||||||
[morpho [] (\mo -> lookupMorpho mo s) la | la <- optLangs opts]
|
[(s,morpho [] (\mo -> lookupMorpho mo s) la) | la <- optLangs opts]
|
||||||
|
|
||||||
morpho z f la = maybe z f $ Map.lookup la mos
|
morpho z f la = maybe z f $ Map.lookup la mos
|
||||||
|
|
||||||
|
theMorpho opts = morpho (error "no morpho") id (head (optLangs opts))
|
||||||
|
|
||||||
-- ps -f -g s returns g (f s)
|
-- ps -f -g s returns g (f s)
|
||||||
stringOps menv opts s = foldr (menvop . app) s (reverse opts) where
|
stringOps menv opts s = foldr (menvop . app) s (reverse opts) where
|
||||||
app f = maybe id id (stringOp f)
|
app f = maybe id id (stringOp f)
|
||||||
@@ -924,8 +945,16 @@ lookFlag pgf lang flag = lookConcrFlag pgf (mkCId lang) (mkCId flag)
|
|||||||
|
|
||||||
prFullFormLexicon :: Morpho -> String
|
prFullFormLexicon :: Morpho -> String
|
||||||
prFullFormLexicon mo =
|
prFullFormLexicon mo =
|
||||||
unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- fullFormLexicon mo]
|
unlines (map prMorphoAnalysis (fullFormLexicon mo))
|
||||||
|
|
||||||
prMorphoAnalysis :: [(Lemma,Analysis)] -> String
|
prAllWords :: Morpho -> String
|
||||||
prMorphoAnalysis lps = unlines [showCId l ++ " " ++ p | (l,p) <- lps]
|
prAllWords mo =
|
||||||
|
unwords [w | (w,_) <- fullFormLexicon mo]
|
||||||
|
|
||||||
|
prMorphoAnalysis :: (String,[(Lemma,Analysis)]) -> String
|
||||||
|
prMorphoAnalysis (w,lps) =
|
||||||
|
unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
|
||||||
|
|
||||||
|
morphoMissing :: Morpho -> [String] -> [String]
|
||||||
|
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]
|
||||||
|
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ linTree pgf lang e = lin (expr2tree e) Nothing
|
|||||||
Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
|
Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
|
||||||
Nothing -> TM (showCId x)
|
Nothing -> TM (showCId x)
|
||||||
lin (Meta i) mty = case mty of
|
lin (Meta i) mty = case mty of
|
||||||
Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc))
|
Just (DTyp _ cat _) -> compute pgf lang [K (KS ("?" ++ show i))] (lookMap tm0 cat (lindefs cnc))
|
||||||
Nothing -> TM (show i)
|
Nothing -> TM (show i)
|
||||||
|
|
||||||
variants :: [Term] -> Term
|
variants :: [Term] -> Term
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
|
|||||||
collectWords :: PGF -> Language -> [(String, [(CId,String)])]
|
collectWords :: PGF -> Language -> [(String, [(CId,String)])]
|
||||||
collectWords pgf lang =
|
collectWords pgf lang =
|
||||||
concatMap collOne
|
concatMap collOne
|
||||||
[(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf]
|
[(f,c,length xs) | (f,(DTyp xs c _,_,_)) <- Map.toList $ funs $ abstract pgf]
|
||||||
where
|
where
|
||||||
collOne (f,c,i) =
|
collOne (f,c,i) =
|
||||||
fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
|
fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
|
||||||
@@ -108,6 +108,7 @@ collectWords pgf lang =
|
|||||||
RR rs -> concat [fromRec f v t | (_,t) <- rs]
|
RR rs -> concat [fromRec f v t | (_,t) <- rs]
|
||||||
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
|
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
|
||||||
RFV rs -> concatMap (fromRec f v) rs
|
RFV rs -> concatMap (fromRec f v) rs
|
||||||
RS s -> [(s,[(f,unwords (reverse v))])]
|
RS s -> [(w,[(f,unwords (reverse v))]) | w <- words s, w /= "?888"] ---
|
||||||
|
-- RS s -> [(s,[(f,unwords (reverse v))])]
|
||||||
RCon c -> [] ---- inherent
|
RCon c -> [] ---- inherent
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user