diff --git a/src-3.0/GF/Command/Commands.hs b/src-3.0/GF/Command/Commands.hs index 6cdd82d7e..650b19693 100644 --- a/src-3.0/GF/Command/Commands.hs +++ b/src-3.0/GF/Command/Commands.hs @@ -343,6 +343,7 @@ allCommands pgf = Map.fromList [ exec = \opts -> return . fromString . stringOps opts . toString, options = [ ("bind","bind tokens separated by Prelude.BIND, i.e. &+"), + ("chars","lexer that makes every non-space character a token"), ("from_devanagari","from unicode to GF Devanagari transliteration"), ("from_thai","from unicode to GF Thai transliteration"), ("from_utf8","decode from utf8"), @@ -355,6 +356,7 @@ allCommands pgf = Map.fromList [ ("unlextext","text-like unlexer"), ("unlexcode","code-like unlexer"), ("unlexmixed","mixture of text and code (code between $...$)"), + ("unchars","unlexer that puts no spaces between tokens"), ("unwords","unlexer that puts a single space between tokens (default)"), ("words","lexer that assumes tokens separated by spaces (default)") ] @@ -499,7 +501,7 @@ allCommands pgf = Map.fromList [ _ | isOpt "treebank" opts -> treebank opts t _ -> unlines [linear opts lang t | lang <- optLangs opts] - linear opts lang = case opts of + linear opts lang = unlex opts lang . case opts of _ | isOpt "all" opts -> allLinearize pgf (mkCId lang) _ | isOpt "table" opts -> tableLinearize pgf (mkCId lang) _ | isOpt "term" opts -> termLinearize pgf (mkCId lang) @@ -510,6 +512,8 @@ allCommands pgf = Map.fromList [ (abstractName pgf ++ ": " ++ showTree t) : [lang ++ ": " ++ linear opts lang t | lang <- optLangs opts] + unlex opts lang = stringOps opts + optRestricted opts = restrictPGF (hasLin pgf (mkCId (optLang opts))) pgf optLangs opts = case valIdOpts "lang" "" opts of diff --git a/src-3.0/GF/Text/Lexing.hs b/src-3.0/GF/Text/Lexing.hs index 16391d183..beabf217b 100644 --- a/src-3.0/GF/Text/Lexing.hs +++ b/src-3.0/GF/Text/Lexing.hs @@ -9,11 +9,13 @@ import Data.Char stringOp :: String -> Maybe (String -> String) stringOp name = case name of + "chars" -> Just $ appLexer (filter (not . all isSpace) . map return) "lextext" -> Just $ appLexer lexText "lexcode" -> Just $ appLexer lexText "lexmixed" -> Just $ appLexer lexMixed "words" -> Just $ appLexer words "bind" -> Just $ appUnlexer bindTok + "uncars" -> Just $ appUnlexer concat "unlextext" -> Just $ appUnlexer unlexText "unlexcode" -> Just $ appUnlexer unlexCode "unlexmixed" -> Just $ appUnlexer unlexMixed