1
0
forked from GitHub/gf-core

UTF3 coding as ps command options, also -bind

This commit is contained in:
aarne
2008-06-15 13:25:22 +00:00
parent 486d21cd7a
commit 8c3111e36a
3 changed files with 34 additions and 8 deletions

View File

@@ -83,11 +83,10 @@ is available for
GF was born in 1998 at Xerox Research Centre Europe, Grenoble in the project GF was born in 1998 at Xerox Research Centre Europe, Grenoble in the project
Multilingual Document Authoring. At Xerox, it was used for prototypes including Multilingual Document Authoring. At Xerox, it was used for prototypes including
- restaurant phrase book in 6 languages a restaurant phrase book in 6 languages,
- database queries in 7 languages a database query system in 7 languages,
- alarm system instructions in 5 languages a formalization of an alarm system instructions with translations to 5 languages, and
- medical drug descriptions in 2 languages an authoring system for medical drug descriptions in 2 languages.
Later projects using GF and involving third parties include, in chronological order, Later projects using GF and involving third parties include, in chronological order,
- GF-Alfa: natural language interface to formal proofs - GF-Alfa: natural language interface to formal proofs

View File

@@ -65,7 +65,9 @@ commandHelp :: Bool -> (String,CommandInfo) -> String
commandHelp full (co,info) = unlines $ [ commandHelp full (co,info) = unlines $ [
co ++ ", " ++ longname info, co ++ ", " ++ longname info,
synopsis info] ++ if full then [ synopsis info] ++ if full then [
"",
"syntax:" ++++ " " ++ syntax info, "syntax:" ++++ " " ++ syntax info,
"",
explanation info, explanation info,
"options:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- options info], "options:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- options info],
"flags:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- flags info], "flags:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- flags info],
@@ -292,18 +294,29 @@ allCommands pgf = Map.fromList [
}), }),
("ps", emptyCommandInfo { ("ps", emptyCommandInfo {
longname = "put_string", longname = "put_string",
syntax = "ps OPT? STRING",
synopsis = "return a string, possibly processed with a function", synopsis = "return a string, possibly processed with a function",
explanation = unlines [ explanation = unlines [
"Returns a string obtained by its argument string by applying", "Returns a string obtained from its argument string by applying",
"string processing functions in the order given in the command line", "string processing functions in the order given in the command line",
"option list. Thus 'ps -f -g s' returns g (f s). Typical string processors", "option list. Thus 'ps -f -g s' returns g (f s). Typical string processors",
"are lexers and unlexers." "are lexers and unlexers, but also character encoding conversions are possible.",
"The unlexers preserve the division of their input to lines."
], ],
examples = [
"l (EAdd 3 4) | ps -code -- linearize code-like output",
"ps -lexer=code | p -cat=Exp -- parse code-like input",
"gr -cat=QCl | l | ps -bind -to_utf8 -- linearization output from LangFin",
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UYF8 terminal"
],
exec = \opts -> return . fromString . stringOps opts . toString, exec = \opts -> return . fromString . stringOps opts . toString,
options = [ options = [
("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
("from_utf8","decode from utf8"),
("lextext","text-like lexer"), ("lextext","text-like lexer"),
("lexcode","code-like lexer"), ("lexcode","code-like lexer"),
("lexmixed","mixture of text and code (code between $...$)"), ("lexmixed","mixture of text and code (code between $...$)"),
("to_utf8","encode to utf8"),
("unlextext","text-like unlexer"), ("unlextext","text-like unlexer"),
("unlexcode","code-like unlexer"), ("unlexcode","code-like unlexer"),
("unlexmixed","mixture of text and code (code between $...$)"), ("unlexmixed","mixture of text and code (code between $...$)"),

View File

@@ -1,5 +1,7 @@
module GF.Text.Lexing (stringOp) where module GF.Text.Lexing (stringOp) where
import GF.Text.UTF8
import Data.Char import Data.Char
-- lexers and unlexers - they work on space-separated word strings -- lexers and unlexers - they work on space-separated word strings
@@ -9,16 +11,21 @@ stringOp name = case name of
"lextext" -> Just $ appLexer lexText "lextext" -> Just $ appLexer lexText
"lexcode" -> Just $ appLexer lexText "lexcode" -> Just $ appLexer lexText
"lexmixed" -> Just $ appLexer lexMixed "lexmixed" -> Just $ appLexer lexMixed
"words" -> Just $ appLexer words
"bind" -> Just $ appUnlexer bindTok
"unlextext" -> Just $ appUnlexer unlexText "unlextext" -> Just $ appUnlexer unlexText
"unlexcode" -> Just $ appUnlexer unlexCode "unlexcode" -> Just $ appUnlexer unlexCode
"unlexmixed" -> Just $ appUnlexer unlexMixed "unlexmixed" -> Just $ appUnlexer unlexMixed
"unwords" -> Just $ appUnlexer unwords
"to_utf8" -> Just encodeUTF8
"from_utf8" -> Just decodeUTF8
_ -> Nothing _ -> Nothing
appLexer :: (String -> [String]) -> String -> String appLexer :: (String -> [String]) -> String -> String
appLexer f = unwords . filter (not . null) . f appLexer f = unwords . filter (not . null) . f
appUnlexer :: ([String] -> String) -> String -> String appUnlexer :: ([String] -> String) -> String -> String
appUnlexer f = f . words appUnlexer f = unlines . map (f . words) . lines
lexText :: String -> [String] lexText :: String -> [String]
lexText s = case s of lexText s = case s of
@@ -43,6 +50,13 @@ lexMixed = concat . alternate False where
_ -> [] _ -> []
lex env = if env then lexCode else lexText lex env = if env then lexCode else lexText
bindTok :: [String] -> String
bindTok ws = case ws of
w:"&+":ws2 -> w ++ bindTok ws2
w:[] -> w
w:ws2 -> w ++ " " ++ bindTok ws2
[] -> ""
unlexText :: [String] -> String unlexText :: [String] -> String
unlexText s = case s of unlexText s = case s of
w:[] -> w w:[] -> w