forked from GitHub/gf-core
UTF3 coding as ps command options, also -bind
This commit is contained in:
@@ -83,11 +83,10 @@ is available for
|
||||
|
||||
GF was born in 1998 at Xerox Research Centre Europe, Grenoble in the project
|
||||
Multilingual Document Authoring. At Xerox, it was used for prototypes including
|
||||
- restaurant phrase book in 6 languages
|
||||
- database queries in 7 languages
|
||||
- alarm system instructions in 5 languages
|
||||
- medical drug descriptions in 2 languages
|
||||
|
||||
a restaurant phrase book in 6 languages,
|
||||
a database query system in 7 languages,
|
||||
a formalization of an alarm system instructions with translations to 5 languages, and
|
||||
an authoring system for medical drug descriptions in 2 languages.
|
||||
|
||||
Later projects using GF and involving third parties include, in chronological order,
|
||||
- GF-Alfa: natural language interface to formal proofs
|
||||
|
||||
@@ -65,7 +65,9 @@ commandHelp :: Bool -> (String,CommandInfo) -> String
|
||||
commandHelp full (co,info) = unlines $ [
|
||||
co ++ ", " ++ longname info,
|
||||
synopsis info] ++ if full then [
|
||||
"",
|
||||
"syntax:" ++++ " " ++ syntax info,
|
||||
"",
|
||||
explanation info,
|
||||
"options:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- options info],
|
||||
"flags:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- flags info],
|
||||
@@ -292,18 +294,29 @@ allCommands pgf = Map.fromList [
|
||||
}),
|
||||
("ps", emptyCommandInfo {
|
||||
longname = "put_string",
|
||||
syntax = "ps OPT? STRING",
|
||||
synopsis = "return a string, possibly processed with a function",
|
||||
explanation = unlines [
|
||||
"Returns a string obtained by its argument string by applying",
|
||||
"Returns a string obtained from its argument string by applying",
|
||||
"string processing functions in the order given in the command line",
|
||||
"option list. Thus 'ps -f -g s' returns g (f s). Typical string processors",
|
||||
"are lexers and unlexers."
|
||||
"are lexers and unlexers, but also character encoding conversions are possible.",
|
||||
"The unlexers preserve the division of their input to lines."
|
||||
],
|
||||
examples = [
|
||||
"l (EAdd 3 4) | ps -code -- linearize code-like output",
|
||||
"ps -lexer=code | p -cat=Exp -- parse code-like input",
|
||||
"gr -cat=QCl | l | ps -bind -to_utf8 -- linearization output from LangFin",
|
||||
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UYF8 terminal"
|
||||
],
|
||||
exec = \opts -> return . fromString . stringOps opts . toString,
|
||||
options = [
|
||||
("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
|
||||
("from_utf8","decode from utf8"),
|
||||
("lextext","text-like lexer"),
|
||||
("lexcode","code-like lexer"),
|
||||
("lexmixed","mixture of text and code (code between $...$)"),
|
||||
("to_utf8","encode to utf8"),
|
||||
("unlextext","text-like unlexer"),
|
||||
("unlexcode","code-like unlexer"),
|
||||
("unlexmixed","mixture of text and code (code between $...$)"),
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
module GF.Text.Lexing (stringOp) where
|
||||
|
||||
import GF.Text.UTF8
|
||||
|
||||
import Data.Char
|
||||
|
||||
-- lexers and unlexers - they work on space-separated word strings
|
||||
@@ -9,16 +11,21 @@ stringOp name = case name of
|
||||
"lextext" -> Just $ appLexer lexText
|
||||
"lexcode" -> Just $ appLexer lexText
|
||||
"lexmixed" -> Just $ appLexer lexMixed
|
||||
"words" -> Just $ appLexer words
|
||||
"bind" -> Just $ appUnlexer bindTok
|
||||
"unlextext" -> Just $ appUnlexer unlexText
|
||||
"unlexcode" -> Just $ appUnlexer unlexCode
|
||||
"unlexmixed" -> Just $ appUnlexer unlexMixed
|
||||
"unwords" -> Just $ appUnlexer unwords
|
||||
"to_utf8" -> Just encodeUTF8
|
||||
"from_utf8" -> Just decodeUTF8
|
||||
_ -> Nothing
|
||||
|
||||
appLexer :: (String -> [String]) -> String -> String
|
||||
appLexer f = unwords . filter (not . null) . f
|
||||
|
||||
appUnlexer :: ([String] -> String) -> String -> String
|
||||
appUnlexer f = f . words
|
||||
appUnlexer f = unlines . map (f . words) . lines
|
||||
|
||||
lexText :: String -> [String]
|
||||
lexText s = case s of
|
||||
@@ -43,6 +50,13 @@ lexMixed = concat . alternate False where
|
||||
_ -> []
|
||||
lex env = if env then lexCode else lexText
|
||||
|
||||
bindTok :: [String] -> String
|
||||
bindTok ws = case ws of
|
||||
w:"&+":ws2 -> w ++ bindTok ws2
|
||||
w:[] -> w
|
||||
w:ws2 -> w ++ " " ++ bindTok ws2
|
||||
[] -> ""
|
||||
|
||||
unlexText :: [String] -> String
|
||||
unlexText s = case s of
|
||||
w:[] -> w
|
||||
|
||||
Reference in New Issue
Block a user