forked from GitHub/gf-core
UTF3 coding as ps command options, also -bind
This commit is contained in:
@@ -83,11 +83,10 @@ is available for
|
|||||||
|
|
||||||
GF was born in 1998 at Xerox Research Centre Europe, Grenoble in the project
|
GF was born in 1998 at Xerox Research Centre Europe, Grenoble in the project
|
||||||
Multilingual Document Authoring. At Xerox, it was used for prototypes including
|
Multilingual Document Authoring. At Xerox, it was used for prototypes including
|
||||||
- restaurant phrase book in 6 languages
|
a restaurant phrase book in 6 languages,
|
||||||
- database queries in 7 languages
|
a database query system in 7 languages,
|
||||||
- alarm system instructions in 5 languages
|
a formalization of an alarm system instructions with translations to 5 languages, and
|
||||||
- medical drug descriptions in 2 languages
|
an authoring system for medical drug descriptions in 2 languages.
|
||||||
|
|
||||||
|
|
||||||
Later projects using GF and involving third parties include, in chronological order,
|
Later projects using GF and involving third parties include, in chronological order,
|
||||||
- GF-Alfa: natural language interface to formal proofs
|
- GF-Alfa: natural language interface to formal proofs
|
||||||
|
|||||||
@@ -65,7 +65,9 @@ commandHelp :: Bool -> (String,CommandInfo) -> String
|
|||||||
commandHelp full (co,info) = unlines $ [
|
commandHelp full (co,info) = unlines $ [
|
||||||
co ++ ", " ++ longname info,
|
co ++ ", " ++ longname info,
|
||||||
synopsis info] ++ if full then [
|
synopsis info] ++ if full then [
|
||||||
|
"",
|
||||||
"syntax:" ++++ " " ++ syntax info,
|
"syntax:" ++++ " " ++ syntax info,
|
||||||
|
"",
|
||||||
explanation info,
|
explanation info,
|
||||||
"options:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- options info],
|
"options:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- options info],
|
||||||
"flags:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- flags info],
|
"flags:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- flags info],
|
||||||
@@ -292,18 +294,29 @@ allCommands pgf = Map.fromList [
|
|||||||
}),
|
}),
|
||||||
("ps", emptyCommandInfo {
|
("ps", emptyCommandInfo {
|
||||||
longname = "put_string",
|
longname = "put_string",
|
||||||
|
syntax = "ps OPT? STRING",
|
||||||
synopsis = "return a string, possibly processed with a function",
|
synopsis = "return a string, possibly processed with a function",
|
||||||
explanation = unlines [
|
explanation = unlines [
|
||||||
"Returns a string obtained by its argument string by applying",
|
"Returns a string obtained from its argument string by applying",
|
||||||
"string processing functions in the order given in the command line",
|
"string processing functions in the order given in the command line",
|
||||||
"option list. Thus 'ps -f -g s' returns g (f s). Typical string processors",
|
"option list. Thus 'ps -f -g s' returns g (f s). Typical string processors",
|
||||||
"are lexers and unlexers."
|
"are lexers and unlexers, but also character encoding conversions are possible.",
|
||||||
|
"The unlexers preserve the division of their input to lines."
|
||||||
],
|
],
|
||||||
|
examples = [
|
||||||
|
"l (EAdd 3 4) | ps -code -- linearize code-like output",
|
||||||
|
"ps -lexer=code | p -cat=Exp -- parse code-like input",
|
||||||
|
"gr -cat=QCl | l | ps -bind -to_utf8 -- linearization output from LangFin",
|
||||||
|
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UYF8 terminal"
|
||||||
|
],
|
||||||
exec = \opts -> return . fromString . stringOps opts . toString,
|
exec = \opts -> return . fromString . stringOps opts . toString,
|
||||||
options = [
|
options = [
|
||||||
|
("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
|
||||||
|
("from_utf8","decode from utf8"),
|
||||||
("lextext","text-like lexer"),
|
("lextext","text-like lexer"),
|
||||||
("lexcode","code-like lexer"),
|
("lexcode","code-like lexer"),
|
||||||
("lexmixed","mixture of text and code (code between $...$)"),
|
("lexmixed","mixture of text and code (code between $...$)"),
|
||||||
|
("to_utf8","encode to utf8"),
|
||||||
("unlextext","text-like unlexer"),
|
("unlextext","text-like unlexer"),
|
||||||
("unlexcode","code-like unlexer"),
|
("unlexcode","code-like unlexer"),
|
||||||
("unlexmixed","mixture of text and code (code between $...$)"),
|
("unlexmixed","mixture of text and code (code between $...$)"),
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
module GF.Text.Lexing (stringOp) where
|
module GF.Text.Lexing (stringOp) where
|
||||||
|
|
||||||
|
import GF.Text.UTF8
|
||||||
|
|
||||||
import Data.Char
|
import Data.Char
|
||||||
|
|
||||||
-- lexers and unlexers - they work on space-separated word strings
|
-- lexers and unlexers - they work on space-separated word strings
|
||||||
@@ -9,16 +11,21 @@ stringOp name = case name of
|
|||||||
"lextext" -> Just $ appLexer lexText
|
"lextext" -> Just $ appLexer lexText
|
||||||
"lexcode" -> Just $ appLexer lexText
|
"lexcode" -> Just $ appLexer lexText
|
||||||
"lexmixed" -> Just $ appLexer lexMixed
|
"lexmixed" -> Just $ appLexer lexMixed
|
||||||
|
"words" -> Just $ appLexer words
|
||||||
|
"bind" -> Just $ appUnlexer bindTok
|
||||||
"unlextext" -> Just $ appUnlexer unlexText
|
"unlextext" -> Just $ appUnlexer unlexText
|
||||||
"unlexcode" -> Just $ appUnlexer unlexCode
|
"unlexcode" -> Just $ appUnlexer unlexCode
|
||||||
"unlexmixed" -> Just $ appUnlexer unlexMixed
|
"unlexmixed" -> Just $ appUnlexer unlexMixed
|
||||||
|
"unwords" -> Just $ appUnlexer unwords
|
||||||
|
"to_utf8" -> Just encodeUTF8
|
||||||
|
"from_utf8" -> Just decodeUTF8
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|
||||||
appLexer :: (String -> [String]) -> String -> String
|
appLexer :: (String -> [String]) -> String -> String
|
||||||
appLexer f = unwords . filter (not . null) . f
|
appLexer f = unwords . filter (not . null) . f
|
||||||
|
|
||||||
appUnlexer :: ([String] -> String) -> String -> String
|
appUnlexer :: ([String] -> String) -> String -> String
|
||||||
appUnlexer f = f . words
|
appUnlexer f = unlines . map (f . words) . lines
|
||||||
|
|
||||||
lexText :: String -> [String]
|
lexText :: String -> [String]
|
||||||
lexText s = case s of
|
lexText s = case s of
|
||||||
@@ -43,6 +50,13 @@ lexMixed = concat . alternate False where
|
|||||||
_ -> []
|
_ -> []
|
||||||
lex env = if env then lexCode else lexText
|
lex env = if env then lexCode else lexText
|
||||||
|
|
||||||
|
bindTok :: [String] -> String
|
||||||
|
bindTok ws = case ws of
|
||||||
|
w:"&+":ws2 -> w ++ bindTok ws2
|
||||||
|
w:[] -> w
|
||||||
|
w:ws2 -> w ++ " " ++ bindTok ws2
|
||||||
|
[] -> ""
|
||||||
|
|
||||||
unlexText :: [String] -> String
|
unlexText :: [String] -> String
|
||||||
unlexText s = case s of
|
unlexText s = case s of
|
||||||
w:[] -> w
|
w:[] -> w
|
||||||
|
|||||||
Reference in New Issue
Block a user