1
0
forked from GitHub/gf-core

UTF3 coding as ps command options, also -bind

This commit is contained in:
aarne
2008-06-15 13:25:22 +00:00
parent 49eefbb2ad
commit 15c9fada0a
3 changed files with 34 additions and 8 deletions

View File

@@ -83,11 +83,10 @@ is available for
GF was born in 1998 at Xerox Research Centre Europe, Grenoble in the project
Multilingual Document Authoring. At Xerox, it was used for prototypes including
- restaurant phrase book in 6 languages
- database queries in 7 languages
- alarm system instructions in 5 languages
- medical drug descriptions in 2 languages
a restaurant phrase book in 6 languages,
a database query system in 7 languages,
a formalization of an alarm system instructions with translations to 5 languages, and
an authoring system for medical drug descriptions in 2 languages.
Later projects using GF and involving third parties include, in chronological order,
- GF-Alfa: natural language interface to formal proofs

View File

@@ -65,7 +65,9 @@ commandHelp :: Bool -> (String,CommandInfo) -> String
commandHelp full (co,info) = unlines $ [
co ++ ", " ++ longname info,
synopsis info] ++ if full then [
"",
"syntax:" ++++ " " ++ syntax info,
"",
explanation info,
"options:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- options info],
"flags:" ++++ unlines [" -" ++ o ++ "\t" ++ e | (o,e) <- flags info],
@@ -292,18 +294,29 @@ allCommands pgf = Map.fromList [
}),
("ps", emptyCommandInfo {
longname = "put_string",
syntax = "ps OPT? STRING",
synopsis = "return a string, possibly processed with a function",
explanation = unlines [
"Returns a string obtained by its argument string by applying",
"Returns a string obtained from its argument string by applying",
"string processing functions in the order given in the command line",
"option list. Thus 'ps -f -g s' returns g (f s). Typical string processors",
"are lexers and unlexers."
"are lexers and unlexers, but also character encoding conversions are possible.",
"The unlexers preserve the division of their input to lines."
],
examples = [
"l (EAdd 3 4) | ps -code -- linearize code-like output",
"ps -lexer=code | p -cat=Exp -- parse code-like input",
"gr -cat=QCl | l | ps -bind -to_utf8 -- linearization output from LangFin",
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UYF8 terminal"
],
exec = \opts -> return . fromString . stringOps opts . toString,
options = [
("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
("from_utf8","decode from utf8"),
("lextext","text-like lexer"),
("lexcode","code-like lexer"),
("lexmixed","mixture of text and code (code between $...$)"),
("to_utf8","encode to utf8"),
("unlextext","text-like unlexer"),
("unlexcode","code-like unlexer"),
("unlexmixed","mixture of text and code (code between $...$)"),

View File

@@ -1,5 +1,7 @@
module GF.Text.Lexing (stringOp) where
import GF.Text.UTF8
import Data.Char
-- lexers and unlexers - they work on space-separated word strings
@@ -9,16 +11,21 @@ stringOp name = case name of
"lextext" -> Just $ appLexer lexText
"lexcode" -> Just $ appLexer lexText
"lexmixed" -> Just $ appLexer lexMixed
"words" -> Just $ appLexer words
"bind" -> Just $ appUnlexer bindTok
"unlextext" -> Just $ appUnlexer unlexText
"unlexcode" -> Just $ appUnlexer unlexCode
"unlexmixed" -> Just $ appUnlexer unlexMixed
"unwords" -> Just $ appUnlexer unwords
"to_utf8" -> Just encodeUTF8
"from_utf8" -> Just decodeUTF8
_ -> Nothing
appLexer :: (String -> [String]) -> String -> String
appLexer f = unwords . filter (not . null) . f
appUnlexer :: ([String] -> String) -> String -> String
appUnlexer f = f . words
appUnlexer f = unlines . map (f . words) . lines
lexText :: String -> [String]
lexText s = case s of
@@ -43,6 +50,13 @@ lexMixed = concat . alternate False where
_ -> []
lex env = if env then lexCode else lexText
bindTok :: [String] -> String
bindTok ws = case ws of
w:"&+":ws2 -> w ++ bindTok ws2
w:[] -> w
w:ws2 -> w ++ " " ++ bindTok ws2
[] -> ""
unlexText :: [String] -> String
unlexText s = case s of
w:[] -> w