From d9ff5aa48cbbc7bc4388cd743aa354f60ab125a5 Mon Sep 17 00:00:00 2001 From: aarne Date: Mon, 10 Nov 2008 15:53:38 +0000 Subject: [PATCH] lexer documentation in help and tutorial updated --- doc/gf-tutorial.txt | 24 ++++++------------------ src/GF/Command/Commands.hs | 13 ++++++------- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/doc/gf-tutorial.txt b/doc/gf-tutorial.txt index cf9518cff..79d924a16 100644 --- a/doc/gf-tutorial.txt +++ b/doc/gf-tutorial.txt @@ -4277,24 +4277,12 @@ In linearization, we use a corresponding **unlexer**: ===Most common lexers and unlexers=== - || lexer | description || - | ``words`` | (default) tokens are separated by spaces or newlines - | ``literals`` | like words, but integer and string literals recognized - | ``chars`` | each character is a token - | ``code`` | program code conventions (uses Haskell's lex) - | ``text`` | with conventions on punctuation and capital letters - | ``codelit`` | like code, but recognize literals (unknown words as strings) - | ``textlit`` | like text, but recognize literals (unknown words as strings) - - || unlexer | description || - | ``unwords`` | (default) space-separated token list - | ``text`` | format as text: punctuation, capitals, paragraph

- | ``code`` | format as code (spacing, indentation) - | ``textlit`` | like text, but remove string literal quotes - | ``codelit`` | like code, but remove string literal quotes - | ``concat`` | remove all spaces - -%TODO: update the names + || lexer | unlexer | description || + | ``chars`` | ``unchars`` | each character is a token + | ``lexcode`` | ``unlexcode`` | program code conventions (uses Haskell's lex) + | ``lexmixed`` | ``unlexmixed`` | like text, but between $ signs like code + | ``lextext`` | ``unlextext`` | with conventions on punctuation and capitals + | ``words`` | ``unwords`` | (default) tokens separated by space characters %TODO: also on alphabet encodings - although somewhere else diff --git a/src/GF/Command/Commands.hs b/src/GF/Command/Commands.hs index 4d6a29ce7..63d7ff06a 100644 --- a/src/GF/Command/Commands.hs +++ b/src/GF/Command/Commands.hs @@ -360,11 +360,10 @@ allCommands cod pgf = Map.fromList [ "To see transliteration tables, use command ut." ], examples = [ - "l (EAdd 3 4) | ps -code -- linearize code-like output", - "ps -lexer=code | p -cat=Exp -- parse code-like input", - "gr -cat=QCl | l | ps -bind -to_utf8 -- linearization output from LangFin", - "ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UTF8 terminal", - "ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal" + "l (EAdd 3 4) | ps -code -- linearize code-like output", + "ps -lexer=code | p -cat=Exp -- parse code-like input", + "gr -cat=QCl | l | ps -bind -- linearization output from LangFin", + "ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal" ], exec = \opts -> return . fromString . stringOps (map prOpt opts) . toString, options = stringOpOptions @@ -626,7 +625,7 @@ stringOpOptions = [ ("from_devanagari","from unicode to GF Devanagari transliteration"), ("from_thai","from unicode to GF Telugu transliteration"), ("from_thai","from unicode to GF Thai transliteration"), - ("from_utf8","decode from utf8"), + ("from_utf8","decode from utf8 (default)"), ("lextext","text-like lexer"), ("lexcode","code-like lexer"), ("lexmixed","mixture of text and code (code between $...$)"), @@ -636,7 +635,7 @@ stringOpOptions = [ ("to_html","wrap in a html file with linebreaks"), ("to_telugu","from GF Telugu transliteration to unicode"), ("to_thai","from GF Thai transliteration to unicode"), - ("to_utf8","encode to utf8"), + ("to_utf8","encode to utf8 (default)"), ("unlextext","text-like unlexer"), ("unlexcode","code-like unlexer"), ("unlexmixed","mixture of text and code (code between $...$)"),