From d9ff5aa48cbbc7bc4388cd743aa354f60ab125a5 Mon Sep 17 00:00:00 2001
From: aarne <aarne@cs.chalmers.se>
Date: Mon, 10 Nov 2008 15:53:38 +0000
Subject: [PATCH] lexer documentation in help and tutorial updated

---
 doc/gf-tutorial.txt        | 24 ++++++------------------
 src/GF/Command/Commands.hs | 13 ++++++-------
 2 files changed, 12 insertions(+), 25 deletions(-)
diff --git a/doc/gf-tutorial.txt b/doc/gf-tutorial.txt
index cf9518cff..79d924a16 100644
--- a/doc/gf-tutorial.txt
+++ b/doc/gf-tutorial.txt
@@ -4277,24 +4277,12 @@ In linearization, we use a corresponding **unlexer**:
 
 ===Most common lexers and unlexers===
 
-  || lexer       | description ||
-  | ``words``    | (default) tokens are separated by spaces or newlines
-  | ``literals`` | like words, but integer and string literals recognized
-  | ``chars``    | each character is a token
-  | ``code``     | program code conventions (uses Haskell's lex)
-  | ``text``     | with conventions on punctuation and capital letters
-  | ``codelit``  | like code, but recognize literals (unknown words as strings)
-  | ``textlit``  | like text, but recognize literals (unknown words as strings)
-
-  || unlexer     | description ||
-  | ``unwords``  | (default) space-separated token list 
-  | ``text``     | format as text: punctuation, capitals, paragraph <p>
-  | ``code``     | format as code (spacing, indentation)
-  | ``textlit``  | like text, but remove string literal quotes
-  | ``codelit``  | like code, but remove string literal quotes
-  | ``concat``   | remove all spaces
-
-%TODO: update the names
+  || lexer       | unlexer        | description ||
+  | ``chars``    | ``unchars``    | each character is a token
+  | ``lexcode``  | ``unlexcode``  | program code conventions (uses Haskell's lex)
+  | ``lexmixed`` | ``unlexmixed`` | like text, but between $ signs like code
+  | ``lextext``  | ``unlextext``  | with conventions on punctuation and capitals
+  | ``words``    | ``unwords``    | (default) tokens separated by space characters 
 
 %TODO: also on alphabet encodings - although somewhere else
 
diff --git a/src/GF/Command/Commands.hs b/src/GF/Command/Commands.hs
index 4d6a29ce7..63d7ff06a 100644
--- a/src/GF/Command/Commands.hs
+++ b/src/GF/Command/Commands.hs
@@ -360,11 +360,10 @@ allCommands cod pgf = Map.fromList [
        "To see transliteration tables, use command ut." 
        ], 
      examples = [
-       "l (EAdd 3 4) | ps -code              -- linearize code-like output",
-       "ps -lexer=code | p -cat=Exp          -- parse code-like input",
-       "gr -cat=QCl | l | ps -bind -to_utf8  -- linearization output from LangFin", 
-       "ps -from_utf8 \"jag ?r h?r\" | p       -- parser in LangSwe in UTF8 terminal",
-       "ps -to_devanagari -to_utf8 \"A-p\"     -- show Devanagari in UTF8 terminal"
+       "l (EAdd 3 4) | ps -code         -- linearize code-like output",
+       "ps -lexer=code | p -cat=Exp     -- parse code-like input",
+       "gr -cat=QCl | l | ps -bind      -- linearization output from LangFin", 
+       "ps -to_devanagari \"A-p\"     -- show Devanagari in UTF8 terminal"
        ],
      exec = \opts -> return . fromString . stringOps (map prOpt opts) . toString,
      options = stringOpOptions
@@ -626,7 +625,7 @@ stringOpOptions = [
        ("from_devanagari","from unicode to GF Devanagari transliteration"),
        ("from_thai","from unicode to GF Telugu transliteration"),
        ("from_thai","from unicode to GF Thai transliteration"),
-       ("from_utf8","decode from utf8"),
+       ("from_utf8","decode from utf8 (default)"),
        ("lextext","text-like lexer"),
        ("lexcode","code-like lexer"),
        ("lexmixed","mixture of text and code (code between $...$)"), 
@@ -636,7 +635,7 @@ stringOpOptions = [
        ("to_html","wrap in a html file with linebreaks"),
        ("to_telugu","from GF Telugu transliteration to unicode"),
        ("to_thai","from GF Thai transliteration to unicode"),
-       ("to_utf8","encode to utf8"),
+       ("to_utf8","encode to utf8 (default)"),
        ("unlextext","text-like unlexer"),
        ("unlexcode","code-like unlexer"),
        ("unlexmixed","mixture of text and code (code between $...$)"),