mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-13 23:09:31 -06:00
lexer documentation in help and tutorial updated
This commit is contained in:
@@ -4277,24 +4277,12 @@ In linearization, we use a corresponding **unlexer**:
|
||||
|
||||
===Most common lexers and unlexers===
|
||||
|
||||
|| lexer | description ||
|
||||
| ``words`` | (default) tokens are separated by spaces or newlines
|
||||
| ``literals`` | like words, but integer and string literals recognized
|
||||
| ``chars`` | each character is a token
|
||||
| ``code`` | program code conventions (uses Haskell's lex)
|
||||
| ``text`` | with conventions on punctuation and capital letters
|
||||
| ``codelit`` | like code, but recognize literals (unknown words as strings)
|
||||
| ``textlit`` | like text, but recognize literals (unknown words as strings)
|
||||
|
||||
|| unlexer | description ||
|
||||
| ``unwords`` | (default) space-separated token list
|
||||
| ``text`` | format as text: punctuation, capitals, paragraph <p>
|
||||
| ``code`` | format as code (spacing, indentation)
|
||||
| ``textlit`` | like text, but remove string literal quotes
|
||||
| ``codelit`` | like code, but remove string literal quotes
|
||||
| ``concat`` | remove all spaces
|
||||
|
||||
%TODO: update the names
|
||||
|| lexer | unlexer | description ||
|
||||
| ``chars`` | ``unchars`` | each character is a token
|
||||
| ``lexcode`` | ``unlexcode`` | program code conventions (uses Haskell's lex)
|
||||
| ``lexmixed`` | ``unlexmixed`` | like text, but between $ signs like code
|
||||
| ``lextext`` | ``unlextext`` | with conventions on punctuation and capitals
|
||||
| ``words`` | ``unwords`` | (default) tokens separated by space characters
|
||||
|
||||
%TODO: also on alphabet encodings - although somewhere else
|
||||
|
||||
|
||||
@@ -360,11 +360,10 @@ allCommands cod pgf = Map.fromList [
|
||||
"To see transliteration tables, use command ut."
|
||||
],
|
||||
examples = [
|
||||
"l (EAdd 3 4) | ps -code -- linearize code-like output",
|
||||
"ps -lexer=code | p -cat=Exp -- parse code-like input",
|
||||
"gr -cat=QCl | l | ps -bind -to_utf8 -- linearization output from LangFin",
|
||||
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UTF8 terminal",
|
||||
"ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal"
|
||||
"l (EAdd 3 4) | ps -code -- linearize code-like output",
|
||||
"ps -lexer=code | p -cat=Exp -- parse code-like input",
|
||||
"gr -cat=QCl | l | ps -bind -- linearization output from LangFin",
|
||||
"ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal"
|
||||
],
|
||||
exec = \opts -> return . fromString . stringOps (map prOpt opts) . toString,
|
||||
options = stringOpOptions
|
||||
@@ -626,7 +625,7 @@ stringOpOptions = [
|
||||
("from_devanagari","from unicode to GF Devanagari transliteration"),
|
||||
("from_thai","from unicode to GF Telugu transliteration"),
|
||||
("from_thai","from unicode to GF Thai transliteration"),
|
||||
("from_utf8","decode from utf8"),
|
||||
("from_utf8","decode from utf8 (default)"),
|
||||
("lextext","text-like lexer"),
|
||||
("lexcode","code-like lexer"),
|
||||
("lexmixed","mixture of text and code (code between $...$)"),
|
||||
@@ -636,7 +635,7 @@ stringOpOptions = [
|
||||
("to_html","wrap in a html file with linebreaks"),
|
||||
("to_telugu","from GF Telugu transliteration to unicode"),
|
||||
("to_thai","from GF Thai transliteration to unicode"),
|
||||
("to_utf8","encode to utf8"),
|
||||
("to_utf8","encode to utf8 (default)"),
|
||||
("unlextext","text-like unlexer"),
|
||||
("unlexcode","code-like unlexer"),
|
||||
("unlexmixed","mixture of text and code (code between $...$)"),
|
||||
|
||||
Reference in New Issue
Block a user