Added CFGM format (pm -printer=cfgm) and utf8 conversion for pm.

This commit is contained in:
bringert
2004-08-23 07:51:36 +00:00
parent 20215c7a49
commit 2af06fd3ab
22 changed files with 1829 additions and 20 deletions

View File

@@ -47,6 +47,10 @@ import qualified ParseCF as PCF
import qualified ConvertGrammar as Cnv
import qualified PrintParser as Prt
import GFC
import qualified MkGFC as MC
import PrintCFGrammar (prCanonAsCFGM)
import MyParser
import MoreCustom -- either small/ or big/. The one in Small is empty.
@@ -55,6 +59,23 @@ import UseIO
import Monad
-- character codings
import Unicode
import UTF8 (decodeUTF8)
import Greek (mkGreek)
import Arabic (mkArabic)
import Hebrew (mkHebrew)
import Russian (mkRussian, mkRusKOI8)
import Ethiopic (mkEthiopic)
import Tamil (mkTamil)
import OCSCyrillic (mkOCSCyrillic)
import LatinASupplement (mkLatinASupplement)
import Devanagari (mkDevanagari)
import Hiragana (mkJapanese)
import ExtendedArabic (mkArabic0600)
import ExtendedArabic (mkExtendedArabic)
import ExtraDiacritics (mkExtraDiacritics)
-- minimal version also used in Hugs. AR 2/12/2002.
-- databases for customizable commands. AR 21/11/2001
@@ -76,6 +97,9 @@ customGrammarParser :: CustomData (FilePath -> IOE C.CanonGrammar)
-- grammarPrinter, "-printer=x"
customGrammarPrinter :: CustomData (StateGrammar -> String)
-- multiGrammarPrinter, "-printer=x"
customMultiGrammarPrinter :: CustomData (CanonGrammar -> String)
-- syntaxPrinter, "-printer=x"
customSyntaxPrinter :: CustomData (GF.Grammar -> String)
@@ -100,6 +124,10 @@ customTokenizer :: CustomData (StateGrammar -> String -> [CFTok])
-- useUntokenizer, "-unlexer=x" --- should be from token list to string
customUntokenizer :: CustomData (StateGrammar -> String -> String)
-- uniCoding, "-coding=x"
-- contains conversions from different codings to the internal
-- unicode coding
customUniCoding :: CustomData (String -> String)
-- this is the way of selecting an item
customOrDefault :: Options -> OptFun -> CustomData a -> a
@@ -185,6 +213,15 @@ customGrammarPrinter =
]
++ moreCustomGrammarPrinter
customMultiGrammarPrinter =
customData "Printers for multiple grammars, selected by option -printer=x" $
[
(strCI "gfcm", MC.prCanon)
,(strCI "cfgm", prCanonAsCFGM)
]
++ moreCustomMultiGrammarPrinter
customSyntaxPrinter =
customData "Syntax printers, selected by option -printer=x" $
[
@@ -308,3 +345,25 @@ customUntokenizer =
-- add your own untokenizers here
]
++ moreCustomUntokenizer
customUniCoding =
customData "Alphabet codings, selected by option -coding=x" $
[
(strCI "latin1", id) -- DEFAULT
,(strCI "utf8", decodeUTF8)
,(strCI "greek", treat [] mkGreek)
,(strCI "hebrew", mkHebrew)
,(strCI "arabic", mkArabic)
,(strCI "russian", treat [] mkRussian)
,(strCI "russianKOI8", mkRusKOI8)
,(strCI "ethiopic", mkEthiopic)
,(strCI "tamil", mkTamil)
,(strCI "OCScyrillic", mkOCSCyrillic)
,(strCI "devanagari", mkDevanagari)
,(strCI "latinasupplement", mkLatinASupplement)
,(strCI "japanese", mkJapanese)
,(strCI "arabic0600", mkArabic0600)
,(strCI "extendedarabic", mkExtendedArabic)
,(strCI "extradiacritics", mkExtraDiacritics)
]
++ moreCustomUniCoding

View File

@@ -5,6 +5,7 @@ module MoreCustom where
moreCustomGrammarParser = []
moreCustomGrammarPrinter = []
moreCustomMultiGrammarPrinter = []
moreCustomSyntaxPrinter = []
moreCustomTermPrinter = []
moreCustomTermCommand = []
@@ -13,3 +14,4 @@ moreCustomStringCommand = []
moreCustomParser = []
moreCustomTokenizer = []
moreCustomUntokenizer = []
moreCustomUniCoding = []

View File

@@ -70,6 +70,8 @@ moreCustomGrammarPrinter =
--- also include printing via grammar2syntax!
]
moreCustomMultiGrammarPrinter = []
moreCustomSyntaxPrinter =
[
(strCIm "gf", S.prSyntax) -- DEFAULT
@@ -118,5 +120,9 @@ moreCustomUntokenizer =
-- add your own untokenizers here
]
moreCustomUniCoding =
[
-- add your own codings here
]
strCIm = id