Added CFGM format (pm -printer=cfgm) and utf8 conversion for pm.

2026-06-08 17:46:32 -06:00 · 2004-08-23 07:51:36 +00:00
parent 20215c7a49
commit 2af06fd3ab
22 changed files with 1829 additions and 20 deletions
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -47,6 +47,10 @@ import qualified ParseCF as PCF
 import qualified ConvertGrammar as Cnv
 import qualified PrintParser as Prt

+import GFC
+import qualified MkGFC as MC
+import PrintCFGrammar (prCanonAsCFGM)
+
 import MyParser

 import MoreCustom -- either small/ or big/. The one in Small is empty.
@@ -55,6 +59,23 @@ import UseIO

 import Monad

+-- character codings
+import Unicode
+import UTF8 (decodeUTF8)
+import Greek (mkGreek)
+import Arabic (mkArabic)
+import Hebrew (mkHebrew)
+import Russian (mkRussian, mkRusKOI8)
+import Ethiopic (mkEthiopic)
+import Tamil (mkTamil)
+import OCSCyrillic (mkOCSCyrillic)
+import LatinASupplement (mkLatinASupplement)
+import Devanagari (mkDevanagari)
+import Hiragana (mkJapanese)
+import ExtendedArabic (mkArabic0600)
+import ExtendedArabic (mkExtendedArabic)
+import ExtraDiacritics (mkExtraDiacritics)
+
 -- minimal version also used in Hugs. AR 2/12/2002. 

 -- databases for customizable commands. AR 21/11/2001
@@ -76,6 +97,9 @@ customGrammarParser :: CustomData (FilePath -> IOE C.CanonGrammar)
 -- grammarPrinter, "-printer=x"
 customGrammarPrinter :: CustomData (StateGrammar -> String)             

+-- multiGrammarPrinter, "-printer=x"
+customMultiGrammarPrinter :: CustomData (CanonGrammar -> String)    
+
 -- syntaxPrinter, "-printer=x"
 customSyntaxPrinter  :: CustomData (GF.Grammar -> String)        

@@ -100,6 +124,10 @@ customTokenizer      :: CustomData (StateGrammar -> String -> [CFTok])
 -- useUntokenizer, "-unlexer=x" --- should be from token list to string
 customUntokenizer    :: CustomData (StateGrammar -> String -> String)  

+-- uniCoding, "-coding=x"
+-- contains conversions from different codings to the internal
+-- unicode coding
+customUniCoding :: CustomData (String -> String)

 -- this is the way of selecting an item
 customOrDefault :: Options -> OptFun -> CustomData a -> a
@@ -185,6 +213,15 @@ customGrammarPrinter =
  ] 
  ++ moreCustomGrammarPrinter

+customMultiGrammarPrinter = 
+  customData "Printers for multiple grammars, selected by option -printer=x" $
+  [
+   (strCI "gfcm", MC.prCanon)
+  ,(strCI "cfgm", prCanonAsCFGM)
+  ]
+  ++ moreCustomMultiGrammarPrinter
+
+
 customSyntaxPrinter = 
  customData "Syntax printers, selected by option -printer=x" $
  [ 
@@ -308,3 +345,25 @@ customUntokenizer =
 -- add your own untokenizers here
  ]
  ++ moreCustomUntokenizer
+
+customUniCoding = 
+  customData "Alphabet codings, selected by option -coding=x" $
+  [
+   (strCI "latin1",           id) -- DEFAULT
+  ,(strCI "utf8",             decodeUTF8)
+  ,(strCI "greek",            treat [] mkGreek)
+  ,(strCI "hebrew",           mkHebrew)
+  ,(strCI "arabic",           mkArabic)
+  ,(strCI "russian",          treat [] mkRussian)
+  ,(strCI "russianKOI8",      mkRusKOI8)
+  ,(strCI "ethiopic",         mkEthiopic)
+  ,(strCI "tamil",            mkTamil)
+  ,(strCI "OCScyrillic",      mkOCSCyrillic)
+  ,(strCI "devanagari",       mkDevanagari)
+  ,(strCI "latinasupplement", mkLatinASupplement)
+  ,(strCI "japanese",         mkJapanese)
+  ,(strCI "arabic0600",       mkArabic0600)
+  ,(strCI "extendedarabic",   mkExtendedArabic)
+  ,(strCI "extradiacritics",  mkExtraDiacritics)
+  ]
+  ++ moreCustomUniCoding
--- a/src/GF/UseGrammar/MoreCustom.hs
+++ b/src/GF/UseGrammar/MoreCustom.hs
@@ -5,6 +5,7 @@ module MoreCustom where

 moreCustomGrammarParser = []
 moreCustomGrammarPrinter = []
+moreCustomMultiGrammarPrinter = []
 moreCustomSyntaxPrinter = []
 moreCustomTermPrinter = []
 moreCustomTermCommand = []
@@ -13,3 +14,4 @@ moreCustomStringCommand = []
 moreCustomParser = []
 moreCustomTokenizer = []
 moreCustomUntokenizer = []
+moreCustomUniCoding = []
--- a/src/GF/UseGrammar/RealMoreCustom.hs
+++ b/src/GF/UseGrammar/RealMoreCustom.hs
@@ -70,6 +70,8 @@ moreCustomGrammarPrinter =
 --- also include printing via grammar2syntax!
  ]

+moreCustomMultiGrammarPrinter = []
+
 moreCustomSyntaxPrinter = 
  [ 
    (strCIm "gf",    S.prSyntax) -- DEFAULT
@@ -118,5 +120,9 @@ moreCustomUntokenizer =
 -- add your own untokenizers here
  ]

+moreCustomUniCoding = 
+  [
+-- add your own codings here
+  ]

 strCIm = id