Finished UTF8 identifier conversion. Will probably redo it at string level, i.e. after pretty-printing instead

This commit is contained in:
bringert
2005-03-08 14:31:22 +00:00
parent d460319528
commit c8763a80e2
7 changed files with 195 additions and 59 deletions

View File

@@ -1,6 +1,7 @@
concrete NumeralsSwe of Numerals = open MorphoSwe, Prelude in { concrete NumeralsSwe of Numerals = open MorphoSwe, Prelude in {
lincat lincat
Numeral = { s : Str } ;
Digit = {s : DForm => Str} ; Digit = {s : DForm => Str} ;
Sub10 = {s : DForm => Str} ; Sub10 = {s : DForm => Str} ;

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/02/18 19:21:06 $ -- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: peb $ -- > CVS $Author: bringert $
-- > CVS $Revision: 1.27 $ -- > CVS $Revision: 1.28 $
-- --
-- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001 -- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
@@ -298,17 +298,20 @@ optPrintGrammar :: Options -> StateGrammar -> String
optPrintGrammar opts = customOrDefault opts grammarPrinter customGrammarPrinter optPrintGrammar opts = customOrDefault opts grammarPrinter customGrammarPrinter
optPrintMultiGrammar :: Options -> CanonGrammar -> String optPrintMultiGrammar :: Options -> CanonGrammar -> String
optPrintMultiGrammar opts = pmg . encode optPrintMultiGrammar opts = pmg . encodeId . encode
where where
pmg = customOrDefault opts grammarPrinter customMultiGrammarPrinter pmg = customOrDefault opts grammarPrinter customMultiGrammarPrinter
-- if -utf8 was given, convert from language specific codings -- if -utf8 was given, convert from language specific codings
encode = if oElem useUTF8 opts then mapModules moduleToUTF8 else id encode = if oElem useUTF8 opts then mapModules moduleToUTF8 else id
-- if -utf8id was given, convert identifiers to UTF8
encodeId = if oElem useUTF8id opts then grammarIdentsToUTF8 else id
moduleToUTF8 m = moduleToUTF8 m =
m{ jments = mapTree (onSnd (mapInfoTerms code)) (jments m), m{ jments = mapTree (onSnd (mapInfoTerms code)) (jments m),
flags = setFlag "coding" "utf8" (flags m) } flags = setFlag "coding" "utf8" (flags m) }
where code = onTokens (anyCodingToUTF8 (moduleOpts m)) where code = onTokens (anyCodingToUTF8 (moduleOpts m))
moduleOpts = Opts . okError . mapM CG.redFlag . flags moduleOpts = Opts . okError . mapM CG.redFlag . flags
grammarIdentsToUTF8 mgr
= MGrammar [ (identToUTF8 i, mapIdents identToUTF8 mi) | (i,mi) <- modules mgr]
optPrintSyntax :: Options -> GF.Grammar -> String optPrintSyntax :: Options -> GF.Grammar -> String
optPrintSyntax opts = customOrDefault opts grammarPrinter customSyntaxPrinter optPrintSyntax opts = customOrDefault opts grammarPrinter customSyntaxPrinter
@@ -366,3 +369,26 @@ optDecodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of
anyCodingToUTF8 :: Options -> String -> String anyCodingToUTF8 :: Options -> String -> String
anyCodingToUTF8 opts = anyCodingToUTF8 opts =
encodeUTF8 . customOrDefault opts uniCoding customUniCoding encodeUTF8 . customOrDefault opts uniCoding customUniCoding
{-
-- | Convert all text not inside double quotes to UTF8
nonLiteralsToUTF8 :: String -> String
nonLiteralsToUTF8 "" = ""
nonLiteralsToUTF8 ('"':cs) = '"' : l ++ nonLiteralsToUTF8 rs
where (l,rs) = takeStringLit cs
nonLiteralsToUTF8 (c:cs) = encodeUTF8 [c] : nonLiteralsToUTF8 cs
where
-- | Split off an initial string ended by double quotes
takeStringLit :: String -> (String,String)
takeStringLit "" = ("","")
takeStringLit
-}
-- | Convert an identifier in unicode to UTF8 encoding
identToUTF8 :: I.Ident -> I.Ident
identToUTF8 i = case i of
I.IC s -> I.IC (encodeUTF8 s)
I.IW -> I.IW
I.IV (i,s) -> I.IV (i, encodeUTF8 s)
I.IA (s,i) -> I.IA (encodeUTF8 s, i)
I.IAV (s,i1,i2) -> I.IAV (encodeUTF8 s, i2, i2)

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/03/07 17:50:00 $ -- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $ -- > CVS $Author: bringert $
-- > CVS $Revision: 1.22 $ -- > CVS $Revision: 1.23 $
-- --
-- Macros for building and analysing terms in GFC concrete syntax. -- Macros for building and analysing terms in GFC concrete syntax.
-- --
@@ -246,44 +246,6 @@ onTokens f t = case t of
K (KP ss vs) -> K (KP (map f ss) [Var (map f x) (map f y) | Var x y <- vs]) K (KP ss vs) -> K (KP (map f ss) [Var (map f x) (map f y) | Var x y <- vs])
_ -> composSafeOp (onTokens f) t _ -> composSafeOp (onTokens f) t
-- | Apply some function to all identifiers in a GFC term
onTermIdents :: (Ident -> Ident) -> Term -> Term
onTermIdents f t = case t of
Arg av -> Arg $ case av of
A i x -> A (f i) x
AB i x y -> AB (f i) x y
I ci -> I (fc ci)
Con ci ts -> Con (fc ci) (map (onTermIdents f) ts)
LI i -> LI (f i)
R as -> R [Ass (fl l) (onTermIdents f t) | Ass l t <- as]
P t l -> P (onTermIdents f t) (fl l)
T ct cs -> T (fct ct) [Cas (map fp ps) (onTermIdents f t) | Cas ps t <- cs]
V ct ts -> V (fct ct) (map (onTermIdents f) ts)
S t1 t2 -> S (onTermIdents f t1) (onTermIdents f t2)
C t1 t2 -> C (onTermIdents f t1) (onTermIdents f t2)
FV ts -> FV (map (onTermIdents f) ts)
_ -> t
where
fc :: CIdent -> CIdent
fc (CIQ i1 i2) = CIQ (f i1) (f i2)
fl :: Label -> Label
fl l = case l of
L i -> L (f i)
_ -> l
fct :: CType -> CType
fct ct = case ct of
RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ]
Table t1 t2 -> Table (fct t1) (fct t2)
Cn ci -> Cn (fc ci)
_ -> ct
fp :: Patt -> Patt
fp p = case p of
PC ci ps -> PC (fc ci) (map fp ps)
PV i -> PV (f i)
PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps]
_ -> p
-- | to define compositional term functions -- | to define compositional term functions
composSafeOp :: (Term -> Term) -> Term -> Term composSafeOp :: (Term -> Term) -> Term -> Term
composSafeOp op trm = case composOp (mkMonadic op) trm of composSafeOp op trm = case composOp (mkMonadic op) trm of

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/03/04 14:08:36 $ -- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $ -- > CVS $Author: bringert $
-- > CVS $Revision: 1.7 $ -- > CVS $Revision: 1.8 $
-- --
-- canonical GF. AR 10\/9\/2002 -- 9\/5\/2003 -- 21\/9 -- canonical GF. AR 10\/9\/2002 -- 9\/5\/2003 -- 21\/9
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
@@ -20,7 +20,8 @@ module GFC (Context,
Info(..), Info(..),
Printname, Printname,
mapInfoTerms, mapInfoTerms,
setFlag setFlag,
mapIdents
) where ) where
import AbsGFC import AbsGFC
@@ -34,6 +35,7 @@ import Operations
import qualified Modules as M import qualified Modules as M
import Char import Char
import Control.Arrow (first)
type Context = [(Ident,Exp)] type Context = [(Ident,Exp)]
@@ -45,7 +47,7 @@ type CanonModule = (Ident, CanonModInfo)
type CanonAbs = M.Module Ident Option Info type CanonAbs = M.Module Ident Option Info
data Info = data Info =
AbsCat A.Context [A.Fun] AbsCat A.Context [A.Fun]
| AbsFun A.Type A.Term | AbsFun A.Type A.Term
| AbsTrans A.Term | AbsTrans A.Term
@@ -67,4 +69,147 @@ mapInfoTerms f i = case i of
_ -> i _ -> i
setFlag :: String -> String -> [Flag] -> [Flag] setFlag :: String -> String -> [Flag] -> [Flag]
setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n] setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n]
-- | Apply a function to all identifiers in a module
mapIdents :: (Ident -> Ident) -> M.ModInfo Ident Flag Info -> M.ModInfo Ident Flag Info
mapIdents f mi = case mi of
M.ModMainGrammar mg -> M.ModMainGrammar (fmg mg)
M.ModMod m -> M.ModMod (fm m)
M.ModWith mt s i is oss -> M.ModWith (fmt mt) s (f i) (map f is) (map fos oss)
where
fmg :: M.MainGrammar Ident -> M.MainGrammar Ident
fmg (M.MainGrammar i mcs) = M.MainGrammar (f i) (map fmc mcs)
fmc :: M.MainConcreteSpec Ident -> M.MainConcreteSpec Ident
fmc (M.MainConcreteSpec i1 i2 mos1 mos2)
= M.MainConcreteSpec (f i1) (f i2) (fmap fos mos1) (fmap fos mos2)
fos :: M.OpenSpec Ident -> M.OpenSpec Ident
fos os = case os of
M.OSimple q i -> M.OSimple q (f i)
M.OQualif q i1 i2 -> M.OQualif q (f i1) (f i2)
fm :: M.Module Ident Flag Info -> M.Module Ident Flag Info
fm m@(M.Module{ M.mtype = mt, M.flags = fl, M.extends = ex,
M.opens = os, M.jments = js}) =
m { M.mtype = fmt mt, M.flags = map ffl fl, M.extends = map f ex,
M.opens = map fos os,
M.jments = mapTree (\(i,t) -> (f i, fi t)) js }
fmt :: M.ModuleType Ident -> M.ModuleType Ident
fmt t = case t of
M.MTTransfer os1 os2 -> M.MTTransfer (fos os1) (fos os2)
M.MTConcrete i -> M.MTConcrete (f i)
M.MTInstance i -> M.MTInstance (f i)
M.MTReuse rt -> M.MTReuse (frt rt)
M.MTUnion mt ms -> M.MTUnion (fmt mt) [(f i, map f is) | (i,is) <- ms]
_ -> t
frt :: M.MReuseType Ident -> M.MReuseType Ident
frt rt = case rt of
M.MRInterface i -> M.MRInterface (f i)
M.MRInstance i1 i2 -> M.MRInstance (f i1) (f i2)
M.MRResource i -> M.MRResource (f i)
ffl :: Flag -> Flag
ffl (Flg i1 i2) = Flg (f i1) (f i2)
fi :: Info -> Info
fi info = case info of
AbsCat ds fs -> AbsCat ds fs -- FIXME: convert idents here too
AbsFun ty te -> AbsFun ty te -- FIXME: convert idents here too
AbsTrans te -> AbsTrans te -- FIXME: convert idents here too
ResPar ps -> ResPar [ParD (f i) (map fct cts) | ParD i cts <- ps]
ResOper ct t -> ResOper (fct ct) (ft t)
CncCat ct t pn -> CncCat (fct ct) (ft t) (ft pn)
CncFun ci avs t pn -> CncFun (fc ci) (map fav avs) (ft t) (ft pn)
AnyInd b i -> AnyInd b (f i)
fqi :: A.QIdent -> A.QIdent
fqi (i1,i2) = (f i1, f i2)
fc :: CIdent -> CIdent
fc (CIQ i1 i2) = CIQ (f i1) (f i2)
fl :: Label -> Label
fl l = case l of
L i -> L (f i)
_ -> l
fct :: CType -> CType
fct ct = case ct of
RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ]
Table t1 t2 -> Table (fct t1) (fct t2)
Cn ci -> Cn (fc ci)
_ -> ct
fp :: Patt -> Patt
fp p = case p of
PC ci ps -> PC (fc ci) (map fp ps)
PV i -> PV (f i)
PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps]
_ -> p
ft :: Term -> Term
ft t = case t of
Arg av -> Arg (fav av)
I ci -> I (fc ci)
Con ci ts -> Con (fc ci) (map ft ts)
LI i -> LI (f i)
R as -> R [Ass (fl l) (ft t) | Ass l t <- as]
P t l -> P (ft t) (fl l)
T ct cs -> T (fct ct) [Cas (map fp ps) (ft t) | Cas ps t <- cs]
V ct ts -> V (fct ct) (map ft ts)
S t1 t2 -> S (ft t1) (ft t2)
C t1 t2 -> C (ft t1) (ft t2)
FV ts -> FV (map ft ts)
_ -> t
fav :: ArgVar -> ArgVar
fav av = case av of
A i x -> A (f i) x
AB i x y -> AB (f i) x y
{-
fat :: A.Term -> A.Term
fat t = case t of
A.Vr i -> A.Vr (f i)
A.Cn i -> A.Cn (f i)
A.Con i -> A.Con (f i)
A.App t1 t2 -> A.App (fat t1) (fat t2)
A.Abs i t' -> A.Abs (f i) (fat t')
A.Prod i t1 t2 -> A.Prod (f i) (fat t1) (fat t2)
A.Eqs eqs -> A.Eqs [(, fat t) | (ps,t) <- eqs ]
| Eqs [([Patt],Term)]
-- only used in internal representation
| Typed Term Term -- ^ type-annotated term
--
-- /below this, the constructors are only for concrete syntax/
| RecType [Labelling] -- ^ record type: @{ p : A ; ...}@
| R [Assign] -- ^ record: @{ p = a ; ...}@
| P Term Label -- ^ projection: @r.p@
| ExtR Term Term -- ^ extension: @R ** {x : A}@ (both types and terms)
| Table Term Term -- ^ table type: @P => A@
| T TInfo [Case] -- ^ table: @table {p => c ; ...}@
| TSh TInfo [Cases] -- ^ table with discjunctive patters (only back end opt)
| V Type [Term] -- ^ table given as course of values: @table T [c1 ; ... ; cn]@
| S Term Term -- ^ selection: @t ! p@
| Let LocalDef Term -- ^ local definition: @let {t : T = a} in b@
| Alias Ident Type Term -- ^ constant and its definition, used in inlining
| Q Ident Ident -- ^ qualified constant from a package
| QC Ident Ident -- ^ qualified constructor from a package
| C Term Term -- ^ concatenation: @s ++ t@
| Glue Term Term -- ^ agglutination: @s + t@
| FV [Term] -- ^ alternatives in free variation: @variants { s ; ... }@
| Alts (Term, [(Term, Term)]) -- ^ alternatives by prefix: @pre {t ; s\/c ; ...}@
| Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@
--
-- /below this, the last three constructors are obsolete/
| LiT Ident -- ^ linearization type
| Ready Str -- ^ result of compiling; not to be parsed ...
| Computed Term -- ^ result of computing: not to be reopened nor parsed
_ -> t
fp :: A.Patt -> A.Patt
fp p = case p of
A.PC Ident [Patt]
A.PP Ident Ident [Patt]
A.PV Ident
A.PR [(Label,Patt)]
A.PT Type Patt
-}

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/02/25 15:35:48 $ -- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: aarne $ -- > CVS $Author: bringert $
-- > CVS $Revision: 1.21 $ -- > CVS $Revision: 1.22 $
-- --
-- Options and flags used in GF shell commands and files. -- Options and flags used in GF shell commands and files.
-- --
@@ -151,7 +151,7 @@ dontParse = iOpt "read"
showAbstr, showXML, showOld, showLatex, showFullForm, showAbstr, showXML, showOld, showLatex, showFullForm,
showEBNF, showCF, showWords, showOpts, showEBNF, showCF, showWords, showOpts,
isCompiled, isHaskell, noCompOpers, retainOpers, isCompiled, isHaskell, noCompOpers, retainOpers,
newParser, noCF, checkCirc, noCheckCirc, lexerByNeed :: Option newParser, noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option
defaultGrOpts :: [Option] defaultGrOpts :: [Option]
showAbstr = iOpt "abs" showAbstr = iOpt "abs"
@@ -174,6 +174,7 @@ noCF = iOpt "nocf"
checkCirc = iOpt "nocirc" checkCirc = iOpt "nocirc"
noCheckCirc = iOpt "nocheckcirc" noCheckCirc = iOpt "nocheckcirc"
lexerByNeed = iOpt "cflexer" lexerByNeed = iOpt "cflexer"
useUTF8id = iOpt "utf8id"
-- ** linearization -- ** linearization

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/02/25 15:35:48 $ -- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: aarne $ -- > CVS $Author: bringert $
-- > CVS $Revision: 1.24 $ -- > CVS $Revision: 1.25 $
-- --
-- The datatype of shell commands and the list of their options. -- The datatype of shell commands and the list of their options.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
@@ -183,7 +183,7 @@ optionsOfCommand co = case co of
CSystemCommand _ -> none CSystemCommand _ -> none
CPrintGrammar -> both "utf8" "printer lang" CPrintGrammar -> both "utf8" "printer lang"
CPrintMultiGrammar -> both "utf8" "printer" CPrintMultiGrammar -> both "utf8 utf8id" "printer"
CPrintSourceGrammar -> both "utf8" "printer" CPrintSourceGrammar -> both "utf8" "printer"
CHelp _ -> opts "all filter length lexer unlexer printer transform depth number" CHelp _ -> opts "all filter length lexer unlexer printer transform depth number"

View File

@@ -81,7 +81,8 @@ pm, print_multigrammar: pm
Prints the current multilingual grammar in .gfcm form. Prints the current multilingual grammar in .gfcm form.
(Automatically executes the strip command (s) before doing this.) (Automatically executes the strip command (s) before doing this.)
options: options:
-utf8 apply UTF8-encoding to the grammar -utf8 apply UTF8 encoding to the tokens in the grammar
-utf8id apply UTF8 encoding to the identifiers in the grammar
-graph print module dependency graph in 'dot' format -graph print module dependency graph in 'dot' format
examples: examples:
pm | wf Letter.gfcm -- print the grammar into the file Letter.gfcm pm | wf Letter.gfcm -- print the grammar into the file Letter.gfcm