Finished UTF8 identifier conversion. Will probably redo it at string level, i.e. after pretty-printing instead

This commit is contained in:
bringert
2005-03-08 14:31:22 +00:00
parent d460319528
commit c8763a80e2
7 changed files with 195 additions and 59 deletions

View File

@@ -1,6 +1,7 @@
concrete NumeralsSwe of Numerals = open MorphoSwe, Prelude in {
lincat
Numeral = { s : Str } ;
Digit = {s : DForm => Str} ;
Sub10 = {s : DForm => Str} ;

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/02/18 19:21:06 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.27 $
-- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.28 $
--
-- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001
-----------------------------------------------------------------------------
@@ -298,17 +298,20 @@ optPrintGrammar :: Options -> StateGrammar -> String
optPrintGrammar opts = customOrDefault opts grammarPrinter customGrammarPrinter
optPrintMultiGrammar :: Options -> CanonGrammar -> String
optPrintMultiGrammar opts = pmg . encode
optPrintMultiGrammar opts = pmg . encodeId . encode
where
pmg = customOrDefault opts grammarPrinter customMultiGrammarPrinter
-- if -utf8 was given, convert from language specific codings
encode = if oElem useUTF8 opts then mapModules moduleToUTF8 else id
-- if -utf8id was given, convert identifiers to UTF8
encodeId = if oElem useUTF8id opts then grammarIdentsToUTF8 else id
moduleToUTF8 m =
m{ jments = mapTree (onSnd (mapInfoTerms code)) (jments m),
flags = setFlag "coding" "utf8" (flags m) }
where code = onTokens (anyCodingToUTF8 (moduleOpts m))
moduleOpts = Opts . okError . mapM CG.redFlag . flags
grammarIdentsToUTF8 mgr
= MGrammar [ (identToUTF8 i, mapIdents identToUTF8 mi) | (i,mi) <- modules mgr]
optPrintSyntax :: Options -> GF.Grammar -> String
optPrintSyntax opts = customOrDefault opts grammarPrinter customSyntaxPrinter
@@ -366,3 +369,26 @@ optDecodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of
anyCodingToUTF8 :: Options -> String -> String
anyCodingToUTF8 opts =
encodeUTF8 . customOrDefault opts uniCoding customUniCoding
{-
-- | Convert all text not inside double quotes to UTF8
nonLiteralsToUTF8 :: String -> String
nonLiteralsToUTF8 "" = ""
nonLiteralsToUTF8 ('"':cs) = '"' : l ++ nonLiteralsToUTF8 rs
where (l,rs) = takeStringLit cs
nonLiteralsToUTF8 (c:cs) = encodeUTF8 [c] : nonLiteralsToUTF8 cs
where
-- | Split off an initial string ended by double quotes
takeStringLit :: String -> (String,String)
takeStringLit "" = ("","")
takeStringLit
-}
-- | Convert an identifier in unicode to UTF8 encoding
identToUTF8 :: I.Ident -> I.Ident
identToUTF8 i = case i of
I.IC s -> I.IC (encodeUTF8 s)
I.IW -> I.IW
I.IV (i,s) -> I.IV (i, encodeUTF8 s)
I.IA (s,i) -> I.IA (encodeUTF8 s, i)
I.IAV (s,i1,i2) -> I.IAV (encodeUTF8 s, i2, i2)

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/03/07 17:50:00 $
-- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.22 $
-- > CVS $Revision: 1.23 $
--
-- Macros for building and analysing terms in GFC concrete syntax.
--
@@ -246,44 +246,6 @@ onTokens f t = case t of
K (KP ss vs) -> K (KP (map f ss) [Var (map f x) (map f y) | Var x y <- vs])
_ -> composSafeOp (onTokens f) t
-- | Apply some function to all identifiers in a GFC term
onTermIdents :: (Ident -> Ident) -> Term -> Term
onTermIdents f t = case t of
Arg av -> Arg $ case av of
A i x -> A (f i) x
AB i x y -> AB (f i) x y
I ci -> I (fc ci)
Con ci ts -> Con (fc ci) (map (onTermIdents f) ts)
LI i -> LI (f i)
R as -> R [Ass (fl l) (onTermIdents f t) | Ass l t <- as]
P t l -> P (onTermIdents f t) (fl l)
T ct cs -> T (fct ct) [Cas (map fp ps) (onTermIdents f t) | Cas ps t <- cs]
V ct ts -> V (fct ct) (map (onTermIdents f) ts)
S t1 t2 -> S (onTermIdents f t1) (onTermIdents f t2)
C t1 t2 -> C (onTermIdents f t1) (onTermIdents f t2)
FV ts -> FV (map (onTermIdents f) ts)
_ -> t
where
fc :: CIdent -> CIdent
fc (CIQ i1 i2) = CIQ (f i1) (f i2)
fl :: Label -> Label
fl l = case l of
L i -> L (f i)
_ -> l
fct :: CType -> CType
fct ct = case ct of
RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ]
Table t1 t2 -> Table (fct t1) (fct t2)
Cn ci -> Cn (fc ci)
_ -> ct
fp :: Patt -> Patt
fp p = case p of
PC ci ps -> PC (fc ci) (map fp ps)
PV i -> PV (f i)
PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps]
_ -> p
-- | to define compositional term functions
composSafeOp :: (Term -> Term) -> Term -> Term
composSafeOp op trm = case composOp (mkMonadic op) trm of

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/03/04 14:08:36 $
-- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.7 $
-- > CVS $Revision: 1.8 $
--
-- canonical GF. AR 10\/9\/2002 -- 9\/5\/2003 -- 21\/9
-----------------------------------------------------------------------------
@@ -20,7 +20,8 @@ module GFC (Context,
Info(..),
Printname,
mapInfoTerms,
setFlag
setFlag,
mapIdents
) where
import AbsGFC
@@ -34,6 +35,7 @@ import Operations
import qualified Modules as M
import Char
import Control.Arrow (first)
type Context = [(Ident,Exp)]
@@ -45,7 +47,7 @@ type CanonModule = (Ident, CanonModInfo)
type CanonAbs = M.Module Ident Option Info
data Info =
data Info =
AbsCat A.Context [A.Fun]
| AbsFun A.Type A.Term
| AbsTrans A.Term
@@ -67,4 +69,147 @@ mapInfoTerms f i = case i of
_ -> i
setFlag :: String -> String -> [Flag] -> [Flag]
setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n]
setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n]
-- | Apply a function to all identifiers in a module
mapIdents :: (Ident -> Ident) -> M.ModInfo Ident Flag Info -> M.ModInfo Ident Flag Info
mapIdents f mi = case mi of
M.ModMainGrammar mg -> M.ModMainGrammar (fmg mg)
M.ModMod m -> M.ModMod (fm m)
M.ModWith mt s i is oss -> M.ModWith (fmt mt) s (f i) (map f is) (map fos oss)
where
fmg :: M.MainGrammar Ident -> M.MainGrammar Ident
fmg (M.MainGrammar i mcs) = M.MainGrammar (f i) (map fmc mcs)
fmc :: M.MainConcreteSpec Ident -> M.MainConcreteSpec Ident
fmc (M.MainConcreteSpec i1 i2 mos1 mos2)
= M.MainConcreteSpec (f i1) (f i2) (fmap fos mos1) (fmap fos mos2)
fos :: M.OpenSpec Ident -> M.OpenSpec Ident
fos os = case os of
M.OSimple q i -> M.OSimple q (f i)
M.OQualif q i1 i2 -> M.OQualif q (f i1) (f i2)
fm :: M.Module Ident Flag Info -> M.Module Ident Flag Info
fm m@(M.Module{ M.mtype = mt, M.flags = fl, M.extends = ex,
M.opens = os, M.jments = js}) =
m { M.mtype = fmt mt, M.flags = map ffl fl, M.extends = map f ex,
M.opens = map fos os,
M.jments = mapTree (\(i,t) -> (f i, fi t)) js }
fmt :: M.ModuleType Ident -> M.ModuleType Ident
fmt t = case t of
M.MTTransfer os1 os2 -> M.MTTransfer (fos os1) (fos os2)
M.MTConcrete i -> M.MTConcrete (f i)
M.MTInstance i -> M.MTInstance (f i)
M.MTReuse rt -> M.MTReuse (frt rt)
M.MTUnion mt ms -> M.MTUnion (fmt mt) [(f i, map f is) | (i,is) <- ms]
_ -> t
frt :: M.MReuseType Ident -> M.MReuseType Ident
frt rt = case rt of
M.MRInterface i -> M.MRInterface (f i)
M.MRInstance i1 i2 -> M.MRInstance (f i1) (f i2)
M.MRResource i -> M.MRResource (f i)
ffl :: Flag -> Flag
ffl (Flg i1 i2) = Flg (f i1) (f i2)
fi :: Info -> Info
fi info = case info of
AbsCat ds fs -> AbsCat ds fs -- FIXME: convert idents here too
AbsFun ty te -> AbsFun ty te -- FIXME: convert idents here too
AbsTrans te -> AbsTrans te -- FIXME: convert idents here too
ResPar ps -> ResPar [ParD (f i) (map fct cts) | ParD i cts <- ps]
ResOper ct t -> ResOper (fct ct) (ft t)
CncCat ct t pn -> CncCat (fct ct) (ft t) (ft pn)
CncFun ci avs t pn -> CncFun (fc ci) (map fav avs) (ft t) (ft pn)
AnyInd b i -> AnyInd b (f i)
fqi :: A.QIdent -> A.QIdent
fqi (i1,i2) = (f i1, f i2)
fc :: CIdent -> CIdent
fc (CIQ i1 i2) = CIQ (f i1) (f i2)
fl :: Label -> Label
fl l = case l of
L i -> L (f i)
_ -> l
fct :: CType -> CType
fct ct = case ct of
RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ]
Table t1 t2 -> Table (fct t1) (fct t2)
Cn ci -> Cn (fc ci)
_ -> ct
fp :: Patt -> Patt
fp p = case p of
PC ci ps -> PC (fc ci) (map fp ps)
PV i -> PV (f i)
PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps]
_ -> p
ft :: Term -> Term
ft t = case t of
Arg av -> Arg (fav av)
I ci -> I (fc ci)
Con ci ts -> Con (fc ci) (map ft ts)
LI i -> LI (f i)
R as -> R [Ass (fl l) (ft t) | Ass l t <- as]
P t l -> P (ft t) (fl l)
T ct cs -> T (fct ct) [Cas (map fp ps) (ft t) | Cas ps t <- cs]
V ct ts -> V (fct ct) (map ft ts)
S t1 t2 -> S (ft t1) (ft t2)
C t1 t2 -> C (ft t1) (ft t2)
FV ts -> FV (map ft ts)
_ -> t
fav :: ArgVar -> ArgVar
fav av = case av of
A i x -> A (f i) x
AB i x y -> AB (f i) x y
{-
fat :: A.Term -> A.Term
fat t = case t of
A.Vr i -> A.Vr (f i)
A.Cn i -> A.Cn (f i)
A.Con i -> A.Con (f i)
A.App t1 t2 -> A.App (fat t1) (fat t2)
A.Abs i t' -> A.Abs (f i) (fat t')
A.Prod i t1 t2 -> A.Prod (f i) (fat t1) (fat t2)
A.Eqs eqs -> A.Eqs [(, fat t) | (ps,t) <- eqs ]
| Eqs [([Patt],Term)]
-- only used in internal representation
| Typed Term Term -- ^ type-annotated term
--
-- /below this, the constructors are only for concrete syntax/
| RecType [Labelling] -- ^ record type: @{ p : A ; ...}@
| R [Assign] -- ^ record: @{ p = a ; ...}@
| P Term Label -- ^ projection: @r.p@
| ExtR Term Term -- ^ extension: @R ** {x : A}@ (both types and terms)
| Table Term Term -- ^ table type: @P => A@
| T TInfo [Case] -- ^ table: @table {p => c ; ...}@
| TSh TInfo [Cases] -- ^ table with discjunctive patters (only back end opt)
| V Type [Term] -- ^ table given as course of values: @table T [c1 ; ... ; cn]@
| S Term Term -- ^ selection: @t ! p@
| Let LocalDef Term -- ^ local definition: @let {t : T = a} in b@
| Alias Ident Type Term -- ^ constant and its definition, used in inlining
| Q Ident Ident -- ^ qualified constant from a package
| QC Ident Ident -- ^ qualified constructor from a package
| C Term Term -- ^ concatenation: @s ++ t@
| Glue Term Term -- ^ agglutination: @s + t@
| FV [Term] -- ^ alternatives in free variation: @variants { s ; ... }@
| Alts (Term, [(Term, Term)]) -- ^ alternatives by prefix: @pre {t ; s\/c ; ...}@
| Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@
--
-- /below this, the last three constructors are obsolete/
| LiT Ident -- ^ linearization type
| Ready Str -- ^ result of compiling; not to be parsed ...
| Computed Term -- ^ result of computing: not to be reopened nor parsed
_ -> t
fp :: A.Patt -> A.Patt
fp p = case p of
A.PC Ident [Patt]
A.PP Ident Ident [Patt]
A.PV Ident
A.PR [(Label,Patt)]
A.PT Type Patt
-}

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/02/25 15:35:48 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.21 $
-- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.22 $
--
-- Options and flags used in GF shell commands and files.
--
@@ -151,7 +151,7 @@ dontParse = iOpt "read"
showAbstr, showXML, showOld, showLatex, showFullForm,
showEBNF, showCF, showWords, showOpts,
isCompiled, isHaskell, noCompOpers, retainOpers,
newParser, noCF, checkCirc, noCheckCirc, lexerByNeed :: Option
newParser, noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option
defaultGrOpts :: [Option]
showAbstr = iOpt "abs"
@@ -174,6 +174,7 @@ noCF = iOpt "nocf"
checkCirc = iOpt "nocirc"
noCheckCirc = iOpt "nocheckcirc"
lexerByNeed = iOpt "cflexer"
useUTF8id = iOpt "utf8id"
-- ** linearization

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/02/25 15:35:48 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.24 $
-- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.25 $
--
-- The datatype of shell commands and the list of their options.
-----------------------------------------------------------------------------
@@ -183,7 +183,7 @@ optionsOfCommand co = case co of
CSystemCommand _ -> none
CPrintGrammar -> both "utf8" "printer lang"
CPrintMultiGrammar -> both "utf8" "printer"
CPrintMultiGrammar -> both "utf8 utf8id" "printer"
CPrintSourceGrammar -> both "utf8" "printer"
CHelp _ -> opts "all filter length lexer unlexer printer transform depth number"

View File

@@ -81,7 +81,8 @@ pm, print_multigrammar: pm
Prints the current multilingual grammar in .gfcm form.
(Automatically executes the strip command (s) before doing this.)
options:
-utf8 apply UTF8-encoding to the grammar
-utf8 apply UTF8 encoding to the tokens in the grammar
-utf8id apply UTF8 encoding to the identifiers in the grammar
-graph print module dependency graph in 'dot' format
examples:
pm | wf Letter.gfcm -- print the grammar into the file Letter.gfcm