mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
Finished UTF8 identifier conversion. Will probably redo it at string level, i.e. after pretty-printing instead
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
concrete NumeralsSwe of Numerals = open MorphoSwe, Prelude in {
|
||||
|
||||
lincat
|
||||
Numeral = { s : Str } ;
|
||||
Digit = {s : DForm => Str} ;
|
||||
Sub10 = {s : DForm => Str} ;
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/02/18 19:21:06 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.27 $
|
||||
-- > CVS $Date: 2005/03/08 15:31:22 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.28 $
|
||||
--
|
||||
-- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -298,17 +298,20 @@ optPrintGrammar :: Options -> StateGrammar -> String
|
||||
optPrintGrammar opts = customOrDefault opts grammarPrinter customGrammarPrinter
|
||||
|
||||
optPrintMultiGrammar :: Options -> CanonGrammar -> String
|
||||
optPrintMultiGrammar opts = pmg . encode
|
||||
optPrintMultiGrammar opts = pmg . encodeId . encode
|
||||
where
|
||||
pmg = customOrDefault opts grammarPrinter customMultiGrammarPrinter
|
||||
-- if -utf8 was given, convert from language specific codings
|
||||
encode = if oElem useUTF8 opts then mapModules moduleToUTF8 else id
|
||||
-- if -utf8id was given, convert identifiers to UTF8
|
||||
encodeId = if oElem useUTF8id opts then grammarIdentsToUTF8 else id
|
||||
moduleToUTF8 m =
|
||||
m{ jments = mapTree (onSnd (mapInfoTerms code)) (jments m),
|
||||
flags = setFlag "coding" "utf8" (flags m) }
|
||||
where code = onTokens (anyCodingToUTF8 (moduleOpts m))
|
||||
moduleOpts = Opts . okError . mapM CG.redFlag . flags
|
||||
|
||||
grammarIdentsToUTF8 mgr
|
||||
= MGrammar [ (identToUTF8 i, mapIdents identToUTF8 mi) | (i,mi) <- modules mgr]
|
||||
|
||||
optPrintSyntax :: Options -> GF.Grammar -> String
|
||||
optPrintSyntax opts = customOrDefault opts grammarPrinter customSyntaxPrinter
|
||||
@@ -366,3 +369,26 @@ optDecodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of
|
||||
anyCodingToUTF8 :: Options -> String -> String
|
||||
anyCodingToUTF8 opts =
|
||||
encodeUTF8 . customOrDefault opts uniCoding customUniCoding
|
||||
|
||||
{-
|
||||
-- | Convert all text not inside double quotes to UTF8
|
||||
nonLiteralsToUTF8 :: String -> String
|
||||
nonLiteralsToUTF8 "" = ""
|
||||
nonLiteralsToUTF8 ('"':cs) = '"' : l ++ nonLiteralsToUTF8 rs
|
||||
where (l,rs) = takeStringLit cs
|
||||
nonLiteralsToUTF8 (c:cs) = encodeUTF8 [c] : nonLiteralsToUTF8 cs
|
||||
where
|
||||
-- | Split off an initial string ended by double quotes
|
||||
takeStringLit :: String -> (String,String)
|
||||
takeStringLit "" = ("","")
|
||||
takeStringLit
|
||||
-}
|
||||
|
||||
-- | Convert an identifier in unicode to UTF8 encoding
|
||||
identToUTF8 :: I.Ident -> I.Ident
|
||||
identToUTF8 i = case i of
|
||||
I.IC s -> I.IC (encodeUTF8 s)
|
||||
I.IW -> I.IW
|
||||
I.IV (i,s) -> I.IV (i, encodeUTF8 s)
|
||||
I.IA (s,i) -> I.IA (encodeUTF8 s, i)
|
||||
I.IAV (s,i1,i2) -> I.IAV (encodeUTF8 s, i2, i2)
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/03/07 17:50:00 $
|
||||
-- > CVS $Date: 2005/03/08 15:31:22 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.22 $
|
||||
-- > CVS $Revision: 1.23 $
|
||||
--
|
||||
-- Macros for building and analysing terms in GFC concrete syntax.
|
||||
--
|
||||
@@ -246,44 +246,6 @@ onTokens f t = case t of
|
||||
K (KP ss vs) -> K (KP (map f ss) [Var (map f x) (map f y) | Var x y <- vs])
|
||||
_ -> composSafeOp (onTokens f) t
|
||||
|
||||
-- | Apply some function to all identifiers in a GFC term
|
||||
onTermIdents :: (Ident -> Ident) -> Term -> Term
|
||||
onTermIdents f t = case t of
|
||||
Arg av -> Arg $ case av of
|
||||
A i x -> A (f i) x
|
||||
AB i x y -> AB (f i) x y
|
||||
I ci -> I (fc ci)
|
||||
Con ci ts -> Con (fc ci) (map (onTermIdents f) ts)
|
||||
LI i -> LI (f i)
|
||||
R as -> R [Ass (fl l) (onTermIdents f t) | Ass l t <- as]
|
||||
P t l -> P (onTermIdents f t) (fl l)
|
||||
T ct cs -> T (fct ct) [Cas (map fp ps) (onTermIdents f t) | Cas ps t <- cs]
|
||||
V ct ts -> V (fct ct) (map (onTermIdents f) ts)
|
||||
S t1 t2 -> S (onTermIdents f t1) (onTermIdents f t2)
|
||||
C t1 t2 -> C (onTermIdents f t1) (onTermIdents f t2)
|
||||
FV ts -> FV (map (onTermIdents f) ts)
|
||||
_ -> t
|
||||
where
|
||||
fc :: CIdent -> CIdent
|
||||
fc (CIQ i1 i2) = CIQ (f i1) (f i2)
|
||||
fl :: Label -> Label
|
||||
fl l = case l of
|
||||
L i -> L (f i)
|
||||
_ -> l
|
||||
fct :: CType -> CType
|
||||
fct ct = case ct of
|
||||
RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ]
|
||||
Table t1 t2 -> Table (fct t1) (fct t2)
|
||||
Cn ci -> Cn (fc ci)
|
||||
_ -> ct
|
||||
fp :: Patt -> Patt
|
||||
fp p = case p of
|
||||
PC ci ps -> PC (fc ci) (map fp ps)
|
||||
PV i -> PV (f i)
|
||||
PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps]
|
||||
_ -> p
|
||||
|
||||
|
||||
-- | to define compositional term functions
|
||||
composSafeOp :: (Term -> Term) -> Term -> Term
|
||||
composSafeOp op trm = case composOp (mkMonadic op) trm of
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/03/04 14:08:36 $
|
||||
-- > CVS $Date: 2005/03/08 15:31:22 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.7 $
|
||||
-- > CVS $Revision: 1.8 $
|
||||
--
|
||||
-- canonical GF. AR 10\/9\/2002 -- 9\/5\/2003 -- 21\/9
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -20,7 +20,8 @@ module GFC (Context,
|
||||
Info(..),
|
||||
Printname,
|
||||
mapInfoTerms,
|
||||
setFlag
|
||||
setFlag,
|
||||
mapIdents
|
||||
) where
|
||||
|
||||
import AbsGFC
|
||||
@@ -34,6 +35,7 @@ import Operations
|
||||
import qualified Modules as M
|
||||
|
||||
import Char
|
||||
import Control.Arrow (first)
|
||||
|
||||
type Context = [(Ident,Exp)]
|
||||
|
||||
@@ -45,7 +47,7 @@ type CanonModule = (Ident, CanonModInfo)
|
||||
|
||||
type CanonAbs = M.Module Ident Option Info
|
||||
|
||||
data Info =
|
||||
data Info =
|
||||
AbsCat A.Context [A.Fun]
|
||||
| AbsFun A.Type A.Term
|
||||
| AbsTrans A.Term
|
||||
@@ -67,4 +69,147 @@ mapInfoTerms f i = case i of
|
||||
_ -> i
|
||||
|
||||
setFlag :: String -> String -> [Flag] -> [Flag]
|
||||
setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n]
|
||||
setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n]
|
||||
|
||||
-- | Apply a function to all identifiers in a module
|
||||
mapIdents :: (Ident -> Ident) -> M.ModInfo Ident Flag Info -> M.ModInfo Ident Flag Info
|
||||
mapIdents f mi = case mi of
|
||||
M.ModMainGrammar mg -> M.ModMainGrammar (fmg mg)
|
||||
M.ModMod m -> M.ModMod (fm m)
|
||||
M.ModWith mt s i is oss -> M.ModWith (fmt mt) s (f i) (map f is) (map fos oss)
|
||||
where
|
||||
fmg :: M.MainGrammar Ident -> M.MainGrammar Ident
|
||||
fmg (M.MainGrammar i mcs) = M.MainGrammar (f i) (map fmc mcs)
|
||||
fmc :: M.MainConcreteSpec Ident -> M.MainConcreteSpec Ident
|
||||
fmc (M.MainConcreteSpec i1 i2 mos1 mos2)
|
||||
= M.MainConcreteSpec (f i1) (f i2) (fmap fos mos1) (fmap fos mos2)
|
||||
fos :: M.OpenSpec Ident -> M.OpenSpec Ident
|
||||
fos os = case os of
|
||||
M.OSimple q i -> M.OSimple q (f i)
|
||||
M.OQualif q i1 i2 -> M.OQualif q (f i1) (f i2)
|
||||
fm :: M.Module Ident Flag Info -> M.Module Ident Flag Info
|
||||
fm m@(M.Module{ M.mtype = mt, M.flags = fl, M.extends = ex,
|
||||
M.opens = os, M.jments = js}) =
|
||||
m { M.mtype = fmt mt, M.flags = map ffl fl, M.extends = map f ex,
|
||||
M.opens = map fos os,
|
||||
M.jments = mapTree (\(i,t) -> (f i, fi t)) js }
|
||||
fmt :: M.ModuleType Ident -> M.ModuleType Ident
|
||||
fmt t = case t of
|
||||
M.MTTransfer os1 os2 -> M.MTTransfer (fos os1) (fos os2)
|
||||
M.MTConcrete i -> M.MTConcrete (f i)
|
||||
M.MTInstance i -> M.MTInstance (f i)
|
||||
M.MTReuse rt -> M.MTReuse (frt rt)
|
||||
M.MTUnion mt ms -> M.MTUnion (fmt mt) [(f i, map f is) | (i,is) <- ms]
|
||||
_ -> t
|
||||
frt :: M.MReuseType Ident -> M.MReuseType Ident
|
||||
frt rt = case rt of
|
||||
M.MRInterface i -> M.MRInterface (f i)
|
||||
M.MRInstance i1 i2 -> M.MRInstance (f i1) (f i2)
|
||||
M.MRResource i -> M.MRResource (f i)
|
||||
ffl :: Flag -> Flag
|
||||
ffl (Flg i1 i2) = Flg (f i1) (f i2)
|
||||
fi :: Info -> Info
|
||||
fi info = case info of
|
||||
AbsCat ds fs -> AbsCat ds fs -- FIXME: convert idents here too
|
||||
AbsFun ty te -> AbsFun ty te -- FIXME: convert idents here too
|
||||
AbsTrans te -> AbsTrans te -- FIXME: convert idents here too
|
||||
ResPar ps -> ResPar [ParD (f i) (map fct cts) | ParD i cts <- ps]
|
||||
ResOper ct t -> ResOper (fct ct) (ft t)
|
||||
CncCat ct t pn -> CncCat (fct ct) (ft t) (ft pn)
|
||||
CncFun ci avs t pn -> CncFun (fc ci) (map fav avs) (ft t) (ft pn)
|
||||
AnyInd b i -> AnyInd b (f i)
|
||||
fqi :: A.QIdent -> A.QIdent
|
||||
fqi (i1,i2) = (f i1, f i2)
|
||||
fc :: CIdent -> CIdent
|
||||
fc (CIQ i1 i2) = CIQ (f i1) (f i2)
|
||||
fl :: Label -> Label
|
||||
fl l = case l of
|
||||
L i -> L (f i)
|
||||
_ -> l
|
||||
fct :: CType -> CType
|
||||
fct ct = case ct of
|
||||
RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ]
|
||||
Table t1 t2 -> Table (fct t1) (fct t2)
|
||||
Cn ci -> Cn (fc ci)
|
||||
_ -> ct
|
||||
fp :: Patt -> Patt
|
||||
fp p = case p of
|
||||
PC ci ps -> PC (fc ci) (map fp ps)
|
||||
PV i -> PV (f i)
|
||||
PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps]
|
||||
_ -> p
|
||||
ft :: Term -> Term
|
||||
ft t = case t of
|
||||
Arg av -> Arg (fav av)
|
||||
I ci -> I (fc ci)
|
||||
Con ci ts -> Con (fc ci) (map ft ts)
|
||||
LI i -> LI (f i)
|
||||
R as -> R [Ass (fl l) (ft t) | Ass l t <- as]
|
||||
P t l -> P (ft t) (fl l)
|
||||
T ct cs -> T (fct ct) [Cas (map fp ps) (ft t) | Cas ps t <- cs]
|
||||
V ct ts -> V (fct ct) (map ft ts)
|
||||
S t1 t2 -> S (ft t1) (ft t2)
|
||||
C t1 t2 -> C (ft t1) (ft t2)
|
||||
FV ts -> FV (map ft ts)
|
||||
_ -> t
|
||||
fav :: ArgVar -> ArgVar
|
||||
fav av = case av of
|
||||
A i x -> A (f i) x
|
||||
AB i x y -> AB (f i) x y
|
||||
{-
|
||||
fat :: A.Term -> A.Term
|
||||
fat t = case t of
|
||||
A.Vr i -> A.Vr (f i)
|
||||
A.Cn i -> A.Cn (f i)
|
||||
A.Con i -> A.Con (f i)
|
||||
A.App t1 t2 -> A.App (fat t1) (fat t2)
|
||||
A.Abs i t' -> A.Abs (f i) (fat t')
|
||||
A.Prod i t1 t2 -> A.Prod (f i) (fat t1) (fat t2)
|
||||
A.Eqs eqs -> A.Eqs [(, fat t) | (ps,t) <- eqs ]
|
||||
| Eqs [([Patt],Term)]
|
||||
|
||||
-- only used in internal representation
|
||||
| Typed Term Term -- ^ type-annotated term
|
||||
--
|
||||
-- /below this, the constructors are only for concrete syntax/
|
||||
| RecType [Labelling] -- ^ record type: @{ p : A ; ...}@
|
||||
| R [Assign] -- ^ record: @{ p = a ; ...}@
|
||||
| P Term Label -- ^ projection: @r.p@
|
||||
| ExtR Term Term -- ^ extension: @R ** {x : A}@ (both types and terms)
|
||||
|
||||
| Table Term Term -- ^ table type: @P => A@
|
||||
| T TInfo [Case] -- ^ table: @table {p => c ; ...}@
|
||||
| TSh TInfo [Cases] -- ^ table with discjunctive patters (only back end opt)
|
||||
| V Type [Term] -- ^ table given as course of values: @table T [c1 ; ... ; cn]@
|
||||
| S Term Term -- ^ selection: @t ! p@
|
||||
|
||||
| Let LocalDef Term -- ^ local definition: @let {t : T = a} in b@
|
||||
|
||||
| Alias Ident Type Term -- ^ constant and its definition, used in inlining
|
||||
|
||||
| Q Ident Ident -- ^ qualified constant from a package
|
||||
| QC Ident Ident -- ^ qualified constructor from a package
|
||||
|
||||
| C Term Term -- ^ concatenation: @s ++ t@
|
||||
| Glue Term Term -- ^ agglutination: @s + t@
|
||||
|
||||
| FV [Term] -- ^ alternatives in free variation: @variants { s ; ... }@
|
||||
|
||||
| Alts (Term, [(Term, Term)]) -- ^ alternatives by prefix: @pre {t ; s\/c ; ...}@
|
||||
| Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@
|
||||
--
|
||||
-- /below this, the last three constructors are obsolete/
|
||||
| LiT Ident -- ^ linearization type
|
||||
| Ready Str -- ^ result of compiling; not to be parsed ...
|
||||
| Computed Term -- ^ result of computing: not to be reopened nor parsed
|
||||
|
||||
_ -> t
|
||||
|
||||
fp :: A.Patt -> A.Patt
|
||||
fp p = case p of
|
||||
A.PC Ident [Patt]
|
||||
A.PP Ident Ident [Patt]
|
||||
A.PV Ident
|
||||
A.PR [(Label,Patt)]
|
||||
A.PT Type Patt
|
||||
-}
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/02/25 15:35:48 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.21 $
|
||||
-- > CVS $Date: 2005/03/08 15:31:22 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.22 $
|
||||
--
|
||||
-- Options and flags used in GF shell commands and files.
|
||||
--
|
||||
@@ -151,7 +151,7 @@ dontParse = iOpt "read"
|
||||
showAbstr, showXML, showOld, showLatex, showFullForm,
|
||||
showEBNF, showCF, showWords, showOpts,
|
||||
isCompiled, isHaskell, noCompOpers, retainOpers,
|
||||
newParser, noCF, checkCirc, noCheckCirc, lexerByNeed :: Option
|
||||
newParser, noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option
|
||||
defaultGrOpts :: [Option]
|
||||
|
||||
showAbstr = iOpt "abs"
|
||||
@@ -174,6 +174,7 @@ noCF = iOpt "nocf"
|
||||
checkCirc = iOpt "nocirc"
|
||||
noCheckCirc = iOpt "nocheckcirc"
|
||||
lexerByNeed = iOpt "cflexer"
|
||||
useUTF8id = iOpt "utf8id"
|
||||
|
||||
-- ** linearization
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/02/25 15:35:48 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.24 $
|
||||
-- > CVS $Date: 2005/03/08 15:31:22 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.25 $
|
||||
--
|
||||
-- The datatype of shell commands and the list of their options.
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -183,7 +183,7 @@ optionsOfCommand co = case co of
|
||||
CSystemCommand _ -> none
|
||||
|
||||
CPrintGrammar -> both "utf8" "printer lang"
|
||||
CPrintMultiGrammar -> both "utf8" "printer"
|
||||
CPrintMultiGrammar -> both "utf8 utf8id" "printer"
|
||||
CPrintSourceGrammar -> both "utf8" "printer"
|
||||
|
||||
CHelp _ -> opts "all filter length lexer unlexer printer transform depth number"
|
||||
|
||||
@@ -81,7 +81,8 @@ pm, print_multigrammar: pm
|
||||
Prints the current multilingual grammar in .gfcm form.
|
||||
(Automatically executes the strip command (s) before doing this.)
|
||||
options:
|
||||
-utf8 apply UTF8-encoding to the grammar
|
||||
-utf8 apply UTF8 encoding to the tokens in the grammar
|
||||
-utf8id apply UTF8 encoding to the identifiers in the grammar
|
||||
-graph print module dependency graph in 'dot' format
|
||||
examples:
|
||||
pm | wf Letter.gfcm -- print the grammar into the file Letter.gfcm
|
||||
|
||||
Reference in New Issue
Block a user