diff --git a/lib/resource/swedish/NumeralsSwe.gf b/lib/resource/swedish/NumeralsSwe.gf index 83d146022..0303f4205 100644 --- a/lib/resource/swedish/NumeralsSwe.gf +++ b/lib/resource/swedish/NumeralsSwe.gf @@ -1,6 +1,7 @@ concrete NumeralsSwe of Numerals = open MorphoSwe, Prelude in { lincat + Numeral = { s : Str } ; Digit = {s : DForm => Str} ; Sub10 = {s : DForm => Str} ; diff --git a/src/GF/API.hs b/src/GF/API.hs index 4ab40b180..2fc562c1c 100644 --- a/src/GF/API.hs +++ b/src/GF/API.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/02/18 19:21:06 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.27 $ +-- > CVS $Date: 2005/03/08 15:31:22 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.28 $ -- -- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001 ----------------------------------------------------------------------------- @@ -298,17 +298,20 @@ optPrintGrammar :: Options -> StateGrammar -> String optPrintGrammar opts = customOrDefault opts grammarPrinter customGrammarPrinter optPrintMultiGrammar :: Options -> CanonGrammar -> String -optPrintMultiGrammar opts = pmg . encode +optPrintMultiGrammar opts = pmg . encodeId . encode where pmg = customOrDefault opts grammarPrinter customMultiGrammarPrinter -- if -utf8 was given, convert from language specific codings encode = if oElem useUTF8 opts then mapModules moduleToUTF8 else id + -- if -utf8id was given, convert identifiers to UTF8 + encodeId = if oElem useUTF8id opts then grammarIdentsToUTF8 else id moduleToUTF8 m = m{ jments = mapTree (onSnd (mapInfoTerms code)) (jments m), flags = setFlag "coding" "utf8" (flags m) } where code = onTokens (anyCodingToUTF8 (moduleOpts m)) moduleOpts = Opts . okError . mapM CG.redFlag . flags - + grammarIdentsToUTF8 mgr + = MGrammar [ (identToUTF8 i, mapIdents identToUTF8 mi) | (i,mi) <- modules mgr] optPrintSyntax :: Options -> GF.Grammar -> String optPrintSyntax opts = customOrDefault opts grammarPrinter customSyntaxPrinter @@ -366,3 +369,26 @@ optDecodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of anyCodingToUTF8 :: Options -> String -> String anyCodingToUTF8 opts = encodeUTF8 . customOrDefault opts uniCoding customUniCoding + +{- +-- | Convert all text not inside double quotes to UTF8 +nonLiteralsToUTF8 :: String -> String +nonLiteralsToUTF8 "" = "" +nonLiteralsToUTF8 ('"':cs) = '"' : l ++ nonLiteralsToUTF8 rs + where (l,rs) = takeStringLit cs +nonLiteralsToUTF8 (c:cs) = encodeUTF8 [c] : nonLiteralsToUTF8 cs + where + -- | Split off an initial string ended by double quotes + takeStringLit :: String -> (String,String) + takeStringLit "" = ("","") + takeStringLit +-} + +-- | Convert an identifier in unicode to UTF8 encoding +identToUTF8 :: I.Ident -> I.Ident +identToUTF8 i = case i of + I.IC s -> I.IC (encodeUTF8 s) + I.IW -> I.IW + I.IV (i,s) -> I.IV (i, encodeUTF8 s) + I.IA (s,i) -> I.IA (encodeUTF8 s, i) + I.IAV (s,i1,i2) -> I.IAV (encodeUTF8 s, i2, i2) diff --git a/src/GF/Canon/CMacros.hs b/src/GF/Canon/CMacros.hs index ea4513a02..a097c4926 100644 --- a/src/GF/Canon/CMacros.hs +++ b/src/GF/Canon/CMacros.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/07 17:50:00 $ +-- > CVS $Date: 2005/03/08 15:31:22 $ -- > CVS $Author: bringert $ --- > CVS $Revision: 1.22 $ +-- > CVS $Revision: 1.23 $ -- -- Macros for building and analysing terms in GFC concrete syntax. -- @@ -246,44 +246,6 @@ onTokens f t = case t of K (KP ss vs) -> K (KP (map f ss) [Var (map f x) (map f y) | Var x y <- vs]) _ -> composSafeOp (onTokens f) t --- | Apply some function to all identifiers in a GFC term -onTermIdents :: (Ident -> Ident) -> Term -> Term -onTermIdents f t = case t of - Arg av -> Arg $ case av of - A i x -> A (f i) x - AB i x y -> AB (f i) x y - I ci -> I (fc ci) - Con ci ts -> Con (fc ci) (map (onTermIdents f) ts) - LI i -> LI (f i) - R as -> R [Ass (fl l) (onTermIdents f t) | Ass l t <- as] - P t l -> P (onTermIdents f t) (fl l) - T ct cs -> T (fct ct) [Cas (map fp ps) (onTermIdents f t) | Cas ps t <- cs] - V ct ts -> V (fct ct) (map (onTermIdents f) ts) - S t1 t2 -> S (onTermIdents f t1) (onTermIdents f t2) - C t1 t2 -> C (onTermIdents f t1) (onTermIdents f t2) - FV ts -> FV (map (onTermIdents f) ts) - _ -> t - where - fc :: CIdent -> CIdent - fc (CIQ i1 i2) = CIQ (f i1) (f i2) - fl :: Label -> Label - fl l = case l of - L i -> L (f i) - _ -> l - fct :: CType -> CType - fct ct = case ct of - RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ] - Table t1 t2 -> Table (fct t1) (fct t2) - Cn ci -> Cn (fc ci) - _ -> ct - fp :: Patt -> Patt - fp p = case p of - PC ci ps -> PC (fc ci) (map fp ps) - PV i -> PV (f i) - PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps] - _ -> p - - -- | to define compositional term functions composSafeOp :: (Term -> Term) -> Term -> Term composSafeOp op trm = case composOp (mkMonadic op) trm of diff --git a/src/GF/Canon/GFC.hs b/src/GF/Canon/GFC.hs index a777f4b76..c29e77c73 100644 --- a/src/GF/Canon/GFC.hs +++ b/src/GF/Canon/GFC.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/04 14:08:36 $ +-- > CVS $Date: 2005/03/08 15:31:22 $ -- > CVS $Author: bringert $ --- > CVS $Revision: 1.7 $ +-- > CVS $Revision: 1.8 $ -- -- canonical GF. AR 10\/9\/2002 -- 9\/5\/2003 -- 21\/9 ----------------------------------------------------------------------------- @@ -20,7 +20,8 @@ module GFC (Context, Info(..), Printname, mapInfoTerms, - setFlag + setFlag, + mapIdents ) where import AbsGFC @@ -34,6 +35,7 @@ import Operations import qualified Modules as M import Char +import Control.Arrow (first) type Context = [(Ident,Exp)] @@ -45,7 +47,7 @@ type CanonModule = (Ident, CanonModInfo) type CanonAbs = M.Module Ident Option Info -data Info = +data Info = AbsCat A.Context [A.Fun] | AbsFun A.Type A.Term | AbsTrans A.Term @@ -67,4 +69,147 @@ mapInfoTerms f i = case i of _ -> i setFlag :: String -> String -> [Flag] -> [Flag] -setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n] \ No newline at end of file +setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n] + +-- | Apply a function to all identifiers in a module +mapIdents :: (Ident -> Ident) -> M.ModInfo Ident Flag Info -> M.ModInfo Ident Flag Info +mapIdents f mi = case mi of + M.ModMainGrammar mg -> M.ModMainGrammar (fmg mg) + M.ModMod m -> M.ModMod (fm m) + M.ModWith mt s i is oss -> M.ModWith (fmt mt) s (f i) (map f is) (map fos oss) + where + fmg :: M.MainGrammar Ident -> M.MainGrammar Ident + fmg (M.MainGrammar i mcs) = M.MainGrammar (f i) (map fmc mcs) + fmc :: M.MainConcreteSpec Ident -> M.MainConcreteSpec Ident + fmc (M.MainConcreteSpec i1 i2 mos1 mos2) + = M.MainConcreteSpec (f i1) (f i2) (fmap fos mos1) (fmap fos mos2) + fos :: M.OpenSpec Ident -> M.OpenSpec Ident + fos os = case os of + M.OSimple q i -> M.OSimple q (f i) + M.OQualif q i1 i2 -> M.OQualif q (f i1) (f i2) + fm :: M.Module Ident Flag Info -> M.Module Ident Flag Info + fm m@(M.Module{ M.mtype = mt, M.flags = fl, M.extends = ex, + M.opens = os, M.jments = js}) = + m { M.mtype = fmt mt, M.flags = map ffl fl, M.extends = map f ex, + M.opens = map fos os, + M.jments = mapTree (\(i,t) -> (f i, fi t)) js } + fmt :: M.ModuleType Ident -> M.ModuleType Ident + fmt t = case t of + M.MTTransfer os1 os2 -> M.MTTransfer (fos os1) (fos os2) + M.MTConcrete i -> M.MTConcrete (f i) + M.MTInstance i -> M.MTInstance (f i) + M.MTReuse rt -> M.MTReuse (frt rt) + M.MTUnion mt ms -> M.MTUnion (fmt mt) [(f i, map f is) | (i,is) <- ms] + _ -> t + frt :: M.MReuseType Ident -> M.MReuseType Ident + frt rt = case rt of + M.MRInterface i -> M.MRInterface (f i) + M.MRInstance i1 i2 -> M.MRInstance (f i1) (f i2) + M.MRResource i -> M.MRResource (f i) + ffl :: Flag -> Flag + ffl (Flg i1 i2) = Flg (f i1) (f i2) + fi :: Info -> Info + fi info = case info of + AbsCat ds fs -> AbsCat ds fs -- FIXME: convert idents here too + AbsFun ty te -> AbsFun ty te -- FIXME: convert idents here too + AbsTrans te -> AbsTrans te -- FIXME: convert idents here too + ResPar ps -> ResPar [ParD (f i) (map fct cts) | ParD i cts <- ps] + ResOper ct t -> ResOper (fct ct) (ft t) + CncCat ct t pn -> CncCat (fct ct) (ft t) (ft pn) + CncFun ci avs t pn -> CncFun (fc ci) (map fav avs) (ft t) (ft pn) + AnyInd b i -> AnyInd b (f i) + fqi :: A.QIdent -> A.QIdent + fqi (i1,i2) = (f i1, f i2) + fc :: CIdent -> CIdent + fc (CIQ i1 i2) = CIQ (f i1) (f i2) + fl :: Label -> Label + fl l = case l of + L i -> L (f i) + _ -> l + fct :: CType -> CType + fct ct = case ct of + RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ] + Table t1 t2 -> Table (fct t1) (fct t2) + Cn ci -> Cn (fc ci) + _ -> ct + fp :: Patt -> Patt + fp p = case p of + PC ci ps -> PC (fc ci) (map fp ps) + PV i -> PV (f i) + PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps] + _ -> p + ft :: Term -> Term + ft t = case t of + Arg av -> Arg (fav av) + I ci -> I (fc ci) + Con ci ts -> Con (fc ci) (map ft ts) + LI i -> LI (f i) + R as -> R [Ass (fl l) (ft t) | Ass l t <- as] + P t l -> P (ft t) (fl l) + T ct cs -> T (fct ct) [Cas (map fp ps) (ft t) | Cas ps t <- cs] + V ct ts -> V (fct ct) (map ft ts) + S t1 t2 -> S (ft t1) (ft t2) + C t1 t2 -> C (ft t1) (ft t2) + FV ts -> FV (map ft ts) + _ -> t + fav :: ArgVar -> ArgVar + fav av = case av of + A i x -> A (f i) x + AB i x y -> AB (f i) x y +{- + fat :: A.Term -> A.Term + fat t = case t of + A.Vr i -> A.Vr (f i) + A.Cn i -> A.Cn (f i) + A.Con i -> A.Con (f i) + A.App t1 t2 -> A.App (fat t1) (fat t2) + A.Abs i t' -> A.Abs (f i) (fat t') + A.Prod i t1 t2 -> A.Prod (f i) (fat t1) (fat t2) + A.Eqs eqs -> A.Eqs [(, fat t) | (ps,t) <- eqs ] + | Eqs [([Patt],Term)] + + -- only used in internal representation + | Typed Term Term -- ^ type-annotated term +-- +-- /below this, the constructors are only for concrete syntax/ + | RecType [Labelling] -- ^ record type: @{ p : A ; ...}@ + | R [Assign] -- ^ record: @{ p = a ; ...}@ + | P Term Label -- ^ projection: @r.p@ + | ExtR Term Term -- ^ extension: @R ** {x : A}@ (both types and terms) + + | Table Term Term -- ^ table type: @P => A@ + | T TInfo [Case] -- ^ table: @table {p => c ; ...}@ + | TSh TInfo [Cases] -- ^ table with discjunctive patters (only back end opt) + | V Type [Term] -- ^ table given as course of values: @table T [c1 ; ... ; cn]@ + | S Term Term -- ^ selection: @t ! p@ + + | Let LocalDef Term -- ^ local definition: @let {t : T = a} in b@ + + | Alias Ident Type Term -- ^ constant and its definition, used in inlining + + | Q Ident Ident -- ^ qualified constant from a package + | QC Ident Ident -- ^ qualified constructor from a package + + | C Term Term -- ^ concatenation: @s ++ t@ + | Glue Term Term -- ^ agglutination: @s + t@ + + | FV [Term] -- ^ alternatives in free variation: @variants { s ; ... }@ + + | Alts (Term, [(Term, Term)]) -- ^ alternatives by prefix: @pre {t ; s\/c ; ...}@ + | Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@ +-- +-- /below this, the last three constructors are obsolete/ + | LiT Ident -- ^ linearization type + | Ready Str -- ^ result of compiling; not to be parsed ... + | Computed Term -- ^ result of computing: not to be reopened nor parsed + + _ -> t + + fp :: A.Patt -> A.Patt + fp p = case p of + A.PC Ident [Patt] + A.PP Ident Ident [Patt] + A.PV Ident + A.PR [(Label,Patt)] + A.PT Type Patt +-} \ No newline at end of file diff --git a/src/GF/Infra/Option.hs b/src/GF/Infra/Option.hs index 8ee49b68d..a1a4e3468 100644 --- a/src/GF/Infra/Option.hs +++ b/src/GF/Infra/Option.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/02/25 15:35:48 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.21 $ +-- > CVS $Date: 2005/03/08 15:31:22 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.22 $ -- -- Options and flags used in GF shell commands and files. -- @@ -151,7 +151,7 @@ dontParse = iOpt "read" showAbstr, showXML, showOld, showLatex, showFullForm, showEBNF, showCF, showWords, showOpts, isCompiled, isHaskell, noCompOpers, retainOpers, - newParser, noCF, checkCirc, noCheckCirc, lexerByNeed :: Option + newParser, noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option defaultGrOpts :: [Option] showAbstr = iOpt "abs" @@ -174,6 +174,7 @@ noCF = iOpt "nocf" checkCirc = iOpt "nocirc" noCheckCirc = iOpt "nocheckcirc" lexerByNeed = iOpt "cflexer" +useUTF8id = iOpt "utf8id" -- ** linearization diff --git a/src/GF/Shell/ShellCommands.hs b/src/GF/Shell/ShellCommands.hs index 89aae2d06..d60849e90 100644 --- a/src/GF/Shell/ShellCommands.hs +++ b/src/GF/Shell/ShellCommands.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/02/25 15:35:48 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.24 $ +-- > CVS $Date: 2005/03/08 15:31:22 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.25 $ -- -- The datatype of shell commands and the list of their options. ----------------------------------------------------------------------------- @@ -183,7 +183,7 @@ optionsOfCommand co = case co of CSystemCommand _ -> none CPrintGrammar -> both "utf8" "printer lang" - CPrintMultiGrammar -> both "utf8" "printer" + CPrintMultiGrammar -> both "utf8 utf8id" "printer" CPrintSourceGrammar -> both "utf8" "printer" CHelp _ -> opts "all filter length lexer unlexer printer transform depth number" diff --git a/src/HelpFile b/src/HelpFile index 4283164df..22e697da6 100644 --- a/src/HelpFile +++ b/src/HelpFile @@ -81,7 +81,8 @@ pm, print_multigrammar: pm Prints the current multilingual grammar in .gfcm form. (Automatically executes the strip command (s) before doing this.) options: - -utf8 apply UTF8-encoding to the grammar + -utf8 apply UTF8 encoding to the tokens in the grammar + -utf8id apply UTF8 encoding to the identifiers in the grammar -graph print module dependency graph in 'dot' format examples: pm | wf Letter.gfcm -- print the grammar into the file Letter.gfcm