diff --git a/devel/koe/KoeFre.gf b/devel/koe/KoeFre.gf index 7b36ae67c..cf0b31f2d 100644 --- a/devel/koe/KoeFre.gf +++ b/devel/koe/KoeFre.gf @@ -1,5 +1,7 @@ concrete KoeFre of Koe = { +flags coding=utf8 ; + param Gen = Masc | Fem ; Num = Sg | Pl ; diff --git a/src/GF/Devel/GrammarToGFCC.hs b/src/GF/Devel/GrammarToGFCC.hs index d939f06ab..4fe2e6e0d 100644 --- a/src/GF/Devel/GrammarToGFCC.hs +++ b/src/GF/Devel/GrammarToGFCC.hs @@ -32,7 +32,7 @@ prGrammar2gfcc opts cnc gr = (abs, D.printGFCC gc) where mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.GFCC) mkCanon2gfcc opts cnc gr = - (prIdent abs, (canon2gfcc opts . reorder abs . utf8Conv . canon2canon abs) gr) + (prIdent abs, (canon2gfcc opts . reorder abs . canon2canon abs) gr) where abs = err error id $ M.abstractOfConcrete gr (identC cnc) @@ -69,12 +69,14 @@ canon2gfcc opts cgr@(M.MGrammar ((a,M.ModMod abm):cms)) = js = tree2list (M.jments mo) flags = Map.fromList [(C.CId f,x) | Opt (f,[x]) <- M.flags mo] opers = Map.fromAscList [] -- opers will be created as optimization + utf = if elem (Opt ("coding",["utf8"])) (M.flags mo) + then D.convertStringsInTerm decodeUTF8 else id lins = Map.fromAscList - [(i2i f, mkTerm tr) | (f,CncFun _ (Yes tr) _) <- js] + [(i2i f, utf (mkTerm tr)) | (f,CncFun _ (Yes tr) _) <- js] lincats = Map.fromAscList [(i2i c, mkCType ty) | (c,CncCat (Yes ty) _ _) <- js] lindefs = Map.fromAscList - [(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- js] + [(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- js] printnames = Map.union (Map.fromAscList [(i2i f, mkTerm tr) | (f,CncFun _ _ (Yes tr)) <- js]) (Map.fromAscList [(i2i f, mkTerm tr) | (f,CncCat _ _ (Yes tr)) <- js]) @@ -95,7 +97,11 @@ mkExp t = case t of mkAt c = case c of Q _ c -> C.AC $ i2i c QC _ c -> C.AC $ i2i c + Vr x -> C.AV $ i2i x EInt i -> C.AI i + EFloat f -> C.AF f + K s -> C.AS s + Meta (MetaSymb i) -> C.AM $ toInteger i _ -> C.AM 0 mkPatt p = uncurry CM.tree $ case p of A.PP _ c ps -> (C.AC (i2i c), map mkPatt ps) @@ -182,20 +188,6 @@ repartition abs cg = [M.partOfGrammar cg (lang,mo) | let mo = errVal (error ("no module found for " ++ A.prt lang)) $ M.lookupModule cg lang ] - --- convert to UTF8 if not yet converted -utf8Conv :: SourceGrammar -> SourceGrammar -utf8Conv = M.MGrammar . map toUTF8 . M.modules where - toUTF8 mo = case mo of - (i, M.ModMod m) - ----- | hasFlagCanon (flagCanon "coding" "utf8") mo -> mo - | otherwise -> (i, M.ModMod $ - m{ M.jments = M.jments m ----- ------ mapTree (onSnd (mapInfoTerms (onTokens encodeUTF8))) (M.jments m), - ----- M.flags = setFlag "coding" "utf8" (M.flags m) - } - ) - _ -> mo -- translate tables and records to arrays, parameters and labels to indices diff --git a/src/GF/GFCC/DataGFCC.hs b/src/GF/GFCC/DataGFCC.hs index e2e5a4efe..3d6cca3cc 100644 --- a/src/GF/GFCC/DataGFCC.hs +++ b/src/GF/GFCC/DataGFCC.hs @@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where import GF.GFCC.AbsGFCC import GF.GFCC.PrintGFCC import GF.Infra.CompactPrint +import GF.Text.UTF8 + import Data.Map import Data.List @@ -71,7 +73,7 @@ mkGFCC (Grm a cs ab@(Abs afls fs cts) ccs) = GFCC { -- convert internal GFCC and pretty-print it printGFCC :: GFCC -> String -printGFCC gfcc = compactPrintGFCC $ printTree $ Grm +printGFCC gfcc0 = compactPrintGFCC $ printTree $ Grm (absname gfcc) (cncnames gfcc) (Abs @@ -88,9 +90,36 @@ printGFCC gfcc = compactPrintGFCC $ printTree $ Grm [Lin f v | (f,v) <- assocs (lincats cnc)] [Lin f v | (f,v) <- assocs (lindefs cnc)] [Lin f v | (f,v) <- assocs (printnames cnc)] + gfcc = utf8GFCC gfcc0 -- default map and filter are for Map here lmap = Prelude.map lfilter = Prelude.filter +mmap = Data.Map.map +-- encode idenfifiers and strings in UTF8 + +utf8GFCC :: GFCC -> GFCC +utf8GFCC gfcc = gfcc { + concretes = mmap u8concr (concretes gfcc) + } + where + u8concr cnc = cnc { + lins = mmap u8term (lins cnc), + opers = mmap u8term (opers cnc) + } + u8term = convertStringsInTerm encodeUTF8 + +---- TODO: convert identifiers and flags + +convertStringsInTerm conv t = case t of + K (KS s) -> K (KS (conv s)) + W s r -> W (conv s) (convs r) + R ts -> R $ lmap convs ts + S ts -> S $ lmap convs ts + FV ts -> FV $ lmap convs ts + P u v -> P (convs u) (convs v) + _ -> t + where + convs = convertStringsInTerm conv diff --git a/src/GF/GFCC/Macros.hs b/src/GF/GFCC/Macros.hs index 2fe6770f1..a44250e98 100644 --- a/src/GF/GFCC/Macros.hs +++ b/src/GF/GFCC/Macros.hs @@ -24,6 +24,14 @@ lookType :: GFCC -> CId -> Type lookType gfcc f = fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc)) +lookAbsFlag :: GFCC -> CId -> String +lookAbsFlag gfcc f = + lookMap (error $ "lookAbsFlag " ++ show f) f (aflags (abstract gfcc)) + +lookFlag :: GFCC -> CId -> CId -> String +lookFlag gfcc lang fun = + lookMap "?" fun $ flags $ lookMap (error "no lang") lang $ concretes gfcc + functionsToCat :: GFCC -> CId -> [(CId,Type)] functionsToCat gfcc cat = [(f,ty) | f <- fs, Just (ty,_) <- [Data.Map.lookup f $ funs $ abstract gfcc]]