forked from GitHub/gf-core
UTF8 encoding of strings in terms in GFCC in gfc
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
concrete KoeFre of Koe = {
|
||||
|
||||
flags coding=utf8 ;
|
||||
|
||||
param
|
||||
Gen = Masc | Fem ;
|
||||
Num = Sg | Pl ;
|
||||
|
||||
@@ -32,7 +32,7 @@ prGrammar2gfcc opts cnc gr = (abs, D.printGFCC gc) where
|
||||
|
||||
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.GFCC)
|
||||
mkCanon2gfcc opts cnc gr =
|
||||
(prIdent abs, (canon2gfcc opts . reorder abs . utf8Conv . canon2canon abs) gr)
|
||||
(prIdent abs, (canon2gfcc opts . reorder abs . canon2canon abs) gr)
|
||||
where
|
||||
abs = err error id $ M.abstractOfConcrete gr (identC cnc)
|
||||
|
||||
@@ -69,12 +69,14 @@ canon2gfcc opts cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
|
||||
js = tree2list (M.jments mo)
|
||||
flags = Map.fromList [(C.CId f,x) | Opt (f,[x]) <- M.flags mo]
|
||||
opers = Map.fromAscList [] -- opers will be created as optimization
|
||||
utf = if elem (Opt ("coding",["utf8"])) (M.flags mo)
|
||||
then D.convertStringsInTerm decodeUTF8 else id
|
||||
lins = Map.fromAscList
|
||||
[(i2i f, mkTerm tr) | (f,CncFun _ (Yes tr) _) <- js]
|
||||
[(i2i f, utf (mkTerm tr)) | (f,CncFun _ (Yes tr) _) <- js]
|
||||
lincats = Map.fromAscList
|
||||
[(i2i c, mkCType ty) | (c,CncCat (Yes ty) _ _) <- js]
|
||||
lindefs = Map.fromAscList
|
||||
[(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- js]
|
||||
[(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- js]
|
||||
printnames = Map.union
|
||||
(Map.fromAscList [(i2i f, mkTerm tr) | (f,CncFun _ _ (Yes tr)) <- js])
|
||||
(Map.fromAscList [(i2i f, mkTerm tr) | (f,CncCat _ _ (Yes tr)) <- js])
|
||||
@@ -95,7 +97,11 @@ mkExp t = case t of
|
||||
mkAt c = case c of
|
||||
Q _ c -> C.AC $ i2i c
|
||||
QC _ c -> C.AC $ i2i c
|
||||
Vr x -> C.AV $ i2i x
|
||||
EInt i -> C.AI i
|
||||
EFloat f -> C.AF f
|
||||
K s -> C.AS s
|
||||
Meta (MetaSymb i) -> C.AM $ toInteger i
|
||||
_ -> C.AM 0
|
||||
mkPatt p = uncurry CM.tree $ case p of
|
||||
A.PP _ c ps -> (C.AC (i2i c), map mkPatt ps)
|
||||
@@ -182,20 +188,6 @@ repartition abs cg = [M.partOfGrammar cg (lang,mo) |
|
||||
let mo = errVal
|
||||
(error ("no module found for " ++ A.prt lang)) $ M.lookupModule cg lang
|
||||
]
|
||||
|
||||
-- convert to UTF8 if not yet converted
|
||||
utf8Conv :: SourceGrammar -> SourceGrammar
|
||||
utf8Conv = M.MGrammar . map toUTF8 . M.modules where
|
||||
toUTF8 mo = case mo of
|
||||
(i, M.ModMod m)
|
||||
----- | hasFlagCanon (flagCanon "coding" "utf8") mo -> mo
|
||||
| otherwise -> (i, M.ModMod $
|
||||
m{ M.jments = M.jments m -----
|
||||
----- mapTree (onSnd (mapInfoTerms (onTokens encodeUTF8))) (M.jments m),
|
||||
----- M.flags = setFlag "coding" "utf8" (M.flags m)
|
||||
}
|
||||
)
|
||||
_ -> mo
|
||||
|
||||
|
||||
-- translate tables and records to arrays, parameters and labels to indices
|
||||
|
||||
@@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where
|
||||
import GF.GFCC.AbsGFCC
|
||||
import GF.GFCC.PrintGFCC
|
||||
import GF.Infra.CompactPrint
|
||||
import GF.Text.UTF8
|
||||
|
||||
import Data.Map
|
||||
import Data.List
|
||||
|
||||
@@ -71,7 +73,7 @@ mkGFCC (Grm a cs ab@(Abs afls fs cts) ccs) = GFCC {
|
||||
-- convert internal GFCC and pretty-print it
|
||||
|
||||
printGFCC :: GFCC -> String
|
||||
printGFCC gfcc = compactPrintGFCC $ printTree $ Grm
|
||||
printGFCC gfcc0 = compactPrintGFCC $ printTree $ Grm
|
||||
(absname gfcc)
|
||||
(cncnames gfcc)
|
||||
(Abs
|
||||
@@ -88,9 +90,36 @@ printGFCC gfcc = compactPrintGFCC $ printTree $ Grm
|
||||
[Lin f v | (f,v) <- assocs (lincats cnc)]
|
||||
[Lin f v | (f,v) <- assocs (lindefs cnc)]
|
||||
[Lin f v | (f,v) <- assocs (printnames cnc)]
|
||||
gfcc = utf8GFCC gfcc0
|
||||
|
||||
-- default map and filter are for Map here
|
||||
lmap = Prelude.map
|
||||
lfilter = Prelude.filter
|
||||
mmap = Data.Map.map
|
||||
|
||||
-- encode idenfifiers and strings in UTF8
|
||||
|
||||
utf8GFCC :: GFCC -> GFCC
|
||||
utf8GFCC gfcc = gfcc {
|
||||
concretes = mmap u8concr (concretes gfcc)
|
||||
}
|
||||
where
|
||||
u8concr cnc = cnc {
|
||||
lins = mmap u8term (lins cnc),
|
||||
opers = mmap u8term (opers cnc)
|
||||
}
|
||||
u8term = convertStringsInTerm encodeUTF8
|
||||
|
||||
---- TODO: convert identifiers and flags
|
||||
|
||||
convertStringsInTerm conv t = case t of
|
||||
K (KS s) -> K (KS (conv s))
|
||||
W s r -> W (conv s) (convs r)
|
||||
R ts -> R $ lmap convs ts
|
||||
S ts -> S $ lmap convs ts
|
||||
FV ts -> FV $ lmap convs ts
|
||||
P u v -> P (convs u) (convs v)
|
||||
_ -> t
|
||||
where
|
||||
convs = convertStringsInTerm conv
|
||||
|
||||
|
||||
@@ -24,6 +24,14 @@ lookType :: GFCC -> CId -> Type
|
||||
lookType gfcc f =
|
||||
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
|
||||
|
||||
lookAbsFlag :: GFCC -> CId -> String
|
||||
lookAbsFlag gfcc f =
|
||||
lookMap (error $ "lookAbsFlag " ++ show f) f (aflags (abstract gfcc))
|
||||
|
||||
lookFlag :: GFCC -> CId -> CId -> String
|
||||
lookFlag gfcc lang fun =
|
||||
lookMap "?" fun $ flags $ lookMap (error "no lang") lang $ concretes gfcc
|
||||
|
||||
functionsToCat :: GFCC -> CId -> [(CId,Type)]
|
||||
functionsToCat gfcc cat =
|
||||
[(f,ty) | f <- fs, Just (ty,_) <- [Data.Map.lookup f $ funs $ abstract gfcc]]
|
||||
|
||||
Reference in New Issue
Block a user