1
0
forked from GitHub/gf-core

UTF8 encoding of strings in terms in GFCC in gfc

This commit is contained in:
aarne
2007-10-08 14:20:37 +00:00
parent 91a1d04def
commit 67420e3c9d
4 changed files with 49 additions and 18 deletions

View File

@@ -1,5 +1,7 @@
concrete KoeFre of Koe = {
flags coding=utf8 ;
param
Gen = Masc | Fem ;
Num = Sg | Pl ;

View File

@@ -32,7 +32,7 @@ prGrammar2gfcc opts cnc gr = (abs, D.printGFCC gc) where
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.GFCC)
mkCanon2gfcc opts cnc gr =
(prIdent abs, (canon2gfcc opts . reorder abs . utf8Conv . canon2canon abs) gr)
(prIdent abs, (canon2gfcc opts . reorder abs . canon2canon abs) gr)
where
abs = err error id $ M.abstractOfConcrete gr (identC cnc)
@@ -69,12 +69,14 @@ canon2gfcc opts cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
js = tree2list (M.jments mo)
flags = Map.fromList [(C.CId f,x) | Opt (f,[x]) <- M.flags mo]
opers = Map.fromAscList [] -- opers will be created as optimization
utf = if elem (Opt ("coding",["utf8"])) (M.flags mo)
then D.convertStringsInTerm decodeUTF8 else id
lins = Map.fromAscList
[(i2i f, mkTerm tr) | (f,CncFun _ (Yes tr) _) <- js]
[(i2i f, utf (mkTerm tr)) | (f,CncFun _ (Yes tr) _) <- js]
lincats = Map.fromAscList
[(i2i c, mkCType ty) | (c,CncCat (Yes ty) _ _) <- js]
lindefs = Map.fromAscList
[(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- js]
[(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- js]
printnames = Map.union
(Map.fromAscList [(i2i f, mkTerm tr) | (f,CncFun _ _ (Yes tr)) <- js])
(Map.fromAscList [(i2i f, mkTerm tr) | (f,CncCat _ _ (Yes tr)) <- js])
@@ -95,7 +97,11 @@ mkExp t = case t of
mkAt c = case c of
Q _ c -> C.AC $ i2i c
QC _ c -> C.AC $ i2i c
Vr x -> C.AV $ i2i x
EInt i -> C.AI i
EFloat f -> C.AF f
K s -> C.AS s
Meta (MetaSymb i) -> C.AM $ toInteger i
_ -> C.AM 0
mkPatt p = uncurry CM.tree $ case p of
A.PP _ c ps -> (C.AC (i2i c), map mkPatt ps)
@@ -182,20 +188,6 @@ repartition abs cg = [M.partOfGrammar cg (lang,mo) |
let mo = errVal
(error ("no module found for " ++ A.prt lang)) $ M.lookupModule cg lang
]
-- convert to UTF8 if not yet converted
utf8Conv :: SourceGrammar -> SourceGrammar
utf8Conv = M.MGrammar . map toUTF8 . M.modules where
toUTF8 mo = case mo of
(i, M.ModMod m)
----- | hasFlagCanon (flagCanon "coding" "utf8") mo -> mo
| otherwise -> (i, M.ModMod $
m{ M.jments = M.jments m -----
----- mapTree (onSnd (mapInfoTerms (onTokens encodeUTF8))) (M.jments m),
----- M.flags = setFlag "coding" "utf8" (M.flags m)
}
)
_ -> mo
-- translate tables and records to arrays, parameters and labels to indices

View File

@@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where
import GF.GFCC.AbsGFCC
import GF.GFCC.PrintGFCC
import GF.Infra.CompactPrint
import GF.Text.UTF8
import Data.Map
import Data.List
@@ -71,7 +73,7 @@ mkGFCC (Grm a cs ab@(Abs afls fs cts) ccs) = GFCC {
-- convert internal GFCC and pretty-print it
printGFCC :: GFCC -> String
printGFCC gfcc = compactPrintGFCC $ printTree $ Grm
printGFCC gfcc0 = compactPrintGFCC $ printTree $ Grm
(absname gfcc)
(cncnames gfcc)
(Abs
@@ -88,9 +90,36 @@ printGFCC gfcc = compactPrintGFCC $ printTree $ Grm
[Lin f v | (f,v) <- assocs (lincats cnc)]
[Lin f v | (f,v) <- assocs (lindefs cnc)]
[Lin f v | (f,v) <- assocs (printnames cnc)]
gfcc = utf8GFCC gfcc0
-- default map and filter are for Map here
lmap = Prelude.map
lfilter = Prelude.filter
mmap = Data.Map.map
-- encode idenfifiers and strings in UTF8
utf8GFCC :: GFCC -> GFCC
utf8GFCC gfcc = gfcc {
concretes = mmap u8concr (concretes gfcc)
}
where
u8concr cnc = cnc {
lins = mmap u8term (lins cnc),
opers = mmap u8term (opers cnc)
}
u8term = convertStringsInTerm encodeUTF8
---- TODO: convert identifiers and flags
convertStringsInTerm conv t = case t of
K (KS s) -> K (KS (conv s))
W s r -> W (conv s) (convs r)
R ts -> R $ lmap convs ts
S ts -> S $ lmap convs ts
FV ts -> FV $ lmap convs ts
P u v -> P (convs u) (convs v)
_ -> t
where
convs = convertStringsInTerm conv

View File

@@ -24,6 +24,14 @@ lookType :: GFCC -> CId -> Type
lookType gfcc f =
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
lookAbsFlag :: GFCC -> CId -> String
lookAbsFlag gfcc f =
lookMap (error $ "lookAbsFlag " ++ show f) f (aflags (abstract gfcc))
lookFlag :: GFCC -> CId -> CId -> String
lookFlag gfcc lang fun =
lookMap "?" fun $ flags $ lookMap (error "no lang") lang $ concretes gfcc
functionsToCat :: GFCC -> CId -> [(CId,Type)]
functionsToCat gfcc cat =
[(f,ty) | f <- fs, Just (ty,_) <- [Data.Map.lookup f $ funs $ abstract gfcc]]