adapted GFCC2FCFG to other uses of GFCC, made it to default parser

This commit is contained in:
aarne
2007-09-19 13:49:12 +00:00
parent 6f58675729
commit 7fd4e067fa
8 changed files with 97 additions and 70 deletions

View File

@@ -12,7 +12,8 @@
-- GFC to GFCC compiler. AR Aug-Oct 2006
-----------------------------------------------------------------------------
module GF.Canon.CanonToGFCC (prCanon2gfcc, mkCanon2gfcc) where
module GF.Canon.CanonToGFCC (
prCanon2gfcc, mkCanon2gfcc, mkCanon2gfccNoUTF8) where
import GF.Canon.AbsGFC
import qualified GF.Canon.GFC as GFC
@@ -43,9 +44,14 @@ import Debug.Trace ----
prCanon2gfcc :: CanonGrammar -> String
prCanon2gfcc = Pr.printTree . mkCanon2gfcc
-- this variant makes utf8 conversion; used in back ends
mkCanon2gfcc :: CanonGrammar -> C.Grammar
mkCanon2gfcc = canon2gfcc . reorder . utf8Conv . canon2canon . normalize
-- this variant makes no utf8 conversion; used in ShellState
mkCanon2gfccNoUTF8 :: CanonGrammar -> C.Grammar
mkCanon2gfccNoUTF8 = canon2gfcc . reorder . canon2canon . normalize
-- This is needed to reorganize the grammar. GFCC has its own back-end optimization.
-- But we need to have the canonical order in tables, created by valOpt
normalize :: CanonGrammar -> CanonGrammar
@@ -82,7 +88,7 @@ mkType t = case GM.catSkeleton t of
mkCType :: CType -> C.Term
mkCType t = case t of
TInts i -> C.C (fromInteger i)
TInts i -> C.C $ fromInteger i
-- record parameter alias - created in gfc preprocessing
RecType [Lbg (L (IC "_")) i, Lbg (L (IC "__")) t] -> C.RP (mkCType i) (mkCType t)
RecType rs -> C.R [mkCType t | Lbg _ t <- rs]
@@ -90,13 +96,13 @@ mkCType t = case t of
TStr -> C.S []
where
getI pt = case pt of
C.C i -> i
C.C i -> i
C.RP i _ -> getI i
mkTerm :: Term -> C.Term
mkTerm tr = case tr of
Arg (A _ i) -> C.V (fromInteger i)
EInt i -> C.C (fromInteger i)
Arg (A _ i) -> C.V $ fromInteger i
EInt i -> C.C $ fromInteger i
-- record parameter alias - created in gfc preprocessing
R [Ass (L (IC "_")) i, Ass (L (IC "__")) t] -> C.RP (mkTerm i) (mkTerm t)
-- ordinary record
@@ -111,11 +117,11 @@ mkTerm tr = case tr of
S t p -> C.P (mkTerm t) (mkTerm p)
C s t -> C.S [mkTerm x | x <- [s,t]]
FV ts -> C.FV [mkTerm t | t <- ts]
K (KS s) -> C.KS s
K (KP ss _) -> C.KP ss [] ---- TODO: prefix variants
K (KS s) -> C.K (C.KS s)
K (KP ss _) -> C.K (C.KP ss []) ---- TODO: prefix variants
E -> C.S []
Par _ _ -> prtTrace tr $ C.C 66661 ---- for debugging
_ -> C.S [C.KS (A.prt tr +++ "66662")] ---- for debugging
_ -> C.S [C.K (C.KS (A.prt tr +++ "66662"))] ---- for debugging
where
mkLab (L (IC l)) = case l of
'_':ds -> (read ds) :: Int
@@ -175,7 +181,7 @@ canon2canon :: CanonGrammar -> CanonGrammar
canon2canon = recollect . map cl2cl . repartition where
recollect =
M.MGrammar . nubBy (\ (i,_) (j,_) -> i==j) . concatMap M.modules
cl2cl cg = tr $ M.MGrammar $ map c2c $ M.modules cg where
cl2cl cg = {-tr $-} M.MGrammar $ map c2c $ M.modules cg where
c2c (c,m) = case m of
M.ModMod mo@(M.Module _ _ _ _ _ js) ->
(c, M.ModMod $ M.replaceJudgements mo $ mapTree j2j js)
@@ -406,30 +412,24 @@ optConcrete defs = subex
-- suffix sets can later be shared by subex elim
optTerm :: C.Term -> C.Term
optTerm tr =
case tr of
C.R ts -> mkSuff ts
C.P t v -> C.P (optTerm t) v
optTerm tr = case tr of
C.R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | C.K (C.KS s) <- ts]
C.R ts -> C.R $ map optTerm ts
C.P t v -> C.P (optTerm t) v
C.L x t -> C.L x (optTerm t)
tr -> tr
where
mkSuff ts@(C.KS s : ts1@(_:_)) =
case pref s ts1 of
Nothing -> C.R (map optTerm ts)
Just "" -> C.R ts
Just prf -> let len = length prf
in C.W prf [drop len s | C.KS s <- ts]
where
pref cand [] = Just cand
pref cand (t:ts) =
case t of
C.KS s -> pref (getPrefix cand s) ts
_ -> Nothing
where
getPrefix cand s
| isPrefixOf cand s = cand
| otherwise = getPrefix (init cand) s
mkSuff ts = C.R (map optTerm ts)
_ -> tr
where
optToks ss = prf : suffs where
prf = pref (head ss) (tail ss)
suffs = map (drop (length prf)) ss
pref cand ss = case ss of
s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss
_ -> cand
isK t = case t of
C.K (C.KS _) -> True
_ -> False
mkSuff ("":ws) = C.R (map (C.K . C.KS) ws)
mkSuff (p:ws) = C.W p (C.R (map (C.K . C.KS) ws))
-- common subexpression elimination; see ./Subexpression.hs for the idea
@@ -454,7 +454,7 @@ addSubexpConsts tree lins =
_ -> case t of
C.R ts -> C.R $ map (recomp f) ts
C.S ts -> C.S $ map (recomp f) ts
C.W s ss -> C.W s ss
C.W s t -> C.W s (recomp f t)
C.P t p -> C.P (recomp f t) (recomp f p)
C.RP t p -> C.RP (recomp f t) (recomp f p)
C.L x t -> C.L x (recomp f t)
@@ -483,7 +483,8 @@ collectSubterms t = case t of
C.S ts -> do
mapM collectSubterms ts
add t
C.W s ts -> do
C.W s u -> do
collectSubterms u
add t
C.P p u -> do
collectSubterms p

View File

@@ -53,13 +53,14 @@ term2js l t = f t
C.R xs -> new "Arr" (map f xs)
C.P x y -> JS.ECall (JS.EMember (f x) (JS.Ident "sel")) [f y]
C.S xs -> new "Seq" (map f xs)
C.KS s -> new "Str" [JS.EStr s]
C.KP ss vs -> new "Seq" (map JS.EStr ss) -- FIXME
C.K (C.KS s) -> new "Str" [JS.EStr s]
C.K (C.KP ss vs) -> new "Seq" (map JS.EStr ss) -- FIXME
C.V i -> JS.EIndex (JS.EVar children) (JS.EInt i)
C.C i -> new "Int" [JS.EInt i]
C.F (C.CId f) -> JS.ECall (JS.EMember (JS.EVar l) (JS.Ident "rule")) [JS.EStr f, JS.EVar children]
C.FV xs -> new "Variants" (map f xs)
C.W str ss -> new "Suffix" (JS.EStr str : map JS.EStr ss)
C.W str (C.R r) ->
new "Suffix" (JS.EStr str : [JS.EStr s | C.K (C.KS s) <- r])
C.RP x y -> new "Rp" [f x, f y]
C.TM -> new "Meta" []

View File

@@ -47,19 +47,24 @@ data Term =
R [Term]
| P Term Term
| S [Term]
| KS String
| KP [String] [Variant]
| K Tokn
| V Int
| C Int
| F CId
| FV [Term]
| W String [String]
| W String Term
| RP Term Term
| TM
| L CId Term
| BV CId
deriving (Eq,Ord,Show)
data Tokn =
KS String
| KP [String] [Variant]
deriving (Eq,Ord,Show)
data Variant =
Var [String] [String]
deriving (Eq,Ord,Show)

View File

@@ -46,9 +46,10 @@ realize :: Term -> String
realize trm = case trm of
R ts -> realize (ts !! 0)
S ss -> unwords $ Prelude.map realize ss
KS s -> s
KP s _ -> unwords s ---- prefix choice TODO
W s ss -> s ++ (ss !! 0)
K t -> case t of
KS s -> s
KP s _ -> unwords s ---- prefix choice TODO
W s t -> s ++ realize t
FV ts -> realize (ts !! 0) ---- other variants TODO
RP _ r -> realize r
TM -> "?"
@@ -58,9 +59,9 @@ linExp :: GFCC -> CId -> Exp -> Term
linExp mcfg lang tree@(Tr at trees) =
case at of
AC fun -> comp (Prelude.map lin trees) $ look fun
AS s -> R [KS (show s)] -- quoted
AI i -> R [KS (show i)]
AF d -> R [KS (show d)]
AS s -> R [kks (show s)] -- quoted
AI i -> R [kks (show i)]
AF d -> R [kks (show d)]
AM -> TM
where
lin = linExp mcfg lang
@@ -71,17 +72,20 @@ exp0 :: Exp
exp0 = Tr (AS "NO_PARSE") []
term0 :: CId -> Term
term0 (CId s) = R [KS ("#" ++ s ++ "#")]
term0 (CId s) = R [kks ("#" ++ s ++ "#")]
kks :: String -> Term
kks = K . KS
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute mcfg lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
RP i t -> RP (comp i) (comp t)
W s ss -> W s ss
W s t -> W s (comp t)
R ts -> R $ Prelude.map comp ts
V i -> idx args i -- already computed
F c -> comp $ look c -- not computed (if contains argvar)
V i -> idx args i -- already computed
F c -> comp $ look c -- not computed (if contains argvar)
FV ts -> FV $ Prelude.map comp ts
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -> trm
@@ -90,9 +94,14 @@ compute mcfg lang args = comp where
idx xs i = if i > length xs - 1 then trace "overrun !!\n" (last xs) else xs !! i
proj r p = case p of
FV ts -> FV $ Prelude.map (proj r) ts
_ -> comp $ getField r (getIndex p)
proj r p = case (r,p) of
(_, FV ts) -> FV $ Prelude.map (proj r) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> i
@@ -102,7 +111,6 @@ compute mcfg lang args = comp where
getField t i = case t of
R rs -> idx rs i
W s ss -> KS (s ++ idx ss i)
RP _ r -> getField r i
TM -> TM
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t

View File

@@ -69,11 +69,10 @@ prPrec :: Int -> Int -> Doc -> Doc
prPrec i j = if j<i then parenth else id
instance Print Int where
instance Print Integer where
prt _ x = doc (shows x)
instance Print Integer where
instance Print Int where
prt _ x = doc (shows x)
@@ -157,8 +156,7 @@ instance Print Term where
R terms -> prPrec i 0 (concatD [doc (showString "[") , prt 0 terms , doc (showString "]")])
P term0 term -> prPrec i 0 (concatD [doc (showString "(") , prt 0 term0 , doc (showString "!") , prt 0 term , doc (showString ")")])
S terms -> prPrec i 0 (concatD [doc (showString "(") , prt 0 terms , doc (showString ")")])
KS str -> prPrec i 0 (concatD [prt 0 str])
KP strs variants -> prPrec i 0 (concatD [doc (showString "[") , doc (showString "pre") , prt 0 strs , doc (showString "[") , prt 0 variants , doc (showString "]") , doc (showString "]")])
K tokn -> prPrec i 0 (concatD [prt 0 tokn])
V n -> prPrec i 0 (concatD [doc (showString "$") , prt 0 n])
C n -> prPrec i 0 (concatD [prt 0 n])
F cid -> prPrec i 0 (concatD [prt 0 cid])
@@ -174,6 +172,11 @@ instance Print Term where
[x] -> (concatD [prt 0 x])
x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs])
instance Print Tokn where
prt i e = case e of
KS str -> prPrec i 0 (concatD [prt 0 str])
KP strs variants -> prPrec i 0 (concatD [doc (showString "[") , doc (showString "pre") , prt 0 strs , doc (showString "[") , prt 0 variants , doc (showString "]") , doc (showString "]")])
instance Print Variant where
prt i e = case e of

View File

@@ -74,7 +74,7 @@ mkType t = case GM.catSkeleton t of
mkCType :: Type -> C.Term
mkCType t = case t of
EInt i -> C.C i
EInt i -> C.C $ fromInteger i
-- record parameter alias - created in gfc preprocessing
RecType [(LIdent "_", i), (LIdent "__", t)] -> C.RP (mkCType i) (mkCType t)
RecType rs -> C.R [mkCType t | (_, t) <- rs]
@@ -82,14 +82,14 @@ mkCType t = case t of
_ -> C.S [] ----- TStr
where
getI pt = case pt of
C.C i -> fromInteger i
C.C i -> i
C.RP i _ -> getI i
_ -> 1 -----
mkTerm :: Term -> C.Term
mkTerm tr = case tr of
Vr (IA (_,i)) -> C.V $ toInteger i
EInt i -> C.C i
Vr (IA (_,i)) -> C.V i
EInt i -> C.C $ fromInteger i
-- record parameter alias - created in gfc preprocessing
R [(LIdent "_", (_,i)), (LIdent "__", (_,t))] -> C.RP (mkTerm i) (mkTerm t)
-- ordinary record
@@ -112,7 +112,7 @@ mkTerm tr = case tr of
_ -> C.S [C.K (C.KS (A.prt tr +++ "66662"))] ---- for debugging
where
mkLab (LIdent l) = case l of
'_':ds -> (read ds) :: Integer
'_':ds -> (read ds) :: Int
_ -> prtTrace tr $ 66663
-- return just one module per language

View File

@@ -260,7 +260,7 @@ updateShellState opts ign mcnc sh ((_,sgr,gr,eenv),rts) = do
let fromGFC = snd . snd . Cnv.convertGFC opts
(mcfgs, cfgs) = unzip $ map (curry fromGFC cgr) concrs
fcfgs = FCnv.convertGrammar (C2GFCC.mkCanon2gfcc cgr)
fcfgs = FCnv.convertGrammar (C2GFCC.mkCanon2gfccNoUTF8 cgr)
pInfos = zipWith3 Prs.buildPInfo mcfgs (map snd fcfgs) cfgs

View File

@@ -133,14 +133,23 @@ convertTerm cnc_defs selector (FV vars) lins = do term <-
convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path
foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
convertTerm cnc_defs selector (KS str) ((lbl_path,lin) : lins) = do projectHead lbl_path
return ((lbl_path,Tok str : lin) : lins)
convertTerm cnc_defs selector (KP (str:_)_)((lbl_path,lin) : lins) = do projectHead lbl_path
return ((lbl_path,Tok str : lin) : lins)
convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
do projectHead lbl_path
return ((lbl_path,Tok str : lin) : lins)
convertTerm cnc_defs selector (K (KP (str:_)_))((lbl_path,lin) : lins) =
do projectHead lbl_path
return ((lbl_path,Tok str : lin) : lins)
convertTerm cnc_defs selector (RP _ term) lins = convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs
convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (W s ss) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 [KS (s ++ s1) | s1 <- ss] lbl_path lin lins
convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
ss <- case t of
R ss -> return ss
F f -> do
t <- Map.lookup f cnc_defs
case t of
R ss -> return ss
convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")")