From 7fd4e067fabe4224317f7326edf1e3cbc99eb836 Mon Sep 17 00:00:00 2001 From: aarne Date: Wed, 19 Sep 2007 13:49:12 +0000 Subject: [PATCH] adapted GFCC2FCFG to other uses of GFCC, made it to default parser --- src/GF/Canon/CanonToGFCC.hs | 69 ++++++++++++++++--------------- src/GF/Canon/CanonToJS.hs | 7 ++-- src/GF/Canon/GFCC/AbsGFCC.hs | 11 +++-- src/GF/Canon/GFCC/DataGFCC.hs | 36 +++++++++------- src/GF/Canon/GFCC/PrintGFCC.hs | 13 +++--- src/GF/Compile/GrammarToGFCC.hs | 10 ++--- src/GF/Compile/ShellState.hs | 2 +- src/GF/Conversion/SimpleToFCFG.hs | 19 ++++++--- 8 files changed, 97 insertions(+), 70 deletions(-) diff --git a/src/GF/Canon/CanonToGFCC.hs b/src/GF/Canon/CanonToGFCC.hs index 65b591acd..3898180c1 100644 --- a/src/GF/Canon/CanonToGFCC.hs +++ b/src/GF/Canon/CanonToGFCC.hs @@ -12,7 +12,8 @@ -- GFC to GFCC compiler. AR Aug-Oct 2006 ----------------------------------------------------------------------------- -module GF.Canon.CanonToGFCC (prCanon2gfcc, mkCanon2gfcc) where +module GF.Canon.CanonToGFCC ( + prCanon2gfcc, mkCanon2gfcc, mkCanon2gfccNoUTF8) where import GF.Canon.AbsGFC import qualified GF.Canon.GFC as GFC @@ -43,9 +44,14 @@ import Debug.Trace ---- prCanon2gfcc :: CanonGrammar -> String prCanon2gfcc = Pr.printTree . mkCanon2gfcc +-- this variant makes utf8 conversion; used in back ends mkCanon2gfcc :: CanonGrammar -> C.Grammar mkCanon2gfcc = canon2gfcc . reorder . utf8Conv . canon2canon . normalize +-- this variant makes no utf8 conversion; used in ShellState +mkCanon2gfccNoUTF8 :: CanonGrammar -> C.Grammar +mkCanon2gfccNoUTF8 = canon2gfcc . reorder . canon2canon . normalize + -- This is needed to reorganize the grammar. GFCC has its own back-end optimization. -- But we need to have the canonical order in tables, created by valOpt normalize :: CanonGrammar -> CanonGrammar @@ -82,7 +88,7 @@ mkType t = case GM.catSkeleton t of mkCType :: CType -> C.Term mkCType t = case t of - TInts i -> C.C (fromInteger i) + TInts i -> C.C $ fromInteger i -- record parameter alias - created in gfc preprocessing RecType [Lbg (L (IC "_")) i, Lbg (L (IC "__")) t] -> C.RP (mkCType i) (mkCType t) RecType rs -> C.R [mkCType t | Lbg _ t <- rs] @@ -90,13 +96,13 @@ mkCType t = case t of TStr -> C.S [] where getI pt = case pt of - C.C i -> i + C.C i -> i C.RP i _ -> getI i mkTerm :: Term -> C.Term mkTerm tr = case tr of - Arg (A _ i) -> C.V (fromInteger i) - EInt i -> C.C (fromInteger i) + Arg (A _ i) -> C.V $ fromInteger i + EInt i -> C.C $ fromInteger i -- record parameter alias - created in gfc preprocessing R [Ass (L (IC "_")) i, Ass (L (IC "__")) t] -> C.RP (mkTerm i) (mkTerm t) -- ordinary record @@ -111,11 +117,11 @@ mkTerm tr = case tr of S t p -> C.P (mkTerm t) (mkTerm p) C s t -> C.S [mkTerm x | x <- [s,t]] FV ts -> C.FV [mkTerm t | t <- ts] - K (KS s) -> C.KS s - K (KP ss _) -> C.KP ss [] ---- TODO: prefix variants + K (KS s) -> C.K (C.KS s) + K (KP ss _) -> C.K (C.KP ss []) ---- TODO: prefix variants E -> C.S [] Par _ _ -> prtTrace tr $ C.C 66661 ---- for debugging - _ -> C.S [C.KS (A.prt tr +++ "66662")] ---- for debugging + _ -> C.S [C.K (C.KS (A.prt tr +++ "66662"))] ---- for debugging where mkLab (L (IC l)) = case l of '_':ds -> (read ds) :: Int @@ -175,7 +181,7 @@ canon2canon :: CanonGrammar -> CanonGrammar canon2canon = recollect . map cl2cl . repartition where recollect = M.MGrammar . nubBy (\ (i,_) (j,_) -> i==j) . concatMap M.modules - cl2cl cg = tr $ M.MGrammar $ map c2c $ M.modules cg where + cl2cl cg = {-tr $-} M.MGrammar $ map c2c $ M.modules cg where c2c (c,m) = case m of M.ModMod mo@(M.Module _ _ _ _ _ js) -> (c, M.ModMod $ M.replaceJudgements mo $ mapTree j2j js) @@ -406,30 +412,24 @@ optConcrete defs = subex -- suffix sets can later be shared by subex elim optTerm :: C.Term -> C.Term -optTerm tr = - case tr of - C.R ts -> mkSuff ts - C.P t v -> C.P (optTerm t) v +optTerm tr = case tr of + C.R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | C.K (C.KS s) <- ts] + C.R ts -> C.R $ map optTerm ts + C.P t v -> C.P (optTerm t) v C.L x t -> C.L x (optTerm t) - tr -> tr - where - mkSuff ts@(C.KS s : ts1@(_:_)) = - case pref s ts1 of - Nothing -> C.R (map optTerm ts) - Just "" -> C.R ts - Just prf -> let len = length prf - in C.W prf [drop len s | C.KS s <- ts] - where - pref cand [] = Just cand - pref cand (t:ts) = - case t of - C.KS s -> pref (getPrefix cand s) ts - _ -> Nothing - where - getPrefix cand s - | isPrefixOf cand s = cand - | otherwise = getPrefix (init cand) s - mkSuff ts = C.R (map optTerm ts) + _ -> tr + where + optToks ss = prf : suffs where + prf = pref (head ss) (tail ss) + suffs = map (drop (length prf)) ss + pref cand ss = case ss of + s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss + _ -> cand + isK t = case t of + C.K (C.KS _) -> True + _ -> False + mkSuff ("":ws) = C.R (map (C.K . C.KS) ws) + mkSuff (p:ws) = C.W p (C.R (map (C.K . C.KS) ws)) -- common subexpression elimination; see ./Subexpression.hs for the idea @@ -454,7 +454,7 @@ addSubexpConsts tree lins = _ -> case t of C.R ts -> C.R $ map (recomp f) ts C.S ts -> C.S $ map (recomp f) ts - C.W s ss -> C.W s ss + C.W s t -> C.W s (recomp f t) C.P t p -> C.P (recomp f t) (recomp f p) C.RP t p -> C.RP (recomp f t) (recomp f p) C.L x t -> C.L x (recomp f t) @@ -483,7 +483,8 @@ collectSubterms t = case t of C.S ts -> do mapM collectSubterms ts add t - C.W s ts -> do + C.W s u -> do + collectSubterms u add t C.P p u -> do collectSubterms p diff --git a/src/GF/Canon/CanonToJS.hs b/src/GF/Canon/CanonToJS.hs index 7e88a5ef2..beefefa04 100644 --- a/src/GF/Canon/CanonToJS.hs +++ b/src/GF/Canon/CanonToJS.hs @@ -53,13 +53,14 @@ term2js l t = f t C.R xs -> new "Arr" (map f xs) C.P x y -> JS.ECall (JS.EMember (f x) (JS.Ident "sel")) [f y] C.S xs -> new "Seq" (map f xs) - C.KS s -> new "Str" [JS.EStr s] - C.KP ss vs -> new "Seq" (map JS.EStr ss) -- FIXME + C.K (C.KS s) -> new "Str" [JS.EStr s] + C.K (C.KP ss vs) -> new "Seq" (map JS.EStr ss) -- FIXME C.V i -> JS.EIndex (JS.EVar children) (JS.EInt i) C.C i -> new "Int" [JS.EInt i] C.F (C.CId f) -> JS.ECall (JS.EMember (JS.EVar l) (JS.Ident "rule")) [JS.EStr f, JS.EVar children] C.FV xs -> new "Variants" (map f xs) - C.W str ss -> new "Suffix" (JS.EStr str : map JS.EStr ss) + C.W str (C.R r) -> + new "Suffix" (JS.EStr str : [JS.EStr s | C.K (C.KS s) <- r]) C.RP x y -> new "Rp" [f x, f y] C.TM -> new "Meta" [] diff --git a/src/GF/Canon/GFCC/AbsGFCC.hs b/src/GF/Canon/GFCC/AbsGFCC.hs index ccb964689..aab74f7fb 100644 --- a/src/GF/Canon/GFCC/AbsGFCC.hs +++ b/src/GF/Canon/GFCC/AbsGFCC.hs @@ -47,19 +47,24 @@ data Term = R [Term] | P Term Term | S [Term] - | KS String - | KP [String] [Variant] + | K Tokn | V Int | C Int | F CId | FV [Term] - | W String [String] + | W String Term | RP Term Term | TM | L CId Term | BV CId deriving (Eq,Ord,Show) +data Tokn = + KS String + | KP [String] [Variant] + deriving (Eq,Ord,Show) + data Variant = Var [String] [String] deriving (Eq,Ord,Show) + diff --git a/src/GF/Canon/GFCC/DataGFCC.hs b/src/GF/Canon/GFCC/DataGFCC.hs index 746175e29..eabd8b3a3 100644 --- a/src/GF/Canon/GFCC/DataGFCC.hs +++ b/src/GF/Canon/GFCC/DataGFCC.hs @@ -46,9 +46,10 @@ realize :: Term -> String realize trm = case trm of R ts -> realize (ts !! 0) S ss -> unwords $ Prelude.map realize ss - KS s -> s - KP s _ -> unwords s ---- prefix choice TODO - W s ss -> s ++ (ss !! 0) + K t -> case t of + KS s -> s + KP s _ -> unwords s ---- prefix choice TODO + W s t -> s ++ realize t FV ts -> realize (ts !! 0) ---- other variants TODO RP _ r -> realize r TM -> "?" @@ -58,9 +59,9 @@ linExp :: GFCC -> CId -> Exp -> Term linExp mcfg lang tree@(Tr at trees) = case at of AC fun -> comp (Prelude.map lin trees) $ look fun - AS s -> R [KS (show s)] -- quoted - AI i -> R [KS (show i)] - AF d -> R [KS (show d)] + AS s -> R [kks (show s)] -- quoted + AI i -> R [kks (show i)] + AF d -> R [kks (show d)] AM -> TM where lin = linExp mcfg lang @@ -71,17 +72,20 @@ exp0 :: Exp exp0 = Tr (AS "NO_PARSE") [] term0 :: CId -> Term -term0 (CId s) = R [KS ("#" ++ s ++ "#")] +term0 (CId s) = R [kks ("#" ++ s ++ "#")] + +kks :: String -> Term +kks = K . KS compute :: GFCC -> CId -> [Term] -> Term -> Term compute mcfg lang args = comp where comp trm = case trm of P r p -> proj (comp r) (comp p) RP i t -> RP (comp i) (comp t) - W s ss -> W s ss + W s t -> W s (comp t) R ts -> R $ Prelude.map comp ts - V i -> idx args i -- already computed - F c -> comp $ look c -- not computed (if contains argvar) + V i -> idx args i -- already computed + F c -> comp $ look c -- not computed (if contains argvar) FV ts -> FV $ Prelude.map comp ts S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts _ -> trm @@ -90,9 +94,14 @@ compute mcfg lang args = comp where idx xs i = if i > length xs - 1 then trace "overrun !!\n" (last xs) else xs !! i - proj r p = case p of - FV ts -> FV $ Prelude.map (proj r) ts - _ -> comp $ getField r (getIndex p) + proj r p = case (r,p) of + (_, FV ts) -> FV $ Prelude.map (proj r) ts + (W s t, _) -> kks (s ++ getString (proj t p)) + _ -> comp $ getField r (getIndex p) + + getString t = case t of + K (KS s) -> s + _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" getIndex t = case t of C i -> i @@ -102,7 +111,6 @@ compute mcfg lang args = comp where getField t i = case t of R rs -> idx rs i - W s ss -> KS (s ++ idx ss i) RP _ r -> getField r i TM -> TM _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t diff --git a/src/GF/Canon/GFCC/PrintGFCC.hs b/src/GF/Canon/GFCC/PrintGFCC.hs index 1ef7cfbe3..05a9246cd 100644 --- a/src/GF/Canon/GFCC/PrintGFCC.hs +++ b/src/GF/Canon/GFCC/PrintGFCC.hs @@ -69,11 +69,10 @@ prPrec :: Int -> Int -> Doc -> Doc prPrec i j = if j prPrec i 0 (concatD [doc (showString "[") , prt 0 terms , doc (showString "]")]) P term0 term -> prPrec i 0 (concatD [doc (showString "(") , prt 0 term0 , doc (showString "!") , prt 0 term , doc (showString ")")]) S terms -> prPrec i 0 (concatD [doc (showString "(") , prt 0 terms , doc (showString ")")]) - KS str -> prPrec i 0 (concatD [prt 0 str]) - KP strs variants -> prPrec i 0 (concatD [doc (showString "[") , doc (showString "pre") , prt 0 strs , doc (showString "[") , prt 0 variants , doc (showString "]") , doc (showString "]")]) + K tokn -> prPrec i 0 (concatD [prt 0 tokn]) V n -> prPrec i 0 (concatD [doc (showString "$") , prt 0 n]) C n -> prPrec i 0 (concatD [prt 0 n]) F cid -> prPrec i 0 (concatD [prt 0 cid]) @@ -174,6 +172,11 @@ instance Print Term where [x] -> (concatD [prt 0 x]) x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs]) +instance Print Tokn where + prt i e = case e of + KS str -> prPrec i 0 (concatD [prt 0 str]) + KP strs variants -> prPrec i 0 (concatD [doc (showString "[") , doc (showString "pre") , prt 0 strs , doc (showString "[") , prt 0 variants , doc (showString "]") , doc (showString "]")]) + instance Print Variant where prt i e = case e of diff --git a/src/GF/Compile/GrammarToGFCC.hs b/src/GF/Compile/GrammarToGFCC.hs index 6d4401e4f..e1374bad1 100644 --- a/src/GF/Compile/GrammarToGFCC.hs +++ b/src/GF/Compile/GrammarToGFCC.hs @@ -74,7 +74,7 @@ mkType t = case GM.catSkeleton t of mkCType :: Type -> C.Term mkCType t = case t of - EInt i -> C.C i + EInt i -> C.C $ fromInteger i -- record parameter alias - created in gfc preprocessing RecType [(LIdent "_", i), (LIdent "__", t)] -> C.RP (mkCType i) (mkCType t) RecType rs -> C.R [mkCType t | (_, t) <- rs] @@ -82,14 +82,14 @@ mkCType t = case t of _ -> C.S [] ----- TStr where getI pt = case pt of - C.C i -> fromInteger i + C.C i -> i C.RP i _ -> getI i _ -> 1 ----- mkTerm :: Term -> C.Term mkTerm tr = case tr of - Vr (IA (_,i)) -> C.V $ toInteger i - EInt i -> C.C i + Vr (IA (_,i)) -> C.V i + EInt i -> C.C $ fromInteger i -- record parameter alias - created in gfc preprocessing R [(LIdent "_", (_,i)), (LIdent "__", (_,t))] -> C.RP (mkTerm i) (mkTerm t) -- ordinary record @@ -112,7 +112,7 @@ mkTerm tr = case tr of _ -> C.S [C.K (C.KS (A.prt tr +++ "66662"))] ---- for debugging where mkLab (LIdent l) = case l of - '_':ds -> (read ds) :: Integer + '_':ds -> (read ds) :: Int _ -> prtTrace tr $ 66663 -- return just one module per language diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs index 80956d8ff..57c0d1e88 100644 --- a/src/GF/Compile/ShellState.hs +++ b/src/GF/Compile/ShellState.hs @@ -260,7 +260,7 @@ updateShellState opts ign mcnc sh ((_,sgr,gr,eenv),rts) = do let fromGFC = snd . snd . Cnv.convertGFC opts (mcfgs, cfgs) = unzip $ map (curry fromGFC cgr) concrs - fcfgs = FCnv.convertGrammar (C2GFCC.mkCanon2gfcc cgr) + fcfgs = FCnv.convertGrammar (C2GFCC.mkCanon2gfccNoUTF8 cgr) pInfos = zipWith3 Prs.buildPInfo mcfgs (map snd fcfgs) cfgs diff --git a/src/GF/Conversion/SimpleToFCFG.hs b/src/GF/Conversion/SimpleToFCFG.hs index 402729341..e0e639800 100644 --- a/src/GF/Conversion/SimpleToFCFG.hs +++ b/src/GF/Conversion/SimpleToFCFG.hs @@ -133,14 +133,23 @@ convertTerm cnc_defs selector (FV vars) lins = do term <- convertTerm cnc_defs selector term lins convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts) -convertTerm cnc_defs selector (KS str) ((lbl_path,lin) : lins) = do projectHead lbl_path - return ((lbl_path,Tok str : lin) : lins) -convertTerm cnc_defs selector (KP (str:_)_)((lbl_path,lin) : lins) = do projectHead lbl_path - return ((lbl_path,Tok str : lin) : lins) +convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) = + do projectHead lbl_path + return ((lbl_path,Tok str : lin) : lins) +convertTerm cnc_defs selector (K (KP (str:_)_))((lbl_path,lin) : lins) = + do projectHead lbl_path + return ((lbl_path,Tok str : lin) : lins) convertTerm cnc_defs selector (RP _ term) lins = convertTerm cnc_defs selector term lins convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs convertTerm cnc_defs selector term lins -convertTerm cnc_defs selector (W s ss) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 [KS (s ++ s1) | s1 <- ss] lbl_path lin lins +convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do + ss <- case t of + R ss -> return ss + F f -> do + t <- Map.lookup f cnc_defs + case t of + R ss -> return ss + convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")")