Yay!! Direct generation of PMCFG from GF grammar

This commit is contained in:
krasimir
2010-06-18 12:55:58 +00:00
parent 5dfc9bbc0b
commit 992a7ffb38
17 changed files with 500 additions and 996 deletions

View File

@@ -1,4 +1,4 @@
{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses #-}
{-# LANGUAGE BangPatterns, RankNTypes, FlexibleInstances, MultiParamTypeClasses, PatternGuards #-}
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
@@ -13,11 +13,15 @@ module GF.Compile.GeneratePMCFG
(convertConcrete) where
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.Data hiding (Type)
import GF.Infra.Option
import GF.Grammar hiding (Env, mkRecord, mkTable)
import qualified GF.Infra.Modules as M
import GF.Grammar.Lookup
import GF.Grammar.Predef
import GF.Data.BacktrackM
import GF.Data.Operations
import GF.Data.Utilities (updateNthM, updateNth, sortNub)
import System.IO
@@ -26,36 +30,52 @@ import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.IntMap as IntMap
import qualified Data.ByteString.Char8 as BS
import Text.PrettyPrint hiding (Str)
import Data.Array.IArray
import Data.Maybe
import Data.Char (isDigit)
import Control.Monad
import Control.Monad.Identity
import Control.Exception
----------------------------------------------------------------------
-- main conversion function
--convertConcrete :: Options -> Abstr -> CId -> Concr -> IO Concr
convertConcrete opts lang flags printnames abs_defs cnc_defs lincats params lin_defs = do
let env0 = emptyGrammarEnv cat_defs params
convertConcrete :: Options -> SourceGrammar -> SourceModule -> SourceModule -> IO Concr
convertConcrete opts gr am cm = do
let env0 = emptyGrammarEnv gr cm
when (flag optProf opts) $ do
profileGrammar lang env0 pfrules
env1 <- expandHOAS opts abs_defs cat_defs lin_defs env0
env2 <- foldM (convertRule opts) env1 pfrules
return $ getParserInfo flags printnames env2
profileGrammar cm env0 pfrules
env1 <- expandHOAS opts cm env0
env2 <- foldM (convertRule gr opts) env1 pfrules
return $ getConcr flags printnames env2
where
cat_defs = Map.insert cidVar (S []) lincats
(m,mo) = cm
pfrules = [
(PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) |
(id, (ty,_,_)) <- Map.toList abs_defs, let (args,res) = typeSkeleton ty,
term <- maybeToList (Map.lookup id cnc_defs)]
findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
(PFRule id args (0,res) (map (\(_,_,ty) -> ty) cont) val term) |
(id,GF.Grammar.CncFun (Just (cat,cont,val)) (Just (L _ term)) _) <- Map.toList (M.jments mo),
let (args,res) = err error typeSkeleton (lookupFunType gr (fst am) id)]
profileGrammar lang (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfrules = do
flags = Map.fromList [(mkCId f,LStr x) | (f,x) <- optionsPGF (M.flags mo)]
printnames = Map.fromAscList [(i2i id, name) | (id,info) <- Map.toList (M.jments mo), name <- prn info]
where
prn (GF.Grammar.CncFun _ _ (Just (L _ tr))) = [flatten tr]
prn (GF.Grammar.CncCat _ _ (Just (L _ tr))) = [flatten tr]
prn _ = []
flatten (K s) = s
flatten (Alts x _) = flatten x
flatten (C x y) = flatten x +++ flatten y
i2i :: Ident -> CId
i2i = CId . ident2bs
profileGrammar (m,mo) env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfrules = do
hPutStrLn stderr ""
hPutStrLn stderr ("Language: " ++ show lang)
hPutStrLn stderr ("Language: " ++ showIdent m)
hPutStrLn stderr ""
hPutStrLn stderr "Categories Count"
hPutStrLn stderr "--------------------------------"
@@ -69,22 +89,52 @@ profileGrammar lang (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) pfr
mapM_ profileRule pfrules
hPutStrLn stderr "--------------------------------"
where
profileCat (cid,(fcat1,fcat2,_,_)) = do
hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1))
profileCat (cid,(fcat1,fcat2,_)) = do
hPutStrLn stderr (lformat 23 (showIdent cid) ++ rformat 9 (show (fcat2-fcat1+1)))
profileRule (PFRule fun args res ctypes ctype term) = do
let pargs = zipWith protoFCat args ctypes
hPutStrLn stderr (lformat 23 fun ++ rformat 9 (product [length xs | PFCat _ _ _ tcs <- pargs, (_,xs) <- tcs]))
lformat :: Show a => Int -> a -> String
lformat n x = s ++ replicate (n-length s) ' '
let pargs = map (protoFCat env) args
hPutStrLn stderr (lformat 23 (showIdent fun) ++ rformat 9 (show (product (map (catFactor env) args))))
where
s = show x
catFactor (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,(_,cat)) =
case IntMap.lookup n catSet >>= Map.lookup cat of
Just (s,e,_) -> e-s+1
Nothing -> 0
rformat :: Show a => Int -> a -> String
rformat n x = replicate (n-length s) ' ' ++ s
where
s = show x
lformat :: Int -> String -> String
lformat n s = s ++ replicate (n-length s) ' '
rformat :: Int -> String -> String
rformat n s = replicate (n-length s) ' ' ++ s
data ProtoFRule = PFRule Ident {- function -}
[(Int,Cat)] {- argument types: context size and category -}
(Int,Cat) {- result type : context size (always 0) and category -}
[Type] {- argument lin-types representation -}
Type {- result lin-type representation -}
Term {- body -}
convertRule :: SourceGrammar -> Options -> GrammarEnv -> ProtoFRule -> IO GrammarEnv
convertRule gr opts grammarEnv (PFRule fun args res ctypes ctype term) = do
let pres = protoFCat grammarEnv res
pargs = map (protoFCat grammarEnv) args
b = runCnvMonad gr (unfactor term >>= convertTerm CNil ctype) (pargs,[])
(grammarEnv1,b1) = addSequencesB grammarEnv b
grammarEnv2 = brk (\grammarEnv -> foldBM addRule
grammarEnv
(goB b1 CNil [])
(pres,pargs) ) grammarEnv1
when (verbAtLeast opts Verbose) $ hPutStrLn stderr ("+ "++showIdent fun)
return $! grammarEnv2
where
addRule lins (newCat', newArgs') env0 =
let [newCat] = getFCatsX env0 newCat'
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
(env2,funid) = addCncFun env1 (PGF.Data.CncFun (i2i fun) (mkArray lins))
in addProduction env2 newCat (PApply funid newArgs)
brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv)
brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
@@ -103,141 +153,245 @@ brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
count = length xs
ys = foldr (zipWith Set.insert) (repeat Set.empty) xs
convertRule :: Options -> GrammarEnv -> ProtoFRule -> IO GrammarEnv
convertRule opts grammarEnv (PFRule fun args res ctypes ctype term) = do
let pres = protoFCat res ctype
pargs = zipWith protoFCat args ctypes
b = runBranchM (convertTerm [] ctype term) (pargs,[])
(grammarEnv1,b1) = addSequences' grammarEnv b
grammarEnv2 = brk (\grammarEnv -> foldBM addRule
grammarEnv
(go' b1 [] [])
(pres,pargs) ) grammarEnv1
when (verbAtLeast opts Verbose) $ hPutStrLn stderr ("+ "++showCId fun)
return $! grammarEnv2
unfactor :: Term -> CnvMonad Term
unfactor t = CM (\gr c -> c (unfac gr t))
where
addRule lins (newCat', newArgs') env0 =
let [newCat] = getFCats env0 newCat'
(env1, newArgs) = List.mapAccumL (\env -> addFCoercion env . getFCats env) env0 newArgs'
(env2,funid) = addCncFun env1 (CncFun fun (mkArray lins))
in addProduction env2 newCat (PApply funid newArgs)
unfac gr t =
case t of
T (TTyped ty) [(PV x,u)] -> V ty [restore x v (unfac gr u) | v <- err error id (allParamValues gr ty)]
_ -> composSafeOp (unfac gr) t
where
restore x u t = case t of
Vr y | y == x -> u
_ -> composSafeOp (restore x u) t
----------------------------------------------------------------------
-- Branch monad
-- CnvMonad monad
--
-- The branching monad provides backtracking together with
-- recording of the choices made. We have two cases
-- when we have alternative choices:
--
-- * when we have parameter type, then
-- we have to try all possible values
-- * when we have variants we have to try all alternatives
--
-- The conversion monad keeps track of the choices and they are
-- returned as 'Branch' data type.
newtype BranchM a = BM (forall b . (a -> ([ProtoFCat],[Symbol]) -> Branch b) -> ([ProtoFCat],[Symbol]) -> Branch b)
data Branch a
= Case Int Path [(Term,Branch a)]
| Variant [Branch a]
| Return a
instance Monad BranchM where
return a = BM (\c s -> c a s)
BM m >>= k = BM (\c s -> m (\a s -> unBM (k a) c s) s)
where unBM (BM m) = m
newtype CnvMonad a = CM {unCM :: SourceGrammar
-> forall b . (a -> ([ProtoFCat],[Symbol]) -> Branch b)
-> ([ProtoFCat],[Symbol])
-> Branch b}
instance MonadState ([ProtoFCat],[Symbol]) BranchM where
get = BM (\c s -> c s s)
put s = BM (\c _ -> c () s)
instance Monad CnvMonad where
return a = CM (\gr c s -> c a s)
CM m >>= k = CM (\gr c s -> m gr (\a s -> unCM (k a) gr c s) s)
instance Functor BranchM where
fmap f (BM m) = BM (\c s -> m (c . f) s)
instance MonadState ([ProtoFCat],[Symbol]) CnvMonad where
get = CM (\gr c s -> c s s)
put s = CM (\gr c _ -> c () s)
runBranchM :: BranchM (Value a) -> ([ProtoFCat],[Symbol]) -> Branch a
runBranchM (BM m) s = m (\v s -> Return v) s
instance Functor CnvMonad where
fmap f (CM m) = CM (\gr c s -> m gr (c . f) s)
variants :: [a] -> BranchM a
variants xs = BM (\c s -> Variant [c x s | x <- xs])
runCnvMonad :: SourceGrammar -> CnvMonad a -> ([ProtoFCat],[Symbol]) -> Branch a
runCnvMonad gr (CM m) s = m gr (\v s -> Return v) s
choices :: Int -> FPath -> BranchM LIndex
choices nr path = BM (\c s -> let (args,_) = s
PFCat _ _ _ tcs = args !! nr
in case fromMaybe (error "evalTerm: wrong path") (lookup path tcs) of
[index] -> c index s
indices -> Case nr path [c i (updateEnv i s) | i <- indices])
where
updateEnv index (args,seq) = (updateNth (restrictArg path index) nr args,seq)
-- | backtracking for all variants
variants :: [a] -> CnvMonad a
variants xs = CM (\gr c s -> Variant [c x s | x <- xs])
restrictArg path index (PFCat n cat rcs tcs) = PFCat n cat rcs (addConstraint path index tcs)
-- | backtracking for all parameter values that a variable could take
choices :: Int -> Path -> CnvMonad Term
choices nr path = do (args,_) <- get
let PFCat _ _ schema = args !! nr
descend schema path CNil
where
descend (CRec rs) (CProj lbl path) rpath = case lookup lbl rs of
Just (Identity t) -> descend t path (CProj lbl rpath)
descend (CRec rs) CNil rpath = do rs <- mapM (\(lbl,Identity t) -> fmap (assign lbl) (descend t CNil (CProj lbl rpath))) rs
return (R rs)
descend (CTbl pt cs) (CSel trm path) rpath = case lookup trm cs of
Just (Identity t) -> descend t path (CSel trm rpath)
descend (CTbl pt cs) CNil rpath = do cs <- mapM (\(trm,Identity t) -> descend t CNil (CSel trm rpath)) cs
return (V pt cs)
descend (CPar (m,vs)) CNil rpath = case vs of
[(value,index)] -> return value
values -> let path = reversePath rpath
in CM (\gr c s -> Case nr path [(value, updateEnv path value gr c s)
| (value,index) <- values])
addConstraint path0 index0 [] = error "restrictProtoFCat: unknown path"
addConstraint path0 index0 (c@(path,indices) : tcs)
| path0 == path = ((path,[index0]) : tcs)
| otherwise = c : addConstraint path0 index0 tcs
updateEnv path value gr c (args,seq) =
case updateNthM (restrictProtoFCat path value) nr args of
Just args -> c value (args,seq)
Nothing -> error "conflict in updateEnv"
mkRecord :: [BranchM (Value a)] -> BranchM (Value a)
mkRecord xs = BM (\c -> foldl (\c (BM m) bs s -> c (m (\v s -> Return v) s : bs) s) (c . Rec) xs [])
-- | the argument should be a parameter type and then
-- the function returns all possible values.
getAllParamValues :: Type -> CnvMonad [Term]
getAllParamValues ty = CM (\gr c -> c (err error id (allParamValues gr ty)))
mkRecord :: [(Label,CnvMonad (Schema Branch s c))] -> CnvMonad (Schema Branch s c)
mkRecord xs = CM (\gr c -> foldl (\c (lbl,CM m) bs s -> c ((lbl,m gr (\v s -> Return v) s) : bs) s) (c . CRec) xs [])
mkTable :: Type -> [(Term ,CnvMonad (Schema Branch s c))] -> CnvMonad (Schema Branch s c)
mkTable pt xs = CM (\gr c -> foldl (\c (trm,CM m) bs s -> c ((trm,m gr (\v s -> Return v) s) : bs) s) (c . CTbl pt) xs [])
----------------------------------------------------------------------
-- Term Schema
--
-- The term schema is a term-like structure, with records, tables,
-- strings and parameters values, but in addition we could add
-- annotations of arbitrary types
-- | Term schema
data Schema b s c
= CRec [(Label,b (Schema b s c))]
| CTbl Type [(Term, b (Schema b s c))]
| CStr s
| CPar c
-- | Path into a term or term schema
data Path
= CProj Label Path
| CSel Term Path
| CNil
deriving (Eq,Show)
-- | The ProtoFCat represents a linearization type as term schema.
-- The annotations are as follows: the strings are annotated with
-- their index in the PMCFG tuple, the parameters are annotated
-- with their value both as term and as index.
data ProtoFCat = PFCat Int Ident (Schema Identity Int (Int,[(Term,Int)]))
type Env = (ProtoFCat, [ProtoFCat])
protoFCat :: GrammarEnv -> (Int,Cat) -> ProtoFCat
protoFCat (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,(_,cat)) =
case IntMap.lookup n catSet >>= Map.lookup cat of
Just (_,_,pfcat) -> pfcat
Nothing -> error "unknown category"
ppPath (CProj lbl path) = ppLabel lbl <+> ppPath path
ppPath (CSel trm path) = ppTerm Unqualified 5 trm <+> ppPath path
ppPath CNil = empty
reversePath path = rev CNil path
where
rev path0 CNil = path0
rev path0 (CProj lbl path) = rev (CProj lbl path0) path
rev path0 (CSel trm path) = rev (CSel trm path0) path
----------------------------------------------------------------------
-- term conversion
type CnvMonad a = BranchM a
type Value a = Schema Branch a Term
type FPath = [LIndex]
data ProtoFCat = PFCat Int CId [FPath] [(FPath,[LIndex])]
type Env = (ProtoFCat, [ProtoFCat])
data ProtoFRule = PFRule CId {- function -}
[(Int,CId)] {- argument types: context size and category -}
(Int,CId) {- result type : context size (always 0) and category -}
[Term] {- argument lin-types representation -}
Term {- result lin-type representation -}
Term {- body -}
type TermMap = Map.Map CId Term
convertTerm :: Path -> Type -> Term -> CnvMonad (Value [Symbol])
convertTerm sel ctype (Vr x) = convertArg ctype (getVarIndex x) (reversePath sel)
convertTerm sel ctype (Abs _ _ t) = convertTerm sel ctype t -- there are only top-level abstractions and we ignore them !!!
convertTerm sel ctype (R record) = convertRec sel ctype record
convertTerm sel ctype (P term l) = convertTerm (CProj l sel) ctype term
convertTerm sel ctype (V pt ts) = convertTbl sel ctype pt ts
convertTerm sel ctype (S term p) = do v <- evalTerm CNil p
convertTerm (CSel v sel) ctype term
convertTerm sel ctype (FV vars) = do term <- variants vars
convertTerm sel ctype term
convertTerm sel ctype (C t1 t2) = do v1 <- convertTerm sel ctype t1
v2 <- convertTerm sel ctype t2
return (CStr (concat [s | CStr s <- [v1,v2]]))
convertTerm sel ctype (K t) = return (CStr [SymKS [t]])
convertTerm sel ctype Empty = return (CStr [])
convertTerm sel ctype (Alts s alts)
= return (CStr [SymKP (strings s) [Alt (strings u) (strings v) | (u,v) <- alts]])
where
strings (K s) = [s]
strings (C u v) = strings u ++ strings v
strings (Strs ss) = concatMap strings ss
convertTerm CNil ctype t = do v <- evalTerm CNil t
return (CPar v)
convertTerm _ _ t = error (render (text "convertTerm" <+> parens (ppTerm Unqualified 0 t)))
protoFCat :: (Int,CId) -> Term -> ProtoFCat
protoFCat (n,cat) ctype =
let (rcs,tcs) = loop [] [] [] ctype'
in PFCat n cat rcs tcs
convertArg :: Term -> Int -> Path -> CnvMonad (Value [Symbol])
convertArg (RecType rs) nr path =
mkRecord (map (\(lbl,ctype) -> (lbl,convertArg ctype nr (CProj lbl path))) rs)
convertArg (Table pt vt) nr path = do
vs <- getAllParamValues pt
mkTable pt (map (\v -> (v,convertArg vt nr (CSel v path))) vs)
convertArg (Sort _) nr path = do
(args,_) <- get
let PFCat _ cat schema = args !! nr
l = index (reversePath path) schema
sym | isLiteralCat cat = SymLit nr l
| otherwise = SymCat nr l
return (CStr [sym])
where
ctype' -- extend the high-order linearization type
| n > 0 = case ctype of
R xs -> R (xs ++ replicate n (S []))
_ -> error $ "Not a record: " ++ show ctype
| otherwise = ctype
loop path rcs tcs (R record) = List.foldr (\(index,term) (rcs,tcs) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
loop path rcs tcs (S _) = (path:rcs, tcs)
index (CProj lbl path) (CRec rs) = case lookup lbl rs of
Just (Identity t) -> index path t
index (CSel trm path) (CTbl _ rs) = case lookup trm rs of
Just (Identity t) -> index path t
index CNil (CStr idx) = idx
convertArg ty nr path = do
value <- choices nr (reversePath path)
return (CPar value)
data Branch a
= Case Int FPath [Branch a]
| Variant [Branch a]
| Return (Value a)
convertRec CNil (RecType rs) record =
mkRecord (map (\(lbl,ctype) -> (lbl,convertTerm CNil ctype (projectRec lbl record))) rs)
convertRec (CProj lbl path) ctype record =
convertTerm path ctype (projectRec lbl record)
convertRec _ ctype _ = error ("convertRec: "++show ctype)
data Value a
= Rec [Branch a]
| Str a
| Con LIndex
convertTbl CNil (Table _ vt) pt ts = do
vs <- getAllParamValues pt
mkTable pt (zipWith (\v t -> (v,convertTerm CNil vt t)) vs ts)
convertTbl (CSel v sub_sel) ctype pt ts = do
vs <- getAllParamValues pt
case lookup v (zip vs ts) of
Just t -> convertTerm sub_sel ctype t
Nothing -> error (render (text "convertTbl:" <+> (text "missing value" <+> ppTerm Unqualified 0 v $$
text "among" <+> vcat (map (ppTerm Unqualified 0) vs))))
convertTbl _ ctype _ _ = error ("convertTbl: "++show ctype)
go' :: Branch SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId]
go' (Case nr path_ bs) path ss = do (index,b) <- member (zip [0..] bs)
restrictArg nr path_ index
go' b path ss
go' (Variant bs) path ss = do b <- member bs
go' b path ss
go' (Return v) path ss = go v path ss
goB :: Branch (Value SeqId) -> Path -> [SeqId] -> BacktrackM Env [SeqId]
goB (Case nr path bs) rpath ss = do (value,b) <- member bs
restrictArg nr path value
goB b rpath ss
goB (Variant bs) rpath ss = do b <- member bs
goB b rpath ss
goB (Return v) rpath ss = goV v rpath ss
go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId]
go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (reverse (zip [0..] xs))
go (Str seqid) path ss = return (seqid : ss)
go (Con i) path ss = restrictHead path i >> return ss
goV :: Value SeqId -> Path -> [SeqId] -> BacktrackM Env [SeqId]
goV (CRec xs) rpath ss = foldM (\ss (lbl,b) -> goB b (CProj lbl rpath) ss) ss (reverse xs)
goV (CTbl _ xs) rpath ss = foldM (\ss (trm,b) -> goB b (CSel trm rpath) ss) ss (reverse xs)
goV (CStr seqid) rpath ss = return (seqid : ss)
goV (CPar t) rpath ss = restrictHead (reversePath rpath) t >> return ss
addSequences' :: GrammarEnv -> Branch [Symbol] -> (GrammarEnv, Branch SeqId)
addSequences' env (Case nr path bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs
addSequencesB :: GrammarEnv -> Branch (Value [Symbol]) -> (GrammarEnv, Branch (Value SeqId))
addSequencesB env (Case nr path bs) = let (env1,bs1) = List.mapAccumL (\env (trm,b) -> let (env',b') = addSequencesB env b
in (env',(trm,b'))) env bs
in (env1,Case nr path bs1)
addSequences' env (Variant bs) = let (env1,bs1) = List.mapAccumL addSequences' env bs
addSequencesB env (Variant bs) = let (env1,bs1) = List.mapAccumL addSequencesB env bs
in (env1,Variant bs1)
addSequences' env (Return v) = let (env1,v1) = addSequences env v
addSequencesB env (Return v) = let (env1,v1) = addSequencesV env v
in (env1,Return v1)
addSequences :: GrammarEnv -> Value [Symbol] -> (GrammarEnv, Value SeqId)
addSequences env (Rec vs) = let (env1,vs1) = List.mapAccumL addSequences' env vs
in (env1,Rec vs1)
addSequences env (Str lin) = let (env1,seqid) = addFSeq env (optimizeLin lin)
in (env1,Str seqid)
addSequences env (Con i) = (env,Con i)
addSequencesV :: GrammarEnv -> Value [Symbol] -> (GrammarEnv, Value SeqId)
addSequencesV env (CRec vs) = let (env1,vs1) = List.mapAccumL (\env (lbl,b) -> let (env',b') = addSequencesB env b
in (env',(lbl,b'))) env vs
in (env1,CRec vs1)
addSequencesV env (CTbl pt vs)=let (env1,vs1) = List.mapAccumL (\env (trm,b) -> let (env',b') = addSequencesB env b
in (env',(trm,b'))) env vs
in (env1,CTbl pt vs1)
addSequencesV env (CStr lin) = let (env1,seqid) = addFSeq env (optimizeLin lin)
in (env1,CStr seqid)
addSequencesV env (CPar i) = (env,CPar i)
optimizeLin [] = []
@@ -251,98 +405,76 @@ optimizeLin lin@(SymKS _ : _) =
optimizeLin (sym : lin) = sym : optimizeLin lin
convertTerm :: FPath -> Term -> Term -> CnvMonad (Value [Symbol])
convertTerm sel ctype (V nr) = convertArg ctype nr (reverse sel)
convertTerm sel ctype (C nr) = convertCon ctype nr (reverse sel)
convertTerm sel ctype (R record) = convertRec sel ctype record
convertTerm sel ctype (P term p) = do nr <- evalTerm [] p
convertTerm (nr:sel) ctype term
convertTerm sel ctype (FV vars) = do term <- variants vars
convertTerm sel ctype term
convertTerm sel ctype (S ts) = do vs <- mapM (convertTerm sel ctype) ts
return (Str (concat [s | Str s <- vs]))
convertTerm sel ctype (K (KS t)) = return (Str [SymKS [t]])
convertTerm sel ctype (K (KP s v))=return (Str [SymKP s v])
convertTerm sel ctype (W s t) = do
ss <- case t of
R ss -> return ss
convertRec sel ctype [K (KS (s ++ s1)) | K (KS s1) <- ss]
convertTerm sel ctype x = error ("convertTerm ("++show x++")")
convertArg :: Term -> Int -> FPath -> CnvMonad (Value [Symbol])
convertArg (R ctypes) nr path = do
mkRecord (zipWith (\lbl ctype -> convertArg ctype nr (lbl:path)) [0..] ctypes)
convertArg (C max) nr path = do
index <- choices nr path
return (Con index)
convertArg (S _) nr path = do
(args,_) <- get
let PFCat _ cat rcs tcs = args !! nr
l = index path rcs 0
sym | isLiteralCat cat = SymLit nr l
| otherwise = SymCat nr l
return (Str [sym])
where
index lbl' (lbl:lbls) idx
| lbl' == lbl = idx
| otherwise = index lbl' lbls $! (idx+1)
convertCon (C max) index [] = return (Con index)
convertCon x _ _ = fail $ "SimpleToFCFG.convertCon: " ++ show x
convertRec [] (R ctypes) record = do
mkRecord (zipWith (convertTerm []) ctypes record)
convertRec (index:sub_sel) ctype record =
convertTerm sub_sel ctype (record !! index)
------------------------------------------------------------
-- eval a term to ground terms
evalTerm :: FPath -> Term -> CnvMonad LIndex
evalTerm path (V nr) = choices nr (reverse path)
evalTerm path (C nr) = return nr
evalTerm path (R record) = case path of
(index:path) -> evalTerm path (record !! index)
evalTerm path (P term sel) = do index <- evalTerm [] sel
evalTerm (index:path) term
evalTerm :: Path -> Term -> CnvMonad Term
evalTerm CNil (QC f) = return (QC f)
evalTerm CNil (App x y) = do x <- evalTerm CNil x
y <- evalTerm CNil y
return (App x y)
evalTerm path (Vr x) = choices (getVarIndex x) path
evalTerm path (R rs) = case path of
(CProj lbl path) -> evalTerm path (projectRec lbl rs)
CNil -> do rs <- mapM (\(lbl,(_,t)) -> do t <- evalTerm path t
return (assign lbl t)) rs
return (R rs)
evalTerm path (P term lbl) = evalTerm (CProj lbl path) term
evalTerm path (V pt ts) = case path of
(CSel trm path) -> do vs <- getAllParamValues pt
case lookup trm (zip vs ts) of
Just t -> evalTerm path t
Nothing -> error "evalTerm: missing value"
CNil -> do ts <- mapM (evalTerm path) ts
return (V pt ts)
evalTerm path (S term sel) = do v <- evalTerm CNil sel
evalTerm (CSel v path) term
evalTerm path (FV terms) = variants terms >>= evalTerm path
evalTerm path x = error ("evalTerm ("++show x++")")
evalTerm path t = error (render (text "evalTerm" <+> parens (ppTerm Unqualified 0 t)))
getVarIndex (IA _ i) = i
getVarIndex (IAV _ _ i) = i
getVarIndex (IC s) | isDigit (BS.last s) = (read . BS.unpack . snd . BS.spanEnd isDigit) s
----------------------------------------------------------------------
-- GrammarEnv
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
type CatSet = IntMap.IntMap (Map.Map CId (FId,FId,[Int],Array LIndex String))
type CatSet = IntMap.IntMap (Map.Map Ident (FId,FId,ProtoFCat))
type SeqSet = Map.Map Sequence SeqId
type FunSet = Map.Map CncFun FunId
type CoerceSet= Map.Map [FId] FId
emptyGrammarEnv lincats params =
emptyGrammarEnv gr (m,mo) =
let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats
in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty
where
computeCatRange index cat ctype
| cat == cidString = (index, (fcatString,fcatString,[],listArray (0,0) ["s"]))
| cat == cidInt = (index, (fcatInt, fcatInt, [],listArray (0,0) ["s"]))
| cat == cidFloat = (index, (fcatFloat, fcatFloat, [],listArray (0,0) ["s"]))
| cat == cidVar = (index, (fcatVar, fcatVar, [],listArray (0,0) ["s"]))
| otherwise = (index+size,(index,index+size-1, poly,maybe (error "missing params") (mkArray . getLabels []) (Map.lookup cat params)))
computeCatRange index cat ctype =
(index+size,(index,index+size-1,PFCat 0 cat schema))
where
(size,poly) = getMultipliers 1 [] ctype
((_,size),schema) = compute (0,1) ctype
getMultipliers m ms (R record) = foldr (\t (m,ms) -> getMultipliers m ms t) (m,ms) record
getMultipliers m ms (S _) = (m,ms)
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
compute st (RecType rs) = let (st',rs') = List.mapAccumL (\st (lbl,t) -> let (st',t') = compute st t
in (st',(lbl,Identity t'))) st rs
in (st',CRec rs')
compute st (Table pt vt) = let vs = err error id (allParamValues gr pt)
(st',cs') = List.mapAccumL (\st v -> let (st',vt') = compute st vt
in (st',(v,Identity vt'))) st vs
in (st',CTbl pt cs')
compute st (Sort s)
| s == cStr = let (index,m) = st
in ((index+1,m),CStr index)
compute st t = let vs = err error id (allParamValues gr t)
(index,m) = st
in ((index,m*length vs),CPar (m,zip vs [0..]))
getLabels ls (R record) = concat [getLabels (l:ls) t | P (K (KS l)) t <- record]
getLabels ls (S [FV ps,t]) = concat [getLabels (l:ls) t | K (KS l) <- ps]
getLabels ls (S []) = [unwords (reverse ls)]
getLabels ls (FV _) = []
getLabels _ t = error (show t)
lincats =
Map.insert cVar (Sort cStr) $
Map.fromAscList
[(c, ty) | (c,GF.Grammar.CncCat (Just (L _ ty)) _ _) <- Map.toList (M.jments mo)]
expandHOAS opts abs_defs lincats lindefs env =
expandHOAS opts (m,mo) env = return env {-
foldM add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats)
where
hoTypes :: [(Int,CId)]
@@ -379,10 +511,10 @@ expandHOAS opts abs_defs lincats lindefs env =
add_varFun env cat =
case Map.lookup cat lindefs of
Nothing -> return env
Just lindef -> convertRule opts env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef)
Just lindef -> convertRule opts env (PFRule _V [(0,cVar)] (0,cat) [arg] res lindef)
where
arg =
case Map.lookup cidVar lincats of
case Map.lookup cVar lincats of
Nothing -> error $ "No lincat for " ++ showCId cat
Just ctype -> ctype
@@ -390,7 +522,7 @@ expandHOAS opts abs_defs lincats lindefs env =
case Map.lookup cat lincats of
Nothing -> error $ "No lincat for " ++ showCId cat
Just ctype -> ctype
-}
addProduction :: GrammarEnv -> FId -> Production -> GrammarEnv
addProduction (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) cat p =
GrammarEnv last_id catSet seqSet funSet crcSet (IntMap.insertWith Set.union cat (Set.singleton p) prodSet)
@@ -420,57 +552,87 @@ addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fc
Nothing -> let !fcat = last_id+1
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
getParserInfo :: Map.Map CId Literal -> Map.Map CId String -> GrammarEnv -> Concr
getParserInfo flags printnames (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
getConcr :: Map.Map CId Literal -> Map.Map CId String -> GrammarEnv -> Concr
getConcr flags printnames (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
Concr { cflags = flags
, printnames = printnames
, cncfuns = mkArray funSet
, sequences = mkArray seqSet
, cncfuns = mkSetArray funSet
, sequences = mkSetArray seqSet
, productions = IntMap.union prodSet coercions
, pproductions = IntMap.empty
, lproductions = Map.empty
, cnccats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (CncCat start end lbls))) (IntMap.lookup 0 catSet)
, cnccats = Map.fromList [(i2i cat,PGF.Data.CncCat start end (mkArray (map (renderStyle style{mode=OneLineMode} . ppPath) (getStrPaths schema))))
| (cat,(start,end,PFCat _ _ schema)) <- maybe [] Map.toList (IntMap.lookup 0 catSet)]
, totalCats = last_id+1
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
coercions = IntMap.fromList [(fcat,Set.fromList (map PCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
getFCats :: GrammarEnv -> ProtoFCat -> [FId]
getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) =
case IntMap.lookup n catSet >>= Map.lookup cat of
Just (start,end,ms,_) -> reverse (solutions (variants ms tcs start) ())
where
variants _ [] fcat = return fcat
variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
variants ms tcs ((m*index) + fcat)
getStrPaths :: Schema Identity s c -> [Path]
getStrPaths = collect CNil []
where
collect path paths (CRec rs) = foldr (\(lbl,Identity t) paths -> collect (CProj lbl path) paths t) paths rs
collect path paths (CTbl _ cs) = foldr (\(trm,Identity t) paths -> collect (CSel trm path) paths t) paths cs
collect path paths (CStr _) = reversePath path : paths
collect path paths (CPar _) = paths
getFCats :: GrammarEnv -> ProtoFCat -> [FId]
getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat schema) =
case IntMap.lookup n catSet >>= Map.lookup cat of
Just (start,end,_) -> reverse (solutions (fmap (start +) $ variants schema) ())
where
variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs
variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs
variants (CStr _) = return 0
variants (CPar (m,values)) = do (value,index) <- member values
return (m*index)
getFCatsX :: GrammarEnv -> ProtoFCat -> [FId]
getFCatsX (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat schema) =
case IntMap.lookup n catSet >>= Map.lookup cat of
Just (start,end,_) -> reverse (solutions (fmap (start +) $ variants schema) ())
where
variants (CRec rs) = fmap sum $ mapM (\(lbl,Identity t) -> variants t) rs
variants (CTbl _ cs) = fmap sum $ mapM (\(trm,Identity t) -> variants t) cs
variants (CStr _) = return 0
variants (CPar (m,values)) = do (value,index) <- member values
return (m*index)
------------------------------------------------------------
-- updating the MCF rule
restrictArg :: LIndex -> FPath -> LIndex -> BacktrackM Env ()
restrictArg :: LIndex -> Path -> Term -> BacktrackM Env ()
restrictArg nr path index = do
(head, args) <- get
args' <- updateNthM (restrictProtoFCat path index) nr args
put (head, args')
args <- updateNthM (restrictProtoFCat path index) nr args
put (head, args)
restrictHead :: FPath -> LIndex -> BacktrackM Env ()
restrictHead path term
= do (head, args) <- get
head' <- restrictProtoFCat path term head
put (head', args)
restrictHead :: Path -> Term -> BacktrackM Env ()
restrictHead path term = do
(head, args) <- get
head <- restrictProtoFCat path term head
put (head, args)
restrictProtoFCat :: FPath -> LIndex -> ProtoFCat -> BacktrackM Env ProtoFCat
restrictProtoFCat path0 index0 (PFCat n cat rcs tcs) = do
tcs <- addConstraint tcs
return (PFCat n cat rcs tcs)
restrictProtoFCat :: (Functor m, MonadPlus m) => Path -> Term -> ProtoFCat -> m ProtoFCat
restrictProtoFCat path v (PFCat n cat schema) = do
schema <- addConstraint path v schema
return (PFCat n cat schema)
where
addConstraint [] = error "restrictProtoFCat: unknown path"
addConstraint (c@(path,indices) : tcs)
| path0 == path = guard (index0 `elem` indices) >>
return ((path,[index0]) : tcs)
| otherwise = liftM (c:) (addConstraint tcs)
addConstraint (CProj lbl path) v (CRec rs) = fmap CRec $ update lbl (addConstraint path v) rs
addConstraint (CSel trm path) v (CTbl pt cs) = fmap (CTbl pt) $ update trm (addConstraint path v) cs
addConstraint CNil v (CPar (m,vs)) = case lookup v vs of
Just index -> return (CPar (m,[(v,index)]))
Nothing -> mzero
addConstraint CNil v (CStr _) = error "restrictProtoFCat: string path"
update k0 f [] = return []
update k0 f (x@(k,Identity v):xs)
| k0 == k = do v <- f v
return ((k,Identity v):xs)
| otherwise = do xs <- update k0 f xs
return (x:xs)
mkArray lst = listArray (0,length lst-1) lst