divided DataGFCC

This commit is contained in:
aarne
2007-10-05 07:33:33 +00:00
parent a0f3aecc51
commit 07d2910df1
5 changed files with 160 additions and 126 deletions

View File

@@ -15,12 +15,15 @@
module GF.GFCC.API where module GF.GFCC.API where
import GF.GFCC.Linearize
import GF.GFCC.Generate
import GF.GFCC.Macros
import GF.GFCC.DataGFCC import GF.GFCC.DataGFCC
import GF.GFCC.AbsGFCC import GF.GFCC.AbsGFCC
import GF.GFCC.ParGFCC import GF.GFCC.ParGFCC
import GF.GFCC.PrintGFCC
import GF.GFCC.ErrM import GF.GFCC.ErrM
import GF.GFCC.Generate
----import GF.Parsing.FCFG ----import GF.Parsing.FCFG
----import GF.Conversion.SimpleToFCFG (convertGrammar,FCat(..)) ----import GF.Conversion.SimpleToFCFG (convertGrammar,FCat(..))
@@ -80,7 +83,7 @@ file2grammar f = do
file2gfcc f = file2gfcc f =
readFileIf f >>= err (error) (return . mkGFCC) . pGrammar . myLexer readFileIf f >>= err (error) (return . mkGFCC) . pGrammar . myLexer
linearize mgr lang = GF.GFCC.DataGFCC.linearize (gfcc mgr) (CId lang) linearize mgr lang = GF.GFCC.Linearize.linearize (gfcc mgr) (CId lang)
parse mgr lang cat s = error "no parser" parse mgr lang cat s = error "no parser"
----parse mgr lang cat s = ----parse mgr lang cat s =
@@ -107,7 +110,7 @@ generateAll mgr cat = generate (gfcc mgr) (CId cat)
readTree _ = err (const exp0) id . (pExp . myLexer) readTree _ = err (const exp0) id . (pExp . myLexer)
showTree t = printTree t showTree = prt
languages mgr = [l | CId l <- cncnames (gfcc mgr)] languages mgr = [l | CId l <- cncnames (gfcc mgr)]

View File

@@ -38,102 +38,6 @@ statGFCC gfcc = unlines [
] ]
where pr (CId s) = s where pr (CId s) = s
lookLin :: GFCC -> CId -> CId -> Term
lookLin gfcc lang fun =
lookMap TM fun $ lins $ lookMap (error "no lang") lang $ concretes gfcc
lookOper :: GFCC -> CId -> CId -> Term
lookOper gfcc lang fun =
lookMap TM fun $ opers $ lookMap (error "no lang") lang $ concretes gfcc
lookLincat :: GFCC -> CId -> CId -> Term
lookLincat gfcc lang fun =
lookMap TM fun $ lincats $ lookMap (error "no lang") lang $ concretes gfcc
-- | Look up the type of a function.
lookType :: GFCC -> CId -> Type
lookType gfcc f =
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
linearize :: GFCC -> CId -> Exp -> String
linearize mcfg lang = realize . linExp mcfg lang
realize :: Term -> String
realize trm = case trm of
R ts -> realize (ts !! 0)
S ss -> unwords $ lmap realize ss
K t -> case t of
KS s -> s
KP s _ -> unwords s ---- prefix choice TODO
W s t -> s ++ realize t
FV ts -> realize (ts !! 0) ---- other variants TODO
TM -> "?"
_ -> "ERROR " ++ show trm ---- debug
linExp :: GFCC -> CId -> Exp -> Term
linExp mcfg lang tree@(Tr at trees) =
case at of
AC fun -> comp (lmap lin trees) $ look fun
AS s -> R [kks (show s)] -- quoted
AI i -> R [kks (show i)]
AF d -> R [kks (show d)]
AM _ -> TM
where
lin = linExp mcfg lang
comp = compute mcfg lang
look = lookLin mcfg lang
exp0 :: Exp
exp0 = Tr (AM 0) []
term0 :: CId -> Term
term0 _ = TM
kks :: String -> Term
kks = K . KS
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute mcfg lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
W s t -> W s (comp t)
R ts -> R $ lmap comp ts
V i -> idx args i -- already computed
F c -> comp $ look c -- not computed (if contains argvar)
FV ts -> FV $ lmap comp ts
S ts -> S $ lfilter (/= S []) $ lmap comp ts
_ -> trm
look = lookOper mcfg lang
idx xs i = if i > length xs - 1
then error
("too large " ++ show i ++ " for\n" ++ unlines (lmap prt xs) ++ "\n") TM
else xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ lmap (proj r) ts
(FV ts, _ ) -> FV $ lmap (\t -> proj t r) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> i
TM -> 0 -- default value for parameter
_ -> error ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -> idx rs i
TM -> TM
_ -> error ("ERROR in grammar compiler: field from " ++ show t) t
prt = printTree
-- convert parsed grammar to internal GFCC -- convert parsed grammar to internal GFCC
mkGFCC :: Grammar -> GFCC mkGFCC :: Grammar -> GFCC
@@ -184,10 +88,6 @@ printGFCC gfcc = printTree $ Grm
[Lin f v | (f,v) <- assocs (lindefs cnc)] [Lin f v | (f,v) <- assocs (lindefs cnc)]
[Lin f v | (f,v) <- assocs (printnames cnc)] [Lin f v | (f,v) <- assocs (printnames cnc)]
-- lookup with default value
lookMap :: (Show i, Ord i) => a -> i -> Map i a -> a
lookMap d c m = maybe d id $ Data.Map.lookup c m
-- default map and filter are for Map here -- default map and filter are for Map here
lmap = Prelude.map lmap = Prelude.map
lfilter = Prelude.filter lfilter = Prelude.filter

View File

@@ -1,5 +1,6 @@
module GF.GFCC.Generate where module GF.GFCC.Generate where
import GF.GFCC.Macros
import GF.GFCC.DataGFCC import GF.GFCC.DataGFCC
import GF.GFCC.AbsGFCC import GF.GFCC.AbsGFCC
@@ -10,27 +11,17 @@ import System.Random
generate :: GFCC -> CId -> [Exp] generate :: GFCC -> CId -> [Exp]
generate gfcc cat = concatMap (\i -> gener i cat) [0..] generate gfcc cat = concatMap (\i -> gener i cat) [0..]
where where
gener 0 c = [Tr (AC f) [] | (f, Typ [] _) <- fns c] gener 0 c = [tree (AC f) [] | (f, Typ [] _) <- fns c]
gener i c = [ gener i c = [
tr | tr |
(f, Typ cs _) <- fns c, (f, Typ cs _) <- fns c,
let alts = map (gener (i-1)) cs, let alts = map (gener (i-1)) cs,
ts <- combinations alts, ts <- combinations alts,
let tr = Tr (AC f) ts, let tr = tree (AC f) ts,
depth tr >= i depth tr >= i
] ]
fns cat = fns = functionsToCat gfcc
let fs = lookMap [] cat $ catfuns $ abstract gfcc
in [(f,ty) | f <- fs, Just (ty,_) <- [M.lookup f $ funs $ abstract gfcc]]
depth tr = case tr of
Tr _ [] -> 1
Tr _ ts -> maximum (map depth ts) + 1
--- from Operations
combinations :: [[a]] -> [[a]]
combinations t = case t of
[] -> [[]]
aa:uu -> [a:u | a <- aa, u <- combinations uu]
-- generate an infinite list of trees randomly -- generate an infinite list of trees randomly
genRandom :: StdGen -> GFCC -> CId -> [Exp] genRandom :: StdGen -> GFCC -> CId -> [Exp]
@@ -45,16 +36,16 @@ genRandom gen gfcc cat = genTrees (randomRs (0.0, 1.0) gen) cat where
(genTrees ds2 cat) -- else (drop k ds) (genTrees ds2 cat) -- else (drop k ds)
genTree rs = gett rs where genTree rs = gett rs where
gett ds (CId "String") = (Tr (AS "foo") [], 1) gett ds (CId "String") = (tree (AS "foo") [], 1)
gett ds (CId "Int") = (Tr (AI 12345) [], 1) gett ds (CId "Int") = (tree (AI 12345) [], 1)
gett [] _ = (Tr (AS "TIMEOUT") [], 1) ---- gett [] _ = (tree (AS "TIMEOUT") [], 1) ----
gett ds cat = case fns cat of gett ds cat = case fns cat of
[] -> (Tr (AM 0) [],1) [] -> (tree (AM 0) [],1)
fs -> let fs -> let
d:ds2 = ds d:ds2 = ds
(f,args) = getf d fs (f,args) = getf d fs
(ts,k) = getts ds2 args (ts,k) = getts ds2 args
in (Tr (AC f) ts, k+1) in (tree (AC f) ts, k+1)
getf d fs = let lg = (length fs) in getf d fs = let lg = (length fs) in
fs !! (floor (d * fromIntegral lg)) fs !! (floor (d * fromIntegral lg))
getts ds cats = case cats of getts ds cats = case cats of
@@ -64,11 +55,10 @@ genRandom gen gfcc cat = genTrees (randomRs (0.0, 1.0) gen) cat where
in (t:ts, k + ks) in (t:ts, k + ks)
_ -> ([],0) _ -> ([],0)
fns cat = fns cat = [(f,cs) | (f, Typ cs _) <- functionsToCat gfcc cat]
let fs = maybe [] id $ M.lookup cat $ catfuns $ abstract gfcc
in [(f,cs) | f <- fs,
Just (Typ cs _,_) <- [M.lookup f $ funs $ abstract gfcc]]
{-
-- brute-force parsing method; only returns the first result -- brute-force parsing method; only returns the first result
-- note: you cannot throw away rules with unknown words from the grammar -- note: you cannot throw away rules with unknown words from the grammar
-- because it is not known which field in each rule may match the input -- because it is not known which field in each rule may match the input
@@ -77,3 +67,4 @@ searchParse :: Int -> GFCC -> CId -> [String] -> [Exp]
searchParse i gfcc cat ws = [t | t <- gen, s <- lins t, words s == ws] where searchParse i gfcc cat ws = [t | t <- gen, s <- lins t, words s == ws] where
gen = take i $ generate gfcc cat gen = take i $ generate gfcc cat
lins t = [linearize gfcc lang t | lang <- cncnames gfcc] lins t = [linearize gfcc lang t | lang <- cncnames gfcc]
-}

77
src/GF/GFCC/Linearize.hs Normal file
View File

@@ -0,0 +1,77 @@
module GF.GFCC.Linearize where
import GF.GFCC.Macros
import GF.GFCC.DataGFCC
import GF.GFCC.AbsGFCC
import Data.Map
import Data.List
-- linearization and computation of concrete GFCC Terms
linearize :: GFCC -> CId -> Exp -> String
linearize mcfg lang = realize . linExp mcfg lang
realize :: Term -> String
realize trm = case trm of
R ts -> realize (ts !! 0)
S ss -> unwords $ lmap realize ss
K t -> case t of
KS s -> s
KP s _ -> unwords s ---- prefix choice TODO
W s t -> s ++ realize t
FV ts -> realize (ts !! 0) ---- other variants TODO
TM -> "?"
_ -> "ERROR " ++ show trm ---- debug
linExp :: GFCC -> CId -> Exp -> Term
linExp mcfg lang tree@(DTr _ at trees) = ---- bindings TODO
case at of
AC fun -> comp (lmap lin trees) $ look fun
AS s -> R [kks (show s)] -- quoted
AI i -> R [kks (show i)]
AF d -> R [kks (show d)]
AM _ -> TM
where
lin = linExp mcfg lang
comp = compute mcfg lang
look = lookLin mcfg lang
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute mcfg lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
W s t -> W s (comp t)
R ts -> R $ lmap comp ts
V i -> idx args i -- already computed
F c -> comp $ look c -- not computed (if contains argvar)
FV ts -> FV $ lmap comp ts
S ts -> S $ lfilter (/= S []) $ lmap comp ts
_ -> trm
look = lookOper mcfg lang
idx xs i = if i > length xs - 1
then error
("too large " ++ show i ++ " for\n" ++ unlines (lmap prt xs) ++ "\n") TM
else xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ lmap (proj r) ts
(FV ts, _ ) -> FV $ lmap (\t -> proj t r) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> i
TM -> 0 -- default value for parameter
_ -> error ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -> idx rs i
TM -> TM
_ -> error ("ERROR in grammar compiler: field from " ++ show t) t

63
src/GF/GFCC/Macros.hs Normal file
View File

@@ -0,0 +1,63 @@
module GF.GFCC.Macros where
import GF.GFCC.AbsGFCC
import GF.GFCC.DataGFCC
import GF.GFCC.PrintGFCC
import Data.Map
import Data.List
-- operations for manipulating GFCC grammars and objects
lookLin :: GFCC -> CId -> CId -> Term
lookLin gfcc lang fun =
lookMap TM fun $ lins $ lookMap (error "no lang") lang $ concretes gfcc
lookOper :: GFCC -> CId -> CId -> Term
lookOper gfcc lang fun =
lookMap TM fun $ opers $ lookMap (error "no lang") lang $ concretes gfcc
lookLincat :: GFCC -> CId -> CId -> Term
lookLincat gfcc lang fun =
lookMap TM fun $ lincats $ lookMap (error "no lang") lang $ concretes gfcc
lookType :: GFCC -> CId -> Type
lookType gfcc f =
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
functionsToCat :: GFCC -> CId -> [(CId,Type)]
functionsToCat gfcc cat =
[(f,ty) | f <- fs, Just (ty,_) <- [Data.Map.lookup f $ funs $ abstract gfcc]]
where
fs = lookMap [] cat $ catfuns $ abstract gfcc
depth :: Exp -> Int
depth tr = case tr of
DTr _ _ [] -> 1
DTr _ _ ts -> maximum (lmap depth ts) + 1
tree :: Atom -> [Exp] -> Exp
tree = DTr []
exp0 :: Exp
exp0 = Tr (AM 0) []
term0 :: CId -> Term
term0 _ = TM
kks :: String -> Term
kks = K . KS
prt :: Print a => a -> String
prt = printTree
-- lookup with default value
lookMap :: (Show i, Ord i) => a -> i -> Map i a -> a
lookMap d c m = maybe d id $ Data.Map.lookup c m
--- from Operations
combinations :: [[a]] -> [[a]]
combinations t = case t of
[] -> [[]]
aa:uu -> [a:u | a <- aa, u <- combinations uu]