now the linearization is completely based on PMCFG

This commit is contained in:
krasimir
2010-01-17 17:05:21 +00:00
parent 9e3d4c74dc
commit af13bae2df
17 changed files with 250 additions and 346 deletions

View File

@@ -48,7 +48,6 @@ library
PGF.TypeCheck
PGF.Binary
PGF.Morphology
PGF.ShowLinearize
PGF.VisualizeTree
GF.Data.TrieMap
GF.Data.Utilities

View File

@@ -14,7 +14,7 @@ module GF.Command.Commands (
import PGF
import PGF.CId
import PGF.ShowLinearize
import PGF.Linearize
import PGF.VisualizeTree
import PGF.Macros
import PGF.Data ----
@@ -344,9 +344,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
("all","show all forms and variants"),
("bracket","show tree structure with brackets and paths to nodes"),
("multi","linearize to all languages (default)"),
("record","show source-code-like record"),
("table","show all forms labelled by parameters"),
("term", "show PGF term"),
("treebank","show the tree and tag linearizations with language names")
] ++ stringOpOptions,
flags = [
@@ -797,11 +795,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [
linear :: [Option] -> CId -> Expr -> String
linear opts lang = let unl = unlex opts lang in case opts of
_ | isOpt "all" opts -> allLinearize unl pgf lang
_ | isOpt "table" opts -> tableLinearize unl pgf lang
_ | isOpt "term" opts -> termLinearize pgf lang
_ | isOpt "record" opts -> recordLinearize pgf lang
_ | isOpt "bracket" opts -> markLinearize pgf lang
_ | isOpt "all" opts -> unlines . concat . intersperse [[]] . map (map (unl . snd)) . tabularLinearizes pgf lang
_ | isOpt "table" opts -> unlines . concat . intersperse [[]] . map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang
_ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang
_ -> unl . linearize pgf lang
unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ----
@@ -957,4 +953,3 @@ prMorphoAnalysis (w,lps) =
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]

View File

@@ -168,10 +168,7 @@ choices nr path = BM (\c s -> let (args,_) = s
| otherwise = c : addConstraint path0 index0 tcs
mkRecord :: [BranchM (Value a)] -> BranchM (Value a)
mkRecord xs = BM (\c -> go xs (c . Rec))
where
go [] c s = c [] s
go (BM m:fs) c s = go fs (\bs s -> c (m (\v s -> Return v) s : bs) s) s
mkRecord xs = BM (\c -> foldl (\c (BM m) bs s -> c (m (\v s -> Return v) s : bs) s) (c . Rec) xs [])
----------------------------------------------------------------------
@@ -202,7 +199,7 @@ protoFCat cnc_defs (n,cat) ctype =
_ -> error $ "Not a record: " ++ show ctype
| otherwise = ctype
loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
loop path rcs tcs (R record) = List.foldr (\(index,term) (rcs,tcs) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
loop path rcs tcs (S _) = (path:rcs, tcs)
loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
@@ -229,7 +226,7 @@ go' (Variant bs) path ss = do b <- member bs
go' (Return v) path ss = go v path ss
go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId]
go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (zip [0..] xs)
go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (reverse (zip [0..] xs))
go (Str seqid) path ss = return (seqid : ss)
go (Con i) path ss = restrictHead path i >> return ss
@@ -350,7 +347,7 @@ emptyGrammarEnv cnc_defs lincats params =
where
(size,poly) = getMultipliers 1 [] ctype
getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
getMultipliers m ms (R record) = foldr (\t (m,ms) -> getMultipliers m ms t) (m,ms) record
getMultipliers m ms (S _) = (m,ms)
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
@@ -364,17 +361,11 @@ emptyGrammarEnv cnc_defs lincats params =
getLabels _ t = error (show t)
expandHOAS abs_defs cnc_defs lincats lindefs env =
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats)
where
hoTypes :: [(Int,CId)]
hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs
, (n,c) <- fst (typeSkeleton ty), n > 0]
hoCats :: [CId]
hoCats = sortNub [c | (_,(ty,_,_)) <- abs_defs
, h <- case ty of {DTyp hyps val _ -> hyps}
, let ty = typeOfHypo h
, c <- fst (catSkeleton ty)]
-- add a range of PMCFG categories for each GF high-order category
add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) =
@@ -386,8 +377,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
-- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat
add_hoFun env (n,cat) =
let linRec = reverse $
[[FSymCat 0 i] | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++
let linRec = [[FSymCat 0 i] | i <- case arg of {PFCat _ _ rcs _ -> [0..length rcs-1]}] ++
[[FSymLit i 0] | i <- [1..n]]
(env1,lins) = List.mapAccumL addFSeq env linRec
newLinRec = mkArray lins
@@ -405,13 +395,10 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
-- add one PMCFG function for each high-order category: _V : Var -> Cat
add_varFun env cat =
convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef)
case Map.lookup cat lindefs of
Nothing -> env
Just lindef -> convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef)
where
lindef =
case Map.lookup cat lindefs of
Nothing -> error $ "No lindef for " ++ showCId cat
Just def -> def
arg =
case Map.lookup cidVar lincats of
Nothing -> error $ "No lincat for " ++ showCId cat
@@ -455,15 +442,15 @@ getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions0= productions0
, productions = filterProductions productions0
, productions = IntMap.union prodSet coercions
, pproductions = IntMap.empty
, lproductions = Map.empty
, startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
productions0 = IntMap.union prodSet coercions
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]

View File

@@ -5,7 +5,7 @@ import GF.Compile.Export
import GF.Compile.GeneratePMCFG
import PGF.CId
import PGF.Linearize(realize)
import PGF.Macros(updateProductionIndices)
import qualified PGF.Macros as CM
import qualified PGF.Data as C
import qualified PGF.Data as D
@@ -46,7 +46,7 @@ mkCanon2gfcc opts cnc gr =
-- Adds parsers for all concretes
addParsers :: Options -> D.PGF -> IO D.PGF
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
return pgf { D.concretes = Map.fromList cncs }
return $ updateProductionIndices $ pgf { D.concretes = Map.fromList cncs }
where
conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc
return (lang,cnc { D.parser = Just pinfo })
@@ -586,3 +586,31 @@ requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where
notReuse i = errVal True $ do
m <- M.lookupModule gr i
return $ M.isModRes m -- to exclude reused Cnc and Abs from required
realize :: C.Term -> String
realize = concat . take 1 . realizes
realizes :: C.Term -> [String]
realizes = map (unwords . untokn) . realizest
realizest :: C.Term -> [[C.Tokn]]
realizest trm = case trm of
C.R ts -> realizest (ts !! 0)
C.S ss -> map concat $ combinations $ map realizest ss
C.K t -> [[t]]
C.W s t -> [[C.KS (s ++ r)] | [C.KS r] <- realizest t]
C.FV ts -> concatMap realizest ts
C.TM s -> [[C.KS s]]
_ -> [[C.KS $ "REALIZE_ERROR " ++ show trm]] ---- debug
untokn :: [C.Tokn] -> [String]
untokn ts = case ts of
C.KP d _ : [] -> d
C.KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
C.KS s : ws -> s : untokn ws
[] -> []
where
sel d vs w = case [v | C.Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
v:_ -> v
_ -> d

View File

@@ -60,6 +60,7 @@ prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c))
pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2)
pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts)))
pr p (K (KS t)) = doubleQuotes (text t)
pr p (K _) = empty
pr p (V i) = text ("argv_" ++ show (i+1))
pr p (C i) = text (show (i+1))
pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts)))

View File

@@ -90,7 +90,7 @@ children = JS.Ident "cs"
-- Parser
parser2js :: ParserInfo -> [JS.Expr]
parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions0 p)],
parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions p)],
JS.EArray $ (map ffun2js (Array.elems (functions p))),
JS.EArray $ (map seq2js (Array.elems (sequences p))),
JS.EObj $ map cats (Map.assocs (startCats p)),

View File

@@ -19,7 +19,7 @@ module GF.Quiz (
) where
import PGF
import PGF.ShowLinearize
import PGF.Linearize
import GF.Data.Operations
import GF.Infra.UseIO
import GF.Infra.Option
@@ -51,11 +51,11 @@ morphologyList :: PGF -> Language -> Type -> Int -> IO [(String,[String])]
morphologyList pgf ig typ number = do
ts <- generateRandom pgf typ >>= return . take (max 1 number)
gen <- newStdGen
let ss = map (tabularLinearize pgf ig) ts
let ss = map (tabularLinearizes pgf ig) ts
let size = length (head ss)
let forms = take number $ randomRs (0,size-1) gen
return [(head (snd (head pws)) +++ par, ws) |
(pws,i) <- zip ss forms, let (par,ws) = pws !! i]
return [(snd (head pws0) +++ fst (pws0 !! i), ws) |
(pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss]
-- | compare answer to the list of right answers, increase score and give feedback
mkAnswer :: Encoding -> [String] -> String -> (Integer, String)

View File

@@ -37,7 +37,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
rules :: [(FCat,Production)]
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo)
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions pinfo)
, prod <- Set.toList set]
fcatCats :: Map FCat Cat
@@ -58,7 +58,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
topdownRules cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions pinfo))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
g (FCoerce cat) rules = f cat rules

View File

@@ -16,7 +16,6 @@ import GF.Speech.SRG (getSpeechLanguage)
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.Linearize (showPrintName)
import Control.Monad (liftM)
import Data.List (isPrefixOf, find, intersperse)

View File

@@ -2,6 +2,7 @@ module PGF.Binary where
import PGF.CId
import PGF.Data
import PGF.Macros
import Data.Binary
import Data.Binary.Put
import Data.Binary.Get
@@ -28,10 +29,11 @@ instance Binary PGF where
gflags <- get
abstract <- get
concretes <- get
return (PGF{ absname=absname, cncnames=cncnames
, gflags=gflags
, abstract=abstract, concretes=concretes
})
return $ updateProductionIndices $
(PGF{ absname=absname, cncnames=cncnames
, gflags=gflags
, abstract=abstract, concretes=concretes
})
instance Binary CId where
put (CId bs) = put bs
@@ -185,15 +187,16 @@ instance Binary Production where
_ -> decodingError
instance Binary ParserInfo where
put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
get = do functions <- get
sequences <- get
productions0<- get
productions <- get
totalCats <- get
startCats <- get
return (ParserInfo{functions=functions,sequences=sequences
,productions0=productions0
,productions =filterProductions productions0
,productions = productions
,pproductions = IntMap.empty
,lproductions = Map.empty
,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF"

View File

@@ -1,38 +1,81 @@
{-# LANGUAGE ParallelListComp #-}
module PGF.Linearize
(linearizes,showPrintName,realize,realizes,linTree, linTreeMark,linearizesMark) where
module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.Tree
import Data.Maybe (fromJust)
import Data.Array.IArray
import Data.List
import Control.Monad
import qualified Data.Map as Map
import Data.List
import Debug.Trace
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
-- linearization and computation of concrete PGF Terms
type LinTable = Array FIndex [Tokn]
linearizes :: PGF -> CId -> Expr -> [String]
linearizes pgf lang = realizes . linTree pgf lang
linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint)
realize :: Term -> String
realize = concat . take 1 . realizes
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
linTree pgf lang mark e = lin0 [] [] [] Nothing e
where
cnc = lookMap (error "no lang") lang (concretes pgf)
pinfo = fromJust (parser cnc)
lp = lproductions pinfo
realizes :: Term -> [String]
realizes = map (unwords . untokn) . realizest
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e []
| otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs])
realizest :: Term -> [[Tokn]]
realizest trm = case trm of
R ts -> realizest (ts !! 0)
S ss -> map concat $ combinations $ map realizest ss
K t -> [[t]]
W s t -> [[KS (s ++ r)] | [KS r] <- realizest t]
FV ts -> concatMap realizest ts
TM s -> [[KS s]]
_ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
lin path xs mb_fid (ELit l) [] = case l of
LStr s -> return (mark Nothing path (ss s))
LInt n -> return (mark Nothing path (ss (show n)))
LFlt f -> return (mark Nothing path (ss (show f)))
lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es)
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es)
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
ss s = listArray (0,0) [[KS s]]
apply path xs mb_fid f es =
case Map.lookup f lp of
Just prods -> case lookupProds mb_fid prods of
Just set -> do prod <- Set.toList set
case prod of
FApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (FFun _ lins) = functions pinfo ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
FCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero
Nothing -> apply path xs mb_fid _V [ELit (LStr "?")] -- function without linearization
where
lookupProds (Just fid) prods = IntMap.lookup fid prods
lookupProds Nothing prods
| f == _B || f == _V = Nothing
| otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
sub i path
| f == _B || f == _V = path
| otherwise = i:path
isApp (FApply _ _) = True
isApp _ = False
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences pinfo ! seqid
compute (FSymCat d r) = (args !! d) ! r
compute (FSymLit d r) = (args !! d) ! r
compute (FSymKS ts) = map KS ts
compute (FSymKP ts alts) = [KP ts alts]
untokn :: [Tokn] -> [String]
untokn ts = case ts of
@@ -45,126 +88,23 @@ untokn ts = case ts of
v:_ -> v
_ -> d
-- Lifts all variants to the top level (except those in macros).
liftVariants :: Term -> [Term]
liftVariants = f
-- create a table from labels+params to variants
tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]]
tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e)
where
f (R ts) = liftM R $ mapM f ts
f (P t1 t2) = liftM2 P (f t1) (f t2)
f (S ts) = liftM S $ mapM f ts
f (FV ts) = ts >>= f
f (W s t) = liftM (W s) $ f t
f t = return t
lbls = case unApp e of
Just (f,_) -> let cat = valCat (lookType pgf f)
in case parser (lookConcr pgf lang) >>= Map.lookup cat . startCats of
Just (_,_,lbls) -> elems lbls
Nothing -> error "No labels"
Nothing -> error "Not function application"
linTree :: PGF -> CId -> Expr -> Term
linTree pgf lang e = lin (expr2tree e) Nothing
-- show bracketed markup with references to tree structure
markLinearizes :: PGF -> CId -> Expr -> [String]
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
where
cnc = lookMap (error "no lang") lang (concretes pgf)
mark mb_f path lint = amap (bracket mb_f path) lint
lin (Abs xs e ) mty = case lin e Nothing of
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of
Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps]
in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants]
Nothing -> tm0
lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted
lin (Lit (LInt i)) mty = R [kks (show i)]
lin (Lit (LFlt d)) mty = R [kks (show d)]
lin (Var x) mty = case mty of
Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (showCId x)
lin (Meta i) mty = case mty of
Just (DTyp _ cat _) -> compute pgf lang [K (KS ("?" ++ show i))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (show i)
variants :: [Term] -> Term
variants ts = case ts of
[t] -> t
_ -> FV ts
unvariants :: Term -> [Term]
unvariants t = case t of
FV ts -> ts
_ -> [t]
compute :: PGF -> CId -> [Term] -> Term -> Term
compute pgf lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
W s t -> W s (comp t)
R ts -> R $ map comp ts
V i -> idx args i -- already computed
F c -> comp $ look c -- not computed (if contains argvar)
FV ts -> FV $ map comp ts
S ts -> S $ filter (/= S []) $ map comp ts
_ -> trm
look = lookOper pgf lang
idx xs i = if i > length xs - 1
then trace
("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0
else xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ map (proj r) ts
(FV ts, _ ) -> FV $ map (\t -> proj t p) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> i
TM _ -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666
getField t i = case t of
R rs -> idx rs i
TM s -> TM s
_ -> error ("ERROR in grammar compiler: field from " ++ show t) t
---------
-- markup with tree positions
linearizesMark :: PGF -> CId -> Expr -> [String]
linearizesMark pgf lang = realizes . linTreeMark pgf lang
linTreeMark :: PGF -> CId -> Expr -> Term
linTreeMark pgf lang = lin [] . expr2tree
where
lin p (Abs xs e ) = case lin p e of
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
lin p (Fun fun es) =
let argVariants =
mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es)
in variants [mark (fun,p) $ compute pgf lang args $ look fun |
args <- argVariants]
lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted
lin p (Lit (LInt i)) = mark p $ R [kks (show i)]
lin p (Lit (LFlt d)) = mark p $ R [kks (show d)]
lin p (Var x) = mark p $ TM (showCId x)
lin p (Meta i) = mark p $ TM (show i)
look = lookLin pgf lang
mark :: Show a => a -> Term -> Term
mark p t = case t of
R ts -> R $ map (mark p) ts
FV ts -> R $ map (mark p) ts
S ts -> S $ bracket p ts
K s -> S $ bracket p [t]
W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts]
_ -> t
-- otherwise in normal form
bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"]
sub p i = p ++ [i]
-- | Show the printname of function or category
showPrintName :: PGF -> Language -> CId -> String
showPrintName pgf lang id = lookMap "?" id $ printnames $ lookMap (error "no lang") lang $ concretes pgf
bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"]
bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"]

View File

@@ -3,10 +3,14 @@ module PGF.Macros where
import PGF.CId
import PGF.Data
import Control.Monad
import qualified Data.Map as Map
import qualified Data.Array as Array
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Array as Array
import Data.Maybe
import Data.List
import GF.Data.Utilities(sortNub)
-- operations for manipulating PGF grammars and objects
@@ -122,6 +126,10 @@ contextLength :: Type -> Int
contextLength ty = case ty of
DTyp hyps _ _ -> length hyps
-- | Show the printname of function or category
showPrintName :: PGF -> Language -> CId -> String
showPrintName pgf lang id = lookMap "?" id $ printnames $ lookMap (error "no lang") lang $ concretes pgf
term0 :: CId -> Term
term0 = TM . showCId
@@ -151,3 +159,63 @@ cidVar = mkCId "__gfVar"
_B = mkCId "__gfB"
_V = mkCId "__gfV"
updateProductionIndices :: PGF -> PGF
updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)}
where
updateConcrete cnc =
case parser cnc of
Nothing -> cnc
Just pinfo -> let prods0 = filterProductions (productions pinfo)
p_prods = parseIndex pinfo prods0
l_prods = linIndex pinfo prods0
in cnc{parser = Just pinfo{pproductions = p_prods, lproductions = l_prods}}
filterProductions prods0
| IntMap.size prods == IntMap.size prods0 = prods
| otherwise = filterProductions prods
where
prods = IntMap.mapMaybe (filterProdSet prods0) prods0
filterProdSet prods set0
| Set.null set = Nothing
| otherwise = Just set
where
set = Set.filter (filterRule prods) set0
filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
filterRule prods _ = True
parseIndex pinfo = IntMap.mapMaybeWithKey filterProdSet
where
filterProdSet fid prods
| fid `IntSet.member` ho_fids = Just prods
| otherwise = let prods' = Set.filter (not . is_ho_prod) prods
in if Set.null prods'
then Nothing
else Just prods'
is_ho_prod (FApply _ [fid]) | fid == fcatVar = True
is_ho_prod _ = False
ho_fids :: IntSet.IntSet
ho_fids = IntSet.fromList [fid | cat <- ho_cats
, fid <- maybe [] (\(s,e,_) -> [s..e]) (Map.lookup cat (startCats pinfo))]
ho_cats :: [CId]
ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs (abstract pgf))
, h <- case ty of {DTyp hyps val _ -> hyps}
, let ty = typeOfHypo h
, c <- fst (catSkeleton ty)]
linIndex pinfo productions =
Map.fromListWith (IntMap.unionWith Set.union)
[(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
, prod <- Set.toList prods
, fun <- getFunctions prod]
where
getFunctions (FApply funid args) = let FFun fun _ = functions pinfo Array.! funid in [fun]
getFunctions (FCoerce fid) = case IntMap.lookup fid productions of
Nothing -> []
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]

View File

@@ -2,11 +2,13 @@ module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,
lookupMorpho,fullFormLexicon) where
import PGF.ShowLinearize (collectWords)
import PGF.Data
import PGF.CId
import PGF.Data
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.List (intersperse)
-- these 4 definitions depend on the datastructure used
@@ -17,7 +19,23 @@ type Analysis = String
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang))
buildMorpho pgf lang = Morpho $
case Map.lookup lang (concretes pgf) >>= parser of
Just pinfo -> collectWords pinfo
Nothing -> Map.empty
collectWords pinfo = Map.fromListWith (++)
[(t, [(fun,lbls ! l)]) | (s,e,lbls) <- Map.elems (startCats pinfo)
, fid <- [s..e]
, FApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (pproductions pinfo))
, let FFun fun lins = functions pinfo ! funid
, (l,seqid) <- assocs lins
, sym <- elems (sequences pinfo ! seqid)
, t <- sym2tokns sym]
where
sym2tokns (FSymKS ts) = ts
sym2tokns (FSymKP ts alts) = ts ++ [t | Alt ts ps <- alts, t <- ts]
sym2tokns _ = []
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo

View File

@@ -34,12 +34,13 @@ data Alternative =
deriving (Eq,Ord,Show)
data ParserInfo
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
, startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
, totalCats :: {-# UNPACK #-} !FCat
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions :: IntMap.IntMap (Set.Set Production) -- the original productions loaded from the PGF file
, pproductions :: IntMap.IntMap (Set.Set Production) -- productions needed for parsing
, lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)) -- productions needed for linearization
, startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
, totalCats :: {-# UNPACK #-} !FCat
}
@@ -98,22 +99,3 @@ ppFCat fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid
filterProductions = closure
where
closure prods0
| IntMap.size prods == IntMap.size prods0 = prods
| otherwise = closure prods
where
prods = IntMap.mapMaybe (filterProdSet prods0) prods0
filterProdSet prods set0
| Set.null set = Nothing
| otherwise = Just set
where
set = Set.filter (filterRule prods) set0
filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
filterRule prods _ = True

View File

@@ -59,7 +59,7 @@ initState pgf lang (DTyp _ start _) =
let items = case Map.lookup start (startCats pinfo) of
Just (s,e,labels) -> do cat <- range (s,e)
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (productions pinfo)
[] cat (pproductions pinfo)
let FFun fn lins = functions pinfo ! funid
(lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl))
@@ -72,7 +72,7 @@ initState pgf lang (DTyp _ start _) =
in PState pgf
pinfo
(Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
(Chart emptyAC [] emptyPC (pproductions pinfo) (totalCats pinfo) 0)
(TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token

View File

@@ -1,114 +0,0 @@
module PGF.ShowLinearize (
collectWords,
tableLinearize,
recordLinearize,
termLinearize,
tabularLinearize,
allLinearize,
markLinearize
) where
import PGF.CId
import PGF.Data
import PGF.Tree
import PGF.Macros
import PGF.Linearize
import GF.Data.Operations
import Data.List
import qualified Data.Map as Map
-- printing linearizations in different ways with source parameters
-- internal representation, only used internally in this module
data Record =
RR [(String,Record)]
| RT [(String,Record)]
| RFV [Record]
| RS String
| RCon String
prRecord :: Record -> String
prRecord = prr where
prr t = case t of
RR fs -> concat $
"{" :
(intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"]
RT fs -> concat $
"table {" :
(intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"]
RFV ts -> concat $
"variants {" : (intersperse ";" (map prr ts)) ++ ["}"]
RS s -> prQuotedString s
RCon s -> s
-- uses the encoding of record types in PGF.paramlincat
mkRecord :: Term -> Term -> Record
mkRecord typ trm = case (typ,trm) of
(_, FV ts) -> RFV $ map (mkRecord typ) ts
(R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts]
(S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts]
(_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts])
(FV ps, C i) -> RCon $ str $ ps !! i
(S [], _) -> case realizes trm of
[s] -> RS s
ss -> RFV $ map RS ss
_ -> RS $ show trm ---- printTree trm
where
str = realize
-- show all branches, without labels and params
allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where
pr (p,vs) = unlines vs
-- show all branches, with labels and params
tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where
pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs))
-- create a table from labels+params to variants
tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])]
tabularLinearize pgf lang = branches . recLinearize pgf lang where
branches r = case r of
RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
RFV rs -> concatMap branches rs
RS s -> [([], [s])]
RCon _ -> []
-- show record in GF-source-like syntax
recordLinearize :: PGF -> CId -> Expr -> String
recordLinearize pgf lang = prRecord . recLinearize pgf lang
-- create a GF-like record, forming the basis of all functions above
recLinearize :: PGF -> CId -> Expr -> Record
recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where
typ = case expr2tree tree of
Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f
-- show PGF term
termLinearize :: PGF -> CId -> Expr -> String
termLinearize pgf lang = show . linTree pgf lang
-- show bracketed markup with references to tree structure
markLinearize :: PGF -> CId -> Expr -> String
markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
-- for Morphology: word, lemma, tags
collectWords :: PGF -> Language -> [(String, [(CId,String)])]
collectWords pgf lang =
concatMap collOne
[(f,c,length xs) | (f,(DTyp xs c _,_,_)) <- Map.toList $ funs $ abstract pgf]
where
collOne (f,c,i) =
fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
fromRec f v r = case r of
RR rs -> concat [fromRec f v t | (_,t) <- rs]
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
RFV rs -> concatMap (fromRec f v) rs
RS s -> [(w,[(f,unwords (reverse v))]) | w <- words s, w /= "?888"] ---
-- RS s -> [(s,[(f,unwords (reverse v))])]
RCon c -> [] ---- inherent

View File

@@ -102,7 +102,7 @@ graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
ifd s = if debug then s else []
pot = readPosText $ head $ linearizesMark pgf lang exp
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
---- use Just str if you have str to match against
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
@@ -188,9 +188,7 @@ getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
---- nubrec and domins are quadratic, but could be (n log n)
graphvizParseTree :: PGF -> CId -> Expr -> String
graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where
linMark = head . linearizesMark pgf lang
---- use Just str if you have str to match against
graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where
lin2tree pgf s = trace s $ prelude ++ nodes ++ links where
@@ -235,12 +233,12 @@ tag s = "<" ++ s ++ ">"
showp = init . tail . show
mtag = tag . ('n':) . uncommas
-- word alignments from Linearize.linearizesMark
-- word alignments from Linearize.markLinearize
-- words are chunks like {[0,1,1,0] old}
graphvizAlignment :: PGF -> Expr -> String
graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)]
linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- cncnames pgf]
lin2graph :: [String] -> [String]
lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links