now the linearization is completely based on PMCFG

This commit is contained in:
krasimir
2010-01-17 17:05:21 +00:00
parent 9e3d4c74dc
commit af13bae2df
17 changed files with 250 additions and 346 deletions

View File

@@ -48,7 +48,6 @@ library
PGF.TypeCheck PGF.TypeCheck
PGF.Binary PGF.Binary
PGF.Morphology PGF.Morphology
PGF.ShowLinearize
PGF.VisualizeTree PGF.VisualizeTree
GF.Data.TrieMap GF.Data.TrieMap
GF.Data.Utilities GF.Data.Utilities

View File

@@ -14,7 +14,7 @@ module GF.Command.Commands (
import PGF import PGF
import PGF.CId import PGF.CId
import PGF.ShowLinearize import PGF.Linearize
import PGF.VisualizeTree import PGF.VisualizeTree
import PGF.Macros import PGF.Macros
import PGF.Data ---- import PGF.Data ----
@@ -344,9 +344,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
("all","show all forms and variants"), ("all","show all forms and variants"),
("bracket","show tree structure with brackets and paths to nodes"), ("bracket","show tree structure with brackets and paths to nodes"),
("multi","linearize to all languages (default)"), ("multi","linearize to all languages (default)"),
("record","show source-code-like record"),
("table","show all forms labelled by parameters"), ("table","show all forms labelled by parameters"),
("term", "show PGF term"),
("treebank","show the tree and tag linearizations with language names") ("treebank","show the tree and tag linearizations with language names")
] ++ stringOpOptions, ] ++ stringOpOptions,
flags = [ flags = [
@@ -797,11 +795,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [
linear :: [Option] -> CId -> Expr -> String linear :: [Option] -> CId -> Expr -> String
linear opts lang = let unl = unlex opts lang in case opts of linear opts lang = let unl = unlex opts lang in case opts of
_ | isOpt "all" opts -> allLinearize unl pgf lang _ | isOpt "all" opts -> unlines . concat . intersperse [[]] . map (map (unl . snd)) . tabularLinearizes pgf lang
_ | isOpt "table" opts -> tableLinearize unl pgf lang _ | isOpt "table" opts -> unlines . concat . intersperse [[]] . map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang
_ | isOpt "term" opts -> termLinearize pgf lang _ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang
_ | isOpt "record" opts -> recordLinearize pgf lang
_ | isOpt "bracket" opts -> markLinearize pgf lang
_ -> unl . linearize pgf lang _ -> unl . linearize pgf lang
unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ---- unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ----
@@ -957,4 +953,3 @@ prMorphoAnalysis (w,lps) =
morphoMissing :: Morpho -> [String] -> [String] morphoMissing :: Morpho -> [String] -> [String]
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)] morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]

View File

@@ -168,10 +168,7 @@ choices nr path = BM (\c s -> let (args,_) = s
| otherwise = c : addConstraint path0 index0 tcs | otherwise = c : addConstraint path0 index0 tcs
mkRecord :: [BranchM (Value a)] -> BranchM (Value a) mkRecord :: [BranchM (Value a)] -> BranchM (Value a)
mkRecord xs = BM (\c -> go xs (c . Rec)) mkRecord xs = BM (\c -> foldl (\c (BM m) bs s -> c (m (\v s -> Return v) s : bs) s) (c . Rec) xs [])
where
go [] c s = c [] s
go (BM m:fs) c s = go fs (\bs s -> c (m (\v s -> Return v) s : bs) s) s
---------------------------------------------------------------------- ----------------------------------------------------------------------
@@ -202,7 +199,7 @@ protoFCat cnc_defs (n,cat) ctype =
_ -> error $ "Not a record: " ++ show ctype _ -> error $ "Not a record: " ++ show ctype
| otherwise = ctype | otherwise = ctype
loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) loop path rcs tcs (R record) = List.foldr (\(index,term) (rcs,tcs) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
loop path rcs tcs (S _) = (path:rcs, tcs) loop path rcs tcs (S _) = (path:rcs, tcs)
loop path rcs tcs (F id) = case Map.lookup id cnc_defs of loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
@@ -229,7 +226,7 @@ go' (Variant bs) path ss = do b <- member bs
go' (Return v) path ss = go v path ss go' (Return v) path ss = go v path ss
go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId]
go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (zip [0..] xs) go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (reverse (zip [0..] xs))
go (Str seqid) path ss = return (seqid : ss) go (Str seqid) path ss = return (seqid : ss)
go (Con i) path ss = restrictHead path i >> return ss go (Con i) path ss = restrictHead path i >> return ss
@@ -350,7 +347,7 @@ emptyGrammarEnv cnc_defs lincats params =
where where
(size,poly) = getMultipliers 1 [] ctype (size,poly) = getMultipliers 1 [] ctype
getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record getMultipliers m ms (R record) = foldr (\t (m,ms) -> getMultipliers m ms t) (m,ms) record
getMultipliers m ms (S _) = (m,ms) getMultipliers m ms (S _) = (m,ms)
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
getMultipliers m ms (F id) = case Map.lookup id cnc_defs of getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
@@ -364,18 +361,12 @@ emptyGrammarEnv cnc_defs lincats params =
getLabels _ t = error (show t) getLabels _ t = error (show t)
expandHOAS abs_defs cnc_defs lincats lindefs env = expandHOAS abs_defs cnc_defs lincats lindefs env =
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats)
where where
hoTypes :: [(Int,CId)] hoTypes :: [(Int,CId)]
hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs
, (n,c) <- fst (typeSkeleton ty), n > 0] , (n,c) <- fst (typeSkeleton ty), n > 0]
hoCats :: [CId]
hoCats = sortNub [c | (_,(ty,_,_)) <- abs_defs
, h <- case ty of {DTyp hyps val _ -> hyps}
, let ty = typeOfHypo h
, c <- fst (catSkeleton ty)]
-- add a range of PMCFG categories for each GF high-order category -- add a range of PMCFG categories for each GF high-order category
add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) =
case IntMap.lookup 0 catSet >>= Map.lookup cat of case IntMap.lookup 0 catSet >>= Map.lookup cat of
@@ -386,8 +377,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
-- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat
add_hoFun env (n,cat) = add_hoFun env (n,cat) =
let linRec = reverse $ let linRec = [[FSymCat 0 i] | i <- case arg of {PFCat _ _ rcs _ -> [0..length rcs-1]}] ++
[[FSymCat 0 i] | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++
[[FSymLit i 0] | i <- [1..n]] [[FSymLit i 0] | i <- [1..n]]
(env1,lins) = List.mapAccumL addFSeq env linRec (env1,lins) = List.mapAccumL addFSeq env linRec
newLinRec = mkArray lins newLinRec = mkArray lins
@@ -405,13 +395,10 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
-- add one PMCFG function for each high-order category: _V : Var -> Cat -- add one PMCFG function for each high-order category: _V : Var -> Cat
add_varFun env cat = add_varFun env cat =
convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef) case Map.lookup cat lindefs of
Nothing -> env
Just lindef -> convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef)
where where
lindef =
case Map.lookup cat lindefs of
Nothing -> error $ "No lindef for " ++ showCId cat
Just def -> def
arg = arg =
case Map.lookup cidVar lincats of case Map.lookup cidVar lincats of
Nothing -> error $ "No lincat for " ++ showCId cat Nothing -> error $ "No lincat for " ++ showCId cat
@@ -455,15 +442,15 @@ getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet , sequences = mkArray seqSet
, productions0= productions0 , productions = IntMap.union prodSet coercions
, productions = filterProductions productions0 , pproductions = IntMap.empty
, lproductions = Map.empty
, startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet) , startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1 , totalCats = last_id+1
} }
where where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
productions0 = IntMap.union prodSet coercions
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
getFCats :: GrammarEnv -> ProtoFCat -> [FCat] getFCats :: GrammarEnv -> ProtoFCat -> [FCat]

View File

@@ -5,7 +5,7 @@ import GF.Compile.Export
import GF.Compile.GeneratePMCFG import GF.Compile.GeneratePMCFG
import PGF.CId import PGF.CId
import PGF.Linearize(realize) import PGF.Macros(updateProductionIndices)
import qualified PGF.Macros as CM import qualified PGF.Macros as CM
import qualified PGF.Data as C import qualified PGF.Data as C
import qualified PGF.Data as D import qualified PGF.Data as D
@@ -46,7 +46,7 @@ mkCanon2gfcc opts cnc gr =
-- Adds parsers for all concretes -- Adds parsers for all concretes
addParsers :: Options -> D.PGF -> IO D.PGF addParsers :: Options -> D.PGF -> IO D.PGF
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)] addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
return pgf { D.concretes = Map.fromList cncs } return $ updateProductionIndices $ pgf { D.concretes = Map.fromList cncs }
where where
conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc
return (lang,cnc { D.parser = Just pinfo }) return (lang,cnc { D.parser = Just pinfo })
@@ -586,3 +586,31 @@ requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where
notReuse i = errVal True $ do notReuse i = errVal True $ do
m <- M.lookupModule gr i m <- M.lookupModule gr i
return $ M.isModRes m -- to exclude reused Cnc and Abs from required return $ M.isModRes m -- to exclude reused Cnc and Abs from required
realize :: C.Term -> String
realize = concat . take 1 . realizes
realizes :: C.Term -> [String]
realizes = map (unwords . untokn) . realizest
realizest :: C.Term -> [[C.Tokn]]
realizest trm = case trm of
C.R ts -> realizest (ts !! 0)
C.S ss -> map concat $ combinations $ map realizest ss
C.K t -> [[t]]
C.W s t -> [[C.KS (s ++ r)] | [C.KS r] <- realizest t]
C.FV ts -> concatMap realizest ts
C.TM s -> [[C.KS s]]
_ -> [[C.KS $ "REALIZE_ERROR " ++ show trm]] ---- debug
untokn :: [C.Tokn] -> [String]
untokn ts = case ts of
C.KP d _ : [] -> d
C.KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
C.KS s : ws -> s : untokn ws
[] -> []
where
sel d vs w = case [v | C.Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
v:_ -> v
_ -> d

View File

@@ -60,6 +60,7 @@ prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c))
pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2) pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2)
pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts))) pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts)))
pr p (K (KS t)) = doubleQuotes (text t) pr p (K (KS t)) = doubleQuotes (text t)
pr p (K _) = empty
pr p (V i) = text ("argv_" ++ show (i+1)) pr p (V i) = text ("argv_" ++ show (i+1))
pr p (C i) = text (show (i+1)) pr p (C i) = text (show (i+1))
pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts))) pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts)))

View File

@@ -90,7 +90,7 @@ children = JS.Ident "cs"
-- Parser -- Parser
parser2js :: ParserInfo -> [JS.Expr] parser2js :: ParserInfo -> [JS.Expr]
parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions0 p)], parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions p)],
JS.EArray $ (map ffun2js (Array.elems (functions p))), JS.EArray $ (map ffun2js (Array.elems (functions p))),
JS.EArray $ (map seq2js (Array.elems (sequences p))), JS.EArray $ (map seq2js (Array.elems (sequences p))),
JS.EObj $ map cats (Map.assocs (startCats p)), JS.EObj $ map cats (Map.assocs (startCats p)),

View File

@@ -19,7 +19,7 @@ module GF.Quiz (
) where ) where
import PGF import PGF
import PGF.ShowLinearize import PGF.Linearize
import GF.Data.Operations import GF.Data.Operations
import GF.Infra.UseIO import GF.Infra.UseIO
import GF.Infra.Option import GF.Infra.Option
@@ -51,11 +51,11 @@ morphologyList :: PGF -> Language -> Type -> Int -> IO [(String,[String])]
morphologyList pgf ig typ number = do morphologyList pgf ig typ number = do
ts <- generateRandom pgf typ >>= return . take (max 1 number) ts <- generateRandom pgf typ >>= return . take (max 1 number)
gen <- newStdGen gen <- newStdGen
let ss = map (tabularLinearize pgf ig) ts let ss = map (tabularLinearizes pgf ig) ts
let size = length (head ss) let size = length (head ss)
let forms = take number $ randomRs (0,size-1) gen let forms = take number $ randomRs (0,size-1) gen
return [(head (snd (head pws)) +++ par, ws) | return [(snd (head pws0) +++ fst (pws0 !! i), ws) |
(pws,i) <- zip ss forms, let (par,ws) = pws !! i] (pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss]
-- | compare answer to the list of right answers, increase score and give feedback -- | compare answer to the list of right answers, increase score and give feedback
mkAnswer :: Encoding -> [String] -> String -> (Integer, String) mkAnswer :: Encoding -> [String] -> String -> (Integer, String)

View File

@@ -37,7 +37,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang) pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
rules :: [(FCat,Production)] rules :: [(FCat,Production)]
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo) rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions pinfo)
, prod <- Set.toList set] , prod <- Set.toList set]
fcatCats :: Map FCat Cat fcatCats :: Map FCat Cat
@@ -58,7 +58,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
topdownRules cat = f cat [] topdownRules cat = f cat []
where where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions pinfo))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
g (FCoerce cat) rules = f cat rules g (FCoerce cat) rules = f cat rules

View File

@@ -16,7 +16,6 @@ import GF.Speech.SRG (getSpeechLanguage)
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Macros import PGF.Macros
import PGF.Linearize (showPrintName)
import Control.Monad (liftM) import Control.Monad (liftM)
import Data.List (isPrefixOf, find, intersperse) import Data.List (isPrefixOf, find, intersperse)

View File

@@ -2,6 +2,7 @@ module PGF.Binary where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Macros
import Data.Binary import Data.Binary
import Data.Binary.Put import Data.Binary.Put
import Data.Binary.Get import Data.Binary.Get
@@ -28,10 +29,11 @@ instance Binary PGF where
gflags <- get gflags <- get
abstract <- get abstract <- get
concretes <- get concretes <- get
return (PGF{ absname=absname, cncnames=cncnames return $ updateProductionIndices $
, gflags=gflags (PGF{ absname=absname, cncnames=cncnames
, abstract=abstract, concretes=concretes , gflags=gflags
}) , abstract=abstract, concretes=concretes
})
instance Binary CId where instance Binary CId where
put (CId bs) = put bs put (CId bs) = put bs
@@ -185,15 +187,16 @@ instance Binary Production where
_ -> decodingError _ -> decodingError
instance Binary ParserInfo where instance Binary ParserInfo where
put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p) put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
get = do functions <- get get = do functions <- get
sequences <- get sequences <- get
productions0<- get productions <- get
totalCats <- get totalCats <- get
startCats <- get startCats <- get
return (ParserInfo{functions=functions,sequences=sequences return (ParserInfo{functions=functions,sequences=sequences
,productions0=productions0 ,productions = productions
,productions =filterProductions productions0 ,pproductions = IntMap.empty
,lproductions = Map.empty
,totalCats=totalCats,startCats=startCats}) ,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF" decodingError = fail "This PGF file was compiled with different version of GF"

View File

@@ -1,38 +1,81 @@
{-# LANGUAGE ParallelListComp #-} module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
module PGF.Linearize
(linearizes,showPrintName,realize,realizes,linTree, linTreeMark,linearizesMark) where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Macros import PGF.Macros
import PGF.Tree import Data.Maybe (fromJust)
import Data.Array.IArray
import Data.List
import Control.Monad import Control.Monad
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.List import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Debug.Trace
-- linearization and computation of concrete PGF Terms -- linearization and computation of concrete PGF Terms
type LinTable = Array FIndex [Tokn]
linearizes :: PGF -> CId -> Expr -> [String] linearizes :: PGF -> CId -> Expr -> [String]
linearizes pgf lang = realizes . linTree pgf lang linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint)
realize :: Term -> String linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
realize = concat . take 1 . realizes linTree pgf lang mark e = lin0 [] [] [] Nothing e
where
cnc = lookMap (error "no lang") lang (concretes pgf)
pinfo = fromJust (parser cnc)
lp = lproductions pinfo
realizes :: Term -> [String] lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
realizes = map (unwords . untokn) . realizest lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e []
| otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs])
realizest :: Term -> [[Tokn]] lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
realizest trm = case trm of lin path xs mb_fid (ELit l) [] = case l of
R ts -> realizest (ts !! 0) LStr s -> return (mark Nothing path (ss s))
S ss -> map concat $ combinations $ map realizest ss LInt n -> return (mark Nothing path (ss (show n)))
K t -> [[t]] LFlt f -> return (mark Nothing path (ss (show f)))
W s t -> [[KS (s ++ r)] | [KS r] <- realizest t] lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es)
FV ts -> concatMap realizest ts lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
TM s -> [[KS s]] lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es)
_ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
ss s = listArray (0,0) [[KS s]]
apply path xs mb_fid f es =
case Map.lookup f lp of
Just prods -> case lookupProds mb_fid prods of
Just set -> do prod <- Set.toList set
case prod of
FApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (FFun _ lins) = functions pinfo ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
FCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero
Nothing -> apply path xs mb_fid _V [ELit (LStr "?")] -- function without linearization
where
lookupProds (Just fid) prods = IntMap.lookup fid prods
lookupProds Nothing prods
| f == _B || f == _V = Nothing
| otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
sub i path
| f == _B || f == _V = path
| otherwise = i:path
isApp (FApply _ _) = True
isApp _ = False
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences pinfo ! seqid
compute (FSymCat d r) = (args !! d) ! r
compute (FSymLit d r) = (args !! d) ! r
compute (FSymKS ts) = map KS ts
compute (FSymKP ts alts) = [KP ts alts]
untokn :: [Tokn] -> [String] untokn :: [Tokn] -> [String]
untokn ts = case ts of untokn ts = case ts of
@@ -45,126 +88,23 @@ untokn ts = case ts of
v:_ -> v v:_ -> v
_ -> d _ -> d
-- Lifts all variants to the top level (except those in macros). -- create a table from labels+params to variants
liftVariants :: Term -> [Term] tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]]
liftVariants = f tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e)
where where
f (R ts) = liftM R $ mapM f ts lbls = case unApp e of
f (P t1 t2) = liftM2 P (f t1) (f t2) Just (f,_) -> let cat = valCat (lookType pgf f)
f (S ts) = liftM S $ mapM f ts in case parser (lookConcr pgf lang) >>= Map.lookup cat . startCats of
f (FV ts) = ts >>= f Just (_,_,lbls) -> elems lbls
f (W s t) = liftM (W s) $ f t Nothing -> error "No labels"
f t = return t Nothing -> error "Not function application"
linTree :: PGF -> CId -> Expr -> Term
linTree pgf lang e = lin (expr2tree e) Nothing -- show bracketed markup with references to tree structure
markLinearizes :: PGF -> CId -> Expr -> [String]
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
where where
cnc = lookMap (error "no lang") lang (concretes pgf) mark mb_f path lint = amap (bracket mb_f path) lint
lin (Abs xs e ) mty = case lin e Nothing of bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"]
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"]
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of
Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps]
in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants]
Nothing -> tm0
lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted
lin (Lit (LInt i)) mty = R [kks (show i)]
lin (Lit (LFlt d)) mty = R [kks (show d)]
lin (Var x) mty = case mty of
Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (showCId x)
lin (Meta i) mty = case mty of
Just (DTyp _ cat _) -> compute pgf lang [K (KS ("?" ++ show i))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (show i)
variants :: [Term] -> Term
variants ts = case ts of
[t] -> t
_ -> FV ts
unvariants :: Term -> [Term]
unvariants t = case t of
FV ts -> ts
_ -> [t]
compute :: PGF -> CId -> [Term] -> Term -> Term
compute pgf lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
W s t -> W s (comp t)
R ts -> R $ map comp ts
V i -> idx args i -- already computed
F c -> comp $ look c -- not computed (if contains argvar)
FV ts -> FV $ map comp ts
S ts -> S $ filter (/= S []) $ map comp ts
_ -> trm
look = lookOper pgf lang
idx xs i = if i > length xs - 1
then trace
("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0
else xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ map (proj r) ts
(FV ts, _ ) -> FV $ map (\t -> proj t p) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> i
TM _ -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666
getField t i = case t of
R rs -> idx rs i
TM s -> TM s
_ -> error ("ERROR in grammar compiler: field from " ++ show t) t
---------
-- markup with tree positions
linearizesMark :: PGF -> CId -> Expr -> [String]
linearizesMark pgf lang = realizes . linTreeMark pgf lang
linTreeMark :: PGF -> CId -> Expr -> Term
linTreeMark pgf lang = lin [] . expr2tree
where
lin p (Abs xs e ) = case lin p e of
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
lin p (Fun fun es) =
let argVariants =
mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es)
in variants [mark (fun,p) $ compute pgf lang args $ look fun |
args <- argVariants]
lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted
lin p (Lit (LInt i)) = mark p $ R [kks (show i)]
lin p (Lit (LFlt d)) = mark p $ R [kks (show d)]
lin p (Var x) = mark p $ TM (showCId x)
lin p (Meta i) = mark p $ TM (show i)
look = lookLin pgf lang
mark :: Show a => a -> Term -> Term
mark p t = case t of
R ts -> R $ map (mark p) ts
FV ts -> R $ map (mark p) ts
S ts -> S $ bracket p ts
K s -> S $ bracket p [t]
W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts]
_ -> t
-- otherwise in normal form
bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"]
sub p i = p ++ [i]
-- | Show the printname of function or category
showPrintName :: PGF -> Language -> CId -> String
showPrintName pgf lang id = lookMap "?" id $ printnames $ lookMap (error "no lang") lang $ concretes pgf

View File

@@ -3,10 +3,14 @@ module PGF.Macros where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import Control.Monad import Control.Monad
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Array as Array import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Array as Array
import Data.Maybe import Data.Maybe
import Data.List import Data.List
import GF.Data.Utilities(sortNub)
-- operations for manipulating PGF grammars and objects -- operations for manipulating PGF grammars and objects
@@ -122,6 +126,10 @@ contextLength :: Type -> Int
contextLength ty = case ty of contextLength ty = case ty of
DTyp hyps _ _ -> length hyps DTyp hyps _ _ -> length hyps
-- | Show the printname of function or category
showPrintName :: PGF -> Language -> CId -> String
showPrintName pgf lang id = lookMap "?" id $ printnames $ lookMap (error "no lang") lang $ concretes pgf
term0 :: CId -> Term term0 :: CId -> Term
term0 = TM . showCId term0 = TM . showCId
@@ -151,3 +159,63 @@ cidVar = mkCId "__gfVar"
_B = mkCId "__gfB" _B = mkCId "__gfB"
_V = mkCId "__gfV" _V = mkCId "__gfV"
updateProductionIndices :: PGF -> PGF
updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)}
where
updateConcrete cnc =
case parser cnc of
Nothing -> cnc
Just pinfo -> let prods0 = filterProductions (productions pinfo)
p_prods = parseIndex pinfo prods0
l_prods = linIndex pinfo prods0
in cnc{parser = Just pinfo{pproductions = p_prods, lproductions = l_prods}}
filterProductions prods0
| IntMap.size prods == IntMap.size prods0 = prods
| otherwise = filterProductions prods
where
prods = IntMap.mapMaybe (filterProdSet prods0) prods0
filterProdSet prods set0
| Set.null set = Nothing
| otherwise = Just set
where
set = Set.filter (filterRule prods) set0
filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
filterRule prods _ = True
parseIndex pinfo = IntMap.mapMaybeWithKey filterProdSet
where
filterProdSet fid prods
| fid `IntSet.member` ho_fids = Just prods
| otherwise = let prods' = Set.filter (not . is_ho_prod) prods
in if Set.null prods'
then Nothing
else Just prods'
is_ho_prod (FApply _ [fid]) | fid == fcatVar = True
is_ho_prod _ = False
ho_fids :: IntSet.IntSet
ho_fids = IntSet.fromList [fid | cat <- ho_cats
, fid <- maybe [] (\(s,e,_) -> [s..e]) (Map.lookup cat (startCats pinfo))]
ho_cats :: [CId]
ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs (abstract pgf))
, h <- case ty of {DTyp hyps val _ -> hyps}
, let ty = typeOfHypo h
, c <- fst (catSkeleton ty)]
linIndex pinfo productions =
Map.fromListWith (IntMap.unionWith Set.union)
[(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
, prod <- Set.toList prods
, fun <- getFunctions prod]
where
getFunctions (FApply funid args) = let FFun fun _ = functions pinfo Array.! funid in [fun]
getFunctions (FCoerce fid) = case IntMap.lookup fid productions of
Nothing -> []
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]

View File

@@ -2,11 +2,13 @@ module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho, buildMorpho,
lookupMorpho,fullFormLexicon) where lookupMorpho,fullFormLexicon) where
import PGF.ShowLinearize (collectWords)
import PGF.Data
import PGF.CId import PGF.CId
import PGF.Data
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.List (intersperse) import Data.List (intersperse)
-- these 4 definitions depend on the datastructure used -- these 4 definitions depend on the datastructure used
@@ -17,7 +19,23 @@ type Analysis = String
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)]) newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang)) buildMorpho pgf lang = Morpho $
case Map.lookup lang (concretes pgf) >>= parser of
Just pinfo -> collectWords pinfo
Nothing -> Map.empty
collectWords pinfo = Map.fromListWith (++)
[(t, [(fun,lbls ! l)]) | (s,e,lbls) <- Map.elems (startCats pinfo)
, fid <- [s..e]
, FApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (pproductions pinfo))
, let FFun fun lins = functions pinfo ! funid
, (l,seqid) <- assocs lins
, sym <- elems (sequences pinfo ! seqid)
, t <- sym2tokns sym]
where
sym2tokns (FSymKS ts) = ts
sym2tokns (FSymKP ts alts) = ts ++ [t | Alt ts ps <- alts, t <- ts]
sym2tokns _ = []
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo

View File

@@ -34,12 +34,13 @@ data Alternative =
deriving (Eq,Ord,Show) deriving (Eq,Ord,Show)
data ParserInfo data ParserInfo
= ParserInfo { functions :: Array FunId FFun = ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq , sequences :: Array SeqId FSeq
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file , productions :: IntMap.IntMap (Set.Set Production) -- the original productions loaded from the PGF file
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions , pproductions :: IntMap.IntMap (Set.Set Production) -- productions needed for parsing
, startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names , lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)) -- productions needed for linearization
, totalCats :: {-# UNPACK #-} !FCat , startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
, totalCats :: {-# UNPACK #-} !FCat
} }
@@ -98,22 +99,3 @@ ppFCat fcat
ppFunId funid = char 'F' <> int funid ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid ppSeqId seqid = char 'S' <> int seqid
filterProductions = closure
where
closure prods0
| IntMap.size prods == IntMap.size prods0 = prods
| otherwise = closure prods
where
prods = IntMap.mapMaybe (filterProdSet prods0) prods0
filterProdSet prods set0
| Set.null set = Nothing
| otherwise = Just set
where
set = Set.filter (filterRule prods) set0
filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
filterRule prods _ = True

View File

@@ -59,7 +59,7 @@ initState pgf lang (DTyp _ start _) =
let items = case Map.lookup start (startCats pinfo) of let items = case Map.lookup start (startCats pinfo) of
Just (s,e,labels) -> do cat <- range (s,e) Just (s,e,labels) -> do cat <- range (s,e)
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (productions pinfo) [] cat (pproductions pinfo)
let FFun fn lins = functions pinfo ! funid let FFun fn lins = functions pinfo ! funid
(lbl,seqid) <- assocs lins (lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl)) return (Active 0 0 funid seqid args (AK cat lbl))
@@ -72,7 +72,7 @@ initState pgf lang (DTyp _ start _) =
in PState pgf in PState pgf
pinfo pinfo
(Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0) (Chart emptyAC [] emptyPC (pproductions pinfo) (totalCats pinfo) 0)
(TMap.singleton [] (Set.fromList items)) (TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token -- | From the current state and the next token

View File

@@ -1,114 +0,0 @@
module PGF.ShowLinearize (
collectWords,
tableLinearize,
recordLinearize,
termLinearize,
tabularLinearize,
allLinearize,
markLinearize
) where
import PGF.CId
import PGF.Data
import PGF.Tree
import PGF.Macros
import PGF.Linearize
import GF.Data.Operations
import Data.List
import qualified Data.Map as Map
-- printing linearizations in different ways with source parameters
-- internal representation, only used internally in this module
data Record =
RR [(String,Record)]
| RT [(String,Record)]
| RFV [Record]
| RS String
| RCon String
prRecord :: Record -> String
prRecord = prr where
prr t = case t of
RR fs -> concat $
"{" :
(intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"]
RT fs -> concat $
"table {" :
(intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"]
RFV ts -> concat $
"variants {" : (intersperse ";" (map prr ts)) ++ ["}"]
RS s -> prQuotedString s
RCon s -> s
-- uses the encoding of record types in PGF.paramlincat
mkRecord :: Term -> Term -> Record
mkRecord typ trm = case (typ,trm) of
(_, FV ts) -> RFV $ map (mkRecord typ) ts
(R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts]
(S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts]
(_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts])
(FV ps, C i) -> RCon $ str $ ps !! i
(S [], _) -> case realizes trm of
[s] -> RS s
ss -> RFV $ map RS ss
_ -> RS $ show trm ---- printTree trm
where
str = realize
-- show all branches, without labels and params
allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where
pr (p,vs) = unlines vs
-- show all branches, with labels and params
tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where
pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs))
-- create a table from labels+params to variants
tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])]
tabularLinearize pgf lang = branches . recLinearize pgf lang where
branches r = case r of
RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
RFV rs -> concatMap branches rs
RS s -> [([], [s])]
RCon _ -> []
-- show record in GF-source-like syntax
recordLinearize :: PGF -> CId -> Expr -> String
recordLinearize pgf lang = prRecord . recLinearize pgf lang
-- create a GF-like record, forming the basis of all functions above
recLinearize :: PGF -> CId -> Expr -> Record
recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where
typ = case expr2tree tree of
Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f
-- show PGF term
termLinearize :: PGF -> CId -> Expr -> String
termLinearize pgf lang = show . linTree pgf lang
-- show bracketed markup with references to tree structure
markLinearize :: PGF -> CId -> Expr -> String
markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
-- for Morphology: word, lemma, tags
collectWords :: PGF -> Language -> [(String, [(CId,String)])]
collectWords pgf lang =
concatMap collOne
[(f,c,length xs) | (f,(DTyp xs c _,_,_)) <- Map.toList $ funs $ abstract pgf]
where
collOne (f,c,i) =
fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
fromRec f v r = case r of
RR rs -> concat [fromRec f v t | (_,t) <- rs]
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
RFV rs -> concatMap (fromRec f v) rs
RS s -> [(w,[(f,unwords (reverse v))]) | w <- words s, w /= "?888"] ---
-- RS s -> [(s,[(f,unwords (reverse v))])]
RCon c -> [] ---- inherent

View File

@@ -102,7 +102,7 @@ graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
ifd s = if debug then s else [] ifd s = if debug then s else []
pot = readPosText $ head $ linearizesMark pgf lang exp pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
---- use Just str if you have str to match against ---- use Just str if you have str to match against
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"] prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
@@ -188,9 +188,7 @@ getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
---- nubrec and domins are quadratic, but could be (n log n) ---- nubrec and domins are quadratic, but could be (n log n)
graphvizParseTree :: PGF -> CId -> Expr -> String graphvizParseTree :: PGF -> CId -> Expr -> String
graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where
linMark = head . linearizesMark pgf lang
---- use Just str if you have str to match against
lin2tree pgf s = trace s $ prelude ++ nodes ++ links where lin2tree pgf s = trace s $ prelude ++ nodes ++ links where
@@ -235,12 +233,12 @@ tag s = "<" ++ s ++ ">"
showp = init . tail . show showp = init . tail . show
mtag = tag . ('n':) . uncommas mtag = tag . ('n':) . uncommas
-- word alignments from Linearize.linearizesMark -- word alignments from Linearize.markLinearize
-- words are chunks like {[0,1,1,0] old} -- words are chunks like {[0,1,1,0] old}
graphvizAlignment :: PGF -> Expr -> String graphvizAlignment :: PGF -> Expr -> String
graphvizAlignment pgf = prGraph True . lin2graph . linsMark where graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)] linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- cncnames pgf]
lin2graph :: [String] -> [String] lin2graph :: [String] -> [String]
lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links