From c72ee23d854b01e56aa42a65985d1e19e077e173 Mon Sep 17 00:00:00 2001 From: krasimir Date: Tue, 21 Oct 2008 14:30:36 +0000 Subject: [PATCH] efficient and nicer implementation for literal categories --- src/GF/Compile/GFCCtoJS.hs | 1 + src/GF/Compile/GeneratePMCFG.hs | 9 ++- src/GF/Speech/PGFToCFG.hs | 13 ++++- src/PGF/Data.hs | 1 + src/PGF/Parsing/FCFG/Incremental.hs | 90 +++++++++++++---------------- src/PGF/Raw/Convert.hs | 6 +- 6 files changed, 62 insertions(+), 58 deletions(-) diff --git a/src/GF/Compile/GFCCtoJS.hs b/src/GF/Compile/GFCCtoJS.hs index 2c3b762da..c8e4e0e4b 100644 --- a/src/GF/Compile/GFCCtoJS.hs +++ b/src/GF/Compile/GFCCtoJS.hs @@ -128,6 +128,7 @@ lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Ar sym2js :: FSymbol -> JS.Expr sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l] +sym2js (FSymLit n l) = new "ArgProj" [JS.EInt n, JS.EInt l] sym2js (FSymTok (KS t)) = new "Terminal" [JS.EStr t] new :: String -> [JS.Expr] -> JS.Expr diff --git a/src/GF/Compile/GeneratePMCFG.hs b/src/GF/Compile/GeneratePMCFG.hs index a20496d70..6a5f9ebdf 100644 --- a/src/GF/Compile/GeneratePMCFG.hs +++ b/src/GF/Compile/GeneratePMCFG.hs @@ -162,7 +162,10 @@ convertArg (C max) nr path lbl_path lin lins = do convertArg (S _) nr path lbl_path lin lins = do (_, args) <- readState let PFCat _ cat rcs tcs = args !! nr - return ((lbl_path, FSymCat nr (index path rcs 0) : lin) : lins) + l = index path rcs 0 + sym | isLiteralCat cat = FSymLit nr l + | otherwise = FSymCat nr l + return ((lbl_path, sym : lin) : lins) where index lbl' (lbl:lbls) idx | lbl' == lbl = idx @@ -257,7 +260,7 @@ expandHOAS abs_defs cnc_defs lincats env = add_hoFun env (n,cat) = let linRec = reverse $ [(l ,[FSymCat 0 i]) | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++ - [([],[FSymCat i 0]) | i <- [1..n]] + [([],[FSymLit i 0]) | i <- [1..n]] (env1,lins) = List.mapAccumL addFSeq env linRec newLinRec = mkArray lins @@ -274,7 +277,7 @@ expandHOAS abs_defs cnc_defs lincats env = -- add one PMCFG function for each high-order category: _V : Var -> Cat add_varFun env cat = - let (env1,seqid) = addFSeq env ([],[FSymCat 0 0]) + let (env1,seqid) = addFSeq env ([],[FSymLit 0 0]) lins = replicate (case res of {PFCat _ _ rcs _ -> length rcs}) seqid (env2,funid) = addFFun env1 (FFun _V [[0]] (mkArray lins)) env3 = foldl (\env res -> addProduction env2 res (FApply funid [fcatVar])) diff --git a/src/GF/Speech/PGFToCFG.hs b/src/GF/Speech/PGFToCFG.hs index 37bc9c0e5..ef7f1f868 100644 --- a/src/GF/Speech/PGFToCFG.hs +++ b/src/GF/Speech/PGFToCFG.hs @@ -85,17 +85,24 @@ pgfToCFG pgf lang = mkCFG (prCId (lookStartCat pgf)) extCats (startRules ++ conc mkRhs = map fsymbolToSymbol . Array.elems containsLiterals :: Array FPointPos FSymbol -> Bool - containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row] + containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row] || + not (null [n | FSymLit n _ <- Array.elems row]) -- only this is needed for PMCFG. + -- The first line is for backward compat. fsymbolToSymbol :: FSymbol -> CFSymbol fsymbolToSymbol (FSymCat n l) = NonTerminal (fcatToCat (args!!n) l) + fsymbolToSymbol (FSymLit n l) = NonTerminal (fcatToCat (args!!n) l) fsymbolToSymbol (FSymTok (KS t)) = Terminal t fixProfile :: Array FPointPos FSymbol -> Profile -> Profile fixProfile row = concatMap positions where - nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row] - positions i = [k | (k,FSymCat j _) <- nts, j == i] + nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt] + positions i = [k | (k,j) <- nts, j == i] + + getPos (FSymCat j _) = [j] + getPos (FSymLit j _) = [j] + getPos _ = [] profilesToTerm :: [Profile] -> CFTerm profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps) diff --git a/src/PGF/Data.hs b/src/PGF/Data.hs index 8ee95c579..31b267a17 100644 --- a/src/PGF/Data.hs +++ b/src/PGF/Data.hs @@ -70,6 +70,7 @@ type FIndex = Int type FPointPos = Int data FSymbol = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex + | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex | FSymTok Tokn deriving (Eq,Ord,Show) type Profile = [Int] diff --git a/src/PGF/Parsing/FCFG/Incremental.hs b/src/PGF/Parsing/FCFG/Incremental.hs index 99d734f40..bd95ec34e 100644 --- a/src/PGF/Parsing/FCFG/Incremental.hs +++ b/src/PGF/Parsing/FCFG/Incremental.hs @@ -44,48 +44,27 @@ initState pinfo (DTyp _ start _) = -- is consumed and the current position shifted by one. nextState :: ParseState -> String -> Maybe ParseState nextState (State pinfo chart items) t = - let (items1,chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart - (items2,chart2) = addConst pinfo (AK fcatString 0) (Lit (LStr t)) t items1 chart1 - (items3,chart3) = case reads t of {[(n,"")] -> addConst pinfo (AK fcatInt 0) (Lit (LInt n)) t items2 chart2; - _ -> (items2,chart2)} - (items4,chart4) = case reads t of {[(d,"")] -> addConst pinfo (AK fcatFloat 0) (Lit (LFlt d)) t items3 chart3; - _ -> (items3,chart3)} - (items5,chart5) = addConst pinfo (AK fcatVar 0) (Var (mkCId t)) t items4 chart4 - chart6 = chart5{ active =emptyAC - , actives=active chart5 : actives chart5 + let (items1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart + chart2 = chart1{ active =emptyAC + , actives=active chart1 : actives chart1 , passive=emptyPC - , offset =offset chart5+1 + , offset =offset chart1+1 } - in if Set.null items5 + in if Set.null items1 then Nothing - else Just (State pinfo chart6 items5) + else Just (State pinfo chart2 items1) where add (KS tok) item set | tok == t = Set.insert item set | otherwise = set -addConst :: ParserInfo -> ActiveKey -> Tree -> String -> Set.Set Active -> Chart -> (Set.Set Active,Chart) -addConst pinfo key const s items chart = - case lookupAC key (active chart) of - Nothing -> (items,chart) - Just set -> let fid = nextId chart - - items1 = Set.fold (\(Active j ppos funid seqid args key) -> - let FSymCat d _ = unsafeAt (unsafeAt (sequences pinfo) seqid) ppos - in Set.insert (Active j (ppos+1) funid seqid (updateAt d fid args) key)) items set - - chart1 = chart{forest =IntMap.insert fid (Set.singleton (FConst const s)) (forest chart) - ,nextId =nextId chart+1 - } - in (items1,chart1) - -- | If the next token is not known but only its prefix (possible empty prefix) -- then the 'getCompletions' function can be used to calculate the possible -- next words and the consequent states. This is used for word completions in -- the GF interpreter. getCompletions :: ParseState -> String -> Map.Map String ParseState getCompletions (State pinfo chart items) w = - let (map',chart1) = process add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart + let (map',chart1) = process Nothing add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC @@ -100,7 +79,7 @@ getCompletions (State pinfo chart items) w = extractExps :: ParseState -> Type -> [Tree] extractExps (State pinfo chart items) (DTyp _ start _) = exps where - (_,st) = process (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart + (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart exps = nubsort $ do cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) @@ -142,8 +121,8 @@ extractExps (State pinfo chart items) (DTyp _ start _) = exps _B = mkCId "_B" _V = mkCId "_V" -process fn !seqs !funs [] acc chart = (acc,chart) -process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart +process mbt fn !seqs !funs [] acc chart = (acc,chart) +process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart | inRange (bounds lin) ppos = case unsafeAt lin ppos of FSymCat d r -> let !fid = args !! d @@ -155,17 +134,23 @@ process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc ch items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items) (\_ _ items -> items) items2 fid (forest chart) - acc2 = if fid < 0 -- literal category - then foldForest (\funid args acc -> acc) - (\lit s acc -> fn (KS s) (Active j (ppos+1) funid seqid args key0) acc) - acc fid (forest chart) - else acc in case lookupAC key (active chart) of - Nothing -> process fn seqs funs items3 acc2 chart{active=insertAC key (Set.singleton item) (active chart)} - Just set | Set.member item set -> process fn seqs funs items acc chart - | otherwise -> process fn seqs funs items2 acc2 chart{active=insertAC key (Set.insert item set) (active chart)} + Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)} + Just set | Set.member item set -> process mbt fn seqs funs items acc chart + | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)} FSymTok tok -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc - in process fn seqs funs items acc' chart + in process mbt fn seqs funs items acc' chart + FSymLit d r -> let !fid = args !! d + in case [t | set <- IntMap.lookup fid (forest chart), FConst _ t <- Set.toList set] of + (tok:_) -> let !acc' = fn (KS tok) (Active j (ppos+1) funid seqid args key0) acc + in process mbt fn seqs funs items acc' chart + [] -> case litCatMatch fid mbt of + Just (t,lit) -> let fid' = nextId chart + !acc' = fn (KS t) (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc + in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit t)) (forest chart) + ,nextId=nextId chart+1 + } + Nothing -> process mbt fn seqs funs items acc chart | otherwise = case lookupPC (mkPK key0 j) (passive chart) of Nothing -> let fid = nextId chart @@ -175,12 +160,12 @@ process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc ch Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) -> let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set - in process fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) - ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart) - ,nextId =nextId chart+1 - } + in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) + ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart) + ,nextId =nextId chart+1 + } Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items - in process fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)} + in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)} where !lin = unsafeAt seqs seqid !k = offset chart @@ -190,15 +175,20 @@ process fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc ch rhs funid lbl = unsafeAt lins lbl where FFun _ _ lins = unsafeAt funs funid - - lit2tok (LStr t) = KS t - lit2tok (LInt n) = KS (show n) - lit2tok (LFlt d) = KS (show d) - + updateAt :: Int -> a -> [a] -> [a] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] +litCatMatch fcat (Just t) + | fcat == fcatString = Just (t,Lit (LStr t)) + | fcat == fcatInt = case reads t of {[(n,"")] -> Just (t,Lit (LInt n)); + _ -> Nothing } + | fcat == fcatFloat = case reads t of {[(d,"")] -> Just (t,Lit (LFlt d)); + _ -> Nothing } + | fcat == fcatVar = Just (t,Var (mkCId t)) +litCatMatch _ _ = Nothing + ---------------------------------------------------------------- -- Active Chart diff --git a/src/PGF/Raw/Convert.hs b/src/PGF/Raw/Convert.hs index d202ff8dd..85799a3a2 100644 --- a/src/PGF/Raw/Convert.hs +++ b/src/PGF/Raw/Convert.hs @@ -102,7 +102,8 @@ toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "categ toProduction (App "C" [fcat]) = FCoerce (expToInt fcat) toSymbol :: RExp -> FSymbol -toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l) +toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l) +toSymbol (App "PL" [n,l]) = FSymLit (expToInt n) (expToInt l) toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts) toSymbol (AStr t) = FSymTok (KS t) @@ -239,7 +240,8 @@ fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), daughter n = App "_A" [intToExp n] fromSymbol :: FSymbol -> RExp -fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l] +fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l] +fromSymbol (FSymLit n l) = App "PL" [intToExp n, intToExp l] fromSymbol (FSymTok t) = fromTokn t fromFSeq :: FSeq -> RExp