From e92151caf83936471e63e05206db5d9b3b3d9189 Mon Sep 17 00:00:00 2001 From: krasimir Date: Thu, 1 Jul 2010 08:51:59 +0000 Subject: [PATCH] redesign the open-literals API --- src/compiler/GF/Command/Commands.hs | 2 +- src/compiler/GF/Compile/Abstract/TC.hs | 2 +- src/compiler/GF/Compile/ExampleBased.hs | 2 +- src/compiler/GF/Compile/GeneratePMCFG.hs | 92 ++++++----- src/compiler/GF/Compile/Rename.hs | 4 +- src/compiler/GF/Data/TrieMap.hs | 10 +- src/compiler/GF/Grammar/Lookup.hs | 6 +- src/compiler/GF/Grammar/Predef.hs | 6 +- src/compiler/GF/Grammar/Values.hs | 2 +- src/compiler/GF/Infra/Option.hs | 14 +- src/compiler/GF/Speech/VoiceXML.hs | 3 +- src/compiler/GFI.hs | 2 +- src/runtime/haskell/PGF.hs | 22 +-- src/runtime/haskell/PGF/Macros.hs | 5 +- src/runtime/haskell/PGF/Parse.hs | 202 ++++++++++++----------- 15 files changed, 198 insertions(+), 176 deletions(-) diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 2ea3e169c..6f3700032 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -1012,7 +1012,7 @@ allCommands env@(pgf, mos) = Map.fromList [ _ -> fromExprs ts where (prs,bss) = unzip parses - ts = [t | ParseResult ts <- prs, t <- ts] + ts = [t | ParseOk ts <- prs, t <- ts] returnFromExprs es = return $ case es of [] -> ([], "no trees found") diff --git a/src/compiler/GF/Compile/Abstract/TC.hs b/src/compiler/GF/Compile/Abstract/TC.hs index 9c28d88e9..68b1691ec 100644 --- a/src/compiler/GF/Compile/Abstract/TC.hs +++ b/src/compiler/GF/Compile/Abstract/TC.hs @@ -161,7 +161,7 @@ checkInferExp th tenv@(k,_,_) e typ = do inferExp :: Theory -> TCEnv -> Exp -> Err (AExp, Val, [(Val,Val)]) inferExp th tenv@(k,rho,gamma) e = case e of Vr x -> mkAnnot (AVr x) $ noConstr $ lookupVar gamma x - Q (m,c) | m == cPredefAbs && isLiteralCat c + Q (m,c) | m == cPredefAbs && isPredefCat c -> return (ACn (m,c) vType, vType, []) | otherwise -> mkAnnot (ACn (m,c)) $ noConstr $ lookupConst th (m,c) QC c -> mkAnnot (ACn c) $ noConstr $ lookupConst th c ---- diff --git a/src/compiler/GF/Compile/ExampleBased.hs b/src/compiler/GF/Compile/ExampleBased.hs index 5c56c0ce5..199d1e375 100644 --- a/src/compiler/GF/Compile/ExampleBased.hs +++ b/src/compiler/GF/Compile/ExampleBased.hs @@ -51,7 +51,7 @@ convertFile conf src file = do return ws TypeError _ _ -> return [] - ParseResult ts -> + ParseOk ts -> case rank ts of (t:tt) -> appv ("WARNING: ambiguous example " ++ ex) >> appn t >> mapM_ (appn . (" --- " ++)) tt >> return [] diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index aeed3947a..c245c3595 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -43,7 +43,7 @@ import Control.Exception convertConcrete :: Options -> SourceGrammar -> SourceModule -> SourceModule -> IO Concr -convertConcrete opts gr am cm = do +convertConcrete opts0 gr am cm = do let env0 = emptyGrammarEnv gr cm when (flag optProf opts) $ do profileGrammar cm env0 pfrules @@ -52,6 +52,8 @@ convertConcrete opts gr am cm = do return $ getConcr flags printnames env2 where (m,mo) = cm + + opts = addOptions (M.flags (snd am)) opts0 pfrules = [ (PFRule id args (0,res) (map (\(_,_,ty) -> ty) cont) val term) | @@ -119,7 +121,7 @@ convertRule gr opts grammarEnv (PFRule fun args res ctypes ctype term) = do let pres = protoFCat grammarEnv res pargs = map (protoFCat grammarEnv) args - b = runCnvMonad gr (unfactor term >>= convertTerm CNil ctype) (pargs,[]) + b = runCnvMonad gr (unfactor term >>= convertTerm opts CNil ctype) (pargs,[]) (grammarEnv1,b1) = addSequencesB grammarEnv b grammarEnv2 = brk (\grammarEnv -> foldBM addRule grammarEnv @@ -293,43 +295,43 @@ reversePath path = rev CNil path type Value a = Schema Branch a Term -convertTerm :: Path -> Type -> Term -> CnvMonad (Value [Symbol]) -convertTerm sel ctype (Vr x) = convertArg ctype (getVarIndex x) (reversePath sel) -convertTerm sel ctype (Abs _ _ t) = convertTerm sel ctype t -- there are only top-level abstractions and we ignore them !!! -convertTerm sel ctype (R record) = convertRec sel ctype record -convertTerm sel ctype (P term l) = convertTerm (CProj l sel) ctype term -convertTerm sel ctype (V pt ts) = convertTbl sel ctype pt ts -convertTerm sel ctype (S term p) = do v <- evalTerm CNil p - convertTerm (CSel v sel) ctype term -convertTerm sel ctype (FV vars) = do term <- variants vars - convertTerm sel ctype term -convertTerm sel ctype (C t1 t2) = do v1 <- convertTerm sel ctype t1 - v2 <- convertTerm sel ctype t2 - return (CStr (concat [s | CStr s <- [v1,v2]])) -convertTerm sel ctype (K t) = return (CStr [SymKS [t]]) -convertTerm sel ctype Empty = return (CStr []) -convertTerm sel ctype (Alts s alts) - = return (CStr [SymKP (strings s) [Alt (strings u) (strings v) | (u,v) <- alts]]) - where - strings (K s) = [s] - strings (C u v) = strings u ++ strings v - strings (Strs ss) = concatMap strings ss -convertTerm CNil ctype t = do v <- evalTerm CNil t - return (CPar v) -convertTerm _ _ t = error (render (text "convertTerm" <+> parens (ppTerm Unqualified 0 t))) +convertTerm :: Options -> Path -> Type -> Term -> CnvMonad (Value [Symbol]) +convertTerm opts sel ctype (Vr x) = convertArg opts ctype (getVarIndex x) (reversePath sel) +convertTerm opts sel ctype (Abs _ _ t) = convertTerm opts sel ctype t -- there are only top-level abstractions and we ignore them !!! +convertTerm opts sel ctype (R record) = convertRec opts sel ctype record +convertTerm opts sel ctype (P term l) = convertTerm opts (CProj l sel) ctype term +convertTerm opts sel ctype (V pt ts) = convertTbl opts sel ctype pt ts +convertTerm opts sel ctype (S term p) = do v <- evalTerm CNil p + convertTerm opts (CSel v sel) ctype term +convertTerm opts sel ctype (FV vars) = do term <- variants vars + convertTerm opts sel ctype term +convertTerm opts sel ctype (C t1 t2) = do v1 <- convertTerm opts sel ctype t1 + v2 <- convertTerm opts sel ctype t2 + return (CStr (concat [s | CStr s <- [v1,v2]])) +convertTerm opts sel ctype (K t) = return (CStr [SymKS [t]]) +convertTerm opts sel ctype Empty = return (CStr []) +convertTerm opts sel ctype (Alts s alts) + = return (CStr [SymKP (strings s) [Alt (strings u) (strings v) | (u,v) <- alts]]) + where + strings (K s) = [s] + strings (C u v) = strings u ++ strings v + strings (Strs ss) = concatMap strings ss +convertTerm opts CNil ctype t = do v <- evalTerm CNil t + return (CPar v) +convertTerm _ _ _ t = error (render (text "convertTerm" <+> parens (ppTerm Unqualified 0 t))) -convertArg :: Term -> Int -> Path -> CnvMonad (Value [Symbol]) -convertArg (RecType rs) nr path = - mkRecord (map (\(lbl,ctype) -> (lbl,convertArg ctype nr (CProj lbl path))) rs) -convertArg (Table pt vt) nr path = do +convertArg :: Options -> Term -> Int -> Path -> CnvMonad (Value [Symbol]) +convertArg opts (RecType rs) nr path = + mkRecord (map (\(lbl,ctype) -> (lbl,convertArg opts ctype nr (CProj lbl path))) rs) +convertArg opts (Table pt vt) nr path = do vs <- getAllParamValues pt - mkTable pt (map (\v -> (v,convertArg vt nr (CSel v path))) vs) -convertArg (Sort _) nr path = do + mkTable pt (map (\v -> (v,convertArg opts vt nr (CSel v path))) vs) +convertArg opts (Sort _) nr path = do (args,_) <- get let PFCat _ cat schema = args !! nr l = index (reversePath path) schema - sym | isLiteralCat cat = SymLit nr l - | otherwise = SymCat nr l + sym | isLiteralCat opts cat = SymLit nr l + | otherwise = SymCat nr l return (CStr [sym]) where index (CProj lbl path) (CRec rs) = case lookup lbl rs of @@ -337,26 +339,26 @@ convertArg (Sort _) nr path = do index (CSel trm path) (CTbl _ rs) = case lookup trm rs of Just (Identity t) -> index path t index CNil (CStr idx) = idx -convertArg ty nr path = do +convertArg opts ty nr path = do value <- choices nr (reversePath path) return (CPar value) -convertRec CNil (RecType rs) record = - mkRecord (map (\(lbl,ctype) -> (lbl,convertTerm CNil ctype (projectRec lbl record))) rs) -convertRec (CProj lbl path) ctype record = - convertTerm path ctype (projectRec lbl record) -convertRec _ ctype _ = error ("convertRec: "++show ctype) +convertRec opts CNil (RecType rs) record = + mkRecord (map (\(lbl,ctype) -> (lbl,convertTerm opts CNil ctype (projectRec lbl record))) rs) +convertRec opts (CProj lbl path) ctype record = + convertTerm opts path ctype (projectRec lbl record) +convertRec opts _ ctype _ = error ("convertRec: "++show ctype) -convertTbl CNil (Table _ vt) pt ts = do +convertTbl opts CNil (Table _ vt) pt ts = do vs <- getAllParamValues pt - mkTable pt (zipWith (\v t -> (v,convertTerm CNil vt t)) vs ts) -convertTbl (CSel v sub_sel) ctype pt ts = do + mkTable pt (zipWith (\v t -> (v,convertTerm opts CNil vt t)) vs ts) +convertTbl opts (CSel v sub_sel) ctype pt ts = do vs <- getAllParamValues pt case lookup v (zip vs ts) of - Just t -> convertTerm sub_sel ctype t + Just t -> convertTerm opts sub_sel ctype t Nothing -> error (render (text "convertTbl:" <+> (text "missing value" <+> ppTerm Unqualified 0 v $$ text "among" <+> vcat (map (ppTerm Unqualified 0) vs)))) -convertTbl _ ctype _ _ = error ("convertTbl: "++show ctype) +convertTbl opts _ ctype _ _ = error ("convertTbl: "++show ctype) goB :: Branch (Value SeqId) -> Path -> [SeqId] -> BacktrackM Env [SeqId] diff --git a/src/compiler/GF/Compile/Rename.hs b/src/compiler/GF/Compile/Rename.hs index a5645c26e..a0ccdae12 100644 --- a/src/compiler/GF/Compile/Rename.hs +++ b/src/compiler/GF/Compile/Rename.hs @@ -87,8 +87,8 @@ renameIdentTerm env@(act,imps) t = -- this facility is mainly for BWC with GF1: you need not import PredefAbs predefAbs c s - | isLiteralCat c = return $ Q (cPredefAbs,c) - | otherwise = checkError s + | isPredefCat c = return $ Q (cPredefAbs,c) + | otherwise = checkError s ident alt c = case lookupTree showIdent c act of Ok f -> return $ f c diff --git a/src/compiler/GF/Data/TrieMap.hs b/src/compiler/GF/Data/TrieMap.hs index a6749d641..a15c780ab 100644 --- a/src/compiler/GF/Data/TrieMap.hs +++ b/src/compiler/GF/Data/TrieMap.hs @@ -11,8 +11,8 @@ module GF.Data.TrieMap , insertWith - , unionWith - , unionsWith + , union, unionWith + , unions, unionsWith , elems ) where @@ -47,6 +47,9 @@ insertWith f (k:ks) v0 (Tr mb_v m) = case Map.lookup k m of Nothing -> Tr mb_v (Map.insert k (singleton ks v0) m) Just tr -> Tr mb_v (Map.insert k (insertWith f ks v0 tr) m) +union :: Ord k => TrieMap k v -> TrieMap k v -> TrieMap k v +union = unionWith (\a b -> a) + unionWith :: Ord k => (v -> v -> v) -> TrieMap k v -> TrieMap k v -> TrieMap k v unionWith f (Tr mb_v1 m1) (Tr mb_v2 m2) = let mb_v = case (mb_v1,mb_v2) of @@ -57,6 +60,9 @@ unionWith f (Tr mb_v1 m1) (Tr mb_v2 m2) = m = Map.unionWith (unionWith f) m1 m2 in Tr mb_v m +unions :: Ord k => [TrieMap k v] -> TrieMap k v +unions = foldl union empty + unionsWith :: Ord k => (v -> v -> v) -> [TrieMap k v] -> TrieMap k v unionsWith f = foldl (unionWith f) empty diff --git a/src/compiler/GF/Grammar/Lookup.hs b/src/compiler/GF/Grammar/Lookup.hs index f942bdcaf..d1473bbcd 100644 --- a/src/compiler/GF/Grammar/Lookup.hs +++ b/src/compiler/GF/Grammar/Lookup.hs @@ -60,8 +60,8 @@ lookupIdentInfo mo i = lookupIdent i (jments mo) lookupResDef :: SourceGrammar -> QIdent -> Err Term lookupResDef gr (m,c) - | isLiteralCat c = lock c defLinType - | otherwise = look m c + | isPredefCat c = lock c defLinType + | otherwise = look m c where look m c = do mo <- lookupModule gr m @@ -161,7 +161,7 @@ lookupAbsDef gr m c = errIn (render (text "looking up absdef of" <+> ppIdent c)) _ -> return (Nothing,Nothing) lookupLincat :: SourceGrammar -> Ident -> Ident -> Err Type -lookupLincat gr m c | isLiteralCat c = return defLinType --- ad hoc; not needed? +lookupLincat gr m c | isPredefCat c = return defLinType --- ad hoc; not needed? lookupLincat gr m c = do mo <- lookupModule gr m info <- lookupIdentInfo mo c diff --git a/src/compiler/GF/Grammar/Predef.hs b/src/compiler/GF/Grammar/Predef.hs index f16765433..f9c2c5d18 100644 --- a/src/compiler/GF/Grammar/Predef.hs +++ b/src/compiler/GF/Grammar/Predef.hs @@ -25,7 +25,7 @@ module GF.Grammar.Predef , cErrorType , cOverload , cUndefinedType - , isLiteralCat + , isPredefCat , cPTrue, cPFalse @@ -92,8 +92,8 @@ cOverload = identC (BS.pack "overload") cUndefinedType :: Ident cUndefinedType = identC (BS.pack "UndefinedType") -isLiteralCat :: Ident -> Bool -isLiteralCat c = elem c [cInt,cString,cFloat,cVar] +isPredefCat :: Ident -> Bool +isPredefCat c = elem c [cInt,cString,cFloat] cPTrue :: Ident cPTrue = identC (BS.pack "PTrue") diff --git a/src/compiler/GF/Grammar/Values.hs b/src/compiler/GF/Grammar/Values.hs index c5646f5b4..1a68ddc89 100644 --- a/src/compiler/GF/Grammar/Values.hs +++ b/src/compiler/GF/Grammar/Values.hs @@ -19,7 +19,7 @@ module GF.Grammar.Values (-- * values used in TC type checking Binds, Constraints, MetaSubst, -- * for TC valAbsInt, valAbsFloat, valAbsString, vType, - isLiteralCat, + isPredefCat, eType, --Z tree2exp, loc2treeFocus ) where diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index 6c00336de..d76302827 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -17,7 +17,7 @@ module GF.Infra.Option helpMessage, -- * Checking specific options flag, cfgTransform, haskellOption, readOutputFormat, - isLexicalCat, renameEncoding, + isLexicalCat, isLiteralCat, renameEncoding, -- * Setting specific options setOptimization, setCFGTransform, -- * Convenience methods for checking options @@ -28,7 +28,9 @@ import Control.Monad import Data.Char (toLower, isDigit) import Data.List import Data.Maybe +import GF.Infra.Ident import GF.Infra.GetOpt +import GF.Grammar.Predef --import System.Console.GetOpt import System.FilePath import System.IO @@ -37,7 +39,7 @@ import GF.Data.ErrM import Data.Set (Set) import qualified Data.Set as Set - +import qualified Data.ByteString.Char8 as BS @@ -146,6 +148,7 @@ data Flags = Flags { optSISR :: Maybe SISRFormat, optHaskellOptions :: Set HaskellOption, optLexicalCats :: Set String, + optLiteralCats :: Set Ident, optGFODir :: Maybe FilePath, optOutputFile :: Maybe FilePath, optOutputDir :: Maybe FilePath, @@ -244,6 +247,7 @@ defaultFlags = Flags { optOutputFormats = [], optSISR = Nothing, optHaskellOptions = Set.empty, + optLiteralCats = Set.fromList [cString,cInt,cFloat], optLexicalCats = Set.empty, optGFODir = Nothing, optOutputFile = Nothing, @@ -308,6 +312,8 @@ optDescr = ++ concat (intersperse " | " (map fst haskellOptionNames))), Option [] ["lexical"] (ReqArg lexicalCat "CAT[,CAT[...]]") "Treat CAT as a lexical category.", + Option [] ["literal"] (ReqArg literalCat "CAT[,CAT[...]]") + "Treat CAT as a literal category.", Option ['o'] ["output-file"] (ReqArg outFile "FILE") "Save output in FILE (default is out.X, where X depends on output format.", Option ['D'] ["output-dir"] (ReqArg outDir "DIR") @@ -386,6 +392,7 @@ optDescr = Just p -> set $ \o -> o { optHaskellOptions = Set.insert p (optHaskellOptions o) } Nothing -> fail $ "Unknown Haskell option: " ++ x ++ " Known: " ++ show (map fst haskellOptionNames) + literalCat x = set $ \o -> o { optLiteralCats = foldr Set.insert (optLiteralCats o) ((map (identC . BS.pack) . splitBy (==',')) x) } lexicalCat x = set $ \o -> o { optLexicalCats = foldr Set.insert (optLexicalCats o) (splitBy (==',') x) } outFile x = set $ \o -> o { optOutputFile = Just x } outDir x = set $ \o -> o { optOutputDir = Just x } @@ -536,6 +543,9 @@ cfgTransform opts t = Set.member t (flag optCFGTransforms opts) haskellOption :: Options -> HaskellOption -> Bool haskellOption opts o = Set.member o (flag optHaskellOptions opts) +isLiteralCat :: Options -> Ident -> Bool +isLiteralCat opts c = Set.member c (flag optLiteralCats opts) + isLexicalCat :: Options -> String -> Bool isLexicalCat opts c = Set.member c (flag optLexicalCats opts) diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs index 9511dde23..f3f05d3d7 100644 --- a/src/compiler/GF/Speech/VoiceXML.hs +++ b/src/compiler/GF/Speech/VoiceXML.hs @@ -40,8 +40,7 @@ type Skeleton = [(CId, [(CId, [CId])])] pgfSkeleton :: PGF -> Skeleton pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType pgf f))) | f <- fs]) - | (c,(_,fs)) <- Map.toList (cats (abstract pgf)), - not (isLiteralCat c)] + | (c,(_,fs)) <- Map.toList (cats (abstract pgf))] -- -- * Questions to ask diff --git a/src/compiler/GFI.hs b/src/compiler/GFI.hs index a98f1cd39..b844c4654 100644 --- a/src/compiler/GFI.hs +++ b/src/compiler/GFI.hs @@ -314,7 +314,7 @@ wordCompletion gfenv (left,right) = do Nothing -> error ("Can't parse '"++str++"' as type") loop ps [] = Just ps - loop ps (t:ts) = case nextState ps t of + loop ps (t:ts) = case nextState ps (simpleParseInput t) of Left es -> Nothing Right ps -> loop ps ts diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index a4d9f4aa1..3b8eced42 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -80,8 +80,8 @@ module PGF( complete, Parse.ParseState, Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, - Parse.acceptsLiteral, Parse.feedLiteral, - Parse.ParseResult(..), Parse.getParseResult, + Parse.ParseInput(..), Parse.simpleParseInput, Parse.mkParseInput, + Parse.ParseOutput(..), Parse.getParseOutput, -- ** Generation generateRandom, generateAll, generateAllDepth, @@ -155,10 +155,10 @@ parseAll :: PGF -> Type -> String -> [[Tree]] parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])] -- | The same as 'parse' but returns more detailed information -parse_ :: PGF -> Language -> Type -> String -> (Parse.ParseResult,BracketedString) +parse_ :: PGF -> Language -> Type -> String -> (Parse.ParseOutput,BracketedString) -- | This is an experimental function. Use it on your own risk -parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> (Parse.ParseResult,BracketedString) +parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> (Parse.ParseOutput,BracketedString) -- | The same as 'generateAllDepth' but does not limit -- the depth in the generation, and doesn't give an initial expression. @@ -223,13 +223,13 @@ readPGF f = decodeFile f parse pgf lang typ s = case parse_ pgf lang typ s of - (Parse.ParseResult ts,_) -> ts - _ -> [] + (Parse.ParseOk ts,_) -> ts + _ -> [] parseAll mgr typ = map snd . parseAllLang mgr typ parseAllLang mgr typ s = - [(lang,ts) | lang <- languages mgr, (Parse.ParseResult ts,_) <- [parse_ mgr lang typ s]] + [(lang,ts) | lang <- languages mgr, (Parse.ParseOk ts,_) <- [parse_ mgr lang typ s]] parse_ pgf lang typ s = case Map.lookup lang (concretes pgf) of @@ -281,9 +281,9 @@ complete pgf from typ input = ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)] where isSuccessful state = - case Parse.getParseResult state typ of - (Parse.ParseResult ts, _) -> not (null ts) - _ -> False + case Parse.getParseOutput state typ of + (Parse.ParseOk ts, _) -> not (null ts) + _ -> False tokensAndPrefix :: String -> ([String],String) tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "") @@ -292,7 +292,7 @@ complete pgf from typ input = where ws = words s loop ps [] = Just ps - loop ps (t:ts) = case Parse.nextState ps t of + loop ps (t:ts) = case Parse.nextState ps (Parse.simpleParseInput t) of Left es -> Nothing Right ps -> loop ps ts diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index f0d9b92a8..95bc82aef 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -48,7 +48,7 @@ lookGlobalFlag pgf f = Map.lookup f (gflags pgf) lookAbsFlag :: PGF -> CId -> Maybe Literal lookAbsFlag pgf f = Map.lookup f (aflags (abstract pgf)) -lookConcr :: PGF -> CId -> Concr +lookConcr :: PGF -> Language -> Concr lookConcr pgf cnc = lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf @@ -127,9 +127,6 @@ combinations t = case t of [] -> [[]] aa:uu -> [a:u | a <- aa, u <- combinations uu] -isLiteralCat :: CId -> Bool -isLiteralCat = (`elem` [cidString, cidFloat, cidInt, cidVar]) - cidString = mkCId "String" cidInt = mkCId "Int" cidFloat = mkCId "Float" diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index 7876e9749..1d09359ed 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -5,10 +5,9 @@ module PGF.Parse , initState , nextState , getCompletions - , acceptsLiteral - , feedLiteral , recoveryStates - , ParseResult(..), getParseResult + , ParseInput(..), simpleParseInput, mkParseInput + , ParseOutput(..), getParseOutput , parse , parseWithRecovery ) where @@ -31,34 +30,45 @@ import PGF.Macros import PGF.TypeCheck import PGF.Forest(Forest(Forest), linearizeWithBrackets, foldForest) +-- | The input to the parser is a pair of predicates. The first one +-- 'piToken' checks that a given token, suggested by the grammar, +-- actually appears at the current position in the input string. +-- The second one 'piLiteral' recognizes whether a literal with forest id 'FId' +-- could be matched at the current position. +data ParseInput + = ParseInput + { piToken :: Token -> Bool + , piLiteral :: FId -> Maybe (CId,Tree,[Token]) + } + -- | This data type encodes the different outcomes which you could get from the parser. -data ParseResult +data ParseOutput = ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed. | TypeError FId [TcError] -- ^ The parsing was successful but none of the trees is type correct. -- The forest id ('FId') points to the bracketed string from the parser -- where the type checking failed. More than one error is returned -- if there are many analizes for some phrase but they all are not type correct. - | ParseResult [Tree] -- ^ If the parsing was successful we get a list of abstract syntax trees. The list should be non-empty. + | ParseOk [Tree] -- ^ If the parsing was successful we get a list of abstract syntax trees. The list should be non-empty. -parse :: PGF -> Language -> Type -> [String] -> (ParseResult,BracketedString) +parse :: PGF -> Language -> Type -> [Token] -> (ParseOutput,BracketedString) parse pgf lang typ toks = loop (initState pgf lang typ) toks where - loop ps [] = getParseResult ps typ - loop ps (t:ts) = case nextState ps t of + loop ps [] = getParseOutput ps typ + loop ps (t:ts) = case nextState ps (simpleParseInput t) of Left es -> case es of - EState _ _ chart -> (ParseFailed (offset chart),snd (getParseResult ps typ)) + EState _ _ chart -> (ParseFailed (offset chart),snd (getParseOutput ps typ)) Right ps -> loop ps ts -parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> (ParseResult,BracketedString) +parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> (ParseOutput,BracketedString) parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks where - accept ps [] = getParseResult ps typ + accept ps [] = getParseOutput ps typ accept ps (t:ts) = - case nextState ps t of + case nextState ps (simpleParseInput t) of Right ps -> accept ps ts Left es -> skip (recoveryStates open_typs es) ts - skip ps_map [] = getParseResult (fst ps_map) typ + skip ps_map [] = getParseOutput (fst ps_map) typ skip ps_map (t:ts) = case Map.lookup t (snd ps_map) of Just ps -> accept ps ts @@ -84,17 +94,52 @@ initState pgf lang (DTyp _ start _) = (Chart emptyAC [] emptyPC (pproductions cnc) (totalCats cnc) 0) (TMap.singleton [] (Set.fromList items)) +-- | This function constructs the simplest possible parser input. +-- It checks the tokens for exact matching and recognizes only @String@, @Int@ and @Float@ literals. +-- The @Int@ and @Float@ literals matche only if the token passed is some number. +-- The @String@ literal always match but the length of the literal could be only one token. +simpleParseInput :: Token -> ParseInput +simpleParseInput t = ParseInput (==t) (matchLit t) + where + matchLit t fid + | fid == fidString = Just (cidString,ELit (LStr t),[t]) + | fid == fidInt = case reads t of {[(n,"")] -> Just (cidInt,ELit (LInt n),[t]); + _ -> Nothing } + | fid == fidFloat = case reads t of {[(d,"")] -> Just (cidFloat,ELit (LFlt d),[t]); + _ -> Nothing } + | fid == fidVar = Just (cidVar,EFun (mkCId t),[t]) + | otherwise = Nothing + +mkParseInput :: PGF -> Language -> (a -> Token -> Bool) -> [(CId,a -> Maybe (Tree,[Token]))] -> a -> ParseInput +mkParseInput pgf lang ftok flits = \x -> ParseInput (ftok x) (flit x) + where + flit = mk flits + + cnc = lookConcr pgf lang + + mk [] = \x fid -> Nothing + mk ((c,flit):flits) = \x fid -> if match fid + then fmap (\(tree,toks) -> (c,tree,toks)) (flit x) + else flit' x fid + where + flit' = mk flits + + match fid = + case Map.lookup c (cnccats cnc) of + Just (CncCat s e _) -> inRange (s,e) fid + Nothing -> False + -- | From the current state and the next token -- 'nextState' computes a new state, where the token -- is consumed and the current position is shifted by one. -- If the new token cannot be accepted then an error state -- is returned. -nextState :: ParseState -> Token -> Either ErrorState ParseState -nextState (PState pgf cnc chart items) t = +nextState :: ParseState -> ParseInput -> Either ErrorState ParseState +nextState (PState pgf cnc chart items) input = let (mb_agenda,map_items) = TMap.decompose items agenda = maybe [] Set.toList mb_agenda - acc = fromMaybe TMap.empty (Map.lookup t map_items) - (acc1,chart1) = process (litCatMatch (Just t)) add (sequences cnc) (cncfuns cnc) agenda acc chart + acc = TMap.unions [tmap | (t,tmap) <- Map.toList map_items, piToken input t] + (acc1,chart1) = process flit ftok (sequences cnc) (cncfuns cnc) agenda acc chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC @@ -104,44 +149,12 @@ nextState (PState pgf cnc chart items) t = then Left (EState pgf cnc chart2) else Right (PState pgf cnc chart2 acc1) where - add (tok:toks) item acc - | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc - add _ item acc = acc + flit = piLiteral input -acceptsLiteral :: ParseState -> Type -> Bool -acceptsLiteral (PState pgf cnc chart items) (DTyp _ cat _) = - case Map.lookup cat (cnccats cnc) of - Just (CncCat s e _) -> or [IntMap.member fid (active chart1) | fid <- [s..e]] - Nothing -> False - where - (mb_agenda,map_items) = TMap.decompose items - agenda = maybe [] Set.toList mb_agenda - (acc1,chart1) = process (litCatMatch Nothing) add (sequences cnc) (cncfuns cnc) agenda TMap.empty chart + ftok (tok:toks) item acc + | piToken input tok = TMap.insertWith Set.union toks (Set.singleton item) acc + ftok _ item acc = acc - add (tok:toks) item acc = acc - -feedLiteral :: ParseState -> Expr -> Either ErrorState ParseState -feedLiteral (PState pgf cnc chart items) (ELit lit) = - let (mb_agenda,map_items) = TMap.decompose items - agenda = maybe [] Set.toList mb_agenda - (acc1,chart1) = process (magic lit) add (sequences cnc) (cncfuns cnc) agenda TMap.empty chart - chart2 = chart1{ active =emptyAC - , actives=active chart1 : actives chart1 - , passive=emptyPC - , offset =offset chart1+1 - } - in if TMap.null acc1 - then Left (EState pgf cnc chart2) - else Right (PState pgf cnc chart2 acc1) - where - add toks item acc = TMap.insertWith Set.union toks (Set.singleton item) acc - - magic lit fid = - case lit of - LStr s | fid == fidString -> Just (cidString, ELit lit, words s) - LInt n | fid == fidInt -> Just (cidInt, ELit lit, [show n]) - LFlt d | fid == fidFloat -> Just (cidFloat, ELit lit, [show d]) - _ -> Nothing -- | If the next token is not known but only its prefix (possible empty prefix) -- then the 'getCompletions' function can be used to calculate the possible @@ -152,7 +165,7 @@ getCompletions (PState pgf cnc chart items) w = let (mb_agenda,map_items) = TMap.decompose items agenda = maybe [] Set.toList mb_agenda acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items - (acc',chart1) = process (litCatMatch Nothing) add (sequences cnc) (cncfuns cnc) agenda acc chart + (acc',chart1) = process flit ftok (sequences cnc) (cncfuns cnc) agenda acc chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC @@ -160,15 +173,17 @@ getCompletions (PState pgf cnc chart items) w = } in fmap (PState pgf cnc chart2) acc' where - add (tok:toks) item acc - | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc - add _ item acc = acc + flit _ = Nothing + + ftok (tok:toks) item acc + | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc + ftok _ item acc = acc recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map Token ParseState) recoveryStates open_types (EState pgf cnc chart) = let open_fcats = concatMap type2fcats open_types agenda = foldl (complete open_fcats) [] (actives chart) - (acc,chart1) = process (litCatMatch Nothing) add (sequences cnc) (cncfuns cnc) agenda Map.empty chart + (acc,chart1) = process flit ftok (sequences cnc) (cncfuns cnc) agenda Map.empty chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC @@ -186,14 +201,15 @@ recoveryStates open_types (EState pgf cnc chart) = items [set | fcat <- open_fcats, set <- lookupACByFCat fcat ac] - add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc + flit _ = Nothing + ftok (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc -- | This function extracts the list of all completed parse trees -- that spans the whole input consumed so far. The trees are also -- limited by the category specified, which is usually -- the same as the startup category. -getParseResult :: ParseState -> Type -> (ParseResult,BracketedString) -getParseResult (PState pgf cnc chart items) ty@(DTyp _ start _) = +getParseOutput :: ParseState -> Type -> (ParseOutput,BracketedString) +getParseOutput (PState pgf cnc chart items) ty@(DTyp _ start _) = let froots | null roots = getPartialSeq (sequences cnc) (reverse (active st : actives st)) acc1 | otherwise = [([SymCat 0 lbl],[fid]) | AK fid lbl <- roots] @@ -209,15 +225,16 @@ getParseResult (PState pgf cnc chart items) ty@(DTyp _ start _) = res = if null exps then ParseFailed (offset chart) - else ParseResult exps + else ParseOk exps in (res,bs) where (mb_agenda,acc) = TMap.decompose items agenda = maybe [] Set.toList mb_agenda - (acc1,st) = process (litCatMatch Nothing) add (sequences cnc) (cncfuns cnc) agenda [] chart + (acc1,st) = process flit ftok (sequences cnc) (cncfuns cnc) agenda [] chart - add _ (Active j ppos funid seqid args key) items = (j,lin,args,key) : items + flit _ = Nothing + ftok _ (Active j ppos funid seqid args key) items = (j,lin,args,key) : items where lin = take (ppos-1) (elems (unsafeAt (sequences cnc) seqid)) @@ -274,8 +291,8 @@ getPartialSeq seqs actives = expand Set.empty inc n (SymLit d r) = SymLit (n+d) r inc n s = s -process mbt fn !seqs !funs [] acc chart = (acc,chart) -process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart +process flit ftok !seqs !funs [] acc chart = (acc,chart) +process flit ftok !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart | inRange (bounds lin) ppos = case unsafeAt lin ppos of SymCat d r -> let !fid = args !! d @@ -288,15 +305,15 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac (\_ _ items -> items) items2 fid (forest chart) in case lookupAC key (active chart) of - Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)} - Just set | Set.member item set -> process mbt fn seqs funs items acc chart - | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)} - SymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc - in process mbt fn seqs funs items acc' chart + Nothing -> process flit ftok seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)} + Just set | Set.member item set -> process flit ftok seqs funs items acc chart + | otherwise -> process flit ftok seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)} + SymKS toks -> let !acc' = ftok toks (Active j (ppos+1) funid seqid args key0) acc + in process flit ftok seqs funs items acc' chart SymKP strs vars - -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc - (strs:[strs' | Alt strs' _ <- vars]) - in process mbt fn seqs funs items acc' chart + -> let !acc' = foldl (\acc toks -> ftok toks (Active j (ppos+1) funid seqid args key0) acc) acc + (strs:[strs' | Alt strs' _ <- vars]) + in process flit ftok seqs funs items acc' chart SymLit d r -> let fid = args !! d key = AK fid r !fid' = case lookupPC (mkPK key k) (passive chart) of @@ -304,17 +321,17 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac Just fid -> fid in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of - (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc - in process mbt fn seqs funs items acc' chart - [] -> case mbt fid of + (toks:_) -> let !acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc + in process flit ftok seqs funs items acc' chart + [] -> case flit fid of Just (cat,lit,toks) -> let fid' = nextId chart - !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc - in process mbt fn seqs funs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) - ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) - ,nextId =nextId chart+1 - } - Nothing -> process mbt fn seqs funs items acc chart{active=insertAC key (Set.singleton item) (active chart)} + !acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc + in process flit ftok seqs funs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) + ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) + ,nextId =nextId chart+1 + } + Nothing -> process flit ftok seqs funs items acc chart{active=insertAC key (Set.singleton item) (active chart)} | otherwise = case lookupPC (mkPK key0 j) (passive chart) of Nothing -> let fid = nextId chart @@ -324,12 +341,12 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) -> let SymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set - in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) - ,forest =IntMap.insert fid (Set.singleton (PApply funid args)) (forest chart) - ,nextId =nextId chart+1 - } + in process flit ftok seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) + ,forest =IntMap.insert fid (Set.singleton (PApply funid args)) (forest chart) + ,nextId =nextId chart+1 + } Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items - in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (PApply funid args)) (forest chart)} + in process flit ftok seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (PApply funid args)) (forest chart)} where !lin = unsafeAt seqs seqid !k = offset chart @@ -344,15 +361,6 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac updateAt :: Int -> a -> [a] -> [a] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] -litCatMatch (Just t) fid - | fid == fidString = Just (cidString,ELit (LStr t),[t]) - | fid == fidInt = case reads t of {[(n,"")] -> Just (cidInt,ELit (LInt n),[t]); - _ -> Nothing } - | fid == fidFloat = case reads t of {[(d,"")] -> Just (cidFloat,ELit (LFlt d),[t]); - _ -> Nothing } - | fid == fidVar = Just (cidVar,EFun (mkCId t),[t]) -litCatMatch _ _ = Nothing - ---------------------------------------------------------------- -- Active Chart