diff --git a/src/compiler/GF/Compile/Compute/Concrete.hs b/src/compiler/GF/Compile/Compute/Concrete.hs index 2d865e802..e6766000d 100644 --- a/src/compiler/GF/Compile/Compute/Concrete.hs +++ b/src/compiler/GF/Compile/Compute/Concrete.hs @@ -25,6 +25,7 @@ import Control.Applicative import qualified Control.Monad.Fail as Fail import qualified Data.Map as Map import GF.Text.Pretty +import Debug.Trace -- * Main entry points @@ -151,13 +152,15 @@ patternMatch v0 ((env0,ps,args0,t):eqs) = match env0 ps eqs args0 | s1 == s2 -> match env ps eqs args (PString s1, VC []) | null s1 -> match env ps eqs args - (PSeq p1 p2,VStr s) - -> do eqs <- matchStr env (p1:p2:ps) eqs [] [] s [] args - patternMatch v0 eqs - (PSeq p1 p2,VC vs)-> do mb_eqs <- matchSeq env (p1:p2:ps) eqs [] vs args - case mb_eqs of - Just eqs -> patternMatch v0 eqs - Nothing -> return v0 + (PSeq min1 max1 p1 min2 max2 p2,v) + -> case value2string v of + Just s -> do let n = length s + lo = min1 `max` (n-max2) + hi = (n-min2) `min` max1 + (ds,cs) = splitAt lo s + eqs <- matchStr env (p1:p2:ps) eqs (hi-lo) (reverse ds) cs args + patternMatch v0 eqs + Nothing -> return v0 (PChar, VStr [_]) -> match env ps eqs args (PChars cs, VStr [c]) | elem c cs -> match env ps eqs args @@ -173,29 +176,29 @@ patternMatch v0 ((env0,ps,args0,t):eqs) = match env0 ps eqs args0 Just tnk -> matchRec env pas as (p:ps) eqs (tnk:args) Nothing -> evalError ("Missing value for label" <+> pp lbl) - matchSeq env ps eqs ws [] args = return (Just eqs) - matchSeq env ps eqs ws (v:vs) args = do - mb_eqs <- matchSeq env ps eqs (v:ws) vs args - case v of - VStr [] -> return mb_eqs - VStr (c:cs) -> case mb_eqs of - Just eqs -> do eqs <- matchStr env ps eqs ws [c] cs vs args - return (Just eqs) - Nothing -> return Nothing - _ -> return Nothing + value2string (VStr s) = Just s + value2string (VC vs) = fmap unwords (mapM value2string vs) + value2string _ = Nothing - matchStr env ps eqs ws ds [] vs args = do - arg1 <- newEvaluatedThunk (vc (reverse (VStr (reverse ds):ws))) - arg2 <- newEvaluatedThunk (vc vs) + matchStr env ps eqs i ds [] args = do + arg1 <- newEvaluatedThunk (vc (reverse ds)) + arg2 <- newEvaluatedThunk (vc []) return ((env,ps,arg1:arg2:args,t) : eqs) - matchStr env ps eqs ws ds (c:cs) vs args = do - arg1 <- newEvaluatedThunk (vc (reverse (if null ds then ws else VStr (reverse ds):ws))) - arg2 <- newEvaluatedThunk (vc (VStr (c:cs):vs)) - eqs <- matchStr env ps eqs ws (c:ds) cs vs args + matchStr env ps eqs 0 ds cs args = do + arg1 <- newEvaluatedThunk (vc (reverse ds)) + arg2 <- newEvaluatedThunk (vc cs) + return ((env,ps,arg1:arg2:args,t) : eqs) + matchStr env ps eqs i ds (c:cs) args = do + arg1 <- newEvaluatedThunk (vc (reverse ds)) + arg2 <- newEvaluatedThunk (vc (c:cs)) + eqs <- matchStr env ps eqs (i-1 :: Int) (c:ds) cs args return ((env,ps,arg1:arg2:args,t) : eqs) - vc [x] = x - vc xs = VC xs + vc s = + case words s of + [] -> VC [] + [w] -> VStr w + ws -> VC (map VStr ws) value2term i (VApp q tnks) = foldM (\e1 tnk -> fmap (App e1) (force tnk [] >>= value2term i)) (QC q) tnks diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 8c5e73305..106c9097a 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -385,7 +385,7 @@ convertTerm opts sel ctype (Alts s alts)= do CStr s <- convertTerm opts CNil cty getPatts p = case p of PAlt a b -> liftM2 (++) (getPatts a) (getPatts b) PString s -> return [K s] - PSeq a b -> do + PSeq _ _ a _ _ b -> do as <- getPatts a bs <- getPatts b return [K (s ++ t) | K s <- as, K t <- bs] diff --git a/src/compiler/GF/Compile/GrammarToCanonical.hs b/src/compiler/GF/Compile/GrammarToCanonical.hs index 72cca9000..9debb63a2 100644 --- a/src/compiler/GF/Compile/GrammarToCanonical.hs +++ b/src/compiler/GF/Compile/GrammarToCanonical.hs @@ -247,7 +247,7 @@ convert' gr vs = ppT pat (PString s) = [s] pat (PAlt p1 p2) = pat p1++pat p2 - pat (PSeq p1 p2) = [s1++s2 | s1<-pat p1, s2<-pat p2] + pat (PSeq _ _ p1 _ _ p2) = [s1++s2 | s1<-pat p1, s2<-pat p2] pat p = error $ "convert' alts pat: "++show p fields = map field . filter (not.isLockLabel.fst) diff --git a/src/compiler/GF/Compile/Rename.hs b/src/compiler/GF/Compile/Rename.hs index 41b2cdc67..0a9e6ea3e 100644 --- a/src/compiler/GF/Compile/Rename.hs +++ b/src/compiler/GF/Compile/Rename.hs @@ -306,10 +306,10 @@ renamePattern env patt = (q',ws) <- renp q return (PAlt p' q', vs ++ ws) - PSeq p q -> do + PSeq minp maxp p minq maxq q -> do (p',vs) <- renp p (q',ws) <- renp q - return (PSeq p' q', vs ++ ws) + return (PSeq minp maxp p' minq maxq q', vs ++ ws) PRep p -> do (p',vs) <- renp p diff --git a/src/compiler/GF/Compile/TypeCheck/Concrete.hs b/src/compiler/GF/Compile/TypeCheck/Concrete.hs index e9420290a..973c3d410 100644 --- a/src/compiler/GF/Compile/TypeCheck/Concrete.hs +++ b/src/compiler/GF/Compile/TypeCheck/Concrete.hs @@ -13,6 +13,7 @@ import GF.Grammar.Lockfield (isLockLabel, lockRecType, unlockRecord) import GF.Compile.TypeCheck.Primitives import Data.List +import Data.Maybe(fromMaybe) import Control.Monad import GF.Text.Pretty @@ -290,7 +291,7 @@ inferLType gr g trm = case trm of inferCase mty (patt,term) = do arg <- maybe (inferPatt patt) return mty cont <- pattContext gr g arg patt - (_,val) <- inferLType gr (reverse cont ++ g) term + (term',val) <- inferLType gr (reverse cont ++ g) term return (arg,val) isConstPatt p = case p of PC _ ps -> True --- all isConstPatt ps @@ -302,7 +303,7 @@ inferLType gr g trm = case trm of PFloat _ -> True PChar -> True PChars _ -> True - PSeq p q -> isConstPatt p && isConstPatt q + PSeq _ _ p _ _ q -> isConstPatt p && isConstPatt q PAlt p q -> isConstPatt p && isConstPatt q PRep p -> isConstPatt p PNeg p -> isConstPatt p @@ -314,12 +315,39 @@ inferLType gr g trm = case trm of PAs _ p -> inferPatt p PNeg p -> inferPatt p PAlt p q -> checks [inferPatt p, inferPatt q] - PSeq _ _ -> return $ typeStr + PSeq _ _ _ _ _ _ -> return $ typeStr PRep _ -> return $ typeStr PChar -> return $ typeStr PChars _ -> return $ typeStr _ -> inferLType gr g (patt2term p) >>= return . snd +measurePatt p = + case p of + PR ass -> let p' = PR (map (\(lbl,p) -> let (_,_,p') = measurePatt p in (lbl,p')) ass) + in (0,Nothing,p') + PString s -> let len=length s + in (len,Just len,p) + PT t p -> let (min,max,p') = measurePatt p + in (min,max,PT t p') + PAs x p -> let (min,max,p') = measurePatt p + in (min,max,PAs x p') + PImplArg p -> let (min,max,p') = measurePatt p + in (min,max,PImplArg p') + PNeg p -> let (_,_,p') = measurePatt p + in (0,Nothing,PNeg p') + PAlt p1 p2 -> let (min1,max1,p1') = measurePatt p1 + (min2,max2,p2') = measurePatt p2 + in (min min1 min2,liftM2 max max1 max2,PAlt p1' p2') + PSeq _ _ p1 _ _ p2 + -> let (min1,max1,p1') = measurePatt p1 + (min2,max2,p2') = measurePatt p2 + in (min1+min2,liftM2 (+) max1 max2,PSeq min1 (fromMaybe maxBound max1) p1' min2 (fromMaybe maxBound max2) p2') + PRep p -> let (_,_,p') = measurePatt p + in (0,Nothing,PRep p') + PChar -> (1,Just 1,p) + PChars _ -> (1,Just 1,p) + _ -> (0,Nothing,p) + -- type inference: Nothing, type checking: Just t -- the latter permits matching with value type getOverload :: SourceGrammar -> Context -> Maybe Type -> Term -> Check (Maybe (Term,Type)) @@ -596,7 +624,8 @@ checkLType gr g trm typ0 = do checkCase arg val (p,t) = do cont <- pattContext gr g arg p t' <- justCheck (reverse cont ++ g) t val - return (p,t') + let (_,_,p') = measurePatt p + return (p',t') pattContext :: SourceGrammar -> Context -> Type -> Patt -> Check Context pattContext env g typ p = case p of @@ -633,7 +662,7 @@ pattContext env g typ p = case p of fsep pts <+> "in pattern alterantives" <+> ppPatt Unqualified 0 p) (null pts) return g1 -- must be g1 == g2 - PSeq p q -> do + PSeq _ _ p _ _ q -> do g1 <- pattContext env g typ p g2 <- pattContext env g typ q return $ g1 ++ g2 diff --git a/src/compiler/GF/Compile/TypeCheck/ConcreteNew.hs b/src/compiler/GF/Compile/TypeCheck/ConcreteNew.hs index 586c879e8..26808a2a8 100644 --- a/src/compiler/GF/Compile/TypeCheck/ConcreteNew.hs +++ b/src/compiler/GF/Compile/TypeCheck/ConcreteNew.hs @@ -318,7 +318,7 @@ tcPatt ge scope (PString s) ty0 = do tcPatt ge scope PChar ty0 = do unify ge scope ty0 vtypeStr return scope -tcPatt ge scope (PSeq p1 p2) ty0 = do +tcPatt ge scope (PSeq _ _ p1 _ _ p2) ty0 = do unify ge scope ty0 vtypeStr scope <- tcPatt ge scope p1 vtypeStr scope <- tcPatt ge scope p2 vtypeStr diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs index 06e941674..673b6f179 100644 --- a/src/compiler/GF/Grammar/Binary.hs +++ b/src/compiler/GF/Grammar/Binary.hs @@ -244,7 +244,7 @@ instance Binary Patt where put (PAs x y) = putWord8 10 >> put (x,y) put (PNeg x) = putWord8 11 >> put x put (PAlt x y) = putWord8 12 >> put (x,y) - put (PSeq x y) = putWord8 13 >> put (x,y) + put (PSeq minx maxx x miny maxy y) = putWord8 13 >> put (minx,maxx,x,miny,maxy,y) put (PRep x) = putWord8 14 >> put x put (PChar) = putWord8 15 put (PChars x) = putWord8 16 >> put x @@ -266,7 +266,7 @@ instance Binary Patt where 10 -> get >>= \(x,y) -> return (PAs x y) 11 -> get >>= \x -> return (PNeg x) 12 -> get >>= \(x,y) -> return (PAlt x y) - 13 -> get >>= \(x,y) -> return (PSeq x y) + 13 -> get >>= \(minx,maxx,x,miny,maxy,y) -> return (PSeq minx maxx x miny maxy y) 14 -> get >>= \x -> return (PRep x) 15 -> return (PChar) 16 -> get >>= \x -> return (PChars x) diff --git a/src/compiler/GF/Grammar/Grammar.hs b/src/compiler/GF/Grammar/Grammar.hs index 758cdd270..6d7a41b10 100644 --- a/src/compiler/GF/Grammar/Grammar.hs +++ b/src/compiler/GF/Grammar/Grammar.hs @@ -421,8 +421,7 @@ data Patt = -- regular expression patterns | PNeg Patt -- ^ negated pattern: -p | PAlt Patt Patt -- ^ disjunctive pattern: p1 | p2 - | PSeq Patt Patt -- ^ sequence of token parts: p + q - | PMSeq MPatt MPatt -- ^ sequence of token parts: p + q + | PSeq Int Int Patt Int Int Patt -- ^ sequence of token parts: p + q | PRep Patt -- ^ repetition of token part: p* | PChar -- ^ string of length one: ? | PChars [Char] -- ^ character list: ["aeiou"] @@ -430,9 +429,6 @@ data Patt = | PM QIdent -- #m.p deriving (Show, Eq, Ord) --- | Measured pattern (paired with the min & max matching length) -type MPatt = ((Int,Int),Patt) - -- | to guide computation and type checking of tables data TInfo = TRaw -- ^ received from parser; can be anything diff --git a/src/compiler/GF/Grammar/Macros.hs b/src/compiler/GF/Grammar/Macros.hs index d7869f507..1f905b025 100644 --- a/src/compiler/GF/Grammar/Macros.hs +++ b/src/compiler/GF/Grammar/Macros.hs @@ -392,7 +392,7 @@ term2patt trm = case termForm trm of Ok ([], Cn id, [a,b]) | id == cSeq -> do a' <- term2patt a b' <- term2patt b - return (PSeq a' b') + return (PSeq 0 maxBound a' 0 maxBound b') Ok ([], Cn id, [a,b]) | id == cAlt -> do a' <- term2patt a b' <- term2patt b @@ -422,7 +422,7 @@ patt2term pt = case pt of PAs x p -> appCons cAs [Vr x, patt2term p] --- an encoding PChar -> appCons cChar [] --- an encoding PChars s -> appCons cChars [K s] --- an encoding - PSeq a b -> appCons cSeq [(patt2term a), (patt2term b)] --- an encoding + PSeq _ _ a _ _ b -> appCons cSeq [(patt2term a), (patt2term b)] --- an encoding PAlt a b -> appCons cAlt [(patt2term a), (patt2term b)] --- an encoding PRep a -> appCons cRep [(patt2term a)] --- an encoding PNeg a -> appCons cNeg [(patt2term a)] --- an encoding @@ -475,8 +475,7 @@ composPattOp op patt = PImplArg p -> liftM PImplArg (op p) PNeg p -> liftM PNeg (op p) PAlt p1 p2 -> liftM2 PAlt (op p1) (op p2) - PSeq p1 p2 -> liftM2 PSeq (op p1) (op p2) - PMSeq (_,p1) (_,p2) -> liftM2 PSeq (op p1) (op p2) -- information loss + PSeq _ _ p1 _ _ p2 -> liftM2 (\p1 p2 -> PSeq 0 maxBound p1 0 maxBound p2) (op p1) (op p2) PRep p -> liftM PRep (op p) _ -> return patt -- covers cases without subpatterns @@ -514,8 +513,7 @@ collectPattOp op patt = PImplArg p -> op p PNeg p -> op p PAlt p1 p2 -> op p1++op p2 - PSeq p1 p2 -> op p1++op p2 - PMSeq (_,p1) (_,p2) -> op p1++op p2 + PSeq _ _ p1 _ _ p2 -> op p1++op p2 PRep p -> op p _ -> [] -- covers cases without subpatterns diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index faccbdb4e..018430ae6 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -485,7 +485,7 @@ Exps Patt :: { Patt } Patt : Patt '|' Patt1 { PAlt $1 $3 } - | Patt '+' Patt1 { PSeq $1 $3 } + | Patt '+' Patt1 { PSeq 0 maxBound $1 0 maxBound $3 } | Patt1 { $1 } Patt1 :: { Patt } diff --git a/src/compiler/GF/Grammar/PatternMatch.hs b/src/compiler/GF/Grammar/PatternMatch.hs index dc0a5d3a5..b7a85660c 100644 --- a/src/compiler/GF/Grammar/PatternMatch.hs +++ b/src/compiler/GF/Grammar/PatternMatch.hs @@ -15,8 +15,7 @@ module GF.Grammar.PatternMatch ( matchPattern, testOvershadow, - findMatch, - measurePatt + findMatch ) where import GF.Data.Operations @@ -122,11 +121,10 @@ tryMatch (p,t) = do Bad _ -> return [] _ -> raise (render ("no match with negative pattern" <+> p)) - (PSeq p1 p2, ([],K s, [])) -> matchPSeq p1 p2 s - (PMSeq mp1 mp2, ([],K s, [])) -> matchPMSeq mp1 mp2 s + (PSeq min1 max1 p1 min2 max2 p2, ([],K s, [])) -> matchPSeq min1 max1 p1 min2 max2 p2 s (PRep p1, ([],K s, [])) -> checks [ - trym (foldr (const (PSeq p1)) (PString "") + trym (foldr (const (PSeq 0 maxBound p1 0 maxBound)) (PString "") [1..n]) t' | n <- [0 .. length s] ] >> return [] @@ -140,12 +138,7 @@ tryMatch (p,t) = do words2term [w] = K w words2term (w:ws) = C (K w) (words2term ws) - -matchPMSeq (m1,p1) (m2,p2) s = matchPSeq' m1 p1 m2 p2 s ---matchPSeq p1 p2 s = matchPSeq' (0,maxBound::Int) p1 (0,maxBound::Int) p2 s -matchPSeq p1 p2 s = matchPSeq' (lengthBounds p1) p1 (lengthBounds p2) p2 s - -matchPSeq' b1@(min1,max1) p1 b2@(min2,max2) p2 s = +matchPSeq min1 max1 p1 min2 max2 p2 s = do let n = length s lo = min1 `max` (n-max2) hi = (n-min2) `min` max1 @@ -153,37 +146,6 @@ matchPSeq' b1@(min1,max1) p1 b2@(min2,max2) p2 s = matches <- checks [mapM tryMatch [(p1,K s1),(p2,K s2)] | (s1,s2) <- cuts] return (concat matches) --- | Estimate the minimal length of the string that a pattern will match -minLength = matchLength 0 id (+) min -- safe underestimate - --- | Estimate the maximal length of the string that a pattern will match -maxLength = - maybe maxBound id . matchLength Nothing Just (liftM2 (+)) (liftM2 max) - -- safe overestimate - -matchLength unknown known seq alt = len - where - len p = - case p of - PString s -> known (length s) - PSeq p1 p2 -> seq (len p1) (len p2) - PAlt p1 p2 -> alt (len p1) (len p2) - PChar -> known 1 - PChars _ -> known 1 - PAs x p' -> len p' - PT t p' -> len p' - _ -> unknown - -lengthBounds p = (minLength p,maxLength p) - -mPatt p = (lengthBounds p,measurePatt p) - -measurePatt p = - case p of - PSeq p1 p2 -> PMSeq (mPatt p1) (mPatt p2) - _ -> composSafePattOp measurePatt p - - isInConstantForm :: Term -> Bool isInConstantForm trm = case trm of Cn _ -> True diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs index 969e6961f..67dde7fd6 100644 --- a/src/compiler/GF/Grammar/Printer.hs +++ b/src/compiler/GF/Grammar/Printer.hs @@ -250,8 +250,7 @@ ppCase q (p,e) = ppPatt q 0 p <+> "=>" <+> ppTerm q 0 e instance Pretty Patt where pp = ppPatt Unqualified 0 ppPatt q d (PAlt p1 p2) = prec d 0 (ppPatt q 0 p1 <+> '|' <+> ppPatt q 1 p2) -ppPatt q d (PSeq p1 p2) = prec d 0 (ppPatt q 0 p1 <+> '+' <+> ppPatt q 1 p2) -ppPatt q d (PMSeq (_,p1) (_,p2)) = prec d 0 (ppPatt q 0 p1 <+> '+' <+> ppPatt q 1 p2) +ppPatt q d (PSeq _ _ p1 _ _ p2) = prec d 0 (ppPatt q 0 p1 <+> '+' <+> ppPatt q 1 p2) ppPatt q d (PC f ps) = if null ps then pp f else prec d 1 (f <+> hsep (map (ppPatt q 3) ps))