diff --git a/src/compiler/GF/Compile/Compute/ConcreteNew.hs b/src/compiler/GF/Compile/Compute/ConcreteNew.hs index 6be113d4b..c853458f8 100644 --- a/src/compiler/GF/Compile/Compute/ConcreteNew.hs +++ b/src/compiler/GF/Compile/Compute/ConcreteNew.hs @@ -8,7 +8,7 @@ module GF.Compile.Compute.ConcreteNew import GF.Grammar hiding (Env, VGen, VApp, VRecType) import GF.Grammar.Lookup(lookupResDefLoc,allParamValues) import GF.Grammar.Predef(cPredef,cErrorType,cTok,cStr) -import GF.Grammar.PatternMatch(matchPattern) +import GF.Grammar.PatternMatch(matchPattern,measurePatt) import GF.Grammar.Lockfield(unlockRecord,lockLabel,isLockLabel,lockRecType) import GF.Compile.Compute.Value hiding (Predefined(..)) import GF.Compile.Compute.Predef(predef,predefName,delta) @@ -320,7 +320,7 @@ valueTable env i cs = TWild _ -> True _ -> False - valueCase (p,t) = do p' <- inlinePattMacro p + valueCase (p,t) = do p' <- measurePatt # inlinePattMacro p let pvs = pattVars p' vt <- value (extend pvs env) t return (p', \ vs -> Bind $ \ bs -> vt (push' p' bs pvs vs)) diff --git a/src/compiler/GF/Grammar/Grammar.hs b/src/compiler/GF/Grammar/Grammar.hs index 218a2bd0b..c59cd809e 100644 --- a/src/compiler/GF/Grammar/Grammar.hs +++ b/src/compiler/GF/Grammar/Grammar.hs @@ -430,6 +430,7 @@ data Term = | Error String -- ^ error values returned by Predef.error deriving (Show, Eq, Ord) +-- | Patterns data Patt = PC Ident [Patt] -- ^ constructor pattern: @C p1 ... pn@ @C@ | PP QIdent [Patt] -- ^ package constructor pattern: @P.C p1 ... pn@ @P.C@ @@ -450,14 +451,17 @@ data Patt = | PNeg Patt -- ^ negated pattern: -p | PAlt Patt Patt -- ^ disjunctive pattern: p1 | p2 | PSeq Patt Patt -- ^ sequence of token parts: p + q + | PMSeq MPatt MPatt -- ^ sequence of token parts: p + q | PRep Patt -- ^ repetition of token part: p* | PChar -- ^ string of length one: ? | PChars [Char] -- ^ character list: ["aeiou"] | PMacro Ident -- #p | PM QIdent -- #m.p - deriving (Show, Eq, Ord) +-- | Measured pattern (paired with the min & max matching length) +type MPatt = ((Int,Int),Patt) + -- | to guide computation and type checking of tables data TInfo = TRaw -- ^ received from parser; can be anything diff --git a/src/compiler/GF/Grammar/Macros.hs b/src/compiler/GF/Grammar/Macros.hs index 97146b197..bd7de5db4 100644 --- a/src/compiler/GF/Grammar/Macros.hs +++ b/src/compiler/GF/Grammar/Macros.hs @@ -483,6 +483,8 @@ composOp co trm = ImplArg t -> liftM ImplArg (co t) _ -> return trm -- covers K, Vr, Cn, Sort, EPatt +composSafePattOp op = runIdentity . composPattOp (return . op) + composPattOp :: Monad m => (Patt -> m Patt) -> Patt -> m Patt composPattOp op patt = case patt of @@ -495,6 +497,7 @@ composPattOp op patt = PNeg p -> liftM PNeg (op p) PAlt p1 p2 -> liftM2 PAlt (op p1) (op p2) PSeq p1 p2 -> liftM2 PSeq (op p1) (op p2) + PMSeq (_,p1) (_,p2) -> liftM2 PSeq (op p1) (op p2) -- information loss PRep p -> liftM PRep (op p) _ -> return patt -- covers cases without subpatterns @@ -545,6 +548,7 @@ collectPattOp op patt = PNeg p -> op p PAlt p1 p2 -> op p1++op p2 PSeq p1 p2 -> op p1++op p2 + PMSeq (_,p1) (_,p2) -> op p1++op p2 PRep p -> op p _ -> [] -- covers cases without subpatterns diff --git a/src/compiler/GF/Grammar/PatternMatch.hs b/src/compiler/GF/Grammar/PatternMatch.hs index 12bd29c8c..8ea388f76 100644 --- a/src/compiler/GF/Grammar/PatternMatch.hs +++ b/src/compiler/GF/Grammar/PatternMatch.hs @@ -14,7 +14,8 @@ module GF.Grammar.PatternMatch (matchPattern, testOvershadow, - findMatch + findMatch, + measurePatt ) where import GF.Data.Operations @@ -117,6 +118,7 @@ tryMatch (p,t) = do _ -> Bad (render (text "no match with negative pattern" <+> ppPatt Unqualified 0 p)) (PSeq p1 p2, ([],K s, [])) -> matchPSeq p1 p2 s + (PMSeq mp1 mp2, ([],K s, [])) -> matchPMSeq mp1 mp2 s (PRep p1, ([],K s, [])) -> checks [ trym (foldr (const (PSeq p1)) (PString "") @@ -129,13 +131,18 @@ tryMatch (p,t) = do _ -> Bad (render (text "no match in case expr for" <+> ppTerm Unqualified 0 t)) -matchPSeq p1 p2 s = - do let min1 = 0 --minLength p1 - min2 = length s -- -minLength p2 - cuts = [splitAt n s | n <- [min1 .. min2]] +matchPMSeq (m1,p1) (m2,p2) s = matchPSeq' m1 p1 m2 p2 s +--matchPSeq p1 p2 s = matchPSeq' (0,maxBound::Int) p1 (0,maxBound::Int) p2 s +matchPSeq p1 p2 s = matchPSeq' (lengthBounds p1) p1 (lengthBounds p2) p2 s + +matchPSeq' b1@(min1,max1) p1 b2@(min2,max2) p2 s = + do let n = length s + lo = min1 `max` (n-max2) + hi = (n-min2) `min` max1 + cuts = [splitAt i s | i <- [lo..hi]] matches <- checks [mapM tryMatch [(p1,K s1),(p2,K s2)] | (s1,s2) <- cuts] return (concat matches) -{- + -- | Estimate the minimal length of the string that a pattern will match minLength p = case p of @@ -147,7 +154,31 @@ minLength p = PAs x p' -> minLength p' PT t p' -> minLength p' _ -> 0 -- safe underestimate --} + +-- | Estimate the maximal length of the string that a pattern will match +maxLength = maybe maxBound id . maxl -- safe overestimate + where + maxl p = + case p of + PString s -> Just (length s) + PSeq p1 p2 -> liftM2 (+) (maxl p1) (maxl p2) + PAlt p1 p2 -> liftM2 max (maxl p1) (maxl p2) + PChar -> Just 1 + PChars _ -> Just 1 + PAs x p' -> maxl p' + PT t p' -> maxl p' + _ -> Nothing -- unknown length + +lengthBounds p = (minLength p,maxLength p) + +mPatt p = (lengthBounds p,measurePatt p) + +measurePatt p = + case p of + PSeq p1 p2 -> PMSeq (mPatt p1) (mPatt p2) + _ -> composSafePattOp measurePatt p + + isInConstantForm :: Term -> Bool isInConstantForm trm = case trm of Cn _ -> True diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs index 276f2c9c2..0d9d41b7b 100644 --- a/src/compiler/GF/Grammar/Printer.hs +++ b/src/compiler/GF/Grammar/Printer.hs @@ -238,6 +238,7 @@ ppCase q (p,e) = ppPatt q 0 p <+> text "=>" <+> ppTerm q 0 e ppPatt q d (PAlt p1 p2) = prec d 0 (ppPatt q 0 p1 <+> char '|' <+> ppPatt q 1 p2) ppPatt q d (PSeq p1 p2) = prec d 0 (ppPatt q 0 p1 <+> char '+' <+> ppPatt q 1 p2) +ppPatt q d (PMSeq (_,p1) (_,p2)) = prec d 0 (ppPatt q 0 p1 <+> char '+' <+> ppPatt q 1 p2) ppPatt q d (PC f ps) = if null ps then ppIdent f else prec d 1 (ppIdent f <+> hsep (map (ppPatt q 3) ps))