From 6efb878c436a687b687808326946a1ae81c84966 Mon Sep 17 00:00:00 2001 From: krangelov Date: Wed, 29 Sep 2021 14:57:18 +0200 Subject: [PATCH] pattern matching for "x"* --- src/compiler/GF/Compile/Compute/Concrete.hs | 14 +++++++++++++- src/compiler/GF/Compile/Rename.hs | 4 ++-- src/compiler/GF/Compile/TypeCheck/Concrete.hs | 10 +++++----- src/compiler/GF/Grammar/Binary.hs | 4 ++-- src/compiler/GF/Grammar/Grammar.hs | 13 +++++++++++-- src/compiler/GF/Grammar/Macros.hs | 8 ++++---- src/compiler/GF/Grammar/Parser.y | 2 +- src/compiler/GF/Grammar/PatternMatch.hs | 2 +- src/compiler/GF/Grammar/Printer.hs | 2 +- testsuite/compiler/compute/string_matching.gfs | 2 ++ .../compiler/compute/string_matching.gfs.gold | 2 ++ 11 files changed, 44 insertions(+), 19 deletions(-) diff --git a/src/compiler/GF/Compile/Compute/Concrete.hs b/src/compiler/GF/Compile/Compute/Concrete.hs index e6766000d..a96a154d3 100644 --- a/src/compiler/GF/Compile/Compute/Concrete.hs +++ b/src/compiler/GF/Compile/Compute/Concrete.hs @@ -25,7 +25,6 @@ import Control.Applicative import qualified Control.Monad.Fail as Fail import qualified Data.Map as Map import GF.Text.Pretty -import Debug.Trace -- * Main entry points @@ -161,6 +160,14 @@ patternMatch v0 ((env0,ps,args0,t):eqs) = match env0 ps eqs args0 eqs <- matchStr env (p1:p2:ps) eqs (hi-lo) (reverse ds) cs args patternMatch v0 eqs Nothing -> return v0 + (PRep minp maxp p, v) + | minp == 0 -> match env ps eqs args + | otherwise -> case value2string v of + Just s -> do let n = length s `div` minp + eqs0 = eqs + eqs <- matchRep env n minp maxp p minp maxp p ps eqs (arg:args) + patternMatch v0 eqs + Nothing -> return v0 (PChar, VStr [_]) -> match env ps eqs args (PChars cs, VStr [c]) | elem c cs -> match env ps eqs args @@ -194,6 +201,11 @@ patternMatch v0 ((env0,ps,args0,t):eqs) = match env0 ps eqs args0 eqs <- matchStr env ps eqs (i-1 :: Int) (c:ds) cs args return ((env,ps,arg1:arg2:args,t) : eqs) + matchRep env 0 minp maxp p minq maxq q ps eqs args = do + return ((env,PString []:ps,args,t) : eqs) + matchRep env n minp maxp p minq maxq q ps eqs args = do + matchRep env (n-1) minp maxp p (minp+minq) (maxp+maxq) (PSeq minp maxp p minq maxq q) ps ((env,q:ps,args,t) : eqs) args + vc s = case words s of [] -> VC [] diff --git a/src/compiler/GF/Compile/Rename.hs b/src/compiler/GF/Compile/Rename.hs index 0a9e6ea3e..75e757ffc 100644 --- a/src/compiler/GF/Compile/Rename.hs +++ b/src/compiler/GF/Compile/Rename.hs @@ -311,9 +311,9 @@ renamePattern env patt = (q',ws) <- renp q return (PSeq minp maxp p' minq maxq q', vs ++ ws) - PRep p -> do + PRep minp maxp p -> do (p',vs) <- renp p - return (PRep p', vs) + return (PRep minp maxp p', vs) PNeg p -> do (p',vs) <- renp p diff --git a/src/compiler/GF/Compile/TypeCheck/Concrete.hs b/src/compiler/GF/Compile/TypeCheck/Concrete.hs index 973c3d410..1def7e65c 100644 --- a/src/compiler/GF/Compile/TypeCheck/Concrete.hs +++ b/src/compiler/GF/Compile/TypeCheck/Concrete.hs @@ -305,7 +305,7 @@ inferLType gr g trm = case trm of PChars _ -> True PSeq _ _ p _ _ q -> isConstPatt p && isConstPatt q PAlt p q -> isConstPatt p && isConstPatt q - PRep p -> isConstPatt p + PRep _ _ p -> isConstPatt p PNeg p -> isConstPatt p PAs _ p -> isConstPatt p _ -> False @@ -316,7 +316,7 @@ inferLType gr g trm = case trm of PNeg p -> inferPatt p PAlt p q -> checks [inferPatt p, inferPatt q] PSeq _ _ _ _ _ _ -> return $ typeStr - PRep _ -> return $ typeStr + PRep _ _ _ -> return $ typeStr PChar -> return $ typeStr PChars _ -> return $ typeStr _ -> inferLType gr g (patt2term p) >>= return . snd @@ -342,8 +342,8 @@ measurePatt p = -> let (min1,max1,p1') = measurePatt p1 (min2,max2,p2') = measurePatt p2 in (min1+min2,liftM2 (+) max1 max2,PSeq min1 (fromMaybe maxBound max1) p1' min2 (fromMaybe maxBound max2) p2') - PRep p -> let (_,_,p') = measurePatt p - in (0,Nothing,PRep p') + PRep _ _ p -> let (minp,maxp,p') = measurePatt p + in (0,Nothing,PRep minp (fromMaybe maxBound maxp) p') PChar -> (1,Just 1,p) PChars _ -> (1,Just 1,p) _ -> (0,Nothing,p) @@ -666,7 +666,7 @@ pattContext env g typ p = case p of g1 <- pattContext env g typ p g2 <- pattContext env g typ q return $ g1 ++ g2 - PRep p' -> noBind typeStr p' + PRep _ _ p' -> noBind typeStr p' PNeg p' -> noBind typ p' _ -> return [] ---- check types! diff --git a/src/compiler/GF/Grammar/Binary.hs b/src/compiler/GF/Grammar/Binary.hs index 673b6f179..998c8bb9e 100644 --- a/src/compiler/GF/Grammar/Binary.hs +++ b/src/compiler/GF/Grammar/Binary.hs @@ -245,7 +245,7 @@ instance Binary Patt where put (PNeg x) = putWord8 11 >> put x put (PAlt x y) = putWord8 12 >> put (x,y) put (PSeq minx maxx x miny maxy y) = putWord8 13 >> put (minx,maxx,x,miny,maxy,y) - put (PRep x) = putWord8 14 >> put x + put (PRep minx maxx x)= putWord8 14 >> put (minx,maxx,x) put (PChar) = putWord8 15 put (PChars x) = putWord8 16 >> put x put (PMacro x) = putWord8 17 >> put x @@ -267,7 +267,7 @@ instance Binary Patt where 11 -> get >>= \x -> return (PNeg x) 12 -> get >>= \(x,y) -> return (PAlt x y) 13 -> get >>= \(minx,maxx,x,miny,maxy,y) -> return (PSeq minx maxx x miny maxy y) - 14 -> get >>= \x -> return (PRep x) + 14 -> get >>= \(minx,maxx,x)-> return (PRep minx maxx x) 15 -> return (PChar) 16 -> get >>= \x -> return (PChars x) 17 -> get >>= \x -> return (PMacro x) diff --git a/src/compiler/GF/Grammar/Grammar.hs b/src/compiler/GF/Grammar/Grammar.hs index 6d7a41b10..3575b782c 100644 --- a/src/compiler/GF/Grammar/Grammar.hs +++ b/src/compiler/GF/Grammar/Grammar.hs @@ -421,8 +421,17 @@ data Patt = -- regular expression patterns | PNeg Patt -- ^ negated pattern: -p | PAlt Patt Patt -- ^ disjunctive pattern: p1 | p2 - | PSeq Int Int Patt Int Int Patt -- ^ sequence of token parts: p + q - | PRep Patt -- ^ repetition of token part: p* + | PSeq {-# UNPACK #-} !Int {-# UNPACK #-} !Int Patt {-# UNPACK #-} !Int {-# UNPACK #-} !Int Patt + -- ^ sequence of token parts: p + q + -- In the constructor PSeq minp maxp p minq maxq q, + -- minp/maxp and minq/maxq are the minimal/maximal + -- length of a matching string for p/q. + | PRep {-# UNPACK #-} !Int {-# UNPACK #-} !Int Patt + -- ^ repetition of token part: p* + -- In the constructor PRep minp maxp p, + -- minp/maxp is the minimal/maximal length of + -- a matching string for p. + | PChar -- ^ string of length one: ? | PChars [Char] -- ^ character list: ["aeiou"] | PMacro Ident -- #p diff --git a/src/compiler/GF/Grammar/Macros.hs b/src/compiler/GF/Grammar/Macros.hs index 1f905b025..9dcc83f0b 100644 --- a/src/compiler/GF/Grammar/Macros.hs +++ b/src/compiler/GF/Grammar/Macros.hs @@ -384,7 +384,7 @@ term2patt trm = case termForm trm of return (PNeg a') Ok ([], Cn id, [a]) | id == cRep -> do a' <- term2patt a - return (PRep a') + return (PRep 0 maxBound a') Ok ([], Cn id, []) | id == cRep -> do return PChar Ok ([], Cn id,[K s]) | id == cChars -> do @@ -424,7 +424,7 @@ patt2term pt = case pt of PChars s -> appCons cChars [K s] --- an encoding PSeq _ _ a _ _ b -> appCons cSeq [(patt2term a), (patt2term b)] --- an encoding PAlt a b -> appCons cAlt [(patt2term a), (patt2term b)] --- an encoding - PRep a -> appCons cRep [(patt2term a)] --- an encoding + PRep _ _ a-> appCons cRep [(patt2term a)] --- an encoding PNeg a -> appCons cNeg [(patt2term a)] --- an encoding @@ -476,7 +476,7 @@ composPattOp op patt = PNeg p -> liftM PNeg (op p) PAlt p1 p2 -> liftM2 PAlt (op p1) (op p2) PSeq _ _ p1 _ _ p2 -> liftM2 (\p1 p2 -> PSeq 0 maxBound p1 0 maxBound p2) (op p1) (op p2) - PRep p -> liftM PRep (op p) + PRep _ _ p -> liftM (PRep 0 maxBound) (op p) _ -> return patt -- covers cases without subpatterns collectOp :: Monoid m => (Term -> m) -> Term -> m @@ -514,7 +514,7 @@ collectPattOp op patt = PNeg p -> op p PAlt p1 p2 -> op p1++op p2 PSeq _ _ p1 _ _ p2 -> op p1++op p2 - PRep p -> op p + PRep _ _ p -> op p _ -> [] -- covers cases without subpatterns diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index 018430ae6..58742416d 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -492,7 +492,7 @@ Patt1 :: { Patt } Patt1 : Ident ListPatt { PC $1 $2 } | ModuleName '.' Ident ListPatt { PP ($1,$3) $4 } - | Patt3 '*' { PRep $1 } + | Patt3 '*' { PRep 0 maxBound $1 } | Patt2 { $1 } Patt2 :: { Patt } diff --git a/src/compiler/GF/Grammar/PatternMatch.hs b/src/compiler/GF/Grammar/PatternMatch.hs index b7a85660c..ae10c37c3 100644 --- a/src/compiler/GF/Grammar/PatternMatch.hs +++ b/src/compiler/GF/Grammar/PatternMatch.hs @@ -123,7 +123,7 @@ tryMatch (p,t) = do (PSeq min1 max1 p1 min2 max2 p2, ([],K s, [])) -> matchPSeq min1 max1 p1 min2 max2 p2 s - (PRep p1, ([],K s, [])) -> checks [ + (PRep _ _ p1, ([],K s, [])) -> checks [ trym (foldr (const (PSeq 0 maxBound p1 0 maxBound)) (PString "") [1..n]) t' | n <- [0 .. length s] ] >> diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs index 67dde7fd6..a34c665f2 100644 --- a/src/compiler/GF/Grammar/Printer.hs +++ b/src/compiler/GF/Grammar/Printer.hs @@ -257,7 +257,7 @@ ppPatt q d (PC f ps) = if null ps ppPatt q d (PP f ps) = if null ps then ppQIdent q f else prec d 1 (ppQIdent q f <+> hsep (map (ppPatt q 3) ps)) -ppPatt q d (PRep p) = prec d 1 (ppPatt q 3 p <> '*') +ppPatt q d (PRep _ _ p) = prec d 1 (ppPatt q 3 p <> '*') ppPatt q d (PAs f p) = prec d 2 (f <> '@' <> ppPatt q 3 p) ppPatt q d (PNeg p) = prec d 2 ('-' <> ppPatt q 3 p) ppPatt q d (PChar) = pp '?' diff --git a/testsuite/compiler/compute/string_matching.gfs b/testsuite/compiler/compute/string_matching.gfs index 8701834de..0da0b70e8 100644 --- a/testsuite/compiler/compute/string_matching.gfs +++ b/testsuite/compiler/compute/string_matching.gfs @@ -21,3 +21,5 @@ cc x; _ => "?"} : Str> cc x; _ => "?"} : Str> cc x; _ => "?"} : Str> cc x; _ => "?"} : Str> +cc x; _ => "?"} : Str> +cc x; _ => "?"} : Str> diff --git a/testsuite/compiler/compute/string_matching.gfs.gold b/testsuite/compiler/compute/string_matching.gfs.gold index d24659547..c8f218e57 100644 --- a/testsuite/compiler/compute/string_matching.gfs.gold +++ b/testsuite/compiler/compute/string_matching.gfs.gold @@ -20,3 +20,5 @@ param_table.Q2 "abc" ++ "def" ++ "g" "abcdefghi" "abcdefghi" +"aaaaxy" +"xy"