mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-19 16:12:52 -06:00
implement measured patterns
This commit is contained in:
@@ -244,7 +244,7 @@ instance Binary Patt where
|
||||
put (PAs x y) = putWord8 10 >> put (x,y)
|
||||
put (PNeg x) = putWord8 11 >> put x
|
||||
put (PAlt x y) = putWord8 12 >> put (x,y)
|
||||
put (PSeq x y) = putWord8 13 >> put (x,y)
|
||||
put (PSeq minx maxx x miny maxy y) = putWord8 13 >> put (minx,maxx,x,miny,maxy,y)
|
||||
put (PRep x) = putWord8 14 >> put x
|
||||
put (PChar) = putWord8 15
|
||||
put (PChars x) = putWord8 16 >> put x
|
||||
@@ -266,7 +266,7 @@ instance Binary Patt where
|
||||
10 -> get >>= \(x,y) -> return (PAs x y)
|
||||
11 -> get >>= \x -> return (PNeg x)
|
||||
12 -> get >>= \(x,y) -> return (PAlt x y)
|
||||
13 -> get >>= \(x,y) -> return (PSeq x y)
|
||||
13 -> get >>= \(minx,maxx,x,miny,maxy,y) -> return (PSeq minx maxx x miny maxy y)
|
||||
14 -> get >>= \x -> return (PRep x)
|
||||
15 -> return (PChar)
|
||||
16 -> get >>= \x -> return (PChars x)
|
||||
|
||||
@@ -421,8 +421,7 @@ data Patt =
|
||||
-- regular expression patterns
|
||||
| PNeg Patt -- ^ negated pattern: -p
|
||||
| PAlt Patt Patt -- ^ disjunctive pattern: p1 | p2
|
||||
| PSeq Patt Patt -- ^ sequence of token parts: p + q
|
||||
| PMSeq MPatt MPatt -- ^ sequence of token parts: p + q
|
||||
| PSeq Int Int Patt Int Int Patt -- ^ sequence of token parts: p + q
|
||||
| PRep Patt -- ^ repetition of token part: p*
|
||||
| PChar -- ^ string of length one: ?
|
||||
| PChars [Char] -- ^ character list: ["aeiou"]
|
||||
@@ -430,9 +429,6 @@ data Patt =
|
||||
| PM QIdent -- #m.p
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
-- | Measured pattern (paired with the min & max matching length)
|
||||
type MPatt = ((Int,Int),Patt)
|
||||
|
||||
-- | to guide computation and type checking of tables
|
||||
data TInfo =
|
||||
TRaw -- ^ received from parser; can be anything
|
||||
|
||||
@@ -392,7 +392,7 @@ term2patt trm = case termForm trm of
|
||||
Ok ([], Cn id, [a,b]) | id == cSeq -> do
|
||||
a' <- term2patt a
|
||||
b' <- term2patt b
|
||||
return (PSeq a' b')
|
||||
return (PSeq 0 maxBound a' 0 maxBound b')
|
||||
Ok ([], Cn id, [a,b]) | id == cAlt -> do
|
||||
a' <- term2patt a
|
||||
b' <- term2patt b
|
||||
@@ -422,7 +422,7 @@ patt2term pt = case pt of
|
||||
PAs x p -> appCons cAs [Vr x, patt2term p] --- an encoding
|
||||
PChar -> appCons cChar [] --- an encoding
|
||||
PChars s -> appCons cChars [K s] --- an encoding
|
||||
PSeq a b -> appCons cSeq [(patt2term a), (patt2term b)] --- an encoding
|
||||
PSeq _ _ a _ _ b -> appCons cSeq [(patt2term a), (patt2term b)] --- an encoding
|
||||
PAlt a b -> appCons cAlt [(patt2term a), (patt2term b)] --- an encoding
|
||||
PRep a -> appCons cRep [(patt2term a)] --- an encoding
|
||||
PNeg a -> appCons cNeg [(patt2term a)] --- an encoding
|
||||
@@ -475,8 +475,7 @@ composPattOp op patt =
|
||||
PImplArg p -> liftM PImplArg (op p)
|
||||
PNeg p -> liftM PNeg (op p)
|
||||
PAlt p1 p2 -> liftM2 PAlt (op p1) (op p2)
|
||||
PSeq p1 p2 -> liftM2 PSeq (op p1) (op p2)
|
||||
PMSeq (_,p1) (_,p2) -> liftM2 PSeq (op p1) (op p2) -- information loss
|
||||
PSeq _ _ p1 _ _ p2 -> liftM2 (\p1 p2 -> PSeq 0 maxBound p1 0 maxBound p2) (op p1) (op p2)
|
||||
PRep p -> liftM PRep (op p)
|
||||
_ -> return patt -- covers cases without subpatterns
|
||||
|
||||
@@ -514,8 +513,7 @@ collectPattOp op patt =
|
||||
PImplArg p -> op p
|
||||
PNeg p -> op p
|
||||
PAlt p1 p2 -> op p1++op p2
|
||||
PSeq p1 p2 -> op p1++op p2
|
||||
PMSeq (_,p1) (_,p2) -> op p1++op p2
|
||||
PSeq _ _ p1 _ _ p2 -> op p1++op p2
|
||||
PRep p -> op p
|
||||
_ -> [] -- covers cases without subpatterns
|
||||
|
||||
|
||||
@@ -485,7 +485,7 @@ Exps
|
||||
Patt :: { Patt }
|
||||
Patt
|
||||
: Patt '|' Patt1 { PAlt $1 $3 }
|
||||
| Patt '+' Patt1 { PSeq $1 $3 }
|
||||
| Patt '+' Patt1 { PSeq 0 maxBound $1 0 maxBound $3 }
|
||||
| Patt1 { $1 }
|
||||
|
||||
Patt1 :: { Patt }
|
||||
|
||||
@@ -15,8 +15,7 @@
|
||||
module GF.Grammar.PatternMatch (
|
||||
matchPattern,
|
||||
testOvershadow,
|
||||
findMatch,
|
||||
measurePatt
|
||||
findMatch
|
||||
) where
|
||||
|
||||
import GF.Data.Operations
|
||||
@@ -122,11 +121,10 @@ tryMatch (p,t) = do
|
||||
Bad _ -> return []
|
||||
_ -> raise (render ("no match with negative pattern" <+> p))
|
||||
|
||||
(PSeq p1 p2, ([],K s, [])) -> matchPSeq p1 p2 s
|
||||
(PMSeq mp1 mp2, ([],K s, [])) -> matchPMSeq mp1 mp2 s
|
||||
(PSeq min1 max1 p1 min2 max2 p2, ([],K s, [])) -> matchPSeq min1 max1 p1 min2 max2 p2 s
|
||||
|
||||
(PRep p1, ([],K s, [])) -> checks [
|
||||
trym (foldr (const (PSeq p1)) (PString "")
|
||||
trym (foldr (const (PSeq 0 maxBound p1 0 maxBound)) (PString "")
|
||||
[1..n]) t' | n <- [0 .. length s]
|
||||
] >>
|
||||
return []
|
||||
@@ -140,12 +138,7 @@ tryMatch (p,t) = do
|
||||
words2term [w] = K w
|
||||
words2term (w:ws) = C (K w) (words2term ws)
|
||||
|
||||
|
||||
matchPMSeq (m1,p1) (m2,p2) s = matchPSeq' m1 p1 m2 p2 s
|
||||
--matchPSeq p1 p2 s = matchPSeq' (0,maxBound::Int) p1 (0,maxBound::Int) p2 s
|
||||
matchPSeq p1 p2 s = matchPSeq' (lengthBounds p1) p1 (lengthBounds p2) p2 s
|
||||
|
||||
matchPSeq' b1@(min1,max1) p1 b2@(min2,max2) p2 s =
|
||||
matchPSeq min1 max1 p1 min2 max2 p2 s =
|
||||
do let n = length s
|
||||
lo = min1 `max` (n-max2)
|
||||
hi = (n-min2) `min` max1
|
||||
@@ -153,37 +146,6 @@ matchPSeq' b1@(min1,max1) p1 b2@(min2,max2) p2 s =
|
||||
matches <- checks [mapM tryMatch [(p1,K s1),(p2,K s2)] | (s1,s2) <- cuts]
|
||||
return (concat matches)
|
||||
|
||||
-- | Estimate the minimal length of the string that a pattern will match
|
||||
minLength = matchLength 0 id (+) min -- safe underestimate
|
||||
|
||||
-- | Estimate the maximal length of the string that a pattern will match
|
||||
maxLength =
|
||||
maybe maxBound id . matchLength Nothing Just (liftM2 (+)) (liftM2 max)
|
||||
-- safe overestimate
|
||||
|
||||
matchLength unknown known seq alt = len
|
||||
where
|
||||
len p =
|
||||
case p of
|
||||
PString s -> known (length s)
|
||||
PSeq p1 p2 -> seq (len p1) (len p2)
|
||||
PAlt p1 p2 -> alt (len p1) (len p2)
|
||||
PChar -> known 1
|
||||
PChars _ -> known 1
|
||||
PAs x p' -> len p'
|
||||
PT t p' -> len p'
|
||||
_ -> unknown
|
||||
|
||||
lengthBounds p = (minLength p,maxLength p)
|
||||
|
||||
mPatt p = (lengthBounds p,measurePatt p)
|
||||
|
||||
measurePatt p =
|
||||
case p of
|
||||
PSeq p1 p2 -> PMSeq (mPatt p1) (mPatt p2)
|
||||
_ -> composSafePattOp measurePatt p
|
||||
|
||||
|
||||
isInConstantForm :: Term -> Bool
|
||||
isInConstantForm trm = case trm of
|
||||
Cn _ -> True
|
||||
|
||||
@@ -250,8 +250,7 @@ ppCase q (p,e) = ppPatt q 0 p <+> "=>" <+> ppTerm q 0 e
|
||||
instance Pretty Patt where pp = ppPatt Unqualified 0
|
||||
|
||||
ppPatt q d (PAlt p1 p2) = prec d 0 (ppPatt q 0 p1 <+> '|' <+> ppPatt q 1 p2)
|
||||
ppPatt q d (PSeq p1 p2) = prec d 0 (ppPatt q 0 p1 <+> '+' <+> ppPatt q 1 p2)
|
||||
ppPatt q d (PMSeq (_,p1) (_,p2)) = prec d 0 (ppPatt q 0 p1 <+> '+' <+> ppPatt q 1 p2)
|
||||
ppPatt q d (PSeq _ _ p1 _ _ p2) = prec d 0 (ppPatt q 0 p1 <+> '+' <+> ppPatt q 1 p2)
|
||||
ppPatt q d (PC f ps) = if null ps
|
||||
then pp f
|
||||
else prec d 1 (f <+> hsep (map (ppPatt q 3) ps))
|
||||
|
||||
Reference in New Issue
Block a user