fix the disambiguation of html tags

This commit is contained in:
Krasimir Angelov
2024-03-06 19:27:16 +01:00
parent b0f71ce0ac
commit 52c56feaf2
4 changed files with 88 additions and 40 deletions

View File

@@ -399,6 +399,8 @@ data Term =
| Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@ | Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@
| TSymCat Int LIndex [(LIndex,(Ident,Type))] | TSymCat Int LIndex [(LIndex,(Ident,Type))]
| TSymVar Int Int | TSymVar Int Int
| OpenTag Ident [Assign] -- used internally in the parser
| CloseTag Ident -- used internally in the parser
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
-- | Patterns -- | Patterns

View File

@@ -8,7 +8,7 @@ module GF.Grammar.Lexer
) where ) where
import Control.Applicative import Control.Applicative
import Control.Monad(ap) import Control.Monad(ap,mplus)
import GF.Infra.Ident import GF.Infra.Ident
--import GF.Data.Operations --import GF.Data.Operations
import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Char8 as BS
@@ -17,8 +17,8 @@ import qualified Data.ByteString.Internal as BS(w2c)
import qualified Data.ByteString.UTF8 as UTF8 import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Word(Word8) import Data.Word(Word8)
import Data.Char(readLitChar) import Data.Char(readLitChar,isSpace)
--import Debug.Trace(trace) import Data.Maybe(isJust)
import qualified Control.Monad.Fail as Fail import qualified Control.Monad.Fail as Fail
} }
@@ -31,7 +31,7 @@ $i = [$l $d _ '] -- identifier character
$u = [.\n] -- universal: any character $u = [.\n] -- universal: any character
@rsyms = -- symbols and non-identifier-like reserved words @rsyms = -- symbols and non-identifier-like reserved words
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \<\/ | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \=
:- :-
"--" [.]* ; -- Toss single line comments "--" [.]* ; -- Toss single line comments
@@ -39,6 +39,9 @@ $u = [.\n] -- universal: any character
$white+ ; $white+ ;
@rsyms { tok ident } @rsyms { tok ident }
\< { \_ _ s -> if start_of_tag (BS.tail s)
then T_less_tag
else T_less }
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) } \' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) }
(\_ | $l)($l | $d | \_ | \')* { tok ident } (\_ | $l)($l | $d | \_ | \')* { tok ident }
@@ -47,16 +50,14 @@ $white+ ;
(\-)? $d+ { tok (T_Integer . read . unpack) } (\-)? $d+ { tok (T_Integer . read . unpack) }
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) } (\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
\< (\_ | $l)($l | $d | \_ | \')* { tok tag }
{ {
unpack = UTF8.toString unpack = UTF8.toString
--unpack = id --unpack = id
ident = res T_Ident . identC . rawIdentC ident = res T_Ident . identC . rawIdentC
tag = res T_less_tag . identC . rawIdentC . BS.tail
tok f p s = f s tok f p len s = f (UTF8.take len s)
data Token data Token
= T_exclmark = T_exclmark
@@ -77,7 +78,7 @@ data Token
| T_colon | T_colon
| T_semicolon | T_semicolon
| T_less | T_less
| T_less_close | T_less_tag
| T_equal | T_equal
| T_big_rarrow | T_big_rarrow
| T_great | T_great
@@ -133,7 +134,6 @@ data Token
| T_terminator | T_terminator
| T_separator | T_separator
| T_nonempty | T_nonempty
| T_less_tag Ident
| T_String String -- string literals | T_String String -- string literals
| T_Integer Integer -- integer literals | T_Integer Integer -- integer literals
| T_Double Double -- double precision float literals | T_Double Double -- double precision float literals
@@ -169,8 +169,6 @@ resWords = Map.fromList
, b "/" T_alt , b "/" T_alt
, b ":" T_colon , b ":" T_colon
, b ";" T_semicolon , b ";" T_semicolon
, b "</" T_less_close
, b "<" T_less
, b "=" T_equal , b "=" T_equal
, b "=>" T_big_rarrow , b "=>" T_big_rarrow
, b ">" T_great , b ">" T_great
@@ -322,13 +320,51 @@ lexer cont = cont=<<token
token :: P Token token :: P Token
token = P go token = P go
where where
--cont' t = trace (show t) (cont t)
go ai2@(_,inp@(AI pos _ str)) = go ai2@(_,inp@(AI pos _ str)) =
case alexScan inp 0 of case alexScan inp 0 of
AlexEOF -> POk (inp,inp) T_EOF AlexEOF -> POk (inp,inp) T_EOF
AlexError (AI pos _ _) -> PFailed pos "lexical error" AlexError (AI pos _ _) -> PFailed pos "lexical error"
AlexSkip inp' len -> {-trace (show len) $-} go (inp,inp') AlexSkip inp' len -> {-trace (show len) $-} go (inp,inp')
AlexToken inp' len act -> POk (inp,inp') (act pos ({-UTF8.toString-} (UTF8.take len str))) AlexToken inp' len act -> POk (inp,inp') (act pos len str)
start_of_tag s = isJust (match s)
where
match s = do
s <- matchSpace s
(char s '/'
`mplus`
do s <- matchIdent s
s <- matchSpace s
(char s '/'
`mplus`
do s <- matchIdent s
s <- matchSpace s
char s '='))
matchSpace s
| BS.null s = Just s
| isSpace (BS.head s) = matchSpace (BS.tail s)
| otherwise = Just s
init =
BS.pack "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
rest =
BS.append init (BS.pack "0123456789'")
char s c
| BS.null s = Nothing
| BS.head s == c = Just (BS.tail s)
| otherwise = Nothing
matchIdent s
| BS.null s = Nothing
| BS.elem (BS.head s) init = matchRest (BS.tail s)
| otherwise = Nothing
matchRest s
| BS.null s = Just s
| BS.elem (BS.head s) rest = matchRest (BS.tail s)
| otherwise = Just s
getPosn :: P Posn getPosn :: P Posn
getPosn = P $ \ai2@(_,inp@(AI pos _ _)) -> POk ai2 pos getPosn = P $ \ai2@(_,inp@(AI pos _ _)) -> POk ai2 pos

View File

@@ -260,9 +260,6 @@ tuple2recordType ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts]
tuple2recordPatt :: [Patt] -> [(Label,Patt)] tuple2recordPatt :: [Patt] -> [(Label,Patt)]
tuple2recordPatt ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts] tuple2recordPatt ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts]
attrs2record :: [(Ident, Term)] -> [Assign]
attrs2record ts = [assign (ident2label i) ((App (Q (cPredef, (identS "linearize"))) t)) | (i,t) <- ts]
mkCases :: Ident -> Term -> Term mkCases :: Ident -> Term -> Term
mkCases x t = T TRaw [(PV x, t)] mkCases x t = T TRaw [(PV x, t)]

View File

@@ -59,8 +59,7 @@ import qualified Data.Map as Map
':' { T_colon } ':' { T_colon }
';' { T_semicolon } ';' { T_semicolon }
'<' { T_less } '<' { T_less }
'</' { T_less_close } '<tag' { T_less_tag }
'<tag' { (T_less_tag $$) }
'=' { T_equal } '=' { T_equal }
'=>' { T_big_rarrow} '=>' { T_big_rarrow}
'>' { T_great } '>' { T_great }
@@ -433,7 +432,9 @@ Exp3
Exp4 :: { Term } Exp4 :: { Term }
Exp4 Exp4
: Exp4 Exp5 { App $1 $2 } : Exp4 Exp5 {% case $2 of
CloseTag id -> mkTag id $1 Empty
_ -> return (App $1 $2) }
| Exp4 '{' Exp '}' { App $1 (ImplArg $3) } | Exp4 '{' Exp '}' { App $1 (ImplArg $3) }
| 'case' Exp 'of' '{' ListCase '}' { let annot = case $2 of | 'case' Exp 'of' '{' ListCase '}' { let annot = case $2 of
Typed _ t -> TTyped t Typed _ t -> TTyped t
@@ -470,17 +471,18 @@ Exp6
| '<' ListTupleComp '>' { R (tuple2record $2) } | '<' ListTupleComp '>' { R (tuple2record $2) }
| '<' Exp ':' Exp '>' { Typed $2 $4 } | '<' Exp ':' Exp '>' { Typed $2 $4 }
| '(' Exp ')' { $2 } | '(' Exp ')' { $2 }
| '<tag' Attributes '>' Exps '</' Ident '>' {% mkTag $1 $6 $2 $4 } | '<tag' Ident Attributes '>' { OpenTag $2 $3 }
| '<tag' Attributes '/' '>' {% mkTag $1 $1 $2 [Empty] } | '<tag' '/' Ident '>' { CloseTag $3 }
| '<tag' Ident Attributes '/' '>' { markup $2 $3 Empty }
Attributes :: { [(Ident,Term)] } Attributes :: { [Assign] }
Attributes Attributes
: { [] } : { [] }
| Attribute Attributes { $1:$2 } | Attribute Attributes { $1:$2 }
Attribute :: { (Ident,Term) } Attribute :: { Assign }
Attribute Attribute
: Ident '=' Exp6 { ($1,$3) } : Ident '=' Exp6 { assign (ident2label $1) $3 }
ListExp :: { [Term] } ListExp :: { [Term] }
ListExp ListExp
@@ -827,23 +829,34 @@ mkL :: Posn -> Posn -> x -> L x
mkL (Pn l1 _) (Pn l2 _) x = L (Local l1 l2) x mkL (Pn l1 _) (Pn l2 _) x = L (Local l1 l2) x
mkTag ident ident' attrs t mkTag ident (App t1 t2) conc =
| showIdent ident == showIdent ident' = return (App case match ident t2 of
(App Just attrs -> fmap (App t1) (mkTag ident t2 conc)
(App Nothing -> let t = App (Q (cPredef, (identS "linearize"))) t2
(Q (cPredef, (identS "markup"))) in mkTag ident t1 $ case conc of
(R (attrs2record attrs)) Empty -> t
) _ -> C t conc
(K (showIdent ident)) mkTag ident t conc =
) case match ident t of
(mkConcatenation t) Just attrs -> return (markup ident attrs conc)
) Nothing -> fail ("Unmatched closing tag " ++ showIdent ident)
| otherwise = fail "Tags don't match" match ident (OpenTag ident' attrs)
| ident == ident' = Just attrs
mkConcatenation [] = Empty match ident (R [(lbl,(Nothing,Vr ident'))])
mkConcatenation [t] = (App (Q (cPredef, (identS "linearize"))) t) | lbl == ident2label (identS "p1") && ident == ident'
mkConcatenation (t:ts) = C t (mkConcatenation ts) = Just []
match ident _ = Nothing
markup ident attrs content =
App
(App
(App
(Q (cPredef, (identS "markup")))
(R attrs)
)
(K (showIdent ident))
)
content
} }