mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 03:32:51 -06:00
fix the disambiguation of html tags
This commit is contained in:
@@ -399,6 +399,8 @@ data Term =
|
|||||||
| Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@
|
| Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@
|
||||||
| TSymCat Int LIndex [(LIndex,(Ident,Type))]
|
| TSymCat Int LIndex [(LIndex,(Ident,Type))]
|
||||||
| TSymVar Int Int
|
| TSymVar Int Int
|
||||||
|
| OpenTag Ident [Assign] -- used internally in the parser
|
||||||
|
| CloseTag Ident -- used internally in the parser
|
||||||
deriving (Show, Eq, Ord)
|
deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
-- | Patterns
|
-- | Patterns
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ module GF.Grammar.Lexer
|
|||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Applicative
|
import Control.Applicative
|
||||||
import Control.Monad(ap)
|
import Control.Monad(ap,mplus)
|
||||||
import GF.Infra.Ident
|
import GF.Infra.Ident
|
||||||
--import GF.Data.Operations
|
--import GF.Data.Operations
|
||||||
import qualified Data.ByteString.Char8 as BS
|
import qualified Data.ByteString.Char8 as BS
|
||||||
@@ -17,8 +17,8 @@ import qualified Data.ByteString.Internal as BS(w2c)
|
|||||||
import qualified Data.ByteString.UTF8 as UTF8
|
import qualified Data.ByteString.UTF8 as UTF8
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import Data.Word(Word8)
|
import Data.Word(Word8)
|
||||||
import Data.Char(readLitChar)
|
import Data.Char(readLitChar,isSpace)
|
||||||
--import Debug.Trace(trace)
|
import Data.Maybe(isJust)
|
||||||
import qualified Control.Monad.Fail as Fail
|
import qualified Control.Monad.Fail as Fail
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ $i = [$l $d _ '] -- identifier character
|
|||||||
$u = [.\n] -- universal: any character
|
$u = [.\n] -- universal: any character
|
||||||
|
|
||||||
@rsyms = -- symbols and non-identifier-like reserved words
|
@rsyms = -- symbols and non-identifier-like reserved words
|
||||||
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \<\/ | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \=
|
\; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \=
|
||||||
|
|
||||||
:-
|
:-
|
||||||
"--" [.]* ; -- Toss single line comments
|
"--" [.]* ; -- Toss single line comments
|
||||||
@@ -39,6 +39,9 @@ $u = [.\n] -- universal: any character
|
|||||||
|
|
||||||
$white+ ;
|
$white+ ;
|
||||||
@rsyms { tok ident }
|
@rsyms { tok ident }
|
||||||
|
\< { \_ _ s -> if start_of_tag (BS.tail s)
|
||||||
|
then T_less_tag
|
||||||
|
else T_less }
|
||||||
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) }
|
\' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) }
|
||||||
(\_ | $l)($l | $d | \_ | \')* { tok ident }
|
(\_ | $l)($l | $d | \_ | \')* { tok ident }
|
||||||
|
|
||||||
@@ -47,16 +50,14 @@ $white+ ;
|
|||||||
(\-)? $d+ { tok (T_Integer . read . unpack) }
|
(\-)? $d+ { tok (T_Integer . read . unpack) }
|
||||||
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
|
(\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) }
|
||||||
|
|
||||||
\< (\_ | $l)($l | $d | \_ | \')* { tok tag }
|
|
||||||
|
|
||||||
{
|
{
|
||||||
unpack = UTF8.toString
|
unpack = UTF8.toString
|
||||||
--unpack = id
|
--unpack = id
|
||||||
|
|
||||||
ident = res T_Ident . identC . rawIdentC
|
ident = res T_Ident . identC . rawIdentC
|
||||||
tag = res T_less_tag . identC . rawIdentC . BS.tail
|
|
||||||
|
|
||||||
tok f p s = f s
|
tok f p len s = f (UTF8.take len s)
|
||||||
|
|
||||||
data Token
|
data Token
|
||||||
= T_exclmark
|
= T_exclmark
|
||||||
@@ -77,7 +78,7 @@ data Token
|
|||||||
| T_colon
|
| T_colon
|
||||||
| T_semicolon
|
| T_semicolon
|
||||||
| T_less
|
| T_less
|
||||||
| T_less_close
|
| T_less_tag
|
||||||
| T_equal
|
| T_equal
|
||||||
| T_big_rarrow
|
| T_big_rarrow
|
||||||
| T_great
|
| T_great
|
||||||
@@ -133,7 +134,6 @@ data Token
|
|||||||
| T_terminator
|
| T_terminator
|
||||||
| T_separator
|
| T_separator
|
||||||
| T_nonempty
|
| T_nonempty
|
||||||
| T_less_tag Ident
|
|
||||||
| T_String String -- string literals
|
| T_String String -- string literals
|
||||||
| T_Integer Integer -- integer literals
|
| T_Integer Integer -- integer literals
|
||||||
| T_Double Double -- double precision float literals
|
| T_Double Double -- double precision float literals
|
||||||
@@ -169,8 +169,6 @@ resWords = Map.fromList
|
|||||||
, b "/" T_alt
|
, b "/" T_alt
|
||||||
, b ":" T_colon
|
, b ":" T_colon
|
||||||
, b ";" T_semicolon
|
, b ";" T_semicolon
|
||||||
, b "</" T_less_close
|
|
||||||
, b "<" T_less
|
|
||||||
, b "=" T_equal
|
, b "=" T_equal
|
||||||
, b "=>" T_big_rarrow
|
, b "=>" T_big_rarrow
|
||||||
, b ">" T_great
|
, b ">" T_great
|
||||||
@@ -322,13 +320,51 @@ lexer cont = cont=<<token
|
|||||||
token :: P Token
|
token :: P Token
|
||||||
token = P go
|
token = P go
|
||||||
where
|
where
|
||||||
--cont' t = trace (show t) (cont t)
|
|
||||||
go ai2@(_,inp@(AI pos _ str)) =
|
go ai2@(_,inp@(AI pos _ str)) =
|
||||||
case alexScan inp 0 of
|
case alexScan inp 0 of
|
||||||
AlexEOF -> POk (inp,inp) T_EOF
|
AlexEOF -> POk (inp,inp) T_EOF
|
||||||
AlexError (AI pos _ _) -> PFailed pos "lexical error"
|
AlexError (AI pos _ _) -> PFailed pos "lexical error"
|
||||||
AlexSkip inp' len -> {-trace (show len) $-} go (inp,inp')
|
AlexSkip inp' len -> {-trace (show len) $-} go (inp,inp')
|
||||||
AlexToken inp' len act -> POk (inp,inp') (act pos ({-UTF8.toString-} (UTF8.take len str)))
|
AlexToken inp' len act -> POk (inp,inp') (act pos len str)
|
||||||
|
|
||||||
|
start_of_tag s = isJust (match s)
|
||||||
|
where
|
||||||
|
match s = do
|
||||||
|
s <- matchSpace s
|
||||||
|
(char s '/'
|
||||||
|
`mplus`
|
||||||
|
do s <- matchIdent s
|
||||||
|
s <- matchSpace s
|
||||||
|
(char s '/'
|
||||||
|
`mplus`
|
||||||
|
do s <- matchIdent s
|
||||||
|
s <- matchSpace s
|
||||||
|
char s '='))
|
||||||
|
|
||||||
|
matchSpace s
|
||||||
|
| BS.null s = Just s
|
||||||
|
| isSpace (BS.head s) = matchSpace (BS.tail s)
|
||||||
|
| otherwise = Just s
|
||||||
|
|
||||||
|
init =
|
||||||
|
BS.pack "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
|
||||||
|
rest =
|
||||||
|
BS.append init (BS.pack "0123456789'")
|
||||||
|
|
||||||
|
char s c
|
||||||
|
| BS.null s = Nothing
|
||||||
|
| BS.head s == c = Just (BS.tail s)
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
matchIdent s
|
||||||
|
| BS.null s = Nothing
|
||||||
|
| BS.elem (BS.head s) init = matchRest (BS.tail s)
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
matchRest s
|
||||||
|
| BS.null s = Just s
|
||||||
|
| BS.elem (BS.head s) rest = matchRest (BS.tail s)
|
||||||
|
| otherwise = Just s
|
||||||
|
|
||||||
getPosn :: P Posn
|
getPosn :: P Posn
|
||||||
getPosn = P $ \ai2@(_,inp@(AI pos _ _)) -> POk ai2 pos
|
getPosn = P $ \ai2@(_,inp@(AI pos _ _)) -> POk ai2 pos
|
||||||
|
|||||||
@@ -260,9 +260,6 @@ tuple2recordType ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts]
|
|||||||
tuple2recordPatt :: [Patt] -> [(Label,Patt)]
|
tuple2recordPatt :: [Patt] -> [(Label,Patt)]
|
||||||
tuple2recordPatt ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts]
|
tuple2recordPatt ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts]
|
||||||
|
|
||||||
attrs2record :: [(Ident, Term)] -> [Assign]
|
|
||||||
attrs2record ts = [assign (ident2label i) ((App (Q (cPredef, (identS "linearize"))) t)) | (i,t) <- ts]
|
|
||||||
|
|
||||||
mkCases :: Ident -> Term -> Term
|
mkCases :: Ident -> Term -> Term
|
||||||
mkCases x t = T TRaw [(PV x, t)]
|
mkCases x t = T TRaw [(PV x, t)]
|
||||||
|
|
||||||
|
|||||||
@@ -59,8 +59,7 @@ import qualified Data.Map as Map
|
|||||||
':' { T_colon }
|
':' { T_colon }
|
||||||
';' { T_semicolon }
|
';' { T_semicolon }
|
||||||
'<' { T_less }
|
'<' { T_less }
|
||||||
'</' { T_less_close }
|
'<tag' { T_less_tag }
|
||||||
'<tag' { (T_less_tag $$) }
|
|
||||||
'=' { T_equal }
|
'=' { T_equal }
|
||||||
'=>' { T_big_rarrow}
|
'=>' { T_big_rarrow}
|
||||||
'>' { T_great }
|
'>' { T_great }
|
||||||
@@ -433,7 +432,9 @@ Exp3
|
|||||||
|
|
||||||
Exp4 :: { Term }
|
Exp4 :: { Term }
|
||||||
Exp4
|
Exp4
|
||||||
: Exp4 Exp5 { App $1 $2 }
|
: Exp4 Exp5 {% case $2 of
|
||||||
|
CloseTag id -> mkTag id $1 Empty
|
||||||
|
_ -> return (App $1 $2) }
|
||||||
| Exp4 '{' Exp '}' { App $1 (ImplArg $3) }
|
| Exp4 '{' Exp '}' { App $1 (ImplArg $3) }
|
||||||
| 'case' Exp 'of' '{' ListCase '}' { let annot = case $2 of
|
| 'case' Exp 'of' '{' ListCase '}' { let annot = case $2 of
|
||||||
Typed _ t -> TTyped t
|
Typed _ t -> TTyped t
|
||||||
@@ -470,17 +471,18 @@ Exp6
|
|||||||
| '<' ListTupleComp '>' { R (tuple2record $2) }
|
| '<' ListTupleComp '>' { R (tuple2record $2) }
|
||||||
| '<' Exp ':' Exp '>' { Typed $2 $4 }
|
| '<' Exp ':' Exp '>' { Typed $2 $4 }
|
||||||
| '(' Exp ')' { $2 }
|
| '(' Exp ')' { $2 }
|
||||||
| '<tag' Attributes '>' Exps '</' Ident '>' {% mkTag $1 $6 $2 $4 }
|
| '<tag' Ident Attributes '>' { OpenTag $2 $3 }
|
||||||
| '<tag' Attributes '/' '>' {% mkTag $1 $1 $2 [Empty] }
|
| '<tag' '/' Ident '>' { CloseTag $3 }
|
||||||
|
| '<tag' Ident Attributes '/' '>' { markup $2 $3 Empty }
|
||||||
|
|
||||||
Attributes :: { [(Ident,Term)] }
|
Attributes :: { [Assign] }
|
||||||
Attributes
|
Attributes
|
||||||
: { [] }
|
: { [] }
|
||||||
| Attribute Attributes { $1:$2 }
|
| Attribute Attributes { $1:$2 }
|
||||||
|
|
||||||
Attribute :: { (Ident,Term) }
|
Attribute :: { Assign }
|
||||||
Attribute
|
Attribute
|
||||||
: Ident '=' Exp6 { ($1,$3) }
|
: Ident '=' Exp6 { assign (ident2label $1) $3 }
|
||||||
|
|
||||||
ListExp :: { [Term] }
|
ListExp :: { [Term] }
|
||||||
ListExp
|
ListExp
|
||||||
@@ -827,23 +829,34 @@ mkL :: Posn -> Posn -> x -> L x
|
|||||||
mkL (Pn l1 _) (Pn l2 _) x = L (Local l1 l2) x
|
mkL (Pn l1 _) (Pn l2 _) x = L (Local l1 l2) x
|
||||||
|
|
||||||
|
|
||||||
mkTag ident ident' attrs t
|
mkTag ident (App t1 t2) conc =
|
||||||
| showIdent ident == showIdent ident' = return (App
|
case match ident t2 of
|
||||||
(App
|
Just attrs -> fmap (App t1) (mkTag ident t2 conc)
|
||||||
(App
|
Nothing -> let t = App (Q (cPredef, (identS "linearize"))) t2
|
||||||
(Q (cPredef, (identS "markup")))
|
in mkTag ident t1 $ case conc of
|
||||||
(R (attrs2record attrs))
|
Empty -> t
|
||||||
)
|
_ -> C t conc
|
||||||
(K (showIdent ident))
|
mkTag ident t conc =
|
||||||
)
|
case match ident t of
|
||||||
(mkConcatenation t)
|
Just attrs -> return (markup ident attrs conc)
|
||||||
)
|
Nothing -> fail ("Unmatched closing tag " ++ showIdent ident)
|
||||||
|
|
||||||
| otherwise = fail "Tags don't match"
|
match ident (OpenTag ident' attrs)
|
||||||
|
| ident == ident' = Just attrs
|
||||||
mkConcatenation [] = Empty
|
match ident (R [(lbl,(Nothing,Vr ident'))])
|
||||||
mkConcatenation [t] = (App (Q (cPredef, (identS "linearize"))) t)
|
| lbl == ident2label (identS "p1") && ident == ident'
|
||||||
mkConcatenation (t:ts) = C t (mkConcatenation ts)
|
= Just []
|
||||||
|
match ident _ = Nothing
|
||||||
|
|
||||||
|
markup ident attrs content =
|
||||||
|
App
|
||||||
|
(App
|
||||||
|
(App
|
||||||
|
(Q (cPredef, (identS "markup")))
|
||||||
|
(R attrs)
|
||||||
|
)
|
||||||
|
(K (showIdent ident))
|
||||||
|
)
|
||||||
|
content
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user