From 1e6e84c5a6a075f463be5a8e92d4ef6a6c512700 Mon Sep 17 00:00:00 2001 From: Matheus Bernardo Date: Fri, 1 Mar 2024 16:48:00 +0100 Subject: [PATCH] Adding html tags to the parser and lexer --- src/compiler/api/GF/Grammar/Lexer.x | 10 ++++++-- src/compiler/api/GF/Grammar/Macros.hs | 3 +++ src/compiler/api/GF/Grammar/Parser.y | 33 +++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/compiler/api/GF/Grammar/Lexer.x b/src/compiler/api/GF/Grammar/Lexer.x index b6b024d7b..5303932de 100644 --- a/src/compiler/api/GF/Grammar/Lexer.x +++ b/src/compiler/api/GF/Grammar/Lexer.x @@ -31,7 +31,7 @@ $i = [$l $d _ '] -- identifier character $u = [.\n] -- universal: any character @rsyms = -- symbols and non-identifier-like reserved words - \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= + \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \<\/ | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= :- "--" [.]* ; -- Toss single line comments @@ -47,11 +47,14 @@ $white+ ; (\-)? $d+ { tok (T_Integer . read . unpack) } (\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) } +\< (\_ | $l)($l | $d | \_ | \')* { tok tag } + { unpack = UTF8.toString --unpack = id ident = res T_Ident . identC . rawIdentC +tag = res T_less_tag . identC . rawIdentC . BS.tail tok f p s = f s @@ -74,6 +77,7 @@ data Token | T_colon | T_semicolon | T_less + | T_less_close | T_equal | T_big_rarrow | T_great @@ -129,12 +133,13 @@ data Token | T_terminator | T_separator | T_nonempty + | T_less_tag Ident | T_String String -- string literals | T_Integer Integer -- integer literals | T_Double Double -- double precision float literals | T_Ident Ident | T_EOF --- deriving Show -- debug + deriving Show -- debug res = eitherResIdent eitherResIdent :: (Ident -> Token) -> Ident -> Token @@ -164,6 +169,7 @@ resWords = Map.fromList , b "/" T_alt , b ":" T_colon , b ";" T_semicolon + , b "" T_big_rarrow diff --git a/src/compiler/api/GF/Grammar/Macros.hs b/src/compiler/api/GF/Grammar/Macros.hs index 7d083fbe3..cb097395c 100644 --- a/src/compiler/api/GF/Grammar/Macros.hs +++ b/src/compiler/api/GF/Grammar/Macros.hs @@ -260,6 +260,9 @@ tuple2recordType ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts] tuple2recordPatt :: [Patt] -> [(Label,Patt)] tuple2recordPatt ts = [(tupleLabel i, t) | (i,t) <- zip [1..] ts] +attrs2record :: [(Ident, Term)] -> [Assign] +attrs2record ts = [assign (ident2label i) ((App (Q (cPredef, (identS "linearize"))) t)) | (i,t) <- ts] + mkCases :: Ident -> Term -> Term mkCases x t = T TRaw [(PV x, t)] diff --git a/src/compiler/api/GF/Grammar/Parser.y b/src/compiler/api/GF/Grammar/Parser.y index 76e3a4c16..cad954d55 100644 --- a/src/compiler/api/GF/Grammar/Parser.y +++ b/src/compiler/api/GF/Grammar/Parser.y @@ -59,6 +59,8 @@ import qualified Data.Map as Map ':' { T_colon } ';' { T_semicolon } '<' { T_less } + '' { T_big_rarrow} '>' { T_great } @@ -468,6 +470,17 @@ Exp6 | '<' ListTupleComp '>' { R (tuple2record $2) } | '<' Exp ':' Exp '>' { Typed $2 $4 } | '(' Exp ')' { $2 } + | '' Exps '' {% mkTag $1 $6 $2 $4 } + | '' {% mkTag $1 $1 $2 [Empty] } + +Attributes :: { [(Ident,Term)] } +Attributes + : { [] } + | Attribute Attributes { $1:$2 } + +Attribute :: { (Ident,Term) } +Attribute + : Ident '=' Exp6 { ($1,$3) } ListExp :: { [Term] } ListExp @@ -813,4 +826,24 @@ mkAlts cs = case cs of mkL :: Posn -> Posn -> x -> L x mkL (Pn l1 _) (Pn l2 _) x = L (Local l1 l2) x + +mkTag ident ident' attrs t + | showIdent ident == showIdent ident' = return (App + (App + (App + (Q (cPredef, (identS "markup"))) + (R (attrs2record attrs)) + ) + (K (showIdent ident)) + ) + (mkConcatenation t) + ) + + | otherwise = fail "Tags don't match" + +mkConcatenation [] = Empty +mkConcatenation [t] = (App (Q (cPredef, (identS "linearize"))) t) +mkConcatenation (t:ts) = C t (mkConcatenation ts) + + }