diff --git a/src/compiler/api/GF/Grammar/Grammar.hs b/src/compiler/api/GF/Grammar/Grammar.hs index cd992810d..1353462b8 100644 --- a/src/compiler/api/GF/Grammar/Grammar.hs +++ b/src/compiler/api/GF/Grammar/Grammar.hs @@ -399,6 +399,8 @@ data Term = | Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@ | TSymCat Int LIndex [(LIndex,(Ident,Type))] | TSymVar Int Int + | OpenTag Ident [Assign] -- used internally in the parser + | CloseTag Ident -- used internally in the parser deriving (Show, Eq, Ord) -- | Patterns diff --git a/src/compiler/api/GF/Grammar/Lexer.x b/src/compiler/api/GF/Grammar/Lexer.x index b6b024d7b..98e3be332 100644 --- a/src/compiler/api/GF/Grammar/Lexer.x +++ b/src/compiler/api/GF/Grammar/Lexer.x @@ -8,7 +8,7 @@ module GF.Grammar.Lexer ) where import Control.Applicative -import Control.Monad(ap) +import Control.Monad(ap,mplus) import GF.Infra.Ident --import GF.Data.Operations import qualified Data.ByteString.Char8 as BS @@ -17,8 +17,8 @@ import qualified Data.ByteString.Internal as BS(w2c) import qualified Data.ByteString.UTF8 as UTF8 import qualified Data.Map as Map import Data.Word(Word8) -import Data.Char(readLitChar) ---import Debug.Trace(trace) +import Data.Char(readLitChar,isSpace) +import Data.Maybe(isJust) import qualified Control.Monad.Fail as Fail } @@ -31,7 +31,7 @@ $i = [$l $d _ '] -- identifier character $u = [.\n] -- universal: any character @rsyms = -- symbols and non-identifier-like reserved words - \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= + \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= :- "--" [.]* ; -- Toss single line comments @@ -39,6 +39,9 @@ $u = [.\n] -- universal: any character $white+ ; @rsyms { tok ident } +\< { \_ _ s -> if start_of_tag (BS.tail s) + then T_less_tag + else T_less } \' ([. # [\' \\ \n]] | (\\ (\' | \\)))+ \' { tok (T_Ident . identS . unescapeInitTail . unpack) } (\_ | $l)($l | $d | \_ | \')* { tok ident } @@ -47,13 +50,14 @@ $white+ ; (\-)? $d+ { tok (T_Integer . read . unpack) } (\-)? $d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . unpack) } + { unpack = UTF8.toString --unpack = id ident = res T_Ident . identC . rawIdentC -tok f p s = f s +tok f p len s = f (UTF8.take len s) data Token = T_exclmark @@ -74,6 +78,7 @@ data Token | T_colon | T_semicolon | T_less + | T_less_tag | T_equal | T_big_rarrow | T_great @@ -134,7 +139,7 @@ data Token | T_Double Double -- double precision float literals | T_Ident Ident | T_EOF --- deriving Show -- debug + deriving Show -- debug res = eitherResIdent eitherResIdent :: (Ident -> Token) -> Ident -> Token @@ -164,7 +169,6 @@ resWords = Map.fromList , b "/" T_alt , b ":" T_colon , b ";" T_semicolon - , b "<" T_less , b "=" T_equal , b "=>" T_big_rarrow , b ">" T_great @@ -316,13 +320,51 @@ lexer cont = cont=< POk (inp,inp) T_EOF AlexError (AI pos _ _) -> PFailed pos "lexical error" AlexSkip inp' len -> {-trace (show len) $-} go (inp,inp') - AlexToken inp' len act -> POk (inp,inp') (act pos ({-UTF8.toString-} (UTF8.take len str))) + AlexToken inp' len act -> POk (inp,inp') (act pos len str) + +start_of_tag s = isJust (match s) + where + match s = do + s <- matchSpace s + (char s '/' + `mplus` + do s <- matchIdent s + s <- matchSpace s + (char s '/' + `mplus` + do s <- matchIdent s + s <- matchSpace s + char s '=')) + + matchSpace s + | BS.null s = Just s + | isSpace (BS.head s) = matchSpace (BS.tail s) + | otherwise = Just s + + init = + BS.pack "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ" + rest = + BS.append init (BS.pack "0123456789'") + + char s c + | BS.null s = Nothing + | BS.head s == c = Just (BS.tail s) + | otherwise = Nothing + + matchIdent s + | BS.null s = Nothing + | BS.elem (BS.head s) init = matchRest (BS.tail s) + | otherwise = Nothing + + matchRest s + | BS.null s = Just s + | BS.elem (BS.head s) rest = matchRest (BS.tail s) + | otherwise = Just s getPosn :: P Posn getPosn = P $ \ai2@(_,inp@(AI pos _ _)) -> POk ai2 pos diff --git a/src/compiler/api/GF/Grammar/Parser.y b/src/compiler/api/GF/Grammar/Parser.y index 76e3a4c16..40ff012bd 100644 --- a/src/compiler/api/GF/Grammar/Parser.y +++ b/src/compiler/api/GF/Grammar/Parser.y @@ -59,6 +59,7 @@ import qualified Data.Map as Map ':' { T_colon } ';' { T_semicolon } '<' { T_less } + '' { T_big_rarrow} '>' { T_great } @@ -431,7 +432,9 @@ Exp3 Exp4 :: { Term } Exp4 - : Exp4 Exp5 { App $1 $2 } + : Exp4 Exp5 {% case $2 of + CloseTag id -> mkTag id $1 Empty + _ -> return (App $1 $2) } | Exp4 '{' Exp '}' { App $1 (ImplArg $3) } | 'case' Exp 'of' '{' ListCase '}' { let annot = case $2 of Typed _ t -> TTyped t @@ -468,6 +471,18 @@ Exp6 | '<' ListTupleComp '>' { R (tuple2record $2) } | '<' Exp ':' Exp '>' { Typed $2 $4 } | '(' Exp ')' { $2 } + | '' { OpenTag $2 $3 } + | '' { CloseTag $3 } + | '' { markup $2 $3 Empty } + +Attributes :: { [Assign] } +Attributes + : { [] } + | Attribute Attributes { $1:$2 } + +Attribute :: { Assign } +Attribute + : Ident '=' Exp6 { assign (ident2label $1) $3 } ListExp :: { [Term] } ListExp @@ -813,4 +828,35 @@ mkAlts cs = case cs of mkL :: Posn -> Posn -> x -> L x mkL (Pn l1 _) (Pn l2 _) x = L (Local l1 l2) x + +mkTag ident (App t1 t2) conc = + case match ident t2 of + Just attrs -> fmap (App t1) (mkTag ident t2 conc) + Nothing -> let t = App (Q (cPredef, (identS "linearize"))) t2 + in mkTag ident t1 $ case conc of + Empty -> t + _ -> C t conc +mkTag ident t conc = + case match ident t of + Just attrs -> return (markup ident attrs conc) + Nothing -> fail ("Unmatched closing tag " ++ showIdent ident) + +match ident (OpenTag ident' attrs) + | ident == ident' = Just attrs +match ident (R [(lbl,(Nothing,Vr ident'))]) + | lbl == ident2label (identS "p1") && ident == ident' + = Just [] +match ident _ = Nothing + +markup ident attrs content = + App + (App + (App + (Q (cPredef, (identS "markup"))) + (R attrs) + ) + (K (showIdent ident)) + ) + content + }