1
0
forked from GitHub/gf-core

Added CFGM format (pm -printer=cfgm) and utf8 conversion for pm.

This commit is contained in:
bringert
2004-08-23 07:51:36 +00:00
parent 25ffe15333
commit 65f012d155
22 changed files with 1829 additions and 20 deletions

129
src/GF/CFGM/LexCFG.x Normal file
View File

@@ -0,0 +1,129 @@
-- -*- haskell -*-
-- This Alex file was machine-generated by the BNF converter
{
module LexCFG where
import ErrM
}
$l = [a-zA-Z\192 - \255] # [\215 \247] -- isolatin1 letter FIXME
$c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter FIXME
$s = [a-z\222-\255] # [\247] -- small isolatin1 letter FIXME
$d = [0-9] -- digit
$i = [$l $d _ '] -- identifier character
$u = [\0-\255] -- universal: any character
@rsyms = -- reserved words consisting of special symbols
\; | \: | \. | \- \> | \[ | \] | \, | \/ | \{ | \} | \!
:-
$white+ ;
@rsyms { tok (\p s -> PT p (TS s)) }
$l $i* { tok (\p s -> PT p (eitherResIdent TV s)) }
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \"{ tok (\p s -> PT p (TL $ unescapeInitTail s)) }
$d+ { tok (\p s -> PT p (TI s)) }
{
tok f p s = f p s
data Tok =
TS String -- reserved words
| TL String -- string literals
| TI String -- integer literals
| TV String -- identifiers
| TD String -- double precision float literals
| TC String -- character literals
deriving (Eq,Show)
data Token =
PT Posn Tok
| Err Posn
deriving Show
tokenPos (PT (Pn _ l _) _ :_) = "line " ++ show l
tokenPos (Err (Pn _ l _) :_) = "line " ++ show l
tokenPos _ = "end of file"
posLineCol (Pn _ l c) = (l,c)
mkPosToken t@(PT p _) = (posLineCol p, prToken t)
prToken t = case t of
PT _ (TS s) -> s
PT _ (TI s) -> s
PT _ (TV s) -> s
PT _ (TD s) -> s
PT _ (TC s) -> s
eitherResIdent :: (String -> Tok) -> String -> Tok
eitherResIdent tv s = if isResWord s then (TS s) else (tv s) where
isResWord s = isInTree s $
B "grammar" (B "end" N N) (B "startcat" N N)
data BTree = N | B String BTree BTree deriving (Show)
isInTree :: String -> BTree -> Bool
isInTree x tree = case tree of
N -> False
B a left right
| x < a -> isInTree x left
| x > a -> isInTree x right
| x == a -> True
unescapeInitTail :: String -> String
unescapeInitTail = unesc . tail where
unesc s = case s of
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
'\\':'n':cs -> '\n' : unesc cs
'\\':'t':cs -> '\t' : unesc cs
'"':[] -> []
c:cs -> c : unesc cs
_ -> []
-------------------------------------------------------------------
-- Alex wrapper code.
-- A modified "posn" wrapper.
-------------------------------------------------------------------
data Posn = Pn !Int !Int !Int
deriving (Eq, Show)
alexStartPos :: Posn
alexStartPos = Pn 0 1 1
alexMove :: Posn -> Char -> Posn
alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1)
alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1
alexMove (Pn a l c) _ = Pn (a+1) l (c+1)
type AlexInput = (Posn, -- current position,
Char, -- previous char
String) -- current input string
tokens :: String -> [Token]
tokens str = go (alexStartPos, '\n', str)
where
go :: (Posn, Char, String) -> [Token]
go inp@(pos, _, str) =
case alexScan inp 0 of
AlexEOF -> []
AlexError (pos, _, _) -> fail $ show pos ++ ": lexical error"
AlexSkip inp' len -> go inp'
AlexToken inp' len act -> act pos (take len str) : (go inp')
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
alexGetChar (p, c, []) = Nothing
alexGetChar (p, _, (c:s)) =
let p' = alexMove p c
in p' `seq` Just (c, (p', c, s))
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (p, c, s) = c
}