forked from GitHub/gf-core
Use tags in SLF generation to get out a parsable string.
This commit is contained in:
@@ -38,7 +38,7 @@ import Data.Maybe (fromMaybe)
|
|||||||
|
|
||||||
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
|
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
|
||||||
|
|
||||||
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord }
|
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
|
||||||
|
|
||||||
-- | An SLF word is a word, or the empty string.
|
-- | An SLF word is a word, or the empty string.
|
||||||
type SLFWord = Maybe String
|
type SLFWord = Maybe String
|
||||||
@@ -60,12 +60,21 @@ mkSLFFA :: Ident -- ^ Grammar name
|
|||||||
mkSLFFA name opts cfg = oneFinalState Nothing () $ moveLabelsToNodes $ dfa2nfa $ cfgToFA name opts cfg
|
mkSLFFA name opts cfg = oneFinalState Nothing () $ moveLabelsToNodes $ dfa2nfa $ cfgToFA name opts cfg
|
||||||
|
|
||||||
automatonToSLF :: FA State (Maybe String) () -> SLF
|
automatonToSLF :: FA State (Maybe String) () -> SLF
|
||||||
automatonToSLF fa =
|
automatonToSLF fa = SLF { slfNodes = map mkSLFNode (states fa),
|
||||||
SLF { slfNodes = map mkSLFNode (states fa),
|
slfEdges = zipWith mkSLFEdge [0..] (transitions fa) }
|
||||||
slfEdges = zipWith mkSLFEdge [0..] (transitions fa) }
|
|
||||||
where mkSLFNode (i,w) = SLFNode { nId = i, nWord = w }
|
|
||||||
mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t }
|
|
||||||
|
|
||||||
|
mkSLFNode :: (Int, Maybe String) -> SLFNode
|
||||||
|
mkSLFNode (i, Nothing) = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
|
||||||
|
mkSLFNode (i, Just w)
|
||||||
|
| isNonWord w = SLFNode { nId = i,
|
||||||
|
nWord = Nothing,
|
||||||
|
nTag = Just w }
|
||||||
|
| otherwise = SLFNode { nId = i,
|
||||||
|
nWord = Just (map toUpper w),
|
||||||
|
nTag = Just w }
|
||||||
|
|
||||||
|
mkSLFEdge :: Int -> (Int,Int,()) -> SLFEdge
|
||||||
|
mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t }
|
||||||
|
|
||||||
prSLF :: SLF -> ShowS
|
prSLF :: SLF -> ShowS
|
||||||
prSLF (SLF { slfNodes = ns, slfEdges = es})
|
prSLF (SLF { slfNodes = ns, slfEdges = es})
|
||||||
@@ -73,14 +82,21 @@ prSLF (SLF { slfNodes = ns, slfEdges = es})
|
|||||||
where
|
where
|
||||||
header = showString "VERSION=1.0" . nl
|
header = showString "VERSION=1.0" . nl
|
||||||
. prFields [("N",show (length ns)),("L", show (length es))] . nl
|
. prFields [("N",show (length ns)),("L", show (length es))] . nl
|
||||||
prNode n = prFields [("I",show (nId n)),("W",showWord (nWord n))]
|
prNode n = prFields $ [("I",show (nId n)),("W",showWord (nWord n))]
|
||||||
|
++ maybe [] (\t -> [("s",t)]) (nTag n)
|
||||||
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
|
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
|
||||||
|
|
||||||
|
-- | Check if a word should not correspond to a word in the SLF file.
|
||||||
|
isNonWord :: String -> Bool
|
||||||
|
isNonWord = any isPunct
|
||||||
|
|
||||||
|
isPunct :: Char -> Bool
|
||||||
|
isPunct c = c `elem` "-_.;.,?!()[]{}"
|
||||||
|
|
||||||
showWord :: SLFWord -> String
|
showWord :: SLFWord -> String
|
||||||
showWord Nothing = "!NULL"
|
showWord Nothing = "!NULL"
|
||||||
showWord (Just w) | null w = "!NULL"
|
showWord (Just w) | null w = "!NULL"
|
||||||
| otherwise = map toUpper w
|
| otherwise = w
|
||||||
|
|
||||||
prFields :: [(String,String)] -> ShowS
|
prFields :: [(String,String)] -> ShowS
|
||||||
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]
|
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user