From 8c697b72a486ec3fd80734e999ac465cd251372c Mon Sep 17 00:00:00 2001 From: krasimir Date: Mon, 20 Apr 2015 11:56:13 +0000 Subject: [PATCH] drop the dependency to FST --- src/compiler/GF/Command/Commands.hs | 33 -------- src/runtime/haskell/PGF.hs | 4 - src/runtime/haskell/PGF/Tokenizer.hs | 113 --------------------------- 3 files changed, 150 deletions(-) delete mode 100644 src/runtime/haskell/PGF/Tokenizer.hs diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 348a942d8..1255b3517 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -1120,13 +1120,6 @@ allCommands = Map.fromList [ ], flags = [("file","the output filename")] }), - ("t", emptyCommandInfo { - longname = "tokenize", - synopsis = "Tokenize string using the vocabulary", - exec = execToktok, - options = [], - flags = [("lang","The name of the concrete to use")] - }), ("ai", emptyCommandInfo { longname = "abstract_info", syntax = "ai IDENTIFIER or ai EXPR", @@ -1426,32 +1419,6 @@ prMorphoAnalysis (w,lps) = unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps]) --- This function is to be excuted when the command 'tok' is parsed -execToktok :: Monad m => PGFEnv -> [Option] -> [Expr] -> m CommandOutput -execToktok (pgf, _) opts exprs = do - let tokenizers = Map.fromList [ (l, mkTokenizer pgf l) | l <- languages pgf] - case getLang opts of - Nothing -> do - let output = concatMap toStringList [t input | (_,t) <- Map.toList tokenizers] - return (fromStrings output) - Just lang -> case Map.lookup lang tokenizers of - Just tok -> do - let output = toStringList $ tok input - return (fromStrings output) - Nothing -> return (pipeMessage ("Unknown language: " ++ show lang)) - where input = case exprs of - [ELit (LStr s)] -> s - _ -> "" - toStringList :: Maybe [String] -> [String] - toStringList Nothing = [] - toStringList (Just l) = l - getLang :: [Option] -> Maybe Language - getLang [] = Nothing - getLang (OFlag "lang" (VId l):_) = readLanguage l - getLang (_:os) = getLang os - - - trie = render . pptss . toTrie . map toATree where pptss [ts] = "*"<+>nest 2 (ppts ts) diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index d2e70166c..07c14324f 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -119,9 +119,6 @@ module PGF( -- extra: morphoKnown, isInMorpho, - -- ** Tokenizing - mkTokenizer, - -- ** Visualizations graphvizAbstractTree, graphvizParseTree, @@ -165,7 +162,6 @@ import PGF.Expr (Tree) import PGF.Morphology import PGF.Data import PGF.Binary() -import PGF.Tokenizer import qualified PGF.Forest as Forest import qualified PGF.Parse as Parse import PGF.Utilities(replace) diff --git a/src/runtime/haskell/PGF/Tokenizer.hs b/src/runtime/haskell/PGF/Tokenizer.hs deleted file mode 100644 index 6b8800c57..000000000 --- a/src/runtime/haskell/PGF/Tokenizer.hs +++ /dev/null @@ -1,113 +0,0 @@ -{- -GF Tokenizer. - -In this module are implemented function that build a fst-based tokenizer -from a Concrete grammar. - --} -{-# LANGUAGE CPP #-} -module PGF.Tokenizer - ( mkTokenizer - ) where - -#if MIN_VERSION_base(4,8,0) -import Prelude hiding ((<*>)) -#endif ---import Data.List (intercalate) ---import Test.QuickCheck -import FST.TransducerInterface -import PGF.Morphology (fullFormLexicon, buildMorpho) -import PGF.Data (PGF, Language) - - - -data LexSymbol = Tok String - deriving (Show, Read) - -type Lexicon = [LexSymbol] - --- | This is the construction function. Given a PGF and a Language, it --- extract the lexicon for this language and build a tokenization fst --- from it. -mkTokenizer :: PGF -> Language -> (String -> Maybe [String]) -mkTokenizer pgf lang = mkTrans lexicon - where lexicon = map (Tok . fst) lexicon' - lexicon' = fullFormLexicon $ buildMorpho pgf lang - -mkTrans :: Lexicon -> (String -> Maybe [String]) -mkTrans = applyDown . lexiconTrans - -lexiconTrans :: Lexicon -> Transducer Char -lexiconTrans lexicon = compile (words |> star ((spaces <|> glue) |> words)) "abcdefghijklmnopqrstuvwxyz " - where words = foldr (<|>) (empty) $ map tokToRR lexicon - glue = eps <*> stringReg " &+ " - -stringReg :: String -> Reg Char -stringReg str = foldr (\x y -> s x |> y) eps str - -tokToRR:: LexSymbol -> RReg Char -tokToRR (Tok str) = foldr ((|>) . idR . s) (idR eps) str - -spaces :: RReg Char -spaces = idR $ s ' ' - - - --- TESTING - --- verry small test lexicon --- testLexicon :: Lexicon --- testLexicon --- = [ Tok "car" --- , Tok "elf" --- ] - --- myTrans :: String -> Maybe [String] --- myTrans = mkTrans testLexicon - --- data TestCase = TestCase String String --- deriving (Show, Read) - --- instance Arbitrary TestCase where --- arbitrary = arbitraryTestCase --- --coarbitrary c = variant (ord c `rem` 4) - --- arbitraryTestCase:: Gen TestCase --- arbitraryTestCase = do --- words <- listOf1 $ elements [t | Tok t <- testLexicon] --- tokens <- intercalateSometime "+&+" words --- return $ TestCase (linearize tokens) (intercalate " " tokens) --- where intercalateSometime :: a -> [a] -> Gen [a] --- intercalateSometime x (x1:x2:xs) = do --- b <- arbitrary --- let pre = case b of --- True -> x1:x:[] --- False -> x1:[] --- suf <- intercalateSometime x (x2:xs) --- return (pre++suf) --- intercalateSometime _ xs = return xs - --- linearize :: [String] -> String --- linearize = linearize' False --- where linearize' _ [] = "" -- boolean indicate if the last token was a real word and not +&+ --- linearize' _ ("+&+":ss) = linearize' False ss --- linearize' True (s:ss) = ' ':s ++ linearize' True ss --- linearize' False (s:ss) = s ++ linearize' True ss - --- testTrans :: (String -> Maybe [String]) -> TestCase -> Bool --- testTrans t (TestCase s1 s2) = --- case t s1 of --- Nothing -> False --- Just l -> elem s2 l - --- main :: IO () --- main = do --- putStrLn "\n=== Transducer ===" --- print $ lexiconTrans lexicon --- putStrLn "\n=== example output ===" --- putStrLn $ "Input: " ++ show "car elfcar elf" --- putStrLn $ "Output: " ++ (show $ mkTrans lexicon "car elfcar elf") --- putStrLn "\n=== QuickCheck tests ===" --- quickCheck (testTrans myTrans) --- putStrLn "\n=== Examples of test cases ===" --- sample (arbitrary :: Gen TestCase)