drop the dependency to FST

2026-05-25 18:58:56 -06:00 · 2015-04-20 11:56:13 +00:00
parent 5d7633fdb4
commit 8c697b72a4
3 changed files with 0 additions and 150 deletions
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -1120,13 +1120,6 @@ allCommands = Map.fromList [
       ],
     flags = [("file","the output filename")]
     }),
-  ("t", emptyCommandInfo {
-     longname = "tokenize",
-     synopsis = "Tokenize string using the vocabulary",
-     exec = execToktok,
-     options = [],
-     flags = [("lang","The name of the concrete to use")]
-     }),
  ("ai", emptyCommandInfo {
     longname = "abstract_info",
     syntax = "ai IDENTIFIER  or  ai EXPR",
@@ -1426,32 +1419,6 @@ prMorphoAnalysis (w,lps) =
  unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])


-- This function is to be excuted when the command 'tok' is parsed
-execToktok :: Monad m => PGFEnv -> [Option] -> [Expr] -> m CommandOutput
-execToktok (pgf, _) opts exprs = do
-  let tokenizers = Map.fromList [ (l, mkTokenizer pgf l) | l <- languages pgf]
-  case getLang opts of
-    Nothing -> do
-      let output = concatMap toStringList [t input | (_,t) <- Map.toList tokenizers]
-      return (fromStrings output)
-    Just lang -> case Map.lookup lang tokenizers of
-      Just tok -> do
-        let output = toStringList $ tok input
-        return (fromStrings output)
-      Nothing -> return (pipeMessage ("Unknown language: " ++ show lang))
-  where input = case exprs of
-          [ELit (LStr s)] -> s
-          _ -> ""
-        toStringList :: Maybe [String] -> [String]
-        toStringList Nothing = []
-        toStringList (Just l) = l
-        getLang :: [Option] -> Maybe Language
-        getLang [] = Nothing
-        getLang (OFlag "lang" (VId l):_) = readLanguage l
-        getLang (_:os) = getLang os
-
-
-
 trie = render . pptss . toTrie . map toATree
  where
    pptss [ts] = "*"<+>nest 2 (ppts ts)
--- a/src/runtime/haskell/PGF.hs
+++ b/src/runtime/haskell/PGF.hs
@@ -119,9 +119,6 @@ module PGF(
           -- extra:
           morphoKnown, isInMorpho,

-           -- ** Tokenizing
-           mkTokenizer,
-
           -- ** Visualizations
           graphvizAbstractTree,
           graphvizParseTree,
@@ -165,7 +162,6 @@ import PGF.Expr (Tree)
 import PGF.Morphology
 import PGF.Data
 import PGF.Binary()
-import PGF.Tokenizer
 import qualified PGF.Forest as Forest
 import qualified PGF.Parse as Parse
 import PGF.Utilities(replace)
--- a/src/runtime/haskell/PGF/Tokenizer.hs
+++ b/src/runtime/haskell/PGF/Tokenizer.hs
@@ -1,113 +0,0 @@
-{-
-GF Tokenizer.
-
-In this module are implemented function that build a fst-based tokenizer
-from a Concrete grammar.
-
-}
-{-# LANGUAGE CPP #-}
-module PGF.Tokenizer 
-       ( mkTokenizer
-       ) where
-
-#if MIN_VERSION_base(4,8,0)
-import Prelude hiding ((<*>))
-#endif
--import Data.List (intercalate)
--import Test.QuickCheck
-import FST.TransducerInterface
-import PGF.Morphology (fullFormLexicon, buildMorpho)
-import PGF.Data (PGF, Language)
-
-
-
-data LexSymbol = Tok String
-    deriving (Show, Read)
-
-type Lexicon = [LexSymbol]
-
-- | This is the construction function. Given a PGF and a Language, it 
-- extract the lexicon for this language and build a tokenization fst
-- from it.
-mkTokenizer :: PGF -> Language -> (String -> Maybe [String])
-mkTokenizer pgf lang = mkTrans lexicon
-  where lexicon = map (Tok . fst) lexicon'
-        lexicon' = fullFormLexicon $ buildMorpho pgf lang
-
-mkTrans :: Lexicon -> (String -> Maybe [String])
-mkTrans = applyDown . lexiconTrans
-
-lexiconTrans :: Lexicon -> Transducer Char
-lexiconTrans lexicon = compile (words |> star ((spaces <|> glue) |> words)) "abcdefghijklmnopqrstuvwxyz "
-  where words = foldr (<|>) (empty) $ map tokToRR lexicon
-        glue = eps <*> stringReg " &+ "
-
-stringReg :: String -> Reg Char
-stringReg str = foldr (\x y -> s x |> y) eps str 
-
-tokToRR:: LexSymbol -> RReg Char
-tokToRR (Tok str) = foldr ((|>) . idR . s) (idR eps) str
-
-spaces :: RReg Char
-spaces = idR $ s ' '
-
-
-
-- TESTING
-
-- verry small test lexicon
-- testLexicon :: Lexicon
-- testLexicon
--   = [ Tok "car"
--     , Tok "elf"
--     ]
-
-- myTrans :: String -> Maybe [String]
-- myTrans = mkTrans testLexicon
-
-- data TestCase = TestCase String String
--     deriving (Show, Read)
-
-- instance Arbitrary TestCase where
--     arbitrary     = arbitraryTestCase
--     --coarbitrary c = variant (ord c `rem` 4)
-
-- arbitraryTestCase:: Gen TestCase
-- arbitraryTestCase = do
--   words <- listOf1 $ elements [t | Tok t <- testLexicon]
--   tokens <- intercalateSometime "+&+" words
--   return $ TestCase (linearize tokens) (intercalate " " tokens)
--   where intercalateSometime :: a -> [a] -> Gen [a]
--         intercalateSometime x (x1:x2:xs) = do 
--           b <- arbitrary
--           let pre = case b of 
--                 True -> x1:x:[]
--                 False -> x1:[]
--           suf <- intercalateSometime x (x2:xs)
--           return (pre++suf)
--         intercalateSometime _ xs = return xs
-
-- linearize :: [String] -> String
-- linearize = linearize' False
--   where linearize' _ [] = ""  -- boolean indicate if the last token was a real word and not +&+
--         linearize' _  ("+&+":ss) = linearize' False ss
--         linearize' True (s:ss) = ' ':s ++ linearize' True ss
--         linearize' False (s:ss) = s ++ linearize' True ss
-
-- testTrans :: (String -> Maybe [String]) -> TestCase -> Bool
-- testTrans t (TestCase s1 s2) =
--   case t s1 of
--     Nothing -> False
--     Just l -> elem s2 l 
-
-- main :: IO ()
-- main = do
--   putStrLn "\n=== Transducer ==="
--   print $ lexiconTrans lexicon
--   putStrLn "\n=== example output ==="
--   putStrLn $ "Input:  " ++ show "car elfcar elf"
--   putStrLn $ "Output: " ++ (show $ mkTrans lexicon "car elfcar elf")
--   putStrLn "\n=== QuickCheck tests ==="
--   quickCheck (testTrans myTrans)
--   putStrLn "\n=== Examples of test cases ==="
--   sample (arbitrary :: Gen TestCase)