drop the dependency to FST

2026-05-22 17:42:51 -06:00 · 2015-04-20 11:56:13 +00:00
parent 79b710a6b7
commit 61e37c9e5e
3 changed files with 0 additions and 150 deletions
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -1120,13 +1120,6 @@ allCommands = Map.fromList [
       ],
     flags = [("file","the output filename")]
     }),
  ("t", emptyCommandInfo {
     longname = "tokenize",
     synopsis = "Tokenize string using the vocabulary",
     exec = execToktok,
     options = [],
     flags = [("lang","The name of the concrete to use")]
     }),
  ("ai", emptyCommandInfo {
     longname = "abstract_info",
     syntax = "ai IDENTIFIER  or  ai EXPR",
@@ -1426,32 +1419,6 @@ prMorphoAnalysis (w,lps) =
  unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
 -- This function is to be excuted when the command 'tok' is parsed
 execToktok :: Monad m => PGFEnv -> [Option] -> [Expr] -> m CommandOutput
 execToktok (pgf, _) opts exprs = do
  let tokenizers = Map.fromList [ (l, mkTokenizer pgf l) | l <- languages pgf]
  case getLang opts of
    Nothing -> do
      let output = concatMap toStringList [t input | (_,t) <- Map.toList tokenizers]
      return (fromStrings output)
    Just lang -> case Map.lookup lang tokenizers of
      Just tok -> do
        let output = toStringList $ tok input
        return (fromStrings output)
      Nothing -> return (pipeMessage ("Unknown language: " ++ show lang))
  where input = case exprs of
          [ELit (LStr s)] -> s
          _ -> ""
        toStringList :: Maybe [String] -> [String]
        toStringList Nothing = []
        toStringList (Just l) = l
        getLang :: [Option] -> Maybe Language
        getLang [] = Nothing
        getLang (OFlag "lang" (VId l):_) = readLanguage l
        getLang (_:os) = getLang os
 trie = render . pptss . toTrie . map toATree
  where
    pptss [ts] = "*"<+>nest 2 (ppts ts)
--- a/src/runtime/haskell/PGF.hs
+++ b/src/runtime/haskell/PGF.hs
@@ -119,9 +119,6 @@ module PGF(
           -- extra:
           morphoKnown, isInMorpho,
           -- ** Tokenizing
           mkTokenizer,
           -- ** Visualizations
           graphvizAbstractTree,
           graphvizParseTree,
@@ -165,7 +162,6 @@ import PGF.Expr (Tree)
 import PGF.Morphology
 import PGF.Data
 import PGF.Binary()
 import PGF.Tokenizer
 import qualified PGF.Forest as Forest
 import qualified PGF.Parse as Parse
 import PGF.Utilities(replace)
--- a/src/runtime/haskell/PGF/Tokenizer.hs
+++ b/src/runtime/haskell/PGF/Tokenizer.hs
@@ -1,113 +0,0 @@
 {-
 GF Tokenizer.
 In this module are implemented function that build a fst-based tokenizer
 from a Concrete grammar.
 -}
 {-# LANGUAGE CPP #-}
 module PGF.Tokenizer 
       ( mkTokenizer
       ) where
 #if MIN_VERSION_base(4,8,0)
 import Prelude hiding ((<*>))
 #endif
 --import Data.List (intercalate)
 --import Test.QuickCheck
 import FST.TransducerInterface
 import PGF.Morphology (fullFormLexicon, buildMorpho)
 import PGF.Data (PGF, Language)
 data LexSymbol = Tok String
    deriving (Show, Read)
 type Lexicon = [LexSymbol]
 -- | This is the construction function. Given a PGF and a Language, it 
 -- extract the lexicon for this language and build a tokenization fst
 -- from it.
 mkTokenizer :: PGF -> Language -> (String -> Maybe [String])
 mkTokenizer pgf lang = mkTrans lexicon
  where lexicon = map (Tok . fst) lexicon'
        lexicon' = fullFormLexicon $ buildMorpho pgf lang
 mkTrans :: Lexicon -> (String -> Maybe [String])
 mkTrans = applyDown . lexiconTrans
 lexiconTrans :: Lexicon -> Transducer Char
 lexiconTrans lexicon = compile (words |> star ((spaces <|> glue) |> words)) "abcdefghijklmnopqrstuvwxyz "
  where words = foldr (<|>) (empty) $ map tokToRR lexicon
        glue = eps <*> stringReg " &+ "
 stringReg :: String -> Reg Char
 stringReg str = foldr (\x y -> s x |> y) eps str 
 tokToRR:: LexSymbol -> RReg Char
 tokToRR (Tok str) = foldr ((|>) . idR . s) (idR eps) str
 spaces :: RReg Char
 spaces = idR $ s ' '
 -- TESTING
 -- verry small test lexicon
 -- testLexicon :: Lexicon
 -- testLexicon
 --   = [ Tok "car"
 --     , Tok "elf"
 --     ]
 -- myTrans :: String -> Maybe [String]
 -- myTrans = mkTrans testLexicon
 -- data TestCase = TestCase String String
 --     deriving (Show, Read)
 -- instance Arbitrary TestCase where
 --     arbitrary     = arbitraryTestCase
 --     --coarbitrary c = variant (ord c `rem` 4)
 -- arbitraryTestCase:: Gen TestCase
 -- arbitraryTestCase = do
 --   words <- listOf1 $ elements [t | Tok t <- testLexicon]
 --   tokens <- intercalateSometime "+&+" words
 --   return $ TestCase (linearize tokens) (intercalate " " tokens)
 --   where intercalateSometime :: a -> [a] -> Gen [a]
 --         intercalateSometime x (x1:x2:xs) = do 
 --           b <- arbitrary
 --           let pre = case b of 
 --                 True -> x1:x:[]
 --                 False -> x1:[]
 --           suf <- intercalateSometime x (x2:xs)
 --           return (pre++suf)
 --         intercalateSometime _ xs = return xs
 -- linearize :: [String] -> String
 -- linearize = linearize' False
 --   where linearize' _ [] = ""  -- boolean indicate if the last token was a real word and not +&+
 --         linearize' _  ("+&+":ss) = linearize' False ss
 --         linearize' True (s:ss) = ' ':s ++ linearize' True ss
 --         linearize' False (s:ss) = s ++ linearize' True ss
 -- testTrans :: (String -> Maybe [String]) -> TestCase -> Bool
 -- testTrans t (TestCase s1 s2) =
 --   case t s1 of
 --     Nothing -> False
 --     Just l -> elem s2 l 
 -- main :: IO ()
 -- main = do
 --   putStrLn "\n=== Transducer ==="
 --   print $ lexiconTrans lexicon
 --   putStrLn "\n=== example output ==="
 --   putStrLn $ "Input:  " ++ show "car elfcar elf"
 --   putStrLn $ "Output: " ++ (show $ mkTrans lexicon "car elfcar elf")
 --   putStrLn "\n=== QuickCheck tests ==="
 --   quickCheck (testTrans myTrans)
 --   putStrLn "\n=== Examples of test cases ==="
 --   sample (arbitrary :: Gen TestCase)