Files
gf-core/contrib/eaglesconv/CollectLemmas.hs
Nick Frolov 2ff1d34c86 A Russian dictionary
A Russian dictionary generated from a wordlist created by the FreeLing
project. The accompanying converter can be used to convert other wordlists in
EAGLES format to GF grammars.
2011-12-31 02:36:24 +00:00

29 lines
791 B
Haskell

-- Copyright (C) 2011 Nikita Frolov
import qualified Data.Text as T
import qualified Data.Text.IO as UTF8
import System.IO
import System.Environment
import Control.Monad
import Control.Monad.State
main :: IO ()
main = do
args <- getArgs
forM_ args $ \ f -> do
entries <- UTF8.readFile f >>= (return . T.lines)
forM_ entries $ \ entry ->
do
let ws = T.words entry
form = head ws
tags = toPairs $ tail ws
forM_ tags $ \ (lemma, tag) ->
do
UTF8.putStrLn $ T.concat [lemma, sp, form, sp, tag]
where sp = T.singleton ' '
toPairs xs = zip (stride 2 xs) (stride 2 (drop 1 xs))
where stride _ [] = []
stride n (x:xs) = x : stride n (drop (n-1) xs)