added script for reading the Susanne treebank

This commit is contained in:
kr.angelov
2013-12-04 12:11:41 +00:00
parent bf9bffdd69
commit 6fa7b1ed99
2 changed files with 57 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
import System.Directory
import System.FilePath
import Data.List
import SusanneFormat
main = do
fs <- getDirectoryContents "data"
txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
let ts = filter (not . isBreak) (readTreebank (lines (concat txts)))
writeFile "text" (unlines (map show ts))
isBreak (Phrase "Oh" [Word _ "YB" "<minbrk>" _]) = True
isBreak _ = False