mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-07 18:22:50 -06:00
added script for reading the Susanne treebank
This commit is contained in:
14
treebanks/susanne/convert.hs
Normal file
14
treebanks/susanne/convert.hs
Normal file
@@ -0,0 +1,14 @@
|
||||
import System.Directory
|
||||
import System.FilePath
|
||||
import Data.List
|
||||
|
||||
import SusanneFormat
|
||||
|
||||
main = do
|
||||
fs <- getDirectoryContents "data"
|
||||
txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
|
||||
let ts = filter (not . isBreak) (readTreebank (lines (concat txts)))
|
||||
writeFile "text" (unlines (map show ts))
|
||||
|
||||
isBreak (Phrase "Oh" [Word _ "YB" "<minbrk>" _]) = True
|
||||
isBreak _ = False
|
||||
Reference in New Issue
Block a user