mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-07 18:22:50 -06:00
more on the Susanne treebank
This commit is contained in:
@@ -1,14 +1,54 @@
|
||||
import System.Directory
|
||||
import System.FilePath
|
||||
import Data.List
|
||||
import Data.Char(toLower)
|
||||
|
||||
import PGF (readPGF, readLanguage, buildMorpho, lookupMorpho, mkCId, functionType, unType)
|
||||
import SusanneFormat
|
||||
|
||||
Just eng = readLanguage "ParseEng"
|
||||
|
||||
main = do
|
||||
gr <- readPGF "../../ParseEngAbs.pgf"
|
||||
let morpho = buildMorpho gr eng
|
||||
fs <- getDirectoryContents "data"
|
||||
txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
|
||||
let ts = filter (not . isBreak) (readTreebank (lines (concat txts)))
|
||||
--let ts = concatMap (convert gr morpho) (readTreebank (lines (concat txts)))
|
||||
let ts = readTreebank (lines (concat txts))
|
||||
writeFile "text" (unlines (map show ts))
|
||||
|
||||
isBreak (Phrase "Oh" [Word _ "YB" "<minbrk>" _]) = True
|
||||
isBreak _ = False
|
||||
convert pgf morpho w@(Word _ tag _ lemma)
|
||||
| elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = []
|
||||
| tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
||||
| tag == "NN1n" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
|
||||
| tag == "NN2" = convertLemma pgf morpho (mkCId "N") "s Pl Nom" w
|
||||
| tag == "JJ" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
||||
| tag == "JB" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
||||
| tag == "JBo" = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
|
||||
| tag == "AT" = convertLemma pgf morpho (mkCId "Quant") "s False Sg" w
|
||||
| tag == "VVDi" = convertLemma pgf morpho (mkCId "V") "s VPast" w
|
||||
| tag == "VVDt" = convertLemma pgf morpho (mkCId "V2") "s VPast" w
|
||||
| tag == "VVDv" = convertLemma pgf morpho (mkCId "V") "s VPast" w
|
||||
| tag == "VVZi" = convertLemma pgf morpho (mkCId "V") "s VPres" w
|
||||
| tag == "VVZt" = convertLemma pgf morpho (mkCId "V2") "s VPres" w
|
||||
| tag == "VVZv" = convertLemma pgf morpho (mkCId "V") "s VPres" w
|
||||
| tag == "PPHS2"= convertLemma pgf morpho (mkCId "Pron") "s (NCase Nom)" w
|
||||
| tag == "PPHO2"= convertLemma pgf morpho (mkCId "Pron") "s NPAcc" w
|
||||
| tag == "RR" = convertLemma pgf morpho (mkCId "Adv") "s" w
|
||||
| tag == "II" = convertLemma pgf morpho (mkCId "Prep") "s" w
|
||||
| tag == "IO" = convertLemma pgf morpho (mkCId "Prep") "s" w
|
||||
| otherwise = [w]
|
||||
convert pgf morpho (Phrase tag mods fn idx ts)
|
||||
| tag == "O" = concatMap (convert pgf morpho) ts
|
||||
| otherwise = [Phrase tag mods fn idx (concatMap (convert pgf morpho) ts)]
|
||||
|
||||
convertLemma pgf morpho cat an0 w@(Word _ tag form _) =
|
||||
case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
|
||||
[f] -> [App f []]
|
||||
_ -> [w]
|
||||
where
|
||||
hasCat pgf f cat =
|
||||
case functionType pgf f of
|
||||
Just ty -> case unType ty of
|
||||
(_,cat1,_) -> cat1 == cat
|
||||
Nothing -> False
|
||||
|
||||
Reference in New Issue
Block a user