diff --git a/treebanks/PennTreebank/Dependencies.hs b/treebanks/PennTreebank/Dependencies.hs new file mode 100644 index 000000000..d785507b3 --- /dev/null +++ b/treebanks/PennTreebank/Dependencies.hs @@ -0,0 +1,37 @@ +module Dependencies where + +import PGF +import qualified Data.Map as Map +import Data.Maybe as Maybe + +type HeadTable = Map.Map CId [CId] + +readHeadTable :: FilePath -> IO HeadTable +readHeadTable fpath = do + ls <- fmap lines $ readFile fpath + return (Map.fromList [(head ws, tail ws) | l <- ls, let ws = map mkCId (words l), not (null ws)]) + +getDependencies :: HeadTable -> Expr -> (CId,[(CId,CId)]) +getDependencies tbl e = + case unApp e of + Just (f,es) + | null es -> (f,[]) + | f == mkCId "MkSymb" -> (f,[]) + | otherwise -> case Map.lookup f tbl of + Just cs -> let xs = zipWith (\c e -> (c,getDependencies tbl e)) cs es + hes = [he | (c,he) <- xs, c == c_head] + (h,deps) = head hes + in if length hes /= 1 + then error ("there must be exactly one head in "++showExpr [] e) + else (h,concat (deps:[(h,m):deps | (c,(m,deps)) <- xs, c == c_mod])) + Nothing -> error ("there is no head defined for function "++showCId f) + Nothing -> error ("this is not a function application: "++showExpr [] e) + +c_head = mkCId "head" +c_mod = mkCId "mod" + +test = do + t <- readHeadTable "ParseEngAbs.heads" + es <- fmap (concatMap (maybeToList . readExpr) . lines) $ readFile "wsj.full" + let deps = Map.fromListWith (+) [(d,1) | e <- es, d <- snd (getDependencies t e)] + writeFile "deps" (unlines (map show (Map.toList deps))) diff --git a/treebanks/PennTreebank/ParseEngAbs.heads b/treebanks/PennTreebank/ParseEngAbs.heads new file mode 100644 index 000000000..f77a60287 --- /dev/null +++ b/treebanks/PennTreebank/ParseEngAbs.heads @@ -0,0 +1,223 @@ +PositA head +ComparA head mod +ComplA2 head mod +ReflA2 head +UseA2 head +UseComparA head +CAdvAP mod head mod +AdjOrd head +SentAP head mod +AdAP mod head +AdvAP head mod +PositAdvAdj head +PrepNP head mod +AdAdv mod head +PositAdAAdj head +SubjS mod head +AdnCAdv head +ComplV2 head mod +ComplV3 head mod mod +ComplV2V head mod mod +ComplV2S head mod mod +ComplV2Q head mod mod +ComplV2A head mod mod +SlashV2 mod head +SlashVVV2 mod head mod +NumInt head +OrdInt head +AdvSC head +NumInt head +OrdInt head +ConjS head mod +ConjRS head mod +ConjAP head mod +ConjNP mod head +ConjAdv head mod +ConjIAdv head mod +ConjCN head mod +GenNP head +GenIP head +GenRP mod head +EmptyRelSlash head +MkVPI head +ConjVPI head mod +ComplVPIVV head mod +MkVPS _ _ head +ConjVPS head mod +PredVPS mod head +PartVP head +PassVPSlash head +ExistNP head +ExistIP head +ProgrVP head +ImpPl1 head +ImpP3 mod head +DetCN _ head +UsePN head +UsePron head +PredetNP mod head +PPartNP head mod +AdvNP head mod +RelNP head mod +DetNP head +DetQuant head _ _ +DetQuantOrd head _ _ +NumCard head +NumDigits head +NumNumeral head +AdNum mod head +OrdDigits head +OrdNumeral head +OrdSuperl head +MassNP head +PossPron head +UseN head +ComplN2 head mod +ComplN3 head mod +UseN2 head +Use2N3 head +Use3N3 head +AdjCN mod head +RelCN head mod +AdvCN head mod +SentCN head mod +ApposCN head mod +num _ +pot0 _ +pot1to19 _ +pot0as1 _ +pot1 _ +pot1plus _ _ +pot1as2 _ +pot2 _ +pot2plus _ _ +pot2as3 _ +pot3 _ +pot3plus _ _ +IDig _ +IIDig _ _ +num2digits head +PhrUtt mod head mod +UttS head +UttQS head +UttImpSg _ head +UttImpPl _ head +UttImpPol _ head +UttIP head +UttIAdv head +UttNP head +UttAdv head +UttVP head +UttCN head +UttCard head +UttAP head +UttInterj head +PConjConj head +VocNP head +QuestCl head +QuestVP mod head +QuestSlash mod head +QuestIAdv mod head +QuestIComp mod head +IdetCN mod head +IdetIP head +AdvIP head mod +IdetQuant head mod +PrepIP _ head +AdvIAdv head mod +CompIAdv head +CompIP head +ComplSlashIP head mod +AdvQVP head mod +AddAdvQVP head mod +QuestQVP mod head +RelCl head +RelVP mod head +RelSlash mod head +FunRP mod mod head +PredVP mod head +PredSCVP mod head +SlashVP mod head +AdvSlash head mod +SlashPrep head mod +SlashVS mod head mod +ImpVP head +EmbedS head +EmbedQS head +EmbedVP head +UseCl _ _ head +UseQCl _ _ head +UseRCl _ _ head +UseSlash _ _ head +AdvS mod head +ExtAdvS mod head +SSubjS mod mod head +RelS head mod +SymbPN head +CNNumNP head _ +MkSymb _ +TTAnt head mod +UseV head +ComplVV head mod +ComplVS head mod +ComplVQ head mod +ComplVA head mod +SlashV2a head +Slash2V3 head mod +Slash3V3 head mod +SlashV2V head _ _ mod +SlashV2S head mod +SlashV2Q head mod +SlashV2A head mod +ComplSlash head mod +SlashVV head mod +SlashV2VNP head mod mod +ReflVP head +UseComp head +AdvVP head mod +AdVVP mod head +AdvVPSlash head mod +AdVVPSlash mod head +VPSlashPrep head mod +CompAP head +CompNP head +CompAdv head +CompCN head +CompoundCN _ mod head +DashCN mod head +GerundN head +GerundAP head +PastPartAP head +OrdCompar head +PositAdVAdj head +UseQuantPN _ head +SlashVPIV2V head _ mod +SlashSlashV2V head _ _ mod +ComplVV head _ _ mod +PredVPosv mod head +PredVPovs mod head +CompS head +CompQS head +CompVP mod mod head +VPSlashVS mod head +PastPartRS _ _ head +PresPartRS _ _ head +ApposNP head mod +AdAdV mod head +UttAdV head +BaseNP head mod +ConsNP head mod +BaseVPS head mod +ConsVPS head mod +BaseVPI head mod +ConsVPI head mod +BaseAP head mod +ConsAP head mod +BaseS head mod +ConsS head mod +BaseCN head mod +ConsCN head mod +BaseAdv head mod +ConsAdv head mod +BaseRS head mod +ConsRS head mod