From 3a79588dd42ca56d638e4e0d1f094ef71df33e78 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 21 Sep 2006 20:55:49 +0000 Subject: [PATCH] finished compact tb format (for a C++ interpreter now) --- src/GF/UseGrammar/Treebank.hs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs index 0072d88a5..952b71877 100644 --- a/src/GF/UseGrammar/Treebank.hs +++ b/src/GF/UseGrammar/Treebank.hs @@ -212,13 +212,16 @@ mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh printCompactTreebank :: (MultiTreebank,[String]) -> [String] printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb - linss = map lins tb - lins (_,ls) = unlines [unwords (map encode (words ws)) | (_,ws) <- ls] + + linss = map (unwords . pad) linss0 + linss0 = map (map (show . encode) . words) allExs + allExs = concat [[snd (ls !! i) | (_,ls) <- tb] | i <- [0..length lgs - 1]] encode w = maybe undefined id $ M.lookup w wmap - wmap = M.fromAscList $ zip ws (map show [0..]) + wmap = M.fromAscList $ zip ws [1..] stat = unwords $ map show [length ws, length lgs, length tb, smax] langs = unwords lgs - smax = maximum [length (words l) | l <- linss] + smax = maximum $ map length linss0 + pad ws = ws ++ replicate (smax - length ws) "0" -- [(String,[(String,String)])] -- tree,lang,lin mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String])