From e4f132204104d23b7d0b91d4acd9994d0943faf7 Mon Sep 17 00:00:00 2001 From: aarne Date: Tue, 24 Jun 2008 21:52:07 +0000 Subject: [PATCH] added default decodings to Make, to enable multilingual utf8 generation --- resource-1.4/Make.hs | 61 ++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/resource-1.4/Make.hs b/resource-1.4/Make.hs index a76bb59d7..f815540ff 100644 --- a/resource-1.4/Make.hs +++ b/resource-1.4/Make.hs @@ -13,25 +13,31 @@ import System -- With no argument, lang and api are done, in this order. -- See 'make' below for what is done by which command. -langs = [ - ("arabic", "Ara"), - ("bulgarian","Bul"), - ("catalan", "Cat"), - ("danish", "Dan"), - ("english", "Eng"), - ("finnish", "Fin"), - ("french", "Fre"), - ("hindi", "Hin"), - ("german", "Ger"), - ("interlingua","Ina"), - ("italian", "Ita"), - ("norwegian","Nor"), - ("russian", "Rus"), - ("spanish", "Spa"), - ("swedish", "Swe"), - ("thai", "Tha") +-- the languages have long directory names and short ISO codes (3 letters) +-- we also give the decodings for postprocessing linearizations, as long as grammars +-- don't support all flags needed; they are used in tests + +langsCoding = [ + (("arabic", "Ara"),""), + (("bulgarian","Bul"),"from_cp1251,to_utf8"), + (("catalan", "Cat"),"to_utf8"), + (("danish", "Dan"),"to_utf8"), + (("english", "Eng"),""), + (("finnish", "Fin"),"to_utf8"), + (("french", "Fre"),"to_utf8"), + (("hindi", "Hin"),"to_devanagari,to_utf8"), + (("german", "Ger"),"to_utf8"), + (("interlingua","Ina"),""), + (("italian", "Ita"),"to_utf8"), + (("norwegian","Nor"),"to_utf8"), + (("russian", "Rus"),""), + (("spanish", "Spa"),"to_utf8"), + (("swedish", "Swe"),"to_utf8"), + (("thai", "Tha"),"to_thai,to_utf8") ] +langs = map fst langsCoding + -- languagues for which to compile Lang langsLang = langs `except` ["Ara"] @@ -81,9 +87,11 @@ make xx = do unwords [dir ++ "/Lang" ++ la ++ ".gfo" | (_,la) <- optl langsPGF] ++ " +RTS -K100M" ifxx "test" $ do - gf treeb $ unwords [dir ++ "/Lang" ++ la ++ ".gfo" | (_,la) <- optl langsTest] + let ls = optl langsTest + gf (treeb "Lang" ls) $ unwords [dir ++ "/Lang" ++ la ++ ".gfo" | (_,la) <- ls] ifxx "demo" $ do - gf demos $ unwords ["demo/Demo" ++ la ++ ".gf" | (_,la) <- optl langsDemo] + let ls = optl langsDemo + gf (demos "Demo" ls) $ unwords ["demo/Demo" ++ la ++ ".gf" | (_,la) <- ls] ifxx "clean" $ do system "rm */*.gfo ../alltenses/*.gfo ../present/*.gfo" ifxx "clone" $ do @@ -104,10 +112,11 @@ gf comm file = do putStrLn $ "reading " ++ file system $ "echo \"" ++ comm ++ "\" | gf3 -s " ++ file -treeb = "rf -lines -tree -file=" ++ treebankExx ++ - " | l -treebank | wf -file=" ++ treebankResults +treeb abstr ls = "rf -lines -tree -file=" ++ treebankExx ++ + " | l -treebank " ++ unlexer abstr ls ++ " | wf -file=" ++ treebankResults -demos = "gr -number=100 | l -treebank | ps -to_utf8 -to_html | wf -file=resdemo.html" +demos abstr ls = "gr -number=100 | l -treebank " ++ unlexer abstr ls ++ + " | ps -to_html | wf -file=resdemo.html" lang (lla,la) = lla ++ "/Lang" ++ la ++ ".gf" try (lla,la) = "api/Try" ++ la ++ ".gf" @@ -140,3 +149,11 @@ replaceLang s1 s2 = repl where _ -> s lgs = 3 -- length s1 +unlexer abstr ls = + "-unlexer=\\\"" ++ unwords + [abstr ++ la ++ "=" ++ unl | + lla@(_,la) <- ls, let unl = unlex lla, not (null unl)] ++ + "\\\"" + where + unlex lla = maybe "" id $ lookup lla langsCoding +