1
0
forked from GitHub/gf-rgl

Add "Synopsis" column to languages.csv; use config everywhere

I tried to remove all language lists from Haskell and Makefiles
This commit is contained in:
John J. Camilleri
2018-11-06 10:32:25 +01:00
parent ff9a164884
commit ec9f74d56e
9 changed files with 182 additions and 250 deletions

68
Config.hs Normal file
View File

@@ -0,0 +1,68 @@
-- | Reading language config file
module Config (
LangInfo (..),
loadLangs, loadLangsFrom, configFile
) where
import Data.List (unfoldr)
import System.IO (hPutStrLn,stderr)
import System.Exit (exitFailure)
-- | Path to language config file
configFile :: FilePath
configFile = "languages.csv"
-- | Information about a language
data LangInfo = LangInfo
{ langCode :: String -- ^ 3-letter ISO 639-2/B code
, langDir :: String -- ^ directory name
, langFunctor :: Maybe String -- ^ functor (not used)
, langUnlexer :: Maybe String -- ^ decoding for postprocessing linearizations
, langPresent :: Bool
, langAll :: Bool
, langTry :: Bool
, langSymbolic :: Bool
, langCompatibility :: Bool
, langSynopsis :: Bool -- ^ include in RGL synopsis
} deriving (Show,Eq)
-- | Load language information from default config file
loadLangs :: IO [LangInfo]
loadLangs = loadLangsFrom configFile
-- | Load language information from specified config file
loadLangsFrom:: FilePath -> IO [LangInfo]
loadLangsFrom configFile = do
lns <- readFile configFile >>= return . lines
mapM mkLangInfo (tail lns)
where
maybeBit bits n = if length bits >= (n+1) && length (bits !! n) > 0 then Just (bits !! n) else Nothing
boolBit bits n def = if length bits >= (n+1) && length (bits !! n) > 0 then (if def then bits !! n /= "n" else bits !! n == "y") else def
mkLangInfo s =
let bits = separateBy ',' s in
if length bits < 2
then die $ "Invalid entry in " ++ configFile ++ ": " ++ s
else return $ LangInfo
{ langCode = bits !! 0
, langDir = bits !! 1
, langFunctor = maybeBit bits 2
, langUnlexer = maybeBit bits 3
, langPresent = boolBit bits 4 False
, langAll = boolBit bits 5 True
, langTry = boolBit bits 6 True
, langSymbolic = boolBit bits 7 True
, langCompatibility = boolBit bits 8 False
, langSynopsis = boolBit bits 9 False
}
-- | Separate a string on a character
-- Source: https://stackoverflow.com/a/4978733/98600
separateBy :: Eq a => a -> [a] -> [[a]]
separateBy chr = unfoldr sep where
sep [] = Nothing
sep l = Just . fmap (drop 1) . break (== chr) $ l
die :: String -> IO a
die s = do
hPutStrLn stderr s
exitFailure

54
Make.hs
View File

@@ -2,7 +2,7 @@
-- | Main build script for RGL
import Data.List (find,isPrefixOf,isSuffixOf,(\\),unfoldr)
import Data.List (find,isPrefixOf,isSuffixOf,(\\))
import Data.Maybe (catMaybes)
import System.IO (hPutStrLn,stderr)
import System.IO.Error (catchIOError)
@@ -15,6 +15,7 @@ import System.Directory (createDirectoryIfMissing,copyFile,getDirectoryContents,
import System.Directory (getModificationTime,setModificationTime)
#endif
import Control.Monad (when,unless)
import Config
main :: IO ()
main = do
@@ -347,57 +348,6 @@ verbose_switch_short = "-v"
getFlag :: String -> [String] -> Maybe String
getFlag flag args = fmap (drop (length flag)) $ find (isPrefixOf flag) args
-------------------------------------------------------------------------------
-- Languages of the RGL
-- | Path to language config file
configFile :: FilePath
configFile = "languages.csv"
-- | Information about a language
data LangInfo = LangInfo
{ langCode :: String -- ^ 3-letter ISO 639-2/B code
, langDir :: String -- ^ directory name
, langFunctor :: Maybe String -- ^ functor (not used)
, langUnlexer :: Maybe String -- ^ decoding for postprocessing linearizations
, langPresent :: Bool
, langAll :: Bool
, langTry :: Bool
, langSymbolic :: Bool
, langCompatibility :: Bool
} deriving (Show,Eq)
-- | Load language information from config file
loadLangs :: IO [LangInfo]
loadLangs = do
lns <- readFile configFile >>= return . lines
mapM mkLangInfo (tail lns)
where
maybeBit bits n = if length bits >= (n+1) && length (bits !! n) > 0 then Just (bits !! n) else Nothing
boolBit bits n def = if length bits >= (n+1) && length (bits !! n) > 0 then (if def then bits !! n /= "n" else bits !! n == "y") else def
mkLangInfo s =
let bits = separateBy ',' s in
if length bits < 2
then die $ "Invalid entry in " ++ configFile ++ ": " ++ s
else return $ LangInfo
{ langCode = bits !! 0
, langDir = bits !! 1
, langFunctor = maybeBit bits 2
, langUnlexer = maybeBit bits 3
, langPresent = boolBit bits 4 False
, langAll = boolBit bits 5 True
, langTry = boolBit bits 6 True
, langSymbolic = boolBit bits 7 True
, langCompatibility = boolBit bits 8 False
}
-- | Separate a string on a character
-- Source: https://stackoverflow.com/a/4978733/98600
separateBy :: Eq a => a -> [a] -> [[a]]
separateBy chr = unfoldr sep where
sep [] = Nothing
sep l = Just . fmap (drop 1) . break (== chr) $ l
-------------------------------------------------------------------------------
-- Executing GF

View File

@@ -41,8 +41,10 @@ Description of columns:
- Try: languages for which to compile `Try`
- Symbolic: languages for which to compile `Symbolic`
- Compatibility: languages for which to complile `Compatibility`
- Synopsis: languages to include in the RGL synopsis document
Columns can be a string, just `y`'s (where nothing means `n`) or just (`n`'s where nothing means `y`).
Columns can be a string, just `y`'s (where nothing means `n`) or just (`n`'s where nothing means `y`),
or a mixture of both `y`'s and `n`'s.
## Haskell script: `Make.hs`
@@ -108,11 +110,9 @@ You can pass the following flags:
## Windows batch file: `Make.bat`
**This script is still untested.**
This method is provided as an alternative for Windows users who don't have Haskell or Bash installed.
This method is provided as an alternative for Windows users who don't have Haskell installed.
It is supposed to be a port of Make.sh and works in largely the same way.
It is supposed to be a port of `Make.sh` and works in largely the same way.
In particular, it accepts the same flags (in the same format) as described above.
However it currently tries to build all modules for all languages and doesn't consider the details of which modules should be compiled for each language (specified in `languages.csv`)

View File

@@ -1,4 +1,4 @@
.PHONY: abstract synopsis index status
.PHONY: all index status synopsis abstract
all: synopsis
@@ -15,17 +15,17 @@ synopsis: synopsis.html
S=../src
# List of languages extracted from MkSynopsis.hs
LANGS=Afr Ara Bul Cat Chi Dan Dut Eng Est Eus Fin Fre Ger Gre Hin Ice Ita Jpn Lav Mlt Mon Nep Nor Nno Pes Pnb Pol Por Ron Rus Snd Spa Swe Tha Urd
# List of languages extracted from languages.csv, with 'Synopsis' column == y
LANGS=$(shell cat ../languages.csv | cut -d',' -f1,10 | grep ',y' | cut -d',' -f1)
# This list was constructed by observing what files MkSynopsis.hs reads
SRC_FILES=$S/abstract/Common.gf $S/abstract/Cat.gf $S/api/Constructors.gf $S/abstract/Structural.gf $(patsubst %,$S/*/Paradigms%.gf,$(LANGS))
SRC_FILES=$(S)/abstract/Common.gf $(S)/abstract/Cat.gf $(S)/api/Constructors.gf $(S)/abstract/Structural.gf $(patsubst %,$S/*/Paradigms%.gf,$(LANGS))
EXAMPLES_OUT=$(patsubst %,api-examples-%.txt,$(LANGS))
INCLUDES=synopsis-intro.txt categories-intro.txt categories-imagemap.html synopsis-additional.txt synopsis-browse.txt synopsis-example.txt
synopsis.html: MkSynopsis.hs MkExxTable.hs $(INCLUDES) $(EXAMPLES_OUT) $(SRC_FILES)
runghc MkSynopsis.hs
runghc -i.. MkSynopsis.hs
categories.png: categories.dot
dot -Tpng $^ > $@
@@ -37,11 +37,9 @@ abstract:
$(GFDOC) -txthtml $S/abstract/*.gf
mv $S/abstract/*.html abstract
api-examples.gfs: api-examples.txt MkExx.hs
runghc MkExx.hs < $< > $@
# Since .gfo files aren't self-contained, the dependencies given here are
# incomplete. But I am thinking that the Try%.gfo file will always be newer
# than any other files it depends on, so the rule will trigger when

View File

@@ -30,45 +30,3 @@ mkIdent = concatMap unspec where
')' -> ""
':' -> "-"
_ -> [c]
langsCoding = [
(("amharic", "Amh"),""),
(("arabic", "Ara"),""),
(("basque", "Eus"),""),
(("bulgarian","Bul"),""),
(("catalan", "Cat"),"Romance"),
(("danish", "Dan"),"Scand"),
(("dutch", "Dut"),""),
(("english", "Eng"),""),
(("finnish", "Fin"),""),
(("french", "Fre"),"Romance"),
(("hindi", "Hin"),"Hindustani"),
(("german", "Ger"),""),
(("interlingua","Ina"),""),
(("italian", "Ita"),"Romance"),
(("latin", "Lat"),""),
(("norwegian","Nor"),"Scand"),
(("polish", "Pol"),""),
(("punjabi", "Pnb"),""),
(("portuguese", "Por"), "Romance"),
(("romanian", "Ron"),""),
(("russian", "Rus"),""),
(("spanish", "Spa"),"Romance"),
(("swedish", "Swe"),"Scand"),
(("thai", "Tha"),""),
(("turkish", "Tur"),""),
(("urdu", "Urd"),"Hindustani")
]
langs = map fst langsCoding
-- languagues for which Try is normally compiled
langsLang = langs `except` langsIncomplete
-- languages for which Lang can be compiled but which are incomplete
langsIncomplete = ["Amh","Ara","Hin","Lat","Pnb","Rus","Tha","Tur","Urd"]
except ls es = filter (flip notElem es . snd) ls

View File

@@ -4,7 +4,6 @@ module MkExxTable (getApiExx, ApiExx, prApiEx, mkEx) where
import System.Environment(getArgs)
import Control.Monad(when)
import qualified Data.Map as M
import Data.Char
main = do
xx <- getArgs
@@ -101,6 +100,6 @@ bind ws = case ws of
"&+":ws2 -> bind ws2
"Predef.BIND":ws2 -> bind ws2
"Predef.SOFT_BIND":ws2 -> bind ws2
w : ws2 -> w : bind ws2
w : "++" : ws2 -> w : bind ws2
w : ws2 -> w : bind ws2
_ -> ws

View File

@@ -1,27 +1,35 @@
import MkExxTable
import System.Process(system)
import System.Environment(getArgs)
import System.FilePath((</>),(<.>))
import Data.Char
import Data.List
import qualified Data.ByteString.Char8 as BS
import qualified Data.Map as M
---import Debug.Trace ----
import Text.Printf
import Config
type Cats = [(String,String,String)]
type Rules = [(String,String,String)]
-- the file generated
synopsis :: FilePath
synopsis = "synopsis.txt"
-- the language in which revealed examples are shown
revealedLang :: String
revealedLang = "Eng"
-- all languages shown (a copy of this list appears in Makefile)
apiExxFiles = ["api-examples-" ++ lang ++ ".txt" | lang <- words
-- "Eng Chi"
"Afr Ara Bul Cat Chi Dan Dut Eng Est Eus Fin Fre Ger Gre Hin Ice Ita Jpn Lav Mlt Mon Nep Nor Nno Pes Pnb Pol Por Ron Rus Snd Spa Swe Tha Urd"
]
apiExxFiles :: IO [FilePath]
apiExxFiles = do
langs <- loadLangsFrom (".." </> configFile)
return $
[ "api-examples-" ++ (langCode lang) ++ ".txt"
| lang <- langs
, langSynopsis lang
]
main :: IO ()
main = do
xx <- getArgs
let isLatex = case xx of
@@ -31,7 +39,7 @@ main = do
cs2 <- getCats catAPI
let cs = sortCats (cs1 ++ cs2)
writeFile synopsis "GF Resource Grammar Library: Synopsis"
append "B. Bringert, T. Hallgren, and A. Ranta"
-- append "B. Bringert, T. Hallgren, and A. Ranta"
space
append "%!Encoding:utf-8"
append "%!style(html): ./revealpopup.css"
@@ -66,7 +74,7 @@ main = do
space
link "Source 2:" structuralAPI
space
apiExx <- getApiExx apiExxFiles
apiExx <- apiExxFiles >>= getApiExx
rs <- getRules apiExx syntaxAPI
--- putStrLn $ unlines ["p -cat=" ++ last (words t) ++
--- " \"" ++ e ++ "\"" | (_,t,e) <- rs, not (null e)] ----
@@ -83,7 +91,7 @@ main = do
-- delimit rs
space
title "Lexical Paradigms"
mapM_ (putParadigms isLatex cs) paradigmFiles
paradigmFiles >>= mapM_ (putParadigms isLatex cs)
space
include "synopsis-additional.txt"
space
@@ -227,7 +235,6 @@ mkIdent = concatMap unspec where
':' -> "-"
_ -> [c]
mkCatTable :: Bool -> Cats -> [String]
mkCatTable isLatex cs = inChunks chsize (\rs -> header ++ map mk1 rs) cs
where
@@ -236,49 +243,36 @@ mkCatTable isLatex cs = inChunks chsize (\rs -> header ++ map mk1 rs) cs
mk1 (name,expl,ex) = unwords ["|", showCat cs name, "|", expl, "|", typo ex, "|"]
typo ex = if take 1 ex == "\"" then itf (init (tail ex)) else ex
srcPath = ("../src" ++)
srcPath = ((</>) "../src")
commonAPI = srcPath "/abstract/Common.gf"
catAPI = srcPath "/abstract/Cat.gf"
syntaxAPI = srcPath "/api/Constructors.gf"
structuralAPI = srcPath "/abstract/Structural.gf"
paradigmFiles = [
("Afrikaans", srcPath "/afrikaans/ParadigmsAfr.gf"),
("Arabic", srcPath "/arabic/ParadigmsAra.gf"),
("Basque", srcPath "/basque/ParadigmsEus.gf"),
("Bulgarian", srcPath "/bulgarian/ParadigmsBul.gf"),
("Catalan", srcPath "/catalan/ParadigmsCat.gf"),
("Chinese", srcPath "/chinese/ParadigmsChi.gf"),
("Danish", srcPath "/danish/ParadigmsDan.gf"),
("Dutch", srcPath "/dutch/ParadigmsDut.gf"),
("English", srcPath "/english/ParadigmsEng.gf"),
("Estonian", srcPath "/estonian/ParadigmsEst.gf"),
("Finnish", srcPath "/finnish/ParadigmsFin.gf"),
("French", srcPath "/french/ParadigmsFre.gf"),
("German", srcPath "/german/ParadigmsGer.gf"),
("Greek", srcPath "/greek/ParadigmsGre.gf"),
("Hindi", srcPath "/hindi/ParadigmsHin.gf"),
("Icelandic", srcPath "/icelandic/ParadigmsIce.gf"),
-- ("Interlingua", srcPath "/interlingua/ParadigmsIna.gf"),
("Italian", srcPath "/italian/ParadigmsIta.gf"),
("Japanese", srcPath "/japanese/ParadigmsJpn.gf"),
("Latvian", srcPath "/latvian/ParadigmsLav.gf"),
("Maltese", srcPath "/maltese/ParadigmsMlt.gf"),
("Mongolian", srcPath "/mongolian/ParadigmsMon.gf"),
("Nepali", srcPath "/nepali/ParadigmsNep.gf"),
("Norwegian", srcPath "/norwegian/ParadigmsNor.gf"),
("Nynorsk", srcPath "/nynorsk/ParadigmsNno.gf"),
("Polish", srcPath "/polish/ParadigmsPol.gf"),
("Punjabi", srcPath "/punjabi/ParadigmsPnb.gf"),
("Portuguese", srcPath "/portuguese/ParadigmsPor.gf"),
("Romanian", srcPath "/romanian/ParadigmsRon.gf"),
("Russian", srcPath "/russian/ParadigmsRus.gf"),
("Sindhi", srcPath "/sindhi/ParadigmsSnd.gf"),
("Spanish", srcPath "/spanish/ParadigmsSpa.gf"),
("Swedish", srcPath "/swedish/ParadigmsSwe.gf"),
("Thai", srcPath "/thai/ParadigmsTha.gf"),
("Urdu", srcPath "/urdu/ParadigmsUrd.gf")
]
commonAPI = srcPath "abstract/Common.gf"
catAPI = srcPath "abstract/Cat.gf"
syntaxAPI = srcPath "api/Constructors.gf"
structuralAPI = srcPath "abstract/Structural.gf"
paradigmFiles :: IO [(String,FilePath)]
paradigmFiles = do
langs <- loadLangsFrom (".." </> configFile)
return $
[ (name, srcPath $ printf "%s/Paradigms%s.gf" (langDir lang) (langCode lang))
| lang <- langs
, langSynopsis lang
, let name = formatName (langDir lang)
]
-- | Format language name from directory name
-- "ancient_greek -> Ancient Greek"
formatName :: String -> String
formatName = unwords . map (\(s:ss) -> toUpper s : ss) . splitOn (=='_')
-- | Split a string at given character, similar to words
splitOn :: (Char -> Bool) -> String -> [String]
splitOn _ "" = []
splitOn f s = takeWhile (not.f) s : splitOn f rest
where
rest = case dropWhile (not.f) s of
"" -> []
_:xs -> xs
append s = appendFile synopsis ('\n':s)
title s = append $ "=" ++ s ++ "="
@@ -339,7 +333,7 @@ showTyp cs = unwords . map f . words
-- to work around GHC 6.12 file input
readFileC cod file = do
let tmp = file ++ ".tmp"
let tmp = file <.> "tmp"
case cod of
"utf8" -> readFile file
_ -> do

View File

@@ -1,35 +0,0 @@
Afrikaans
Amharic
Arabic
Bulgarian
Catalan
Chinese
Danish
Dutch
English
Finnish
French
German
Greek
Hebrew
Hindi
Interlingua
Japanese
Italian
Latin
Latvian
<li>Maltese
Nepali
Norwegian
Persian
Polish
Punjabi
Romanian
Russian
Sindhi
Spanish
Swahili
Swedish
Thai
Turkish
Urdu

View File

@@ -1,45 +1,45 @@
Code,Directory,Functor,Unlexer,Present,All,Try,Symbolic,Compatibility
Afr,afrikaans,,,,,,n,
Amh,amharic,,,,,n,n,
Ara,arabic,,,,,,y,
Eus,basque,,,,,,,
Bul,bulgarian,,,y,,,,
Cat,catalan,Romance,,y,,,,y
Chi,chinese,,,,,,,
Dan,danish,Scand,,y,,,,
Dut,dutch,,,y,,,,
Eng,english,,,y,,,,y
Est,estonian,,,,,,,
Fin,finnish,,,y,,,,y
Fre,french,Romance,,y,,,,y
Grc,ancient_greek,,,y,,n,n,
Gre,greek,,,,,,,
Heb,hebrew,,,,,n,n,
Hin,hindi,Hindustani,to_devanagari,y,,,,
Hun,hungarian,,,y,n,n,n,
Ger,german,,,,,,,
Ice,icelandic,,,,,,n,
Ina,interlingua,,,y,,n,n,
Ita,italian,Romance,,y,,,,y
Jpn,japanese,,,,,,,
Lat,latin,,,y,,n,n,
Lav,latvian,,,,,,,y
Mlt,maltese,,,,,,,
Mon,mongolian,,,,,,n,
Nep,nepali,,,,,,n,
Nor,norwegian,Scand,,y,,,,
Nno,nynorsk,,,y,,,,
Pes,persian,,,,,,,
Pol,polish,,,,,,,
Por,portuguese,Romance,,y,,,,y
Pnb,punjabi,,,y,,,,
Ron,romanian,,,y,,,,
Rus,russian,,,y,,,,
Snd,sindhi,,,,,,,
Spa,spanish,Romance,,y,,,,y
Swa,swahili,,,,n,n,n,y
Swe,swedish,Scand,,y,,,,y
Tel,telugu,,,y,n,n,n,
Tha,thai,,to_thai,,,,,
Tur,turkish,,,,,n,n,
Urd,urdu,Hindustani,,,,,,
Code,Directory,Functor,Unlexer,Present,All,Try,Symbolic,Compatibility,Synopsis
Afr,afrikaans,,,,,,n,,y
Amh,amharic,,,,,n,n,,n
Ara,arabic,,,,,,y,,y
Bul,bulgarian,,,y,,,,,y
Cat,catalan,Romance,,y,,,,y,y
Chi,chinese,,,,,,,,y
Dan,danish,Scand,,y,,,,,y
Dut,dutch,,,y,,,,,y
Eng,english,,,y,,,,y,y
Est,estonian,,,,,,,,y
Eus,basque,,,,,,,,y
Fin,finnish,,,y,,,,y,y
Fre,french,Romance,,y,,,,y,y
Ger,german,,,,,,,,y
Grc,ancient_greek,,,y,,n,n,,n
Gre,greek,,,,,,,,y
Heb,hebrew,,,,,n,n,,n
Hin,hindi,Hindustani,to_devanagari,y,,,,,y
Hun,hungarian,,,y,n,n,n,,n
Ice,icelandic,,,,,,n,,y
Ina,interlingua,,,y,,n,n,,n
Ita,italian,Romance,,y,,,,y,y
Jpn,japanese,,,,,,,,y
Lat,latin,,,y,,n,n,,n
Lav,latvian,,,,,,,y,y
Mlt,maltese,,,,,,,,y
Mon,mongolian,,,,,,n,,y
Nep,nepali,,,,,,n,,y
Nno,nynorsk,,,y,,,,,y
Nor,norwegian,Scand,,y,,,,,y
Pes,persian,,,,,,,,y
Pnb,punjabi,,,y,,,,,y
Pol,polish,,,,,,,,y
Por,portuguese,Romance,,y,,,,y,y
Ron,romanian,,,y,,,,,y
Rus,russian,,,y,,,,,y
Snd,sindhi,,,,,,,,y
Spa,spanish,Romance,,y,,,,y,y
Swa,swahili,,,,n,n,n,y,n
Swe,swedish,Scand,,y,,,,y,y
Tel,telugu,,,y,n,n,n,,n
Tha,thai,,to_thai,,,,,,y
Tur,turkish,,,,,n,n,,n
Urd,urdu,Hindustani,,,,,,,y
1 Code Directory Functor Unlexer Present All Try Symbolic Compatibility Synopsis
2 Afr afrikaans n y
3 Amh amharic n n n
4 Ara arabic y y
5 Eus Bul basque bulgarian y y
6 Bul Cat bulgarian catalan Romance y y y
7 Cat Chi catalan chinese Romance y y y
8 Chi Dan chinese danish Scand y y
9 Dan Dut danish dutch Scand y y
10 Dut Eng dutch english y y y
11 Eng Est english estonian y y y
12 Est Eus estonian basque y
13 Fin finnish y y y
14 Fre french Romance y y y
15 Grc Ger ancient_greek german y n n y
16 Gre Grc greek ancient_greek y n n n
17 Heb Gre hebrew greek n n y
18 Hin Heb hindi hebrew Hindustani to_devanagari y n n n
19 Hun Hin hungarian hindi Hindustani to_devanagari y n n n y
20 Ger Hun german hungarian y n n n n
21 Ice icelandic n y
22 Ina interlingua y n n n
23 Ita italian Romance y y y
24 Jpn japanese y
25 Lat latin y n n n
26 Lav latvian y y
27 Mlt maltese y
28 Mon mongolian n y
29 Nep nepali n y
30 Nor Nno norwegian nynorsk Scand y y
31 Nno Nor nynorsk norwegian Scand y y
32 Pes persian y
33 Pol Pnb polish punjabi y y
34 Por Pol portuguese polish Romance y y y
35 Pnb Por punjabi portuguese Romance y y y
36 Ron romanian y y
37 Rus russian y y
38 Snd sindhi y
39 Spa spanish Romance y y y
40 Swa swahili n n n y n
41 Swe swedish Scand y y y
42 Tel telugu y n n n n
43 Tha thai to_thai y
44 Tur turkish n n n
45 Urd urdu Hindustani y