diff --git a/Config.hs b/Config.hs new file mode 100644 index 000000000..d3b325fdc --- /dev/null +++ b/Config.hs @@ -0,0 +1,68 @@ +-- | Reading language config file +module Config ( + LangInfo (..), + loadLangs, loadLangsFrom, configFile + ) where + +import Data.List (unfoldr) +import System.IO (hPutStrLn,stderr) +import System.Exit (exitFailure) + +-- | Path to language config file +configFile :: FilePath +configFile = "languages.csv" + +-- | Information about a language +data LangInfo = LangInfo + { langCode :: String -- ^ 3-letter ISO 639-2/B code + , langDir :: String -- ^ directory name + , langFunctor :: Maybe String -- ^ functor (not used) + , langUnlexer :: Maybe String -- ^ decoding for postprocessing linearizations + , langPresent :: Bool + , langAll :: Bool + , langTry :: Bool + , langSymbolic :: Bool + , langCompatibility :: Bool + , langSynopsis :: Bool -- ^ include in RGL synopsis + } deriving (Show,Eq) + +-- | Load language information from default config file +loadLangs :: IO [LangInfo] +loadLangs = loadLangsFrom configFile + +-- | Load language information from specified config file +loadLangsFrom:: FilePath -> IO [LangInfo] +loadLangsFrom configFile = do + lns <- readFile configFile >>= return . lines + mapM mkLangInfo (tail lns) + where + maybeBit bits n = if length bits >= (n+1) && length (bits !! n) > 0 then Just (bits !! n) else Nothing + boolBit bits n def = if length bits >= (n+1) && length (bits !! n) > 0 then (if def then bits !! n /= "n" else bits !! n == "y") else def + mkLangInfo s = + let bits = separateBy ',' s in + if length bits < 2 + then die $ "Invalid entry in " ++ configFile ++ ": " ++ s + else return $ LangInfo + { langCode = bits !! 0 + , langDir = bits !! 1 + , langFunctor = maybeBit bits 2 + , langUnlexer = maybeBit bits 3 + , langPresent = boolBit bits 4 False + , langAll = boolBit bits 5 True + , langTry = boolBit bits 6 True + , langSymbolic = boolBit bits 7 True + , langCompatibility = boolBit bits 8 False + , langSynopsis = boolBit bits 9 False + } + +-- | Separate a string on a character +-- Source: https://stackoverflow.com/a/4978733/98600 +separateBy :: Eq a => a -> [a] -> [[a]] +separateBy chr = unfoldr sep where + sep [] = Nothing + sep l = Just . fmap (drop 1) . break (== chr) $ l + +die :: String -> IO a +die s = do + hPutStrLn stderr s + exitFailure diff --git a/Make.hs b/Make.hs index 2a40e3989..420d005c9 100644 --- a/Make.hs +++ b/Make.hs @@ -2,7 +2,7 @@ -- | Main build script for RGL -import Data.List (find,isPrefixOf,isSuffixOf,(\\),unfoldr) +import Data.List (find,isPrefixOf,isSuffixOf,(\\)) import Data.Maybe (catMaybes) import System.IO (hPutStrLn,stderr) import System.IO.Error (catchIOError) @@ -15,6 +15,7 @@ import System.Directory (createDirectoryIfMissing,copyFile,getDirectoryContents, import System.Directory (getModificationTime,setModificationTime) #endif import Control.Monad (when,unless) +import Config main :: IO () main = do @@ -347,57 +348,6 @@ verbose_switch_short = "-v" getFlag :: String -> [String] -> Maybe String getFlag flag args = fmap (drop (length flag)) $ find (isPrefixOf flag) args -------------------------------------------------------------------------------- --- Languages of the RGL - --- | Path to language config file -configFile :: FilePath -configFile = "languages.csv" - --- | Information about a language -data LangInfo = LangInfo - { langCode :: String -- ^ 3-letter ISO 639-2/B code - , langDir :: String -- ^ directory name - , langFunctor :: Maybe String -- ^ functor (not used) - , langUnlexer :: Maybe String -- ^ decoding for postprocessing linearizations - , langPresent :: Bool - , langAll :: Bool - , langTry :: Bool - , langSymbolic :: Bool - , langCompatibility :: Bool - } deriving (Show,Eq) - --- | Load language information from config file -loadLangs :: IO [LangInfo] -loadLangs = do - lns <- readFile configFile >>= return . lines - mapM mkLangInfo (tail lns) - where - maybeBit bits n = if length bits >= (n+1) && length (bits !! n) > 0 then Just (bits !! n) else Nothing - boolBit bits n def = if length bits >= (n+1) && length (bits !! n) > 0 then (if def then bits !! n /= "n" else bits !! n == "y") else def - mkLangInfo s = - let bits = separateBy ',' s in - if length bits < 2 - then die $ "Invalid entry in " ++ configFile ++ ": " ++ s - else return $ LangInfo - { langCode = bits !! 0 - , langDir = bits !! 1 - , langFunctor = maybeBit bits 2 - , langUnlexer = maybeBit bits 3 - , langPresent = boolBit bits 4 False - , langAll = boolBit bits 5 True - , langTry = boolBit bits 6 True - , langSymbolic = boolBit bits 7 True - , langCompatibility = boolBit bits 8 False - } - --- | Separate a string on a character --- Source: https://stackoverflow.com/a/4978733/98600 -separateBy :: Eq a => a -> [a] -> [[a]] -separateBy chr = unfoldr sep where - sep [] = Nothing - sep l = Just . fmap (drop 1) . break (== chr) $ l - ------------------------------------------------------------------------------- -- Executing GF diff --git a/README.md b/README.md index 4cd0b2449..dda79ac41 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,10 @@ Description of columns: - Try: languages for which to compile `Try` - Symbolic: languages for which to compile `Symbolic` - Compatibility: languages for which to complile `Compatibility` +- Synopsis: languages to include in the RGL synopsis document -Columns can be a string, just `y`'s (where nothing means `n`) or just (`n`'s where nothing means `y`). +Columns can be a string, just `y`'s (where nothing means `n`) or just (`n`'s where nothing means `y`), +or a mixture of both `y`'s and `n`'s. ## Haskell script: `Make.hs` @@ -108,11 +110,9 @@ You can pass the following flags: ## Windows batch file: `Make.bat` -**This script is still untested.** +This method is provided as an alternative for Windows users who don't have Haskell or Bash installed. -This method is provided as an alternative for Windows users who don't have Haskell installed. - -It is supposed to be a port of Make.sh and works in largely the same way. +It is supposed to be a port of `Make.sh` and works in largely the same way. In particular, it accepts the same flags (in the same format) as described above. However it currently tries to build all modules for all languages and doesn't consider the details of which modules should be compiled for each language (specified in `languages.csv`) diff --git a/doc/Makefile b/doc/Makefile index 556973dd7..90133f78e 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,4 +1,4 @@ -.PHONY: abstract synopsis index status +.PHONY: all index status synopsis abstract all: synopsis @@ -15,17 +15,17 @@ synopsis: synopsis.html S=../src -# List of languages extracted from MkSynopsis.hs -LANGS=Afr Ara Bul Cat Chi Dan Dut Eng Est Eus Fin Fre Ger Gre Hin Ice Ita Jpn Lav Mlt Mon Nep Nor Nno Pes Pnb Pol Por Ron Rus Snd Spa Swe Tha Urd +# List of languages extracted from languages.csv, with 'Synopsis' column == y +LANGS=$(shell cat ../languages.csv | cut -d',' -f1,10 | grep ',y' | cut -d',' -f1) # This list was constructed by observing what files MkSynopsis.hs reads -SRC_FILES=$S/abstract/Common.gf $S/abstract/Cat.gf $S/api/Constructors.gf $S/abstract/Structural.gf $(patsubst %,$S/*/Paradigms%.gf,$(LANGS)) +SRC_FILES=$(S)/abstract/Common.gf $(S)/abstract/Cat.gf $(S)/api/Constructors.gf $(S)/abstract/Structural.gf $(patsubst %,$S/*/Paradigms%.gf,$(LANGS)) EXAMPLES_OUT=$(patsubst %,api-examples-%.txt,$(LANGS)) INCLUDES=synopsis-intro.txt categories-intro.txt categories-imagemap.html synopsis-additional.txt synopsis-browse.txt synopsis-example.txt synopsis.html: MkSynopsis.hs MkExxTable.hs $(INCLUDES) $(EXAMPLES_OUT) $(SRC_FILES) - runghc MkSynopsis.hs + runghc -i.. MkSynopsis.hs categories.png: categories.dot dot -Tpng $^ > $@ @@ -37,11 +37,9 @@ abstract: $(GFDOC) -txthtml $S/abstract/*.gf mv $S/abstract/*.html abstract - api-examples.gfs: api-examples.txt MkExx.hs runghc MkExx.hs < $< > $@ - # Since .gfo files aren't self-contained, the dependencies given here are # incomplete. But I am thinking that the Try%.gfo file will always be newer # than any other files it depends on, so the rule will trigger when diff --git a/doc/MkExx.hs b/doc/MkExx.hs index 16eb38bfd..0db35218e 100644 --- a/doc/MkExx.hs +++ b/doc/MkExx.hs @@ -6,7 +6,7 @@ main = interact (unlines . concatMap mkScript . takeWhile (/="--.") . lines) mkScript l = case l of - ' ':_ -> + ' ':_ -> let ident = mkIdent $ unwords $ takeWhile (/="--") $ words l in [add $ psq ident] '-':_ -> [] @@ -30,45 +30,3 @@ mkIdent = concatMap unspec where ')' -> "" ':' -> "-" _ -> [c] - - - -langsCoding = [ - (("amharic", "Amh"),""), - (("arabic", "Ara"),""), - (("basque", "Eus"),""), - (("bulgarian","Bul"),""), - (("catalan", "Cat"),"Romance"), - (("danish", "Dan"),"Scand"), - (("dutch", "Dut"),""), - (("english", "Eng"),""), - (("finnish", "Fin"),""), - (("french", "Fre"),"Romance"), - (("hindi", "Hin"),"Hindustani"), - (("german", "Ger"),""), - (("interlingua","Ina"),""), - (("italian", "Ita"),"Romance"), - (("latin", "Lat"),""), - (("norwegian","Nor"),"Scand"), - (("polish", "Pol"),""), - (("punjabi", "Pnb"),""), - (("portuguese", "Por"), "Romance"), - (("romanian", "Ron"),""), - (("russian", "Rus"),""), - (("spanish", "Spa"),"Romance"), - (("swedish", "Swe"),"Scand"), - (("thai", "Tha"),""), - (("turkish", "Tur"),""), - (("urdu", "Urd"),"Hindustani") - ] - - -langs = map fst langsCoding - --- languagues for which Try is normally compiled -langsLang = langs `except` langsIncomplete - --- languages for which Lang can be compiled but which are incomplete -langsIncomplete = ["Amh","Ara","Hin","Lat","Pnb","Rus","Tha","Tur","Urd"] - -except ls es = filter (flip notElem es . snd) ls diff --git a/doc/MkExxTable.hs b/doc/MkExxTable.hs index a45effabc..4cdab36c5 100644 --- a/doc/MkExxTable.hs +++ b/doc/MkExxTable.hs @@ -4,10 +4,9 @@ module MkExxTable (getApiExx, ApiExx, prApiEx, mkEx) where import System.Environment(getArgs) import Control.Monad(when) import qualified Data.Map as M -import Data.Char main = do - xx <- getArgs + xx <- getArgs aexx <- getApiExx' True xx return () -- putStrLn $ prApiExx aexx @@ -16,7 +15,7 @@ getApiExx = getApiExx' False getApiExx' verbose xx = do s <- readFile (head xx) - let aet = getApiExxTrees $ filter validOutput $ mergeOutput $ lines s + let aet = getApiExxTrees $ filter validOutput $ mergeOutput $ lines s aeos <- mapM (readApiExxOne verbose) xx let aexx = mkApiExx $ ("API",aet) : aeos -- putStrLn $ prApiExx aexx @@ -57,7 +56,7 @@ cleanUp = dropWhile (flip elem " >") --- this makes txt2tags loop... mergeOutput ls = ls mergeOutputt ls = case ls of - l@('>':_):ll -> let (ll1,ll2) = span ((/=">") . take 1) ll in unwords (l : map (unwords . words) ll1) : mergeOutput ll2 + l@('>':_):ll -> let (ll1,ll2) = span ((/=">") . take 1) ll in unwords (l : map (unwords . words) ll1) : mergeOutput ll2 _:ll -> mergeOutput ll _ -> [] @@ -65,15 +64,15 @@ mergeOutputt ls = case ls of validOutput = (==">") . take 1 mkApiExx :: [(String,ApiExxOne)] -> ApiExx -mkApiExx ltes = - M.fromList [(t, - M.fromList [(l,maybe "NONE" id (M.lookup t te)) | (l,te) <- ltes]) +mkApiExx ltes = + M.fromList [(t, + M.fromList [(l,maybe "NONE" id (M.lookup t te)) | (l,te) <- ltes]) | t <- M.keys firstL] where firstL = snd (head ltes) prApiExx :: ApiExx -> String -prApiExx aexx = unlines +prApiExx aexx = unlines [unlines (t:prApiEx lexx) | (t,lexx) <- M.toList aexx] prApiEx :: M.Map String String -> [String] @@ -81,7 +80,7 @@ prApiEx apexx = case M.toList apexx of (a,e):lexx -> (a ++ ": ``" ++ unwords (words e) ++ "``"): [l ++ ": //" ++ mkEx l e ++ "//" | (l,e) <- lexx] -mkEx l = unws . bind . mkE . words where +mkEx l = unws . bind . mkE . words where unws = if elem l ["Chi","Jpn","Tha"] then concat else unwords -- remove spaces mkE e = case e of "atomic":"term":_ -> ["*"] @@ -101,6 +100,6 @@ bind ws = case ws of "&+":ws2 -> bind ws2 "Predef.BIND":ws2 -> bind ws2 "Predef.SOFT_BIND":ws2 -> bind ws2 - w : ws2 -> w : bind ws2 w : "++" : ws2 -> w : bind ws2 + w : ws2 -> w : bind ws2 _ -> ws diff --git a/doc/MkSynopsis.hs b/doc/MkSynopsis.hs index e2040fe49..777c3aad2 100644 --- a/doc/MkSynopsis.hs +++ b/doc/MkSynopsis.hs @@ -1,27 +1,35 @@ import MkExxTable import System.Process(system) import System.Environment(getArgs) +import System.FilePath((>),(<.>)) import Data.Char import Data.List -import qualified Data.ByteString.Char8 as BS import qualified Data.Map as M ----import Debug.Trace ---- +import Text.Printf +import Config type Cats = [(String,String,String)] type Rules = [(String,String,String)] -- the file generated +synopsis :: FilePath synopsis = "synopsis.txt" -- the language in which revealed examples are shown +revealedLang :: String revealedLang = "Eng" -- all languages shown (a copy of this list appears in Makefile) -apiExxFiles = ["api-examples-" ++ lang ++ ".txt" | lang <- words --- "Eng Chi" - "Afr Ara Bul Cat Chi Dan Dut Eng Est Eus Fin Fre Ger Gre Hin Ice Ita Jpn Lav Mlt Mon Nep Nor Nno Pes Pnb Pol Por Ron Rus Snd Spa Swe Tha Urd" - ] +apiExxFiles :: IO [FilePath] +apiExxFiles = do + langs <- loadLangsFrom (".." > configFile) + return $ + [ "api-examples-" ++ (langCode lang) ++ ".txt" + | lang <- langs + , langSynopsis lang + ] +main :: IO () main = do xx <- getArgs let isLatex = case xx of @@ -31,7 +39,7 @@ main = do cs2 <- getCats catAPI let cs = sortCats (cs1 ++ cs2) writeFile synopsis "GF Resource Grammar Library: Synopsis" - append "B. Bringert, T. Hallgren, and A. Ranta" + -- append "B. Bringert, T. Hallgren, and A. Ranta" space append "%!Encoding:utf-8" append "%!style(html): ./revealpopup.css" @@ -66,7 +74,7 @@ main = do space link "Source 2:" structuralAPI space - apiExx <- getApiExx apiExxFiles + apiExx <- apiExxFiles >>= getApiExx rs <- getRules apiExx syntaxAPI --- putStrLn $ unlines ["p -cat=" ++ last (words t) ++ --- " \"" ++ e ++ "\"" | (_,t,e) <- rs, not (null e)] ---- @@ -83,7 +91,7 @@ main = do -- delimit rs space title "Lexical Paradigms" - mapM_ (putParadigms isLatex cs) paradigmFiles + paradigmFiles >>= mapM_ (putParadigms isLatex cs) space include "synopsis-additional.txt" space @@ -227,7 +235,6 @@ mkIdent = concatMap unspec where ':' -> "-" _ -> [c] - mkCatTable :: Bool -> Cats -> [String] mkCatTable isLatex cs = inChunks chsize (\rs -> header ++ map mk1 rs) cs where @@ -236,49 +243,36 @@ mkCatTable isLatex cs = inChunks chsize (\rs -> header ++ map mk1 rs) cs mk1 (name,expl,ex) = unwords ["|", showCat cs name, "|", expl, "|", typo ex, "|"] typo ex = if take 1 ex == "\"" then itf (init (tail ex)) else ex -srcPath = ("../src" ++) +srcPath = ((>) "../src") -commonAPI = srcPath "/abstract/Common.gf" -catAPI = srcPath "/abstract/Cat.gf" -syntaxAPI = srcPath "/api/Constructors.gf" -structuralAPI = srcPath "/abstract/Structural.gf" -paradigmFiles = [ - ("Afrikaans", srcPath "/afrikaans/ParadigmsAfr.gf"), - ("Arabic", srcPath "/arabic/ParadigmsAra.gf"), - ("Basque", srcPath "/basque/ParadigmsEus.gf"), - ("Bulgarian", srcPath "/bulgarian/ParadigmsBul.gf"), - ("Catalan", srcPath "/catalan/ParadigmsCat.gf"), - ("Chinese", srcPath "/chinese/ParadigmsChi.gf"), - ("Danish", srcPath "/danish/ParadigmsDan.gf"), - ("Dutch", srcPath "/dutch/ParadigmsDut.gf"), - ("English", srcPath "/english/ParadigmsEng.gf"), - ("Estonian", srcPath "/estonian/ParadigmsEst.gf"), - ("Finnish", srcPath "/finnish/ParadigmsFin.gf"), - ("French", srcPath "/french/ParadigmsFre.gf"), - ("German", srcPath "/german/ParadigmsGer.gf"), - ("Greek", srcPath "/greek/ParadigmsGre.gf"), - ("Hindi", srcPath "/hindi/ParadigmsHin.gf"), - ("Icelandic", srcPath "/icelandic/ParadigmsIce.gf"), --- ("Interlingua", srcPath "/interlingua/ParadigmsIna.gf"), - ("Italian", srcPath "/italian/ParadigmsIta.gf"), - ("Japanese", srcPath "/japanese/ParadigmsJpn.gf"), - ("Latvian", srcPath "/latvian/ParadigmsLav.gf"), - ("Maltese", srcPath "/maltese/ParadigmsMlt.gf"), - ("Mongolian", srcPath "/mongolian/ParadigmsMon.gf"), - ("Nepali", srcPath "/nepali/ParadigmsNep.gf"), - ("Norwegian", srcPath "/norwegian/ParadigmsNor.gf"), - ("Nynorsk", srcPath "/nynorsk/ParadigmsNno.gf"), - ("Polish", srcPath "/polish/ParadigmsPol.gf"), - ("Punjabi", srcPath "/punjabi/ParadigmsPnb.gf"), - ("Portuguese", srcPath "/portuguese/ParadigmsPor.gf"), - ("Romanian", srcPath "/romanian/ParadigmsRon.gf"), - ("Russian", srcPath "/russian/ParadigmsRus.gf"), - ("Sindhi", srcPath "/sindhi/ParadigmsSnd.gf"), - ("Spanish", srcPath "/spanish/ParadigmsSpa.gf"), - ("Swedish", srcPath "/swedish/ParadigmsSwe.gf"), - ("Thai", srcPath "/thai/ParadigmsTha.gf"), - ("Urdu", srcPath "/urdu/ParadigmsUrd.gf") - ] +commonAPI = srcPath "abstract/Common.gf" +catAPI = srcPath "abstract/Cat.gf" +syntaxAPI = srcPath "api/Constructors.gf" +structuralAPI = srcPath "abstract/Structural.gf" + +paradigmFiles :: IO [(String,FilePath)] +paradigmFiles = do + langs <- loadLangsFrom (".." > configFile) + return $ + [ (name, srcPath $ printf "%s/Paradigms%s.gf" (langDir lang) (langCode lang)) + | lang <- langs + , langSynopsis lang + , let name = formatName (langDir lang) + ] + +-- | Format language name from directory name +-- "ancient_greek -> Ancient Greek" +formatName :: String -> String +formatName = unwords . map (\(s:ss) -> toUpper s : ss) . splitOn (=='_') + +-- | Split a string at given character, similar to words +splitOn :: (Char -> Bool) -> String -> [String] +splitOn _ "" = [] +splitOn f s = takeWhile (not.f) s : splitOn f rest + where + rest = case dropWhile (not.f) s of + "" -> [] + _:xs -> xs append s = appendFile synopsis ('\n':s) title s = append $ "=" ++ s ++ "=" @@ -339,7 +333,7 @@ showTyp cs = unwords . map f . words -- to work around GHC 6.12 file input readFileC cod file = do - let tmp = file ++ ".tmp" + let tmp = file <.> "tmp" case cod of "utf8" -> readFile file _ -> do diff --git a/doc/language-list.txt b/doc/language-list.txt deleted file mode 100644 index 486353b69..000000000 --- a/doc/language-list.txt +++ /dev/null @@ -1,35 +0,0 @@ -Afrikaans -Amharic -Arabic -Bulgarian -Catalan -Chinese -Danish -Dutch -English -Finnish -French -German -Greek -Hebrew -Hindi -Interlingua -Japanese -Italian -Latin -Latvian -