mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-10 13:29:32 -06:00
349 lines
12 KiB
Haskell
349 lines
12 KiB
Haskell
import MkExxTable
|
|
import System.Cmd
|
|
import System.Environment
|
|
import Data.Char
|
|
import Data.List
|
|
import qualified Data.ByteString.Char8 as BS
|
|
import qualified Data.Map as M
|
|
---import Debug.Trace ----
|
|
|
|
type Cats = [(String,String,String)]
|
|
type Rules = [(String,String,String)]
|
|
|
|
-- the file generated
|
|
synopsis = "synopsis.txt"
|
|
|
|
-- the language in which revealed examples are shown
|
|
revealedLang = "Eng"
|
|
|
|
-- all languages shown
|
|
apiExxFiles = ["api-examples-" ++ lang ++ ".txt" | lang <- words
|
|
-- "Eng Chi"
|
|
"Afr Bul Cat Chi Dan Dut Eng Est Fin Fre Ger Gre Hin Ice Ita Jpn Lav Mlt Mon Nep Nor Nno Pes Pnb Pol Ron Rus Snd Spa Swe Tha Urd"
|
|
]
|
|
|
|
main = do
|
|
xx <- getArgs
|
|
let isLatex = case xx of
|
|
"-tex":_ -> True
|
|
_ -> False
|
|
cs1 <- getCats commonAPI
|
|
cs2 <- getCats catAPI
|
|
let cs = sortCats (cs1 ++ cs2)
|
|
writeFile synopsis "GF Resource Grammar Library: Synopsis"
|
|
append "B. Bringert, T. Hallgren, and A. Ranta"
|
|
space
|
|
append "%!Encoding:utf-8"
|
|
append "%!style(html): ./revealpopup.css"
|
|
space
|
|
append "%!postproc(html): '#divreveal' '<div class=reveal>'"
|
|
append "%!postproc(html): '#divpopup' '<div class=popup>'"
|
|
append "%!postproc(html): '#ediv' '</div>'"
|
|
append "%!postproc(html): '#UL' '<ul>'"
|
|
append "%!postproc(html): '#EUL' '</ul>'"
|
|
append "%!postproc(html): '#LI' '<li>'"
|
|
append "%!postproc(html): '(SRC=\"categories.png\")' '\\1 USEMAP=\"#categories\"'"
|
|
append "%!postproc(html): '#LParadigms' '<a name=\"RParadigms\"></a>'"
|
|
append "%!postproc(tex): '#LParadigms' ''"
|
|
delimit $ addToolTips cs
|
|
include "synopsis-intro.txt"
|
|
title "Categories"
|
|
space
|
|
link "Source 1:" commonAPI
|
|
space
|
|
link "Source 2:" catAPI
|
|
space
|
|
append "==A hierarchic view==\n"
|
|
include "categories-intro.txt"
|
|
append "==Explanations==\n"
|
|
delimit $ mkCatTable isLatex cs
|
|
space
|
|
title "Syntax Rules and Structural Words"
|
|
space
|
|
link "Source 1:" syntaxAPI
|
|
space
|
|
link "Source 2:" structuralAPI
|
|
space
|
|
apiExx <- getApiExx apiExxFiles
|
|
rs <- getRules apiExx syntaxAPI
|
|
--- putStrLn $ unlines ["p -cat=" ++ last (words t) ++
|
|
--- " \"" ++ e ++ "\"" | (_,t,e) <- rs, not (null e)] ----
|
|
rs2 <- getRules apiExx structuralAPI
|
|
let rss = rs ++ rs2
|
|
--- mapM_ putStrLn [f ++ " " ++ e | (f,_,e) <- rss]
|
|
delimit $ mkSplitTables True isLatex apiExx cs rss
|
|
space
|
|
-- title "Structural Words"
|
|
-- space
|
|
-- link "Source:" structuralAPI
|
|
-- space
|
|
-- rs <- rulesTable False isLatex cs structuralAPI
|
|
-- delimit rs
|
|
space
|
|
title "Lexical Paradigms"
|
|
mapM_ (putParadigms isLatex cs) paradigmFiles
|
|
space
|
|
include "synopsis-additional.txt"
|
|
space
|
|
include "synopsis-browse.txt"
|
|
space
|
|
title "An Example of Usage"
|
|
space
|
|
include "synopsis-example.txt"
|
|
space
|
|
title "Table of Contents"
|
|
space
|
|
append "%%toc"
|
|
space
|
|
let format = if isLatex then "tex" else "html"
|
|
system $ "txt2tags -t" ++ format ++ " " ++ " --toc " ++ synopsis
|
|
if isLatex then (system $ "pdflatex synopsis.tex") >> return () else return ()
|
|
|
|
addToolTips :: Cats -> [String]
|
|
addToolTips = map f
|
|
where f (n,e,_) = "%!postproc(html): '(?i)(HREF=\"#" ++ n ++ "\")( TITLE=\"[^\"]*\")?' '\\1 TITLE=\"" ++ e' ++ "\"'"
|
|
where e' = n ++ if null e then "" else " - " ++ e
|
|
|
|
getCats :: FilePath -> IO Cats
|
|
getCats file = do
|
|
ss <- readFile file >>= return . lines
|
|
return $ getrs [] ss
|
|
where
|
|
getrs rs ss = case ss of
|
|
('-':'-':'.':_):_ -> reverse rs
|
|
[] -> reverse rs
|
|
('-':'-':_):ss2 -> getrs rs ss2
|
|
s:ss2 -> case words s of
|
|
cat:";":"--":exp -> getrs ((cat,unwords expl, unwords (tail ex)):rs) ss2 where
|
|
(expl,ex) = span (/="e.g.") exp
|
|
_ -> getrs rs ss2
|
|
|
|
rulesTable :: ApiExx -> Bool -> Bool -> Cats -> FilePath -> IO [String]
|
|
rulesTable aexx hasEx isLatex cs file = do
|
|
rs <- getRules aexx file
|
|
return $ mkTable hasEx isLatex aexx cs rs
|
|
|
|
|
|
getRules :: ApiExx -> FilePath -> IO Rules
|
|
getRules aexx file = do
|
|
ss <- readFileC (coding file) file >>= return . filter (not . hiddenLine) . lines
|
|
return $ getrs [] ss
|
|
where
|
|
getrs rs ss = case ss of
|
|
('-':'-':'.':_):_ -> reverse rs
|
|
[] -> reverse rs
|
|
('-':'-':_):ss2 -> getrs rs ss2
|
|
s:ss2 -> case words s of
|
|
_:_:"overload":_ -> getrs rs ss2
|
|
_:":":_ -> getrs (rule s:rs) ss2
|
|
_ -> getrs rs ss2
|
|
rule s = (name, typ, ex)
|
|
where
|
|
ws = takeWhile (flip notElem ["--#", "--:", "="]) $ words s
|
|
name = head ws
|
|
(t,e) = span (/="--") (tail ws)
|
|
typ = unwords $ filtype (drop 1 t)
|
|
filtype = filter (/=";")
|
|
ex = if null e then "" else unwords $ unnumber $ drop 1 e
|
|
unnumber e = case e of
|
|
n:ws | last n == '.' && not (null (init n)) && all isDigit (init n) -> ws
|
|
_ -> e
|
|
|
|
putParadigms :: Bool -> Cats -> (String, FilePath) -> IO ()
|
|
putParadigms isLatex cs (lang,file) = do
|
|
stitle ("Paradigms for " ++ lang)
|
|
append "#LParadigms"
|
|
space
|
|
link "source" file
|
|
space
|
|
rs <- rulesTable M.empty False isLatex cs file
|
|
space
|
|
delimit rs
|
|
space
|
|
|
|
inChunks :: Int -> ([a] -> [String]) -> [a] -> [String]
|
|
inChunks i f = concat . intersperse ["\n\n"] . map f . chunks i where
|
|
chunks _ [] = []
|
|
chunks i xs = x : chunks i y where (x,y) = splitAt i xs
|
|
|
|
-- Makes one table per result category.
|
|
-- Adds a subsection header for each table.
|
|
mkSplitTables :: Bool -> Bool -> ApiExx -> Cats -> Rules -> [String]
|
|
mkSplitTables hasEx isLatex aexx cs = concatMap t . addLexicalCats cs . sortRules
|
|
where t (c, xs) = [subtitle c expl] ++ tableOrLink
|
|
where
|
|
expl = case [e | (n,e,_) <- cs, n == c] of
|
|
[] -> ""
|
|
e:_ -> e
|
|
tableOrLink = if null xs then parad else mkTable hasEx isLatex aexx cs xs
|
|
parad = [
|
|
"Lexical category, constructors given in",
|
|
"[lexical paradigms #RParadigms]."
|
|
]
|
|
|
|
mkTable :: Bool -> Bool -> ApiExx -> Cats -> Rules -> [String]
|
|
mkTable hasEx isLatex aexx cs = inChunks chsize (\rs -> header : map (unwords . row) rs)
|
|
where
|
|
chsize = if isLatex then 40 else 1000
|
|
header = if hasEx then "|| Function | Type | Example ||"
|
|
else "|| Function | Type | Explanation ||"
|
|
row (name,typ,ex) =
|
|
let ntyp = mkIdent (name ++ " : " ++ typ) in
|
|
if hasEx then ["|", name', "|", typ', "|", ex' ntyp, "|"]
|
|
else ["|", name', "|", typ', "|", expl ntyp, "|"]
|
|
where
|
|
name' = ttf name
|
|
typ' = showTyp cs typ
|
|
ex' typ = let ex0 = if null ex then itf (takeWhile (/='_') name) else ex in
|
|
case M.lookup typ aexx of
|
|
Just es -> mkExample es ex0
|
|
_ -> itf ex0
|
|
expl typ = if null ex then itf "-" else itf ex
|
|
|
|
-- make an example with hover-popup translations
|
|
mkExample es ex = unwords [
|
|
"#divreveal",
|
|
itf (maybe ex (mkEx revealedLang) (M.lookup revealedLang es)),
|
|
"#divpopup",
|
|
"#UL",
|
|
unwords ["#LI" ++ e | e <- prApiEx es],
|
|
"#EUL",
|
|
"#ediv",
|
|
"#ediv"
|
|
]
|
|
|
|
-- makes mkUtt : QS -> Utt to mkUtt-QS-Utt
|
|
mkIdent :: String -> String
|
|
mkIdent = concatMap unspec where
|
|
unspec c = case c of
|
|
' ' -> ""
|
|
'>' -> ""
|
|
'(' -> ""
|
|
')' -> ""
|
|
':' -> "-"
|
|
_ -> [c]
|
|
|
|
|
|
mkCatTable :: Bool -> Cats -> [String]
|
|
mkCatTable isLatex cs = inChunks chsize (\rs -> header ++ map mk1 rs) cs
|
|
where
|
|
header = ["|| Category | Explanation | Example ||"]
|
|
chsize = if isLatex then 40 else 1000
|
|
mk1 (name,expl,ex) = unwords ["|", showCat cs name, "|", expl, "|", typo ex, "|"]
|
|
typo ex = if take 1 ex == "\"" then itf (init (tail ex)) else ex
|
|
|
|
srcPath = ("../src" ++)
|
|
|
|
commonAPI = srcPath "/abstract/Common.gf"
|
|
catAPI = srcPath "/abstract/Cat.gf"
|
|
syntaxAPI = srcPath "/api/Constructors.gf"
|
|
structuralAPI = srcPath "/abstract/Structural.gf"
|
|
paradigmFiles = [
|
|
("Afrikaans", srcPath "/afrikaans/ParadigmsAfr.gf"),
|
|
("Bulgarian", srcPath "/bulgarian/ParadigmsBul.gf"),
|
|
("Catalan", srcPath "/catalan/ParadigmsCat.gf"),
|
|
("Chinese", srcPath "/chinese/ParadigmsChi.gf"),
|
|
("Danish", srcPath "/danish/ParadigmsDan.gf"),
|
|
("Dutch", srcPath "/dutch/ParadigmsDut.gf"),
|
|
("English", srcPath "/english/ParadigmsEng.gf"),
|
|
("Estonian", srcPath "/estonian/ParadigmsEst.gf"),
|
|
("Finnish", srcPath "/finnish/ParadigmsFin.gf"),
|
|
("French", srcPath "/french/ParadigmsFre.gf"),
|
|
("German", srcPath "/german/ParadigmsGer.gf"),
|
|
("Greek", srcPath "/greek/ParadigmsGre.gf"),
|
|
("Hindi", srcPath "/hindi/ParadigmsHin.gf"),
|
|
("Icelandic", srcPath "/icelandic/ParadigmsIce.gf"),
|
|
-- ("Interlingua", srcPath "/interlingua/ParadigmsIna.gf"),
|
|
("Italian", srcPath "/italian/ParadigmsIta.gf"),
|
|
("Japanese", srcPath "/japanese/ParadigmsJpn.gf"),
|
|
("Latvian", srcPath "/latvian/ParadigmsLav.gf"),
|
|
("Maltese", srcPath "/maltese/ParadigmsMlt.gf"),
|
|
("Mongolian", srcPath "/mongolian/ParadigmsMon.gf"),
|
|
("Nepali", srcPath "/nepali/ParadigmsNep.gf"),
|
|
("Norwegian", srcPath "/norwegian/ParadigmsNor.gf"),
|
|
("Nynorsk", srcPath "/nynorsk/ParadigmsNno.gf"),
|
|
("Polish", srcPath "/polish/ParadigmsPol.gf"),
|
|
("Punjabi", srcPath "/punjabi/ParadigmsPnb.gf"),
|
|
("Romanian", srcPath "/romanian/ParadigmsRon.gf"),
|
|
("Russian", srcPath "/russian/ParadigmsRus.gf"),
|
|
("Sindhi", srcPath "/sindhi/ParadigmsSnd.gf"),
|
|
("Spanish", srcPath "/spanish/ParadigmsSpa.gf"),
|
|
("Swedish", srcPath "/swedish/ParadigmsSwe.gf"),
|
|
("Thai", srcPath "/thai/ParadigmsTha.gf"),
|
|
("Urdu", srcPath "/urdu/ParadigmsUrd.gf")
|
|
]
|
|
|
|
append s = appendFile synopsis ('\n':s)
|
|
title s = append $ "=" ++ s ++ "="
|
|
stitle s = append $ "==" ++ s ++ "=="
|
|
include s = append $ "%!include: " ++ s
|
|
space = append "\n"
|
|
delimit ss = mapM_ append ss
|
|
link s f = append $ s ++ " [``" ++ f ++ "`` " ++ fa ++ "]" where
|
|
fa = "http://www.grammaticalframework.org/lib" ++ dropWhile (=='.') f
|
|
|
|
ttf s = "``" ++ s ++ "``"
|
|
itf s = "//" ++ s ++ "//"
|
|
|
|
hiddenLine s = case reverse (words s) of
|
|
"--%":_ -> True
|
|
_ -> False
|
|
|
|
-----------------
|
|
|
|
-- sort category synopsis by category, retain one table
|
|
sortCats :: Cats -> Cats
|
|
sortCats = sortBy compareCat
|
|
where compareCat (n1,_,_) (n2,_,_) = compare n1 n2
|
|
|
|
-- sort function synopsis by category, into separate tables
|
|
sortRules :: Rules -> [Rules]
|
|
sortRules = groupBy sameCat . sortBy compareRules
|
|
where sameCat r1 r2 = resultCat r1 == resultCat r2
|
|
compareRules r1@(n1,_,_) r2@(n2,_,_)
|
|
= compare (resultCat r1,n1) (resultCat r2,n2)
|
|
|
|
addLexicalCats :: Cats -> [Rules] -> [(String,Rules)]
|
|
addLexicalCats cs rss =
|
|
map head $ groupBy fstEq $ sortBy (\x y -> compare (fst x) (fst y)) $
|
|
[ (resultCat r, rs) | rs@(r:_) <- rss] ++ [(n,[]) | (n,_,_) <- cs]
|
|
where fstEq p1 p2 = fst p1 == fst p2
|
|
|
|
resultCat :: (String,String,String) -> String
|
|
resultCat (_,t,_) = last (words t)
|
|
|
|
|
|
subtitle cat expl = "==" ++ cat ++ e ++ "==" ++ "[" ++ cat ++ "]"
|
|
where e = if null expl then "" else " - " ++ expl
|
|
|
|
showCat :: Cats -> String -> String
|
|
showCat cs cat = "[" ++ cat ++ " #" ++ cat ++ "]"
|
|
|
|
showTyp :: Cats -> String -> String
|
|
showTyp cs = unwords . map f . words
|
|
where f s | head s == '(' && last s == ')' && isCat c
|
|
= "(" ++ showCat cs c ++ ")"
|
|
| isCat s = showCat cs s
|
|
| otherwise = ttf s
|
|
where c = init (tail s)
|
|
isCat cat = cat `notElem` ["Str","Int"]
|
|
&& all (\c -> isAlphaNum c || c == '\'') cat
|
|
&& isUpper (head cat)
|
|
|
|
-- to work around GHC 6.12 file input
|
|
readFileC cod file = do
|
|
let tmp = file ++ ".tmp"
|
|
case cod of
|
|
"utf8" -> readFile file
|
|
_ -> do
|
|
system $ "iconv -f " ++ cod ++ " -t UTF-8 " ++ file ++ " >" ++ tmp
|
|
readFile tmp
|
|
|
|
-- 'intelligently' determine the coding of a file
|
|
---- AR 5/6/2016: now utf8 is used for all languages except Bul, where no characters are shown in documentation anyway
|
|
coding file = case language file of
|
|
"Bul" -> "CP1251" --- "ISO-8859-1"
|
|
_ -> "utf8"
|
|
|
|
language = reverse . take 3 . drop 3 . reverse
|