diff --git a/lib/src/latin/dict/MkLatin.hs b/lib/src/latin/dict/MkLatin.hs new file mode 100644 index 000000000..480c54772 --- /dev/null +++ b/lib/src/latin/dict/MkLatin.hs @@ -0,0 +1,60 @@ +import Data.Char + +main = mkLatin + +mkLatin = do + ws <- readFile "DICTPAGE.RAW" >>= return . lines + let fcs = mkDict ws + let fs = [unwords [status st ++ "fun",f,":",cat,";","--",eng] | [st, f,cat,_,eng] <- fcs] + let ls = [unwords [status st ++ "lin",f,"=",lat,";"] | [st, f,_,lat,_] <- fcs] + + writeFile "DictLatAbs.gf" "abstract DictLatAbs = Cat ** {" + appendFile "DictLatAbs.gf" "\n-- extracted from http://archives.nd.edu/whitaker/dictpage.htm\n" + appendFile "DictLatAbs.gf" (unlines fs) + appendFile "DictLatAbs.gf" "}" + + writeFile "DictLat.gf" "concrete DictLat of DictLatAbs = CatLat ** open ParadigmsLat in {" + appendFile "DictLat.gf" "\n-- extracted from http://archives.nd.edu/whitaker/dictpage.htm\n" + appendFile "DictLat.gf" (unlines ls) + appendFile "DictLat.gf" "}" + +--- putStrLn $ unlines ls + +mkDict :: [String] -> [[String]] -- fun, cat, lat, eng +mkDict = map mkOne . zip [10001 ..] . map cleanUp + where + cleanUp s = let (lat,eng) = break (=='[') s in + (words (filter (\c -> c==' ' || isLetter c) lat), eng) + mkOne (i,(lws,eng)) = addId i (mkLat lws) ++ [eng] + mkLat lws = case lws of + x:y:"N":_:g:_ -> f [x, "N", lin "mkN" [show x,show y, (snd (gender g))]] where f = if fst (gender g) then ok else todo + x:y:"N": g:_ -> f [x, "N", lin "mkN" [show x,show y, (snd (gender g))]] where f = if fst (gender g) then ok else todo + x:"gen":z:"ADJ":_ -> ok [x, "A", lin "mkA" [show x]] + x:y:z:"ADJ":_ -> ok [x, "A", lin "mkA" [show x]] + x:"ADV":_ -> ok [x, "Adv", lin "mkAdv" [show x]] + x:y:z:u:"V":_:"INTRANS":_ -> okv [y, "V", lin "mkV" [show y,show x,show z,show u]] + x:y:z:u:"V":_:"TRANS":_ -> okv [y, "V2", lin "mkV2" ["(" ++ lin "mkV" [show y,show x,show z,show u] ++ ")"]] + x:y:z:u:"V":_:"DEP":_ -> todo [y, "V", lin "depV" ["(" ++ lin "mkV" [show y,show x,show z,show u] ++ ")"]] + x:y:z:u:"V":_ -> okv [y, "V", lin "mkV" [show y,show x,show z,show u]] + + _ -> todo ["TODO","",unwords lws] + + addId i (st:f:c:rest) = st:(f ++ "_" ++ show i ++ "_" ++ c):c:rest + + gender g = case g of + "M" -> (True, "masculine") + "F" -> (True, "feminine") + "N" -> (True, "neuter") + "C" -> (True, "masculine {-C-}") + _ -> (False, g ++ "{-??-}") + + fun x c = x ++ "_" ++ c + lin f xs = unwords (f:xs) + todo xs = "1":xs + ok xs = "0":xs + okv r@(f:_) = if elem (take 3 (reverse f)) ["era","ere","eri"] then ok r else todo r + +status st = case st of + "0" -> "" + _ -> "-- " + diff --git a/lib/src/latin/dict/README b/lib/src/latin/dict/README new file mode 100644 index 000000000..372a21ee0 --- /dev/null +++ b/lib/src/latin/dict/README @@ -0,0 +1,33 @@ +Aarne Ranta 2017-01-19 + +A script for building a Latin dictionary from William Whitaker's Words + +Source: http://archives.nd.edu/whitaker/dictpage.zip + +See also: https://en.wikipedia.org/wiki/William_Whitaker's_Words + +License: BSD for the script. Unknown for the source, therefore not redistributed here. + +Usage: + + runghc MkLatin.hs + +Output: + + DictLatAbs.gf + DictLat.gf + +Uses Herbert Lange's RGL implementation of Latin (darcs version of 2017-01-19 with ParadigmsLat.mkAdv added by AR) + +Coverage of first version: 90%; 3938 missing out of 39227 entries. Marked with leading "--" +Reasons for uncovered entries +- uncommon category (e.g. structural words) +- missing case in ParadigmsLat.gf, e.g. deponent verbs + +Matching for adjectives (A) uncertain, because we only use ParadigmsLat.gf.mkA : Str -> A + +Verb subcat information is uncertain, too. It is based on the src's TRANS and INTRANS flags. + + + +