diff --git a/src/icelandic/wiktionary/CheckWithWiktionary.hs b/src/icelandic/wiktionary/CheckWithWiktionary.hs
index b5098d4e6..278af2b0d 100644
--- a/src/icelandic/wiktionary/CheckWithWiktionary.hs
+++ b/src/icelandic/wiktionary/CheckWithWiktionary.hs
@@ -2,6 +2,7 @@ import qualified Data.Map
import qualified Data.Text.IO
import Data.Char
import Data.List
+import System.Directory
-- AR 2019-08-06
-- checking IrregIce wrt Wikipedia:
@@ -144,6 +145,11 @@ jumpToIcelandic ls = dropWhile (\l -> not (isPrefixOf "
" l && isPrefixOf "Ic
-- to be run in wiktionary/, with subdirs nouns/ adjectives/ verbs/
-- to be run in adjectives/
+getAllWiktNouns = do
+ vs <- readFile "nouns/wikt-nouns.txt" >>= return . lines
+ writeFile "n.tmp" ""
+ mapM_ (\v -> getWiktNoun "nouns/" v >>= appendFile "n.tmp" . unlines . emitGF) vs
+
getAllWiktAdjectives = do
vs <- readFile "adjectives/wikt-adjectives.txt" >>= return . lines
writeFile "a.tmp" ""
@@ -159,8 +165,13 @@ getAllWiktVerbs = do
-- return ([relevant Wikt lines], (fun,cat,lin), message)
getWiktWord :: Int -> (String -> [String] -> ([String],((String,String,String),Message))) -> FilePath -> FilePath -> IO ([String],((String,String,String),Message))
getWiktWord number check dir file = do
- s <- readFile (dir++file) >>= return . map untag . take number . getTD . jumpToIcelandic . lines
- return $ check file s
+ let dirfile = dir++file
+ ex <- doesFileExist dirfile
+ if not ex
+ then return ([],(noGF,MBad (file ++ " does not exist")))
+ else do
+ s <- readFile dirfile >>= return . map untag . take number . getTD . jumpToIcelandic . lines
+ return $ check file s
getWiktNoun = getWiktWord 17 checkNoun
getWiktAdjective = getWiktWord 120 checkAdjective
@@ -170,23 +181,26 @@ noCheck :: String -> [String] -> ([String],((String,String,String),Message))
noCheck s ss = (ss, (noGF, MMissing s))
noGF = ("--","--","--")
-checkNoun noun forms = noCheck noun forms ----
+checkNoun noun forms = case length forms of
+---- n | n < 24 -> (forms, (noGF, MBad (adj ++ " A: only " ++ show (length forms) ++ " lines")))
+ n | n < 17 -> (forms, (noGF, MBad (noun ++ " N: only " ++ show (length forms) ++ " lines")))
+ _ -> (forms, checkZZ noun (noun ++ "_N", "N", "mkgN " ++ gender (forms!!0), [forms!!i | i <- [1,5,9,13,3,7,11,15]]))
+ where
+ gender s = case take 1 s of
+ "m" -> "masculine"
+ "f" -> "feminine"
+ _ -> "neuter" --- "n"
+
checkAdjective adj forms = case length forms of
n | n < 24 -> (forms, (noGF, MBad (adj ++ " A: only " ++ show (length forms) ++ " lines")))
n | n < 120 -> (forms, checkZZ adj (adj ++ "_A", "A", "mkA", [forms!!0, forms!!1]))
_ -> (forms, checkZZ adj (adj ++ "_A", "A", "mkA", [forms!!0, forms!!1,forms!!48]))
-checkZZ w (fun,cat,lin,args) =
- if elem "ZZ" (map (wform . words) args)
- then (noGF, MBad (w ++ " " ++ cat ++ ": missing forms in data"))
- else ((fun,cat, app lin args),MGood w)
-
-checkVerb verb forms =
- if length forms < 75
- then (forms, (noGF, MBad (verb ++ " V: only " ++ show (length forms) ++ " lines")))
- else case unexpectedWikLines forms of
- [] -> (forms, ((verb ++ "_V", "V", app "mkV" [verb, forms!!5, forms!!18, forms!!74, forms!!1]), MGood verb)) ----
+checkVerb verb forms = case length forms of
+ n | n < 75 -> (forms, (noGF, MBad (verb ++ " V: only " ++ show (length forms) ++ " lines")))
+ _ -> case unexpectedWikLines forms of
+ [] -> (forms, checkZZ verb (verb ++ "_V", "V", "mkV", [verb, forms!!5, forms!!18, forms!!74, forms!!1]))
us -> (forms, (noGF, MBad (verb ++ " V: unexpected lines " ++ show (length us))))
data Message =
@@ -198,6 +212,11 @@ data Message =
app f xs = unwords $ f : map (quote . wform . words) xs
quote s = "\"" ++ s ++ "\""
+checkZZ w (fun,cat,lin,args) =
+ if elem "ZZ" (map (wform . words) args)
+ then (noGF, MBad (w ++ " " ++ cat ++ ": missing forms in data"))
+ else ((fun,cat, app lin args),MGood w)
+
emitGF (ss,((fun,cat,lin),msg)) = case msg of
MGood _ -> [unwords ["fun",fun,":",cat,";"],unwords ["lin",fun,"=",lin,";"]]
_ -> ["-- " ++ show msg]