mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 19:22:50 -06:00
74 lines
2.5 KiB
Plaintext
74 lines
2.5 KiB
Plaintext
1. Create a check list for Swe
|
|
|
|
do
|
|
bnc <- readFile "bnc-to-check.txt" >>= return . words -- list of BNC funs
|
|
dict <- readFile "DictionarySwe.gf" >>= return . map words . lines -- current Swe lexicon
|
|
let dictmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- dict]
|
|
let bncdict = [(f,maybe "variants{} ;" id $ Data.Map.lookup f dictmap) | f <- bnc] -- current Swe for BNC
|
|
writeFile "bncswe.txt" $ unlines [unwords ("lin":f:"=":[ws]) | (f,ws) <- bncdict] -- print inspectable file
|
|
|
|
|
|
2. Inspect the check list
|
|
|
|
went through one thousand
|
|
- corrected everything
|
|
- removed trailing comments from corrected entries
|
|
- split senses
|
|
- added a -- | comment for disambiguating new senses
|
|
|
|
move the checked words to correctswe.txt
|
|
|
|
|
|
3. Apply split senses
|
|
|
|
grep "\-\- |" correctswe.txt | sort
|
|
|
|
Copy split senses to bnc-to-check.txt
|
|
- *but don't remove the unsplit senses* because they are needed to find words from other languages
|
|
|
|
Copy split senses to Dictionary.gf, together with the -- | comments
|
|
|
|
Make copies for split senses in DictionaryEng.gf
|
|
|
|
Verify the result by compiling DictionaryEng.gf
|
|
|
|
|
|
4. Extend the Swe lexicon
|
|
|
|
do
|
|
old <- readFile "DictionarySwe.gf" >>= return . map words . lines -- read old lexicon
|
|
new <- readFile "correctswe.txt" >>= return . map words . lines -- read corrected and new words
|
|
let oldmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- old]
|
|
let newlist = [(f,unwords (takeWhile (/= "--") ws)) | "lin":f:"=":ws <- new] -- drop comments from corrected words
|
|
let newmap = foldr (uncurry Data.Map.insert) oldmap newlist -- insert corrected words
|
|
writeFile "newswe.txt" $ unlines [unwords ("lin":f:"=":[ws]) | (f,ws) <- Data.Map.assocs newmap] -- print lin rules
|
|
|
|
Replace the body of DictionarySwe.gf by newswe.txt
|
|
|
|
Compile DictionarySwe.gf
|
|
|
|
|
|
5. Spare the rest to do
|
|
|
|
Remove the corrected words from bncswe.txt.
|
|
Or take note of the last word that was checked already.
|
|
|
|
|
|
|
|
|
|
6. Generate a spreadsheet view
|
|
|
|
Here for Swe and Bul:
|
|
|
|
do
|
|
bnc <- readFile "bnc-to-check.txt" >>= return . words
|
|
swe <- readFile "DictionarySwe.gf" >>= return . map words . lines
|
|
let swemap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- swe]
|
|
bul <- readFile "DictionaryBul.gf" >>= return . map words . lines
|
|
let bulmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- bul]
|
|
let look f m = maybe "-" id $ Data.Map.lookup f m
|
|
let line f = f ++ "\t" ++ look f swemap ++ "\t" ++ look f bulmap
|
|
writeFile "bnc-swe-bul.tsv" $ unlines $ map line bnc
|
|
|
|
|