mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-04 16:52:50 -06:00
checked top-1000 BNC senses in Swe, with some split senses added to Dictionary and DictionaryEng. Wrote bnc-dict-log.txt to describe the procedure, which should be reproducible to other languages now.
This commit is contained in:
60
lib/src/translator/bnc-dict-log.txt
Normal file
60
lib/src/translator/bnc-dict-log.txt
Normal file
@@ -0,0 +1,60 @@
|
||||
1. Create a check list for Swe
|
||||
|
||||
do
|
||||
bnc <- readFile "bnc-to-check.txt" >>= return . words -- list of BNC funs
|
||||
dict <- readFile "DictionarySwe.gf" >>= return . map words . lines -- current Swe lexicon
|
||||
let dictmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- dict]
|
||||
let bncdict = [(f,maybe "variants{} ;" id $ Data.Map.lookup f dictmap) | f <- bnc] -- current Swe for BNC
|
||||
writeFile "bncswe.txt" $ unlines [unwords ("lin":f:"=":[ws]) | (f,ws) <- bncdict] -- print inspectable file
|
||||
|
||||
|
||||
2. Inspect the check list
|
||||
|
||||
went through one thousand
|
||||
- corrected everything
|
||||
- removed trailing comments from corrected entries
|
||||
- split senses
|
||||
- added a -- | comment for disambiguating new senses
|
||||
|
||||
move the checked words to correctswe.txt
|
||||
|
||||
|
||||
3. Apply split senses
|
||||
|
||||
grep "\-\- |" correctswe.txt | sort
|
||||
|
||||
Copy split senses to bnc-to-check.txt
|
||||
- *but don't remove the unsplit senses* because they are needed to find words from other languages
|
||||
|
||||
Copy split senses to Dictionary.gf, together with the -- | comments
|
||||
|
||||
Make copies for split senses in DictionaryEng.gf
|
||||
|
||||
Verify the result by compiling DictionaryEng.gf
|
||||
|
||||
|
||||
4. Extend the Swe lexicon
|
||||
|
||||
do
|
||||
old <- readFile "DictionarySwe.gf" >>= return . map words . lines -- read old lexicon
|
||||
new <- readFile "correctswe.txt" >>= return . map words . lines -- read corrected and new words
|
||||
let oldmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- old]
|
||||
let newlist = [(f,unwords (takeWhile (/= "--") ws)) | "lin":f:"=":ws <- new] -- drop comments from corrected words
|
||||
let newmap = foldr (uncurry Data.Map.insert) oldmap newlist -- insert corrected words
|
||||
writeFile "newswe.txt" $ unlines [unwords ("lin":f:"=":[ws]) | (f,ws) <- Data.Map.assocs newmap] -- print lin rules
|
||||
|
||||
Replace the body of DictionarySwe.gf by newswe.txt
|
||||
|
||||
Compile DictionarySwe.gf
|
||||
|
||||
|
||||
5. Spare the rest to do
|
||||
|
||||
Remove the corrected words from bncswe.txt.
|
||||
Or take note of the last word that was checked already.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user