mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-24 18:28:55 -06:00
Import Folkets Lexikon: report the number of words found
And some other minor tweaks.
This commit is contained in:
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import Prelude hiding (lookup)
|
import Prelude hiding (lookup)
|
||||||
import Data.List hiding (lookup)
|
import Data.List hiding (lookup)
|
||||||
import System.Directory(doesFileExist)
|
import System.Directory(doesFileExist,renameFile)
|
||||||
import System.Cmd(system)
|
import System.Cmd(system)
|
||||||
import System.Exit
|
import System.Exit
|
||||||
import Control.Monad(unless)
|
import Control.Monad(unless)
|
||||||
@@ -42,7 +42,7 @@ prepare =
|
|||||||
download url file = ifMissing file $ curl (url++file)
|
download url file = ifMissing file $ curl (url++file)
|
||||||
makePGF imps dir pgf src = ifMissing (dir++pgf) (gfmake imps dir src)
|
makePGF imps dir pgf src = ifMissing (dir++pgf) (gfmake imps dir src)
|
||||||
|
|
||||||
curl url = shell $ ["curl -O",url]
|
curl url = shell $ ["curl -OL --compressed",url]
|
||||||
gfmake imps dir src = shell $ ["cd",dir,"&&","gf",opts,imp imps,src]
|
gfmake imps dir src = shell $ ["cd",dir,"&&","gf",opts,imp imps,src]
|
||||||
|
|
||||||
opts = "-s -make -no-pmcfg -gfo-dir ../../alltenses"
|
opts = "-s -make -no-pmcfg -gfo-dir ../../alltenses"
|
||||||
@@ -64,17 +64,19 @@ prepare =
|
|||||||
--- Create ---------------------------------------------------------------------
|
--- Create ---------------------------------------------------------------------
|
||||||
|
|
||||||
create =
|
create =
|
||||||
do eng <- readPGF "../../english/DictEngAbs.pgf"
|
do lexicon1 <- fromFolketsEnSv `fmap` readFile folkets_en_sv
|
||||||
lexicon1 <- fromFolketsEnSv `fmap` readFile folkets_en_sv
|
|
||||||
lexicon2 <- fromFolketsSvEn `fmap` readFile folkets_sv_en
|
lexicon2 <- fromFolketsSvEn `fmap` readFile folkets_sv_en
|
||||||
let lexicon = M.unionWith (++) lexicon1 lexicon2
|
let lexicon = M.unionWith (++) lexicon1 lexicon2
|
||||||
missing <- missingWords `fmap` readPGF "../DictEngAbs.pgf"
|
missing <- missingWords `fmap` readPGF "../DictEngAbs.pgf"
|
||||||
|
putStrLn $ "Looking for "++show (length missing)++" missing words."
|
||||||
|
eng <- readPGF "../../english/DictEngAbs.pgf"
|
||||||
saldo <- dict2map `fmap` readPGF "../DictSweAbs.pgf"
|
saldo <- dict2map `fmap` readPGF "../DictSweAbs.pgf"
|
||||||
let entries = map (translate lexicon eng saldo) missing
|
let entries = map (translate lexicon eng saldo) missing
|
||||||
putStrLn $ "Looking for "++show (length missing)++" missing words."
|
histo = M.toList $ M.fromListWith (+) [(length ts,1)|(_,ts)<-entries]
|
||||||
writeFile target . concWrap $ map toGF entries
|
tmp = target++".tmp"
|
||||||
let histo = M.toList $ M.fromListWith (+) [(length ts,1)|(_,ts)<-entries]
|
|
||||||
putStrLn $ "Found "++show (sum [n|(a,n)<-histo,a>0])++" words."
|
putStrLn $ "Found "++show (sum [n|(a,n)<-histo,a>0])++" words."
|
||||||
|
writeFile tmp . concWrap $ map toGF entries
|
||||||
|
renameFile tmp target
|
||||||
mapM_ print histo
|
mapM_ print histo
|
||||||
|
|
||||||
lang1 = head . languages
|
lang1 = head . languages
|
||||||
@@ -92,24 +94,23 @@ missingWords pgf = missingLins pgf (lang1 pgf)
|
|||||||
|
|
||||||
fromFolketsEnSv :: String -> M.Map String [String]
|
fromFolketsEnSv :: String -> M.Map String [String]
|
||||||
fromFolketsEnSv =
|
fromFolketsEnSv =
|
||||||
fmap nub . fromList .
|
fromList .
|
||||||
map entry . findElements (unqual "word") . fromJust . parseXMLDoc
|
map entry . findElements (unqual "word") . fromJust . parseXMLDoc
|
||||||
where
|
where
|
||||||
entry w = (value w,map value (findChildren (unqual "translation") w))
|
entry w = (value w,map value (findChildren (unqual "translation") w))
|
||||||
value = fromJust . findAttr (unqual "value")
|
|
||||||
|
|
||||||
|
|
||||||
fromFolketsSvEn :: String -> M.Map String [String]
|
fromFolketsSvEn :: String -> M.Map String [String]
|
||||||
fromFolketsSvEn =
|
fromFolketsSvEn =
|
||||||
fmap nub . fromList .
|
fromList .
|
||||||
concatMap entry . findElements (unqual "word") . fromJust . parseXMLDoc
|
concatMap entry . findElements (unqual "word") . fromJust . parseXMLDoc
|
||||||
where
|
where
|
||||||
entry w = [(en,[sv])|let sv=value w,
|
entry w = [(en,[sv])|let sv=value w,
|
||||||
en<-map value (findChildren (unqual "translation") w)]
|
en<-map value (findChildren (unqual "translation") w)]
|
||||||
value = fromJust . findAttr (unqual "value")
|
|
||||||
|
value = fromJust . findAttr (unqual "value")
|
||||||
|
|
||||||
lookup m = maybe [] id . M.lookup m
|
lookup m = maybe [] id . M.lookup m
|
||||||
fromList xs = M.fromListWith (++) xs
|
fromList xs = fmap nub $ M.fromListWith (++) xs
|
||||||
|
|
||||||
translate lexicon eng saldo eabs =
|
translate lexicon eng saldo eabs =
|
||||||
(eabs,nub [sabs|let en=lin eng eabs,
|
(eabs,nub [sabs|let en=lin eng eabs,
|
||||||
|
|||||||
Reference in New Issue
Block a user