forked from GitHub/gf-core
Dutch lexicon generated via Google translate; method in lib/src/MkExx.hs
This commit is contained in:
2
Setup.hs
2
Setup.hs
@@ -213,7 +213,7 @@ langsDemo = langsLang `except` ["Ara","Hin","Ina","Tha"]
|
||||
langsParse = langs `only` ["Eng"]
|
||||
|
||||
-- languages for which langs.pgf is built
|
||||
langsPGF = langsLang `except` ["Ara","Hin","Tha"]
|
||||
langsPGF = langsLang `except` ["Ara","Bul","Hin","Ron","Tha"]
|
||||
|
||||
-- languages for which Compatibility exists (to be extended)
|
||||
langsCompat = langsLang `only` ["Cat","Eng","Fin","Fre","Ita","Spa","Swe"]
|
||||
|
||||
@@ -12,7 +12,7 @@ oper
|
||||
necessary_A = mkA "necessary" ;
|
||||
own_A = mkA "own" ;
|
||||
have_VV = mkVV have_V ;
|
||||
provably_Adv = mkAdv "provably" ;
|
||||
provably_Adv = ParadigmsEng.mkAdv "provably" ;
|
||||
provable_A = mkA "provable" ;
|
||||
false_A = mkA "false" ;
|
||||
such_A = mkA "such" ;
|
||||
|
||||
89
lib/src/MkExx.hs
Normal file
89
lib/src/MkExx.hs
Normal file
@@ -0,0 +1,89 @@
|
||||
module Main where
|
||||
|
||||
-- to learn a lexicon from Google translate via sentence translation
|
||||
|
||||
import System
|
||||
|
||||
main = do
|
||||
xx <- getArgs
|
||||
case xx of
|
||||
"align":trees:lins:_ -> do
|
||||
ts <- readFile trees >>= return . lines
|
||||
ls <- readFile lins >>= return . lines
|
||||
mapM_ (putStrLn . align) (zip ts ls)
|
||||
n:f:_ -> do
|
||||
nouns <- readFile n >>= return . words
|
||||
preds <- readFile f >>= return . words
|
||||
interact (const (mkExx nouns preds))
|
||||
|
||||
type Ident = String
|
||||
|
||||
mkExx nouns preds = unlines $ map predic (zip nouns predss) where
|
||||
predss = preds ++ predss -- there are more nouns than predicates
|
||||
|
||||
predic :: (Ident,Ident) -> String
|
||||
predic (n,f) = case c of
|
||||
'A':_ -> predn n ("(UseComp (CompAP (PositA " ++ f ++ ")))")
|
||||
"V2" -> predn n ("(ComplSlash (SlashV2a " ++ f ++ ") (" ++ detn n ++ "))")
|
||||
'V':_ -> predn n ("(UseV " ++ f ++ ")")
|
||||
where
|
||||
c = tail $ dropWhile (/='_') f
|
||||
|
||||
predn n f = "PredVP (" ++ detn n ++ ") " ++ f
|
||||
detn n = "DetCN (DetQuant DefArt NumSg) (UseN " ++ n ++ ")"
|
||||
|
||||
align (t,s) = unlines [
|
||||
noun ++ " = mkN " ++ nargs ++ " ;",
|
||||
pred ++ " = mk" ++ cat ++ " " ++ fargs ++ " ;"
|
||||
]
|
||||
where
|
||||
(noun,(pred,cat)) = case words t of
|
||||
_:_:_:_:_:_:n:ps -> (
|
||||
takeWhile (/=')') n,
|
||||
case ps of
|
||||
"(UseComp":_:_:a:_ -> (takeWhile (/=')') a,"A")
|
||||
"(UseV":v:_ -> (takeWhile (/=')') v,"V")
|
||||
"(ComplSlash":_:v:_ -> (takeWhile (/=')') v,"V2")
|
||||
)
|
||||
(nargs,fargs) = case words s of
|
||||
de:n:"is":a:_ -> (nargsOf n de, quote (init a))
|
||||
de:n:v:_:_ -> (nargsOf n de, quote (verb v))
|
||||
de:n:v:_ -> (nargsOf n de, quote (verb (init v)))
|
||||
|
||||
nargsOf n d = unwords [quote n, if d == "Het" then "neuter" else "utrum"]
|
||||
|
||||
verb s = init s ++ "en"
|
||||
|
||||
quote s = "\"" ++ s ++ "\""
|
||||
|
||||
|
||||
|
||||
-- do this way:
|
||||
|
||||
{-
|
||||
gf LangEng
|
||||
> gt -cat=N | wf -file=ns
|
||||
> gt -cat=A | wf -file=fs
|
||||
> gt -cat=V | wf -append -file=fs
|
||||
> gt -cat=V2 | wf -append -file=fs
|
||||
|
||||
sort -u ns >nouns
|
||||
sort -u fs >preds
|
||||
|
||||
runghc MkExx.hs nouns preds >exx-input
|
||||
|
||||
gf
|
||||
> rf -file=exx-input -lines -tree | l | wf -file=all-exx
|
||||
|
||||
ghci
|
||||
> let mk (_:c:cs) = Data.Char.toUpper c : cs ++ "."
|
||||
> do {s <- readFile "all-exx" ; writeFile "trans-eng" (unlines (map mk (lines s)))}
|
||||
|
||||
-- google-translate trans-eng, obtaining trans-dut
|
||||
|
||||
-- align the files, producing LexiconDut.gf
|
||||
|
||||
runghc MkExx.hs align exx-input trans-dut | sort -u >newlex
|
||||
|
||||
-}
|
||||
|
||||
@@ -361,8 +361,330 @@ lin
|
||||
-- reason_N = mkN "Grund" "Gründe" masculine ;
|
||||
today_Adv = mkAdv "vandaag" ;
|
||||
-- uncertain_A = mkA "unsicher" ;
|
||||
--
|
||||
--
|
||||
--} ;
|
||||
|
||||
-- lexicon learned by ../MkExx
|
||||
|
||||
airplane_N = mkN "vliegtuig" neuter ;
|
||||
animal_N = mkN "dier" neuter ;
|
||||
apartment_N = mkN "appartement" neuter ;
|
||||
apple_N = mkN "appel" utrum ;
|
||||
art_N = mkN "kunst" utrum ;
|
||||
ashes_N = mkN "as" utrum ;
|
||||
baby_N = mkN "baby" utrum ;
|
||||
back_N = mkN "achterkant" utrum ;
|
||||
-- bad_A = mkA "slecht" ;
|
||||
bank_N = mkN "bank" utrum ;
|
||||
bark_N = mkN "schors" utrum ;
|
||||
-- beautiful_A = mkA "prachtig" ;
|
||||
-- beer_N = mkN "bier" neuter ;
|
||||
belly_N = mkN "buik" utrum ;
|
||||
-- big_A = mkA "groot" ;
|
||||
-- bike_N = mkN "fiets" utrum ;
|
||||
bird_N = mkN "vogel" utrum ;
|
||||
bite_V2 = mkV2 "bijen" ;
|
||||
-- black_A = mkA "zwart" ;
|
||||
blood_N = mkN "bloed" neuter ;
|
||||
-- blow_V = mkV "slageen" ;
|
||||
blow_V = mkV "waaien" ;
|
||||
-- blue_A = mkA "blauw" ;
|
||||
boat_N = mkN "boot" utrum ;
|
||||
bone_N = mkN "bot" neuter ;
|
||||
-- book_N = mkN "boek" neuter ;
|
||||
boot_N = mkN "boot" utrum ;
|
||||
boss_N = mkN "baas" utrum ;
|
||||
boy_N = mkN "jongen" utrum ;
|
||||
bread_N = mkN "brood" neuter ;
|
||||
break_V2 = mkV2 "breeken" ;
|
||||
breast_N = mkN "borst" utrum ;
|
||||
breathe_V = mkV "ademen" ;
|
||||
broad_A = mkA "breed" ;
|
||||
-- brown_A = mkA "bruin" ;
|
||||
burn_V = mkV "brandwondeen" ;
|
||||
butter_N = mkN "boter" utrum ;
|
||||
-- buy_V2 = mkV2 "koopen" ;
|
||||
camera_N = mkN "camera" utrum ;
|
||||
cap_N = mkN "kap" utrum ;
|
||||
car_N = mkN "auto" utrum ;
|
||||
carpet_N = mkN "tapijt" neuter ;
|
||||
cat_N = mkN "kat" utrum ;
|
||||
ceiling_N = mkN "plafond" neuter ;
|
||||
chair_N = mkN "stoel" utrum ;
|
||||
cheese_N = mkN "kaas" utrum ;
|
||||
-- child_N = mkN "kind" neuter ;
|
||||
church_N = mkN "kerk" utrum ;
|
||||
city_N = mkN "stad" utrum ;
|
||||
clean_A = mkA "schoon" ;
|
||||
clever_A = mkA "slim" ;
|
||||
close_V2 = mkV2 "sluien" ;
|
||||
cloud_N = mkN "wolk" utrum ;
|
||||
coat_N = mkN "vacht" utrum ;
|
||||
cold_A = mkA "koud" ;
|
||||
-- come_V = mkV "komen" ;
|
||||
computer_N = mkN "computer" utrum ;
|
||||
correct_A = mkA "correct" ;
|
||||
count_V2 = mkV2 "telen" ;
|
||||
country_N = mkN "land" neuter ;
|
||||
cousin_N = mkN "neef" utrum ;
|
||||
cow_N = mkN "koe" utrum ;
|
||||
cut_V2 = mkV2 "snijden" ;
|
||||
day_N = mkN "dag" utrum ;
|
||||
-- die_V = mkV "sterfen" ;
|
||||
dig_V = mkV "graafen" ;
|
||||
-- dig_V = mkV "opgravingeen" ;
|
||||
dirty_A = mkA "vuil" ;
|
||||
-- do_V2 = mkV2 "d" ;
|
||||
-- do_V2 = mkV2 "heefen" ;
|
||||
doctor_N = mkN "arts" utrum ;
|
||||
dog_N = mkN "hond" utrum ;
|
||||
door_N = mkN "deur" utrum ;
|
||||
-- drink_V2 = mkV2 "drankeen" ;
|
||||
dry_A = mkA "droog" ;
|
||||
dull_A = mkA "saai" ;
|
||||
dust_N = mkN "stof" neuter ;
|
||||
ear_N = mkN "oor" neuter ;
|
||||
earth_N = mkN "aarde" utrum ;
|
||||
-- eat_V2 = mkV2 "eeen" ;
|
||||
egg_N = mkN "ei" neuter ;
|
||||
empty_A = mkA "leeg" ;
|
||||
enemy_N = mkN "vijand" utrum ;
|
||||
eye_N = mkN "oog" neuter ;
|
||||
factory_N = mkN "fabriek" utrum ;
|
||||
fall_V = mkV "valen" ;
|
||||
fat_N = mkN "vet" neuter ;
|
||||
fear_V2 = mkV2 "vreesen" ;
|
||||
feather_N = mkN "veer" utrum ;
|
||||
fight_V2 = mkV2 "vechen" ;
|
||||
find_V2 = mkV2 "vinden" ;
|
||||
fingernail_N = mkN "vingernagel" utrum ;
|
||||
fire_N = mkN "brand" utrum ;
|
||||
-- fish_N = mkN "vis" utrum ;
|
||||
float_V = mkV "drijfen" ;
|
||||
floor_N = mkN "vloer" utrum ;
|
||||
-- flow_V = mkV "stromeen" ;
|
||||
flow_V = mkV "stroomen" ;
|
||||
flower_N = mkN "bloem" utrum ;
|
||||
fly_V = mkV "vliegen" ;
|
||||
fog_N = mkN "mist" utrum ;
|
||||
foot_N = mkN "voet" utrum ;
|
||||
forest_N = mkN "bos" neuter ;
|
||||
forget_V2 = mkV2 "vergeeen" ;
|
||||
freeze_V = mkV "bevriesen" ;
|
||||
fridge_N = mkN "koelkast" utrum ;
|
||||
friend_N = mkN "vriend" utrum ;
|
||||
fruit_N = mkN "vrucht" utrum ;
|
||||
full_A = mkA "vol" ;
|
||||
fun_AV = mkA "leuk" ;
|
||||
garden_N = mkN "tuin" utrum ;
|
||||
girl_N = mkN "meisje" neuter ;
|
||||
glove_N = mkN "handschoen" utrum ;
|
||||
-- go_V = mkV "gaaen" ;
|
||||
gold_N = mkN "goud" neuter ;
|
||||
good_A = mkA "goed" ;
|
||||
grammar_N = mkN "grammatica" utrum ;
|
||||
grass_N = mkN "gras" neuter ;
|
||||
green_A = mkA "groen" ;
|
||||
guts_N = mkN "darm" utrum ;
|
||||
hair_N = mkN "haar" neuter ;
|
||||
hand_N = mkN "hand" utrum ;
|
||||
harbour_N = mkN "haven" utrum ;
|
||||
hat_N = mkN "hoed" utrum ;
|
||||
hate_V2 = mkV2 "heefen" ;
|
||||
have_V2 = mkV2 "he" ;
|
||||
head_N = mkN "hoofd" neuter ;
|
||||
hear_V2 = mkV2 "hooren" ;
|
||||
heart_N = mkN "hart" neuter ;
|
||||
heavy_A = mkA "zwaar" ;
|
||||
hill_N = mkN "heuvel" utrum ;
|
||||
hit_V2 = mkV2 "raaken" ;
|
||||
hold_V2 = mkV2 "heefen" ;
|
||||
horn_N = mkN "hoorn" utrum ;
|
||||
horse_N = mkN "paard" neuter ;
|
||||
hot_A = mkA "hot" ;
|
||||
-- house_N = mkN "huis" neuter ;
|
||||
hunt_V2 = mkV2 "jaagen" ;
|
||||
husband_N = mkN "man" utrum ;
|
||||
ice_N = mkN "ijs" neuter ;
|
||||
important_A = mkA "belangrijk" ;
|
||||
industry_N = mkN "industrie" utrum ;
|
||||
iron_N = mkN "ijzer" neuter ;
|
||||
jump_V = mkV "springen" ;
|
||||
kill_V2 = mkV2 "dooden" ;
|
||||
king_N = mkN "koning" utrum ;
|
||||
knee_N = mkN "knie" utrum ;
|
||||
know_V2 = mkV2 "kenen" ;
|
||||
lake_N = mkN "meer" neuter ;
|
||||
lamp_N = mkN "lamp" utrum ;
|
||||
language_N = mkN "taal" utrum ;
|
||||
laugh_V = mkV "lachen" ;
|
||||
leaf_N = mkN "blad" neuter ;
|
||||
learn_V2 = mkV2 "leeren" ;
|
||||
leather_N = mkN "leer" neuter ;
|
||||
leave_V2 = mkV2 "laaen" ;
|
||||
leg_N = mkN "been" neuter ;
|
||||
lie_V = mkV "ligen" ;
|
||||
like_V2 = mkV2 "houden" ;
|
||||
listen_V2 = mkV2 "luisteren" ;
|
||||
live_V = mkV "leveen" ;
|
||||
liver_N = mkN "lever" utrum ;
|
||||
long_A = mkA "lang" ;
|
||||
lose_V2 = mkV2 "verliesen" ;
|
||||
louse_N = mkN "luis" utrum ;
|
||||
love_N = mkN "liefde" utrum ;
|
||||
love_V2 = mkV2 "houden" ;
|
||||
man_N = mkN "man" utrum ;
|
||||
meat_N = mkN "vlees" neuter ;
|
||||
milk_N = mkN "melk" utrum ;
|
||||
moon_N = mkN "maan" utrum ;
|
||||
mountain_N = mkN "berg" utrum ;
|
||||
mouth_N = mkN "mond" utrum ;
|
||||
music_N = mkN "muziek" utrum ;
|
||||
name_N = mkN "naam" utrum ;
|
||||
narrow_A = mkA "smal" ;
|
||||
near_A = mkA "nabij" ;
|
||||
neck_N = mkN "nek" utrum ;
|
||||
new_A = mkA "nieuw" ;
|
||||
newspaper_N = mkN "krant" utrum ;
|
||||
night_N = mkN "nacht" utrum ;
|
||||
nose_N = mkN "neus" utrum ;
|
||||
number_N = mkN "nummer" neuter ;
|
||||
oil_N = mkN "olie-schakelaars" utrum ;
|
||||
old_A = mkA "oud" ;
|
||||
open_V2 = mkV2 "openen" ;
|
||||
paper_N = mkN "papier" neuter ;
|
||||
peace_N = mkN "vrede" utrum ;
|
||||
pen_N = mkN "pen" utrum ;
|
||||
person_N = mkN "persoon" utrum ;
|
||||
planet_N = mkN "planeet" utrum ;
|
||||
plastic_N = mkN "plastic" utrum ;
|
||||
play_V = mkV "speelen" ;
|
||||
play_V2 = mkV2 "speelen" ;
|
||||
policeman_N = mkN "politieagent" utrum ;
|
||||
priest_N = mkN "priester" utrum ;
|
||||
probable_AS = mkA "waarschijnlijk" ;
|
||||
pull_V2 = mkV2 "treken" ;
|
||||
push_V2 = mkV2 "duwen" ;
|
||||
put_V2 = mkV2 "zeen" ;
|
||||
queen_N = mkN "koningin" utrum ;
|
||||
question_N = mkN "vraag" utrum ;
|
||||
radio_N = mkN "radio" utrum ;
|
||||
rain_N = mkN "regen" utrum ;
|
||||
rain_V0 = mkV "regenen" ;
|
||||
read_V2 = mkV2 "leesen" ;
|
||||
ready_A = mkA "klaar" ;
|
||||
reason_N = mkN "reden" utrum ;
|
||||
-- red_A = mkA "rood" ;
|
||||
religion_N = mkN "religie" utrum ;
|
||||
restaurant_N = mkN "restaurant" neuter ;
|
||||
river_N = mkN "rivier" utrum ;
|
||||
road_N = mkN "weg" utrum ;
|
||||
rock_N = mkN "rots" utrum ;
|
||||
roof_N = mkN "dak" neuter ;
|
||||
root_N = mkN "wortel" utrum ;
|
||||
rope_N = mkN "touw" neuter ;
|
||||
rotten_A = mkA "verrot" ;
|
||||
round_A = mkA "rond" ;
|
||||
rub_V2 = mkV2 "wrijfen" ;
|
||||
rubber_N = mkN "rubberen" utrum ;
|
||||
rule_N = mkN "regel" utrum ;
|
||||
run_V = mkV "draaien" ;
|
||||
salt_N = mkN "zout" neuter ;
|
||||
sand_N = mkN "zand" neuter ;
|
||||
school_N = mkN "school" utrum ;
|
||||
science_N = mkN "wetenschap" utrum ;
|
||||
scratch_V2 = mkV2 "krasseen" ;
|
||||
sea_N = mkN "zee" utrum ;
|
||||
-- see_V2 = mkV2 "zieen" ;
|
||||
seed_N = mkN "zaad" neuter ;
|
||||
seek_V2 = mkV2 "wien" ;
|
||||
sew_V = mkV "naaien" ;
|
||||
sharp_A = mkA "scherp" ;
|
||||
sheep_N = mkN "schaap" neuter ;
|
||||
ship_N = mkN "schip" neuter ;
|
||||
shirt_N = mkN "shirt" neuter ;
|
||||
shoe_N = mkN "schoen" utrum ;
|
||||
shop_N = mkN "winkel" utrum ;
|
||||
short_A = mkA "kort" ;
|
||||
silver_N = mkN "zilver" neuter ;
|
||||
sing_V = mkV "zingen" ;
|
||||
sister_N = mkN "zuster" utrum ;
|
||||
sit_V = mkV "zien" ;
|
||||
skin_N = mkN "huid" utrum ;
|
||||
sky_N = mkN "lucht" utrum ;
|
||||
-- sleep_V = mkV "slaapen" ;
|
||||
-- small_A = mkA "klein" ;
|
||||
smell_V = mkV "geureen" ;
|
||||
smoke_N = mkN "rook" utrum ;
|
||||
smooth_A = mkA "glad" ;
|
||||
snake_N = mkN "slang" utrum ;
|
||||
snow_N = mkN "sneeuw" utrum ;
|
||||
sock_N = mkN "sok" utrum ;
|
||||
song_N = mkN "liedje" neuter ;
|
||||
speak_V2 = mkV2 "spreeken" ;
|
||||
spit_V = mkV "spuugen" ;
|
||||
split_V2 = mkV2 "splitsen" ;
|
||||
squeeze_V2 = mkV2 "knijpen" ;
|
||||
stab_V2 = mkV2 "steeken" ;
|
||||
stand_V = mkV "staaen" ;
|
||||
star_N = mkN "ster" utrum ;
|
||||
steel_N = mkN "staal" neuter ;
|
||||
stick_N = mkN "stok" utrum ;
|
||||
stone_N = mkN "steen" utrum ;
|
||||
stop_V = mkV "stopen" ;
|
||||
stove_N = mkN "kachel" utrum ;
|
||||
straight_A = mkA "recht" ;
|
||||
student_N = mkN "student" utrum ;
|
||||
stupid_A = mkA "dom" ;
|
||||
suck_V2 = mkV2 "zuigen" ;
|
||||
sun_N = mkN "zon" utrum ;
|
||||
swell_V = mkV "zwelen" ;
|
||||
swim_V = mkV "zwemen" ;
|
||||
switch8off_V2 = mkV2 "schakelen" ;
|
||||
switch8on_V2 = mkV2 "oen" ;
|
||||
table_N = mkN "tabel" utrum ;
|
||||
tail_N = mkN "staart" utrum ;
|
||||
teach_V2 = mkV2 "leeren" ;
|
||||
teacher_N = mkN "leraar" utrum ;
|
||||
television_N = mkN "televisie" utrum ;
|
||||
thick_A = mkA "dik" ;
|
||||
thin_A = mkA "dun" ;
|
||||
think_V = mkV "denken" ;
|
||||
throw_V2 = mkV2 "gooien" ;
|
||||
tie_V2 = mkV2 "bandeen" ;
|
||||
tongue_N = mkN "tong" utrum ;
|
||||
tooth_N = mkN "tand" utrum ;
|
||||
train_N = mkN "trein" utrum ;
|
||||
travel_V = mkV "reizeen" ;
|
||||
tree_N = mkN "boom" utrum ;
|
||||
turn_V = mkV "draaien" ;
|
||||
ugly_A = mkA "lelijk" ;
|
||||
uncertain_A = mkA "onzeker" ;
|
||||
understand_V2 = mkV2 "begrijpen" ;
|
||||
university_N = mkN "universiteit" utrum ;
|
||||
village_N = mkN "dorp" neuter ;
|
||||
vomit_V = mkV "braaken" ;
|
||||
wait_V2 = mkV2 "wachen" ;
|
||||
walk_V = mkV "wandelingeen" ;
|
||||
war_N = mkN "oorlog" utrum ;
|
||||
-- warm_A = mkA "warm" ;
|
||||
wash_V2 = mkV2 "spoelen" ;
|
||||
watch_V2 = mkV2 "horlogeen" ;
|
||||
water_N = mkN "water" neuter ;
|
||||
wet_A = mkA "nat" ;
|
||||
white_A = mkA "wit" ;
|
||||
wide_A = mkA "breed" ;
|
||||
wife_N = mkN "vrouw" utrum ;
|
||||
win_V2 = mkV2 "winen" ;
|
||||
wind_N = mkN "wind" utrum ;
|
||||
window_N = mkN "raam" neuter ;
|
||||
-- wine_N = mkN "wijn" utrum ;
|
||||
wing_N = mkN "vleugel" utrum ;
|
||||
wipe_V2 = mkV2 "veegen" ;
|
||||
woman_N = mkN "vrouw" utrum ;
|
||||
wood_N = mkN "hout" neuter ;
|
||||
worm_N = mkN "worm" utrum ;
|
||||
write_V2 = mkV2 "schrijfen" ;
|
||||
year_N = mkN "jaar" neuter ;
|
||||
yellow_A = mkA "geel" ;
|
||||
young_A = mkA "jong" ;
|
||||
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ fastcgi.server = (".pgf" =>
|
||||
".fcgi" =>
|
||||
((
|
||||
"socket" => basedir + "/" + var.PID + "-morpho.socket",
|
||||
"bin-path" => basedir + "/dist/build/morpho-server/morpho-server",
|
||||
# "bin-path" => basedir + "/dist/build/morpho-server/morpho-server",
|
||||
"bin-environment" => ("GHCRTS" => "-M512M"),
|
||||
"min-procs" => 1,
|
||||
"max-procs" => 1,
|
||||
|
||||
Reference in New Issue
Block a user