1
0
forked from GitHub/gf-core

Dutch lexicon generated via Google translate; method in lib/src/MkExx.hs

This commit is contained in:
aarne
2009-11-16 15:21:56 +00:00
parent 70308bb549
commit 8ed9fe442d
5 changed files with 417 additions and 6 deletions

View File

@@ -213,7 +213,7 @@ langsDemo = langsLang `except` ["Ara","Hin","Ina","Tha"]
langsParse = langs `only` ["Eng"]
-- languages for which langs.pgf is built
langsPGF = langsLang `except` ["Ara","Hin","Tha"]
langsPGF = langsLang `except` ["Ara","Bul","Hin","Ron","Tha"]
-- languages for which Compatibility exists (to be extended)
langsCompat = langsLang `only` ["Cat","Eng","Fin","Fre","Ita","Spa","Swe"]

View File

@@ -12,7 +12,7 @@ oper
necessary_A = mkA "necessary" ;
own_A = mkA "own" ;
have_VV = mkVV have_V ;
provably_Adv = mkAdv "provably" ;
provably_Adv = ParadigmsEng.mkAdv "provably" ;
provable_A = mkA "provable" ;
false_A = mkA "false" ;
such_A = mkA "such" ;

89
lib/src/MkExx.hs Normal file
View File

@@ -0,0 +1,89 @@
module Main where
-- to learn a lexicon from Google translate via sentence translation
import System
main = do
xx <- getArgs
case xx of
"align":trees:lins:_ -> do
ts <- readFile trees >>= return . lines
ls <- readFile lins >>= return . lines
mapM_ (putStrLn . align) (zip ts ls)
n:f:_ -> do
nouns <- readFile n >>= return . words
preds <- readFile f >>= return . words
interact (const (mkExx nouns preds))
type Ident = String
mkExx nouns preds = unlines $ map predic (zip nouns predss) where
predss = preds ++ predss -- there are more nouns than predicates
predic :: (Ident,Ident) -> String
predic (n,f) = case c of
'A':_ -> predn n ("(UseComp (CompAP (PositA " ++ f ++ ")))")
"V2" -> predn n ("(ComplSlash (SlashV2a " ++ f ++ ") (" ++ detn n ++ "))")
'V':_ -> predn n ("(UseV " ++ f ++ ")")
where
c = tail $ dropWhile (/='_') f
predn n f = "PredVP (" ++ detn n ++ ") " ++ f
detn n = "DetCN (DetQuant DefArt NumSg) (UseN " ++ n ++ ")"
align (t,s) = unlines [
noun ++ " = mkN " ++ nargs ++ " ;",
pred ++ " = mk" ++ cat ++ " " ++ fargs ++ " ;"
]
where
(noun,(pred,cat)) = case words t of
_:_:_:_:_:_:n:ps -> (
takeWhile (/=')') n,
case ps of
"(UseComp":_:_:a:_ -> (takeWhile (/=')') a,"A")
"(UseV":v:_ -> (takeWhile (/=')') v,"V")
"(ComplSlash":_:v:_ -> (takeWhile (/=')') v,"V2")
)
(nargs,fargs) = case words s of
de:n:"is":a:_ -> (nargsOf n de, quote (init a))
de:n:v:_:_ -> (nargsOf n de, quote (verb v))
de:n:v:_ -> (nargsOf n de, quote (verb (init v)))
nargsOf n d = unwords [quote n, if d == "Het" then "neuter" else "utrum"]
verb s = init s ++ "en"
quote s = "\"" ++ s ++ "\""
-- do this way:
{-
gf LangEng
> gt -cat=N | wf -file=ns
> gt -cat=A | wf -file=fs
> gt -cat=V | wf -append -file=fs
> gt -cat=V2 | wf -append -file=fs
sort -u ns >nouns
sort -u fs >preds
runghc MkExx.hs nouns preds >exx-input
gf
> rf -file=exx-input -lines -tree | l | wf -file=all-exx
ghci
> let mk (_:c:cs) = Data.Char.toUpper c : cs ++ "."
> do {s <- readFile "all-exx" ; writeFile "trans-eng" (unlines (map mk (lines s)))}
-- google-translate trans-eng, obtaining trans-dut
-- align the files, producing LexiconDut.gf
runghc MkExx.hs align exx-input trans-dut | sort -u >newlex
-}

View File

@@ -361,8 +361,330 @@ lin
-- reason_N = mkN "Grund" "Gründe" masculine ;
today_Adv = mkAdv "vandaag" ;
-- uncertain_A = mkA "unsicher" ;
--
--
--} ;
-- lexicon learned by ../MkExx
airplane_N = mkN "vliegtuig" neuter ;
animal_N = mkN "dier" neuter ;
apartment_N = mkN "appartement" neuter ;
apple_N = mkN "appel" utrum ;
art_N = mkN "kunst" utrum ;
ashes_N = mkN "as" utrum ;
baby_N = mkN "baby" utrum ;
back_N = mkN "achterkant" utrum ;
-- bad_A = mkA "slecht" ;
bank_N = mkN "bank" utrum ;
bark_N = mkN "schors" utrum ;
-- beautiful_A = mkA "prachtig" ;
-- beer_N = mkN "bier" neuter ;
belly_N = mkN "buik" utrum ;
-- big_A = mkA "groot" ;
-- bike_N = mkN "fiets" utrum ;
bird_N = mkN "vogel" utrum ;
bite_V2 = mkV2 "bijen" ;
-- black_A = mkA "zwart" ;
blood_N = mkN "bloed" neuter ;
-- blow_V = mkV "slageen" ;
blow_V = mkV "waaien" ;
-- blue_A = mkA "blauw" ;
boat_N = mkN "boot" utrum ;
bone_N = mkN "bot" neuter ;
-- book_N = mkN "boek" neuter ;
boot_N = mkN "boot" utrum ;
boss_N = mkN "baas" utrum ;
boy_N = mkN "jongen" utrum ;
bread_N = mkN "brood" neuter ;
break_V2 = mkV2 "breeken" ;
breast_N = mkN "borst" utrum ;
breathe_V = mkV "ademen" ;
broad_A = mkA "breed" ;
-- brown_A = mkA "bruin" ;
burn_V = mkV "brandwondeen" ;
butter_N = mkN "boter" utrum ;
-- buy_V2 = mkV2 "koopen" ;
camera_N = mkN "camera" utrum ;
cap_N = mkN "kap" utrum ;
car_N = mkN "auto" utrum ;
carpet_N = mkN "tapijt" neuter ;
cat_N = mkN "kat" utrum ;
ceiling_N = mkN "plafond" neuter ;
chair_N = mkN "stoel" utrum ;
cheese_N = mkN "kaas" utrum ;
-- child_N = mkN "kind" neuter ;
church_N = mkN "kerk" utrum ;
city_N = mkN "stad" utrum ;
clean_A = mkA "schoon" ;
clever_A = mkA "slim" ;
close_V2 = mkV2 "sluien" ;
cloud_N = mkN "wolk" utrum ;
coat_N = mkN "vacht" utrum ;
cold_A = mkA "koud" ;
-- come_V = mkV "komen" ;
computer_N = mkN "computer" utrum ;
correct_A = mkA "correct" ;
count_V2 = mkV2 "telen" ;
country_N = mkN "land" neuter ;
cousin_N = mkN "neef" utrum ;
cow_N = mkN "koe" utrum ;
cut_V2 = mkV2 "snijden" ;
day_N = mkN "dag" utrum ;
-- die_V = mkV "sterfen" ;
dig_V = mkV "graafen" ;
-- dig_V = mkV "opgravingeen" ;
dirty_A = mkA "vuil" ;
-- do_V2 = mkV2 "d" ;
-- do_V2 = mkV2 "heefen" ;
doctor_N = mkN "arts" utrum ;
dog_N = mkN "hond" utrum ;
door_N = mkN "deur" utrum ;
-- drink_V2 = mkV2 "drankeen" ;
dry_A = mkA "droog" ;
dull_A = mkA "saai" ;
dust_N = mkN "stof" neuter ;
ear_N = mkN "oor" neuter ;
earth_N = mkN "aarde" utrum ;
-- eat_V2 = mkV2 "eeen" ;
egg_N = mkN "ei" neuter ;
empty_A = mkA "leeg" ;
enemy_N = mkN "vijand" utrum ;
eye_N = mkN "oog" neuter ;
factory_N = mkN "fabriek" utrum ;
fall_V = mkV "valen" ;
fat_N = mkN "vet" neuter ;
fear_V2 = mkV2 "vreesen" ;
feather_N = mkN "veer" utrum ;
fight_V2 = mkV2 "vechen" ;
find_V2 = mkV2 "vinden" ;
fingernail_N = mkN "vingernagel" utrum ;
fire_N = mkN "brand" utrum ;
-- fish_N = mkN "vis" utrum ;
float_V = mkV "drijfen" ;
floor_N = mkN "vloer" utrum ;
-- flow_V = mkV "stromeen" ;
flow_V = mkV "stroomen" ;
flower_N = mkN "bloem" utrum ;
fly_V = mkV "vliegen" ;
fog_N = mkN "mist" utrum ;
foot_N = mkN "voet" utrum ;
forest_N = mkN "bos" neuter ;
forget_V2 = mkV2 "vergeeen" ;
freeze_V = mkV "bevriesen" ;
fridge_N = mkN "koelkast" utrum ;
friend_N = mkN "vriend" utrum ;
fruit_N = mkN "vrucht" utrum ;
full_A = mkA "vol" ;
fun_AV = mkA "leuk" ;
garden_N = mkN "tuin" utrum ;
girl_N = mkN "meisje" neuter ;
glove_N = mkN "handschoen" utrum ;
-- go_V = mkV "gaaen" ;
gold_N = mkN "goud" neuter ;
good_A = mkA "goed" ;
grammar_N = mkN "grammatica" utrum ;
grass_N = mkN "gras" neuter ;
green_A = mkA "groen" ;
guts_N = mkN "darm" utrum ;
hair_N = mkN "haar" neuter ;
hand_N = mkN "hand" utrum ;
harbour_N = mkN "haven" utrum ;
hat_N = mkN "hoed" utrum ;
hate_V2 = mkV2 "heefen" ;
have_V2 = mkV2 "he" ;
head_N = mkN "hoofd" neuter ;
hear_V2 = mkV2 "hooren" ;
heart_N = mkN "hart" neuter ;
heavy_A = mkA "zwaar" ;
hill_N = mkN "heuvel" utrum ;
hit_V2 = mkV2 "raaken" ;
hold_V2 = mkV2 "heefen" ;
horn_N = mkN "hoorn" utrum ;
horse_N = mkN "paard" neuter ;
hot_A = mkA "hot" ;
-- house_N = mkN "huis" neuter ;
hunt_V2 = mkV2 "jaagen" ;
husband_N = mkN "man" utrum ;
ice_N = mkN "ijs" neuter ;
important_A = mkA "belangrijk" ;
industry_N = mkN "industrie" utrum ;
iron_N = mkN "ijzer" neuter ;
jump_V = mkV "springen" ;
kill_V2 = mkV2 "dooden" ;
king_N = mkN "koning" utrum ;
knee_N = mkN "knie" utrum ;
know_V2 = mkV2 "kenen" ;
lake_N = mkN "meer" neuter ;
lamp_N = mkN "lamp" utrum ;
language_N = mkN "taal" utrum ;
laugh_V = mkV "lachen" ;
leaf_N = mkN "blad" neuter ;
learn_V2 = mkV2 "leeren" ;
leather_N = mkN "leer" neuter ;
leave_V2 = mkV2 "laaen" ;
leg_N = mkN "been" neuter ;
lie_V = mkV "ligen" ;
like_V2 = mkV2 "houden" ;
listen_V2 = mkV2 "luisteren" ;
live_V = mkV "leveen" ;
liver_N = mkN "lever" utrum ;
long_A = mkA "lang" ;
lose_V2 = mkV2 "verliesen" ;
louse_N = mkN "luis" utrum ;
love_N = mkN "liefde" utrum ;
love_V2 = mkV2 "houden" ;
man_N = mkN "man" utrum ;
meat_N = mkN "vlees" neuter ;
milk_N = mkN "melk" utrum ;
moon_N = mkN "maan" utrum ;
mountain_N = mkN "berg" utrum ;
mouth_N = mkN "mond" utrum ;
music_N = mkN "muziek" utrum ;
name_N = mkN "naam" utrum ;
narrow_A = mkA "smal" ;
near_A = mkA "nabij" ;
neck_N = mkN "nek" utrum ;
new_A = mkA "nieuw" ;
newspaper_N = mkN "krant" utrum ;
night_N = mkN "nacht" utrum ;
nose_N = mkN "neus" utrum ;
number_N = mkN "nummer" neuter ;
oil_N = mkN "olie-schakelaars" utrum ;
old_A = mkA "oud" ;
open_V2 = mkV2 "openen" ;
paper_N = mkN "papier" neuter ;
peace_N = mkN "vrede" utrum ;
pen_N = mkN "pen" utrum ;
person_N = mkN "persoon" utrum ;
planet_N = mkN "planeet" utrum ;
plastic_N = mkN "plastic" utrum ;
play_V = mkV "speelen" ;
play_V2 = mkV2 "speelen" ;
policeman_N = mkN "politieagent" utrum ;
priest_N = mkN "priester" utrum ;
probable_AS = mkA "waarschijnlijk" ;
pull_V2 = mkV2 "treken" ;
push_V2 = mkV2 "duwen" ;
put_V2 = mkV2 "zeen" ;
queen_N = mkN "koningin" utrum ;
question_N = mkN "vraag" utrum ;
radio_N = mkN "radio" utrum ;
rain_N = mkN "regen" utrum ;
rain_V0 = mkV "regenen" ;
read_V2 = mkV2 "leesen" ;
ready_A = mkA "klaar" ;
reason_N = mkN "reden" utrum ;
-- red_A = mkA "rood" ;
religion_N = mkN "religie" utrum ;
restaurant_N = mkN "restaurant" neuter ;
river_N = mkN "rivier" utrum ;
road_N = mkN "weg" utrum ;
rock_N = mkN "rots" utrum ;
roof_N = mkN "dak" neuter ;
root_N = mkN "wortel" utrum ;
rope_N = mkN "touw" neuter ;
rotten_A = mkA "verrot" ;
round_A = mkA "rond" ;
rub_V2 = mkV2 "wrijfen" ;
rubber_N = mkN "rubberen" utrum ;
rule_N = mkN "regel" utrum ;
run_V = mkV "draaien" ;
salt_N = mkN "zout" neuter ;
sand_N = mkN "zand" neuter ;
school_N = mkN "school" utrum ;
science_N = mkN "wetenschap" utrum ;
scratch_V2 = mkV2 "krasseen" ;
sea_N = mkN "zee" utrum ;
-- see_V2 = mkV2 "zieen" ;
seed_N = mkN "zaad" neuter ;
seek_V2 = mkV2 "wien" ;
sew_V = mkV "naaien" ;
sharp_A = mkA "scherp" ;
sheep_N = mkN "schaap" neuter ;
ship_N = mkN "schip" neuter ;
shirt_N = mkN "shirt" neuter ;
shoe_N = mkN "schoen" utrum ;
shop_N = mkN "winkel" utrum ;
short_A = mkA "kort" ;
silver_N = mkN "zilver" neuter ;
sing_V = mkV "zingen" ;
sister_N = mkN "zuster" utrum ;
sit_V = mkV "zien" ;
skin_N = mkN "huid" utrum ;
sky_N = mkN "lucht" utrum ;
-- sleep_V = mkV "slaapen" ;
-- small_A = mkA "klein" ;
smell_V = mkV "geureen" ;
smoke_N = mkN "rook" utrum ;
smooth_A = mkA "glad" ;
snake_N = mkN "slang" utrum ;
snow_N = mkN "sneeuw" utrum ;
sock_N = mkN "sok" utrum ;
song_N = mkN "liedje" neuter ;
speak_V2 = mkV2 "spreeken" ;
spit_V = mkV "spuugen" ;
split_V2 = mkV2 "splitsen" ;
squeeze_V2 = mkV2 "knijpen" ;
stab_V2 = mkV2 "steeken" ;
stand_V = mkV "staaen" ;
star_N = mkN "ster" utrum ;
steel_N = mkN "staal" neuter ;
stick_N = mkN "stok" utrum ;
stone_N = mkN "steen" utrum ;
stop_V = mkV "stopen" ;
stove_N = mkN "kachel" utrum ;
straight_A = mkA "recht" ;
student_N = mkN "student" utrum ;
stupid_A = mkA "dom" ;
suck_V2 = mkV2 "zuigen" ;
sun_N = mkN "zon" utrum ;
swell_V = mkV "zwelen" ;
swim_V = mkV "zwemen" ;
switch8off_V2 = mkV2 "schakelen" ;
switch8on_V2 = mkV2 "oen" ;
table_N = mkN "tabel" utrum ;
tail_N = mkN "staart" utrum ;
teach_V2 = mkV2 "leeren" ;
teacher_N = mkN "leraar" utrum ;
television_N = mkN "televisie" utrum ;
thick_A = mkA "dik" ;
thin_A = mkA "dun" ;
think_V = mkV "denken" ;
throw_V2 = mkV2 "gooien" ;
tie_V2 = mkV2 "bandeen" ;
tongue_N = mkN "tong" utrum ;
tooth_N = mkN "tand" utrum ;
train_N = mkN "trein" utrum ;
travel_V = mkV "reizeen" ;
tree_N = mkN "boom" utrum ;
turn_V = mkV "draaien" ;
ugly_A = mkA "lelijk" ;
uncertain_A = mkA "onzeker" ;
understand_V2 = mkV2 "begrijpen" ;
university_N = mkN "universiteit" utrum ;
village_N = mkN "dorp" neuter ;
vomit_V = mkV "braaken" ;
wait_V2 = mkV2 "wachen" ;
walk_V = mkV "wandelingeen" ;
war_N = mkN "oorlog" utrum ;
-- warm_A = mkA "warm" ;
wash_V2 = mkV2 "spoelen" ;
watch_V2 = mkV2 "horlogeen" ;
water_N = mkN "water" neuter ;
wet_A = mkA "nat" ;
white_A = mkA "wit" ;
wide_A = mkA "breed" ;
wife_N = mkN "vrouw" utrum ;
win_V2 = mkV2 "winen" ;
wind_N = mkN "wind" utrum ;
window_N = mkN "raam" neuter ;
-- wine_N = mkN "wijn" utrum ;
wing_N = mkN "vleugel" utrum ;
wipe_V2 = mkV2 "veegen" ;
woman_N = mkN "vrouw" utrum ;
wood_N = mkN "hout" neuter ;
worm_N = mkN "worm" utrum ;
write_V2 = mkV2 "schrijfen" ;
year_N = mkN "jaar" neuter ;
yellow_A = mkA "geel" ;
young_A = mkA "jong" ;
}

View File

@@ -62,7 +62,7 @@ fastcgi.server = (".pgf" =>
".fcgi" =>
((
"socket" => basedir + "/" + var.PID + "-morpho.socket",
"bin-path" => basedir + "/dist/build/morpho-server/morpho-server",
# "bin-path" => basedir + "/dist/build/morpho-server/morpho-server",
"bin-environment" => ("GHCRTS" => "-M512M"),
"min-procs" => 1,
"max-procs" => 1,