This commit is contained in:
Inari Listenmaa
2020-03-11 14:47:24 +01:00
20 changed files with 113800 additions and 164 deletions

View File

@@ -12,7 +12,7 @@ addons:
- ghc
before_install:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update && brew install ghc@8.2 && export PATH="/usr/local/opt/ghc@8.2/bin:$PATH" ; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update && brew install ghc@8.6 && export PATH="/usr/local/opt/ghc@8.6/bin:$PATH" ; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then curl http://www.grammaticalframework.org/download/gf-3.9-bin-intel-mac.tar.gz > gf.tar.gz && sudo tar --no-same-owner --no-same-permissions -C /usr/local -zxf gf.tar.gz && rm gf.tar.gz; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then curl http://www.grammaticalframework.org/download/gf_3.9.1-1_amd64-trusty.deb > gf.deb && sudo dpkg -i gf.deb && rm gf.deb ; fi
- if [[ "$TRAVIS_OS_NAME" == "windows" ]]; then choco install ghc --version=8.4.4 && export PATH="/c/ProgramData/chocolatey/lib/ghc/tools/ghc-8.4.4/bin:$PATH"; fi

View File

@@ -6,7 +6,7 @@
The GF Resource Grammar Library is the standard library for Grammatical Framework. It covers the morphology and basic syntax of over 30 languages.
For more about the RGL, see the [synopsis page](http://www.grammaticalframework.org/lib/doc/synopsis.html).
For more about the RGL, see the [synopsis page](http://www.grammaticalframework.org/lib/doc/synopsis/).
## Choose your build method

View File

@@ -327,7 +327,22 @@ concrete ConjunctionJpn of Conjunction = CatJpn ** open ResJpn, Prelude in {
s2dropNaEnging = y.dropNaEnging ;
prepositive = \\st => x.prepositive ! st ++ y.prepositive ! st
} ;
ConsAP x xs = {
s1and = x.te ;
s1or = \\st => table {
Pos => x.dropNaEnging ! st ++ "か" ;
Neg => x.pred ! Plain ! TPres ! Neg ++ "か"
} ;
s2pred = \\st,t,p => xs.s1and ! st ! p ++ xs.s2pred ! st ! t ! p ;
s2attr = \\st => xs.s1and ! st ! Pos ++ xs.s2attr ! st ;
s2te = \\st,p => xs.s1and ! st ! p ++ xs.s2te ! st ! p ;
s2ba = \\st,p => xs.s1and ! st ! p ++ xs.s2ba ! st ! p ;
s2adv = \\st => xs.s1and ! st ! Pos ++ xs.s2adv ! st ;
s2dropNaEnging = xs.s2dropNaEnging ;
prepositive = \\st => x.prepositive ! st ++ xs.prepositive ! st
} ;
{-
ConsAP x xs = {
s1and = \\st,p => xs.s1and ! st ! p ++ xs.s2te ! st ! p ;
s1or = \\st => table {
@@ -342,7 +357,7 @@ concrete ConjunctionJpn of Conjunction = CatJpn ** open ResJpn, Prelude in {
s2dropNaEnging = x.dropNaEnging ;
prepositive = \\st => x.prepositive ! st ++ xs.prepositive ! st
} ;
-}
BaseIAdv x y = {
s = \\st => x.s ! st ++ x.particle ++ y.s ! st ;
particle = y.particle ;

View File

@@ -5,13 +5,13 @@ flags coding = utf8 ;
lin
add_V3 = mkV3 "加える" "に" "を" Gr2 ; -- "kuwaeru"
airplane_N = mkN "飛行機" Inanim "機" False ; -- "hikouki" "ki"
airplane_N = mkN "飛行機" Inanim "機" ; -- "hikouki" "ki"
alas_Interj = ss "残念です" ; -- "zannendesu"
already_Adv = ResJpn.mkAdv "すでに" ; -- "sudeni"
animal_N = mkN "動物" Anim "疋" True ; -- "doubutsu" "hiki"
animal_N = mkN "動物" Anim ; -- "doubutsu"
answer_V2S = mkV "答える" Gr2 ; -- "kotaeru"
apartment_N = mkN "アパート" Inanim ; -- "apaato"
apple_N = mkN "リンゴ" Inanim "個" False ; -- "ringo" "ko"
apple_N = mkN "リンゴ" Inanim "個" ; -- "ringo" "ko"
art_N = mkN "芸術" Inanim ; -- "geijutsu"
ashes_N = mkN "灰" Inanim ; -- "hai"
ask_V2Q = mkV "聞く" Gr1 ; -- "kiku"
@@ -22,24 +22,24 @@ lin
bark_N = mkN "木皮" Inanim ; -- "kohada"
beautiful_A = mkA "美しい" ; -- "utsukushii"
become_VA = mkV "なる" Gr1 ; -- "naru"
beer_N = mkN "ビール" Inanim "杯" False ; -- "biiru" "hai"
beer_N = mkN "ビール" Inanim ; -- "biiru"
beg_V2V = mkV "請い願う" Gr1 ; -- "koinegau"
belly_N = mkN "お腹" "腹" Inanim ; -- "onaka" "hara"
big_A = mkA "大きな" ; -- "ookina"
bike_N = mkN "自転車" Inanim "台" False ; -- "jitensha" "dai"
bird_N = mkN "鳥" Anim "羽" False ; -- "tori" "wa"
bike_N = mkN "自転車" Inanim "台" ; -- "jitensha" "dai"
bird_N = mkN "鳥" Anim "羽" ; -- "tori" "wa"
bite_V2 = mkV2 "噛む" "を" Gr1 ; -- "kamu"
black_A = mkA "黒い" ; -- "kuroi" ;
blood_N = mkN "血液" Inanim ; -- "ketsueki"
blow_V = mkV "吹く" Gr1 ; -- "fuku"
blue_A = mkA "青い" ; -- "aoi" ;
boat_N = mkN "ボート" Inanim "艘" False ; -- "bouto" "sou"
boat_N = mkN "ボート" Inanim "艘" ; -- "bouto" "sou"
bone_N = mkN "骨" Inanim ; -- "hone"
book_N = mkN "本" Inanim "冊" False ; -- "hon" "satsu"
book_N = mkN "本" Inanim "冊" ; -- "hon" "satsu"
boot_N = mkN "ブート" Inanim ; -- "buuto"
boss_N = mkN "社長" Anim "人" False "社長たち" ; -- "shachou"
boy_N = mkN "男の子" Anim "人" False "男の子たち" ; -- "otokonoko"
bread_N = mkN "パン" Inanim "斤" False ; -- "pan" "kin"
bread_N = mkN "パン" Inanim "斤" ; -- "pan" "kin"
break_V2 = mkV2 "破る" "を" Gr1 ; -- "yaburu"
breast_N = mkN "胸" Inanim ; -- "mune"
breathe_V = mkV "息する" Suru ; -- "ikisuru"
@@ -51,20 +51,20 @@ lin
buy_V2 = mkV2 "買う" "を" Gr1 ; -- "kau"
camera_N = mkN "カメラ" Inanim ; -- "kamera"
cap_N = mkN "キャップ" Inanim ; -- "kyappu"
car_N = mkN "車" Inanim "車" True ; -- "kuruma" "sha"
car_N = mkN "車" Inanim "台" ; -- "kuruma" "dai"
carpet_N = mkN "絨毯" Inanim ; -- "juutan"
cat_N = mkN "猫" Anim "匹" False ; -- "neko" "hiki"
cat_N = mkN "猫" Anim "匹" ; -- "neko" "hiki"
ceiling_N = mkN "天井" Inanim ; -- "tenjou"
chair_N = mkN "椅子" Inanim "脚" False ; -- "isu" "kyaku"
chair_N = mkN "椅子" Inanim "脚" ; -- "isu" "kyaku"
cheese_N = mkN "チーズ" Inanim ; -- "chiizu"
child_N = mkN "子供" Anim "人" False ; -- "kodomo"
church_N = mkN "教会" Inanim "軒" False ; -- "kyoukai" "ken"
child_N = mkN "子供" Anim "人" ; -- "kodomo"
church_N = mkN "教会" Inanim ; -- "kyoukai"
city_N = mkN "都市" Inanim ; -- "toshi"
clean_A = mkA "奇麗な" ; -- "kireina" ;
clever_A = mkA "賢い" ; -- "kashikoi" ;
close_V2 = mkV2 "閉める" "を" Gr2 ; -- "shimeru"
cloud_N = mkN "雲" Inanim ; -- "kumo"
coat_N = mkN "コート" Inanim ; -- "kouto"
coat_N = mkN "コート" Inanim "着" False ; -- "kouto" "chaku"
cold_A = mkA "寒い" ; -- "samui" ;
come_V = mkV "来る" Kuru ; -- "ko" "ki" "kuru" "kita"
computer_N = mkN "コンピュータ" Inanim ; -- "konpyuuta"
@@ -72,15 +72,15 @@ lin
country_N = mkN "国" Inanim "ヶ国" True ; -- "kuni" "kakoku"
count_V2 = mkV2 "数える" "を" Gr2 ; -- "kazoeru"
cousin_N = mkN "いとこ" Anim "人" False "いとこたち" ; -- "itoko"
cow_N = mkN "牛" Anim "頭" False ; -- "ushi" "tou"
cow_N = mkN "牛" Anim "頭" ; -- "ushi" "tou"
cut_V2 = mkV2 "切る" "を" Gr1 ; -- "kiru"
day_N = mkN "日" Inanim "日" True ; -- "hi" "ka";
day_N = mkN "日" Inanim True ; -- "hi" "ka";
die_V = mkV "死ぬ" Gr1 ; -- "shinu"
dig_V = mkV "掘る" Gr1 ; -- "horu"
dirty_A = mkA "汚い" ; -- "kitanai" ;
distance_N3 = mkN3 "距離" "から" "まで" Inanim ; -- "kyori" "kara" "made"
doctor_N = mkN "医者" Anim "人" False "医者たち" ; -- "isha"
dog_N = mkN "犬" Anim "匹" False ; -- "inu" "hiki"
dog_N = mkN "犬" Anim "匹" ; -- "inu" "hiki"
door_N = mkN "ドア" Inanim ; -- "doa"
do_V2 = mkV2 "する" "を" Suru ; -- "suru"
drink_V2 = mkV2 "飲む" "を" Gr1 ; -- "nomu"
@@ -107,10 +107,10 @@ lin
find_V2 = mkV2 "拾う" "を" Gr1 ; -- "hirou"
fingernail_N = mkN "爪" Inanim ; -- "tsume"
fire_N = mkN "火" Inanim ; -- "hi"
fish_N = mkN "魚" Anim "匹" False ; -- "sakana" "hiki"
fish_N = mkN "魚" Anim "匹" ; -- "sakana" "hiki"
float_V = mkV "浮く" Gr1 ; -- "uku"
floor_N = mkN "床" Inanim ; -- "yuka"
flower_N = mkN "花" Inanim "輪" False ; -- "sakana" "rin"
flower_N = mkN "花" Inanim ; -- "sakana"
flow_V = mkV "流れる" Gr2 ; -- "nagareru"
fly_V = mkV "飛ぶ" Gr1 ; -- "tobu"
fog_N = mkN "霧" Inanim ; -- "kiri"
@@ -119,8 +119,8 @@ lin
forget_V2 = mkV2 "忘れる" "を" Gr2 ; -- "wasureru"
freeze_V = mkV "凍らす" Gr1 ; -- "kourasu"
fridge_N = mkN "冷蔵庫" Inanim ; -- "reizouko"
friend_N = mkN "友だち" Anim "人" False ; -- "tomodachi"
fruit_N = mkN "果物" Inanim "個" False ; -- "kudamono" "ko"
friend_N = mkN "友だち" Anim "人" ; -- "tomodachi"
fruit_N = mkN "果物" Inanim ; -- "kudamono"
full_A = mkA "一杯の" ; -- "ippaino" ;
fun_AV = mkA "可笑しい" ; -- "okashii" ;
garden_N = mkN "庭" Inanim ; -- "niwa"
@@ -147,10 +147,10 @@ lin
hit_V2 = mkV2 "打つ" "を" Gr1 ; -- "utsu"
hold_V2 = mkV2 "持つ" "を" Gr1 ; -- "motsu"
hope_VS = mkV2 "期待する" "ことを" Suru ; -- "kitaisuru"
horn_N = mkN "角" Inanim "本" False ; -- "tsuno" "hon"
horse_N = mkN "馬" Anim "頭" False ; -- "uma" "tou"
horn_N = mkN "角" Inanim "本" ; -- "tsuno" "hon"
horse_N = mkN "馬" Anim "頭" ; -- "uma" "tou"
hot_A = mkA "熱い" ; -- "atsui"
house_N = mkN "家" Inanim "軒" True ; -- "ie" "ken"
house_N = mkN "家" Inanim "軒" ; -- "ie" "ken"
hunt_V2 = mkV2 "狩る" "を" Gr1 ; -- "karu"
husband_N = mkN "夫" Anim "人" False "夫たち" ; -- "otto"
ice_N = mkN "氷" Inanim ; -- "kouri"
@@ -167,9 +167,9 @@ lin
know_VS = mkV2 "知る" "ことを" Gr1 ; -- "shiru"
lake_N = mkN "湖" Inanim ; -- "mizuumi"
lamp_N = mkN "電灯" Inanim ; -- "dentou"
language_N = mkN "言語" Inanim "語" True ; -- "gengo" "go"
language_N = mkN "言語" Inanim ; -- "gengo"
laugh_V = mkV "笑う" Gr1 ; -- "warau"
leaf_N = mkN "葉" Inanim "葉" True ; -- "ha" "ha"
leaf_N = mkN "葉" Inanim "枚" ; -- "ha" "mai"
learn_V2 = mkV2 "学ぶ" "を" Gr1 ; -- "manabu"
leather_N = mkN "皮革" Inanim ; -- "hikaku"
leave_V2 = mkV2 "残す" "を" Gr1 ; -- "nokosu"
@@ -182,7 +182,7 @@ lin
live_V = mkV "住む" Gr1 ; -- "sumu"
long_A = mkA "長い" ; -- "nagai"
lose_V2 = mkV2 "失う" "を" Gr1 ; -- "ushinau"
louse_N = mkN "虱" Anim "匹" False ; -- "shirami" "hiki"
louse_N = mkN "虱" Anim "匹" ; -- "shirami" "hiki"
love_N = mkN "愛" Inanim ; -- "ai"
love_V2 = mkV2 "愛する" "を" Suru ; -- "aisuru"
man_N = mkN "男" Anim "人" False "男たち" ; -- "otoko"
@@ -191,7 +191,7 @@ lin
milk_N = mkN "ミルク" Inanim ; -- "miruku"
moon_N = mkN "月" Inanim ; -- "tsuki"
mother_N2 = mkN2 "お母さん" Anim "人" False "お母さんたち" "の" ; -- "okaasan"
mountain_N = mkN "山" Inanim "座" False ; -- "yama" "za"
mountain_N = mkN "山" Inanim ; -- "yama"
mouth_N = mkN "口" Inanim ; -- "kuchi"
music_N = mkN "音楽" Inanim ; -- "ongaku"
name_N = mkN "名前" "お名前" Inanim ; -- "namae" "onamae"
@@ -199,19 +199,19 @@ lin
near_A = mkA "近い" ; -- "chikai"
neck_N = mkN "首" Inanim ; -- "kubi"
new_A = mkA "新しい" ; -- "atarashii"
newspaper_N = mkN "新聞" Inanim "部" False ; -- "shimbun" "bu"
night_N = mkN "夜" Inanim "夜" True ; -- "yoru" "ya"
newspaper_N = mkN "新聞" Inanim "紙" ; -- "shimbun" "shi"
night_N = mkN "夜" Inanim True ; -- "yoru" "ya"
nose_N = mkN "鼻" Inanim ; -- "hana"
now_Adv = ResJpn.mkAdv "今" ; -- "ima"
number_N = mkN "数" Inanim "数" True ; -- "kazu" "suu"
number_N = mkN "数" Inanim ; -- "kazu"
oil_N = mkN "油" Inanim ; -- "abura"
old_A = mkA "古い" ; -- "furui"
open_V2 = mkV2 "開く" "を" Gr1 ; -- "hiraku"
paint_V2A = mkV "塗る" Gr1 ; -- "nuru"
paper_N = mkN "紙" Inanim "葉" False ; -- "kami" "you"
paper_N = mkN "紙" Inanim "枚" ; -- "kami" "mai"
paris_PN = mkPN "パリ" ;
peace_N = mkN "平和" Inanim ; -- "heiwa"
pen_N = mkN "ペン" Inanim "本" False ; -- "pen" "hon"
pen_N = mkN "ペン" Inanim "本" ; -- "pen" "hon"
person_N = mkN "人" Anim "人" True "人たち" ; -- "hito"
planet_N = mkN "惑星" Inanim ; -- "wakusei"
plastic_N = mkN "プラスチック" Inanim ; -- "purasutikku"
@@ -224,7 +224,7 @@ lin
push_V2 = mkV2 "押す" "を" Gr1 ; -- "osu"
put_V2 = mkV2 "置く" "を" Gr1 ; -- "oku"
queen_N = mkN "女王" Anim "人" False "女王たち" ; -- "joou"
question_N = mkN "質問" Inanim "題" True ; -- "shitsumon" "dai"
question_N = mkN "質問" Inanim ; -- "shitsumon"
radio_N = mkN "ラジオ" Inanim ; -- "rajio"
rain_N = mkN "雨" Inanim ; -- "ame"
rain_V0 = mkRain ;
@@ -235,12 +235,12 @@ lin
religion_N = mkN "宗教" Inanim ; -- "shuukyou"
restaurant_N = mkN "レストラン" Inanim ; -- "resutoran"
right_Ord = mkA "右の" ; -- "migino"
river_N = mkN "川" Inanim "本" False ; -- "kawa" "hon"
road_N = mkN "道路" Inanim "本" False ; -- "douro" "hon"
river_N = mkN "川" Inanim "本" ; -- "kawa" "hon"
road_N = mkN "道路" Inanim "本" ; -- "douro" "hon"
rock_N = mkN "岩" Inanim ; -- "iwa"
roof_N = mkN "屋根" Inanim ; -- "yane"
root_N = mkN "根" Inanim ; -- "ne"
rope_N = mkN "縄" Inanim "本" False ; -- "nawa"
rope_N = mkN "縄" Inanim "本" ; -- "nawa"
rotten_A = mkA "腐っている" "腐った" ; -- "kusatteiru"
round_A = mkA "丸い" ; -- "marui"
rubber_N = mkN "ゴム" Inanim ; -- "gomu" - material
@@ -261,11 +261,11 @@ lin
send_V3 = mkV3 "送る" "に" "を" Gr1 ; -- "okuru"
sew_V = mkV "縫う" Gr1 ; -- "nuu"
sharp_A = mkA "鋭い" ; -- "surudoi"
sheep_N = mkN "羊" Anim "頭" False ; -- "hitsuji" "tou"
ship_N = mkN "船" Inanim "杯" False ; -- "fune" "hai"
shirt_N = mkN "シャツ" Inanim ; -- "shatsu"
sheep_N = mkN "羊" Anim "頭" ; -- "hitsuji" "tou"
ship_N = mkN "船" Inanim "隻" ; -- "fune" "seki"
shirt_N = mkN "シャツ" Inanim "枚" False ; -- "shatsu" "mai"
shoe_N = mkN "靴" Inanim ; -- "kutsu"
shop_N = mkN "店" Inanim "店" True ; -- "mise" "ten"
shop_N = mkN "店" Inanim ; -- "mise"
short_A = mkA "短い" ; -- "mijikai"
silver_N = mkN "銀" Inanim ; -- "gin"
sing_V = mkV "歌う" Gr1 ; -- "utau"
@@ -278,10 +278,10 @@ lin
smell_V = mkV "匂う" Gr1 ; -- "niou"
smoke_N = mkN "煙" Inanim ; -- "kemuri"
smooth_A = mkA "平滑な" ; -- "heikatsuna"
snake_N = mkN "蛇" Anim "匹" False ; -- "hebi" "hiki"
snake_N = mkN "蛇" Anim "匹" ; -- "hebi" "hiki"
snow_N = mkN "雪" Inanim ; -- "yuki"
sock_N = mkN "靴下" Inanim ; -- "kutsushita"
song_N = mkN "" Inanim "曲" False ; -- "uta" "kyoku"
song_N = mkN "" Inanim True ; -- "kyoku"
speak_V2 = mkV2 "話す" "を" Gr1 ; -- "hanasu"
spit_V = mkV "唾する" Suru ; -- "tsubakisuru"
split_V2 = mkV2 "分かつ" "を" Gr1 ; -- "wakatsu"
@@ -290,7 +290,7 @@ lin
stand_V = mkV "立つ" Gr1 ; -- "tatsu"
star_N = mkN "星" Inanim ; -- "hoshi"
steel_N = mkN "鋼" Inanim ; -- "hagane"
stick_N = mkN "棒" Inanim "本" False ; -- "bou" "hon"
stick_N = mkN "棒" Inanim "本" ; -- "bou" "hon"
stone_N = mkN "石" Inanim ; -- "ishi"
stop_V = mkV "止まる" Gr1 ; -- "tomaru"
stove_N = mkN "ストーブ" Inanim ; -- "sutobu"
@@ -303,7 +303,7 @@ lin
swim_V = mkV "泳ぐ" Gr1 ; -- "oyogu"
switch8off_V2 = mkV2 "スイッチを切る" "の" Gr1 ; -- "suitchiokiru"
switch8on_V2 = mkV2 "スイッチを入れる" "の" Gr2 ; -- "suitchioireru"
table_N = mkN "テーブル" Inanim "脚" False ; -- "teburu" "kyaku"
table_N = mkN "テーブル" Inanim ; -- "teburu"
tail_N = mkN "尾" Inanim ; -- "o"
talk_V3 = mkV3 "話す" "と" "について" Gr1 ; -- "hanasu"
teacher_N = mkN "先生" Anim "人" False "先生たち" ; -- "sensei"
@@ -317,15 +317,15 @@ lin
today_Adv = ResJpn.mkAdv "今日" ; -- "kyou" ;
tongue_N = mkN "舌" Inanim ; -- "shita"
tooth_N = mkN "歯" Inanim ; -- "ha"
train_N = mkN "車" Inanim "列車" True ; -- "densha" "ressha"
train_N = mkN "車" Inanim "本" ; -- "ressha" "hon"
travel_V = mkV "旅行する" Suru ; -- "ryokousuru"
tree_N = mkN "木" Inanim "樹" True ; -- "ki" "ju"
tree_N = mkN "木" Inanim "本" ; -- "ki" "hon"
turn_V = mkV "回る" Gr1 ; -- "mawaru"
ugly_A = mkA "醜い" ; -- "mnikui"
uncertain_A = mkA "危なっかしい" ; -- "abunakkashii"
understand_V2 = mkV2 "理解する" "を" Suru ; -- "rikaisuru"
university_N = mkN "大学" Inanim ; -- "daigaku"
village_N = mkN "村" Inanim "村" True ; -- "mura" "son"
village_N = mkN "村" Inanim ; -- "mura"
vomit_V = mkV "吐く" Gr1 ; -- "haku"
wait_V2 = mkV2 "待つ" "を" Gr1 ; -- "matsu"
walk_V = mkV "歩く" Gr1 ; -- "aruku"
@@ -347,9 +347,9 @@ lin
woman_N = mkN "女" Anim "人" False "女たち" ; -- "onna"
wonder_VQ = mkV2 "質問する" "を" Suru ; -- "shitsumonsuru"
wood_N = mkN "木材" Inanim ; -- "mokuzai"
worm_N = mkN "ワーム" Anim "匹" False ; -- "waamu" "hiki"
worm_N = mkN "ワーム" Anim "匹" ; -- "waamu" "hiki"
write_V2 = mkV2 "書く" "を" Gr1 ; -- "kaku"
year_N = mkN "年" Inanim "年" True ; -- "toshi" "nen"
year_N = mkN "年" Inanim True ; -- "toshi" "nen"
yellow_A = mkA "黄色の" ; -- "kiirono"
young_A = mkA "若い" ; -- "wakai"
}

View File

@@ -10,16 +10,16 @@ flags coding = utf8 ;
True => case cn.hasAttr of {
True => cn.object ! st ++ det.quant ! st ++ det.num ++ cn.counter ++ det.postpositive
++ "の" ++ cn.s ! det.n ! st ;
False => cn.object ! st ++ det.quant ! st ++ det.num ++ cn.counter ++ det.postpositive
False => cn.object ! st ++ det.quant ! st ++ det.num ++ cn.counter ++ det.postpositive
} ;
False => case <det.tenPlus, cn.counterTsu> of {
<True, True> => cn.object ! st ++ det.quant ! st ++ det.num ++ "個" ++ det.postpositive
<True, True> => cn.object ! st ++ det.quant ! st ++ det.num ++ "個" ++ det.postpositive
++ "の" ++ cn.s ! det.n ! st ;
_ => cn.object ! st ++ det.quant ! st ++ det.num ++ cn.counter ++ det.postpositive
++ "の" ++ cn.s ! det.n ! st
}
_ => cn.object ! st ++ det.quant ! st ++ det.num ++ cn.counter ++ det.postpositive
++ "の" ++ cn.s ! det.n ! st
}
} ;
False => cn.object ! st ++ det.quant ! st ++ det.num ++ cn.s ! det.n ! st
False => cn.object ! st ++ det.quant ! st ++ det.num ++ cn.s ! det.n ! st
} ;
prepositive = cn.prepositive ;
needPart = True ;
@@ -30,7 +30,7 @@ flags coding = utf8 ;
meaning = SomeoneElse ;
anim = cn.anim
} ;
UsePN pn = {
s = \\st => pn.s ! st ;
prepositive = \\st => [] ;
@@ -39,7 +39,7 @@ flags coding = utf8 ;
meaning = SomeoneElse ;
anim = pn.anim
} ;
UsePron pron = {
s = pron.s ;
prepositive = \\st => [] ;
@@ -51,7 +51,7 @@ flags coding = utf8 ;
} ;
anim = pron.anim
} ;
PredetNP p np = {
s = \\st => p.s ++ np.s ! st ;
prepositive = np.prepositive ;
@@ -63,7 +63,7 @@ flags coding = utf8 ;
meaning = np.meaning ;
anim = np.anim
} ;
PPartNP np v2 = np ** {
s = \\st => v2.pass ! Plain ! TPast ! Pos ++ np.s ! st ;
} ;
@@ -71,20 +71,20 @@ flags coding = utf8 ;
AdvNP np adv = np ** {
s = \\st => case adv.prepositive of {
True => np.s ! st ;
False => adv.s ! st ++ np.s ! st
False => adv.s ! st ++ np.s ! st
} ;
prepositive = \\st => case adv.prepositive of {
True => adv.s ! st ;
False => []
}
False => []
}
} ;
ExtAdvNP = AdvNP ;
RelNP np rs = np ** {
s = \\st => rs.s ! np.anim ! st ++ np.s ! st
s = \\st => rs.s ! np.anim ! st ++ np.s ! st
} ;
DetNP det = {
s = det.sp ;
prepositive = \\st => [] ;
@@ -96,7 +96,7 @@ flags coding = utf8 ;
meaning = SomeoneElse ;
anim = Inanim -- not always, depends on the context
} ;
DetQuant quant num = {
quant = quant.s ;
postpositive = num.postpositive ;
@@ -113,7 +113,7 @@ flags coding = utf8 ;
no = quant.no ;
tenPlus = num.tenPlus
} ;
DetQuantOrd quant num ord = {
quant = \\st => quant.s ! st ++ ord.attr ;
postpositive = num.postpositive ;
@@ -132,15 +132,15 @@ flags coding = utf8 ;
} ;
NumSg = mkNum "" Sg ;
NumPl = mkNum "" Pl ;
NumCard card = card ** {inclCard = True} ;
NumDigits num = num ** {postpositive = []} ;
NumNumeral num = num ** {postpositive = []} ;
AdNum adn card = case adn.postposition of {
True => {
s = card.s ;
@@ -153,11 +153,11 @@ flags coding = utf8 ;
postpositive = [] ;
n = card.n ;
tenPlus = card.tenPlus
}
}
} ;
OrdDigits, OrdNumeral = mkOrd ; -- "banme"
OrdSuperl a = {
pred = \\st,t,p => "一番" ++ a.pred ! st ! t ! p ; -- "ichiban"
attr = "一番" ++ a.attr ;
@@ -175,11 +175,11 @@ flags coding = utf8 ;
adv = \\p => n.s ++ "番" ++ a.adv ! p ;
dropNaEnging = n.s ++ "番" ++ a.dropNaEnging
} ;
IndefArt = {s = \\st => "" ; sp = \\st => "何か" ; no = False} ;
DefArt = {s = \\st => "" ; sp = \\st => "これ" ; no = False} ;
MassNP cn = {
s = \\st => cn.object ! st ++ cn.s ! Pl ! st ;
prepositive = cn.prepositive ;
@@ -188,40 +188,40 @@ flags coding = utf8 ;
meaning = SomeoneElse ;
anim = cn.anim
} ;
PossPron pron = {
s, sp = \\st => pron.s ! st ++ "の" ;
no = False
} ;
UseN n = n ** {
object = \\st => [] ;
prepositive = \\st => [] ;
hasAttr = False ;
} ;
ComplN2 n2 np = n2 ** {
object = \\st => n2.object ! st ++ np.s ! st ++ n2.prep ;
prepositive = np.prepositive ;
hasAttr = False ;
} ;
ComplN3 n3 np = n3 ** {
object = \\st => np.s ! st ++ n3.prep1 ;
prepositive = np.prepositive ;
prep = n3.prep2 ;
} ;
UseN2 n2 = n2 ** {
prepositive = \\st => [] ;
hasAttr = False ;
} ;
Use2N3 n3 = n3 ** {
object = \\st => [] ;
prep = n3.prep1 ;
} ;
Use3N3 n3 = n3 ** {
object = \\st => [] ;
prep = n3.prep2 ;
@@ -230,7 +230,7 @@ flags coding = utf8 ;
AdjCN ap cn = cn ** {
s = \\n,st => case cn.hasAttr of {
False => ap.attr ! st ++ cn.s ! n ! st ;
True => ap.te ! st ! Pos ++ cn.s ! n ! st
True => ap.te ! st ! Pos ++ cn.s ! n ! st
} ;
hasAttr = True ;
} ;
@@ -238,29 +238,29 @@ flags coding = utf8 ;
RelCN cn rs = cn ** {
object = \\st => rs.s ! cn.anim ! st ++ cn.object ! st ;
} ;
AdvCN cn adv = cn ** {
object = \\st => case adv.prepositive of {
True => cn.object ! st ;
False => adv.s ! st ++ cn.object ! st
False => adv.s ! st ++ cn.object ! st
} ;
prepositive = \\st => case adv.prepositive of {
True => adv.s ! st ;
False => []
} ;
} ;
SentCN cn sc = cn ** {
object = \\st => sc.s ! Ga ! st ++ cn.object ! st ;
} ;
ApposCN cn np = cn ** {
s = \\n,st => np.s ! st ++ cn.s ! n ! st ;
} ;
PossNP cn np = cn ** { -- house of Paris, house of mine
s = \\n,st => np.s ! st ++ "の" ++ cn.s ! n ! st ;
} ;
} ;
PartNP = PossNP ;
@@ -276,13 +276,13 @@ flags coding = utf8 ;
AdjDAP dap ap = lin Det {
quant = \\st => dap.quant ! st ++ ap.prepositive ! st ++ ap.attr ! st ;
num = dap.num ;
postpositive = dap.postpositive ;
n = dap.n ;
inclCard = dap.inclCard ;
postpositive = dap.postpositive ;
n = dap.n ;
inclCard = dap.inclCard ;
sp = \\st => dap.sp ! st ++ ap.prepositive ! st ++ ap.attr ! st ;
no = dap.no ;
tenPlus = dap.tenPlus
} ;
no = dap.no ;
tenPlus = dap.tenPlus
} ;
DetDAP det = det ;
}

View File

@@ -1,110 +1,127 @@
resource ParadigmsJpn = CatJpn **
resource ParadigmsJpn = CatJpn **
open ResJpn, CatJpn, Prelude in {
flags coding = utf8 ;
param
VerbGroupP = Gr1P | Gr2P | SuruP | KuruP ;
oper
VerbGroup : Type ; -- Parameter for mkV*
gr1 : VerbGroup ; -- Basic form ends in -u, consonant stem.
gr2 : VerbGroup ; -- Basic form ends in -iru/-eru, vowel stem.
suru : VerbGroup ; -- Irregular: kuru
kuru : VerbGroup ; -- Irregular: suru
Animacy : Type ; -- Parameter for mkN*
animate : Animacy ;
inanimate : Animacy ;
mkN = overload {
mkN : (man : Str) -> N ---- AR 15/11/2014
= \n -> lin N (regNoun n Inanim "つ" False True) ;
mkN : (man : Str) -> (anim : Animateness) -> N
mkN : (man : Str) -> N -- Inanimate noun. Counter is つ and doesn't replace the noun.
= \n -> lin N (regNoun n Inanim "つ" False True) ; ---- AR 15/11/2014
mkN : (man : Str) -> (anim : Animacy) -> N -- Animacy given as argument. Counter is つ and doesn't replace the noun.
= \n,a -> lin N (regNoun n a "つ" False True) ;
mkN : (kane,okane : Str) -> (anim : Animateness) -> N
mkN : (kane,okane : Str) -> (anim : Animacy) -> N -- Style variation (plain, respectful) and animacy given. Counter is つ and doesn't replace the noun.
= \kane,okane,a -> lin N (styleNoun kane okane a "つ" False True) ;
mkN : (man : Str) -> (anim : Animateness) -> (counter : Str) -> (counterReplace : Bool) -> N
mkN : (man : Str) -> (anim : Animacy) -> (counter : Str) -> (counterReplace : Bool) -> N -- No style variation. Arguments are animacy, counter and whether counter replaces the noun.
= \n,a,c,b -> lin N (regNoun n a c b False) ;
mkN : (man : Str) -> (anim : Animateness) -> (counter : Str) -> (counterReplace : Bool) ->
(men : Str) -> N = \n,a,c,b,pl -> lin N (numberNoun n a c b pl False) ;
mkN : (kane,okane : Str) -> (anim : Animateness) -> (counter : Str) ->
(counterReplace : Bool) -> N
= \kane,okane,a,c,b -> lin N (styleNoun kane okane a c b False) ;
mkN : (tsuma,okusan : Str) -> (anim : Animateness) -> (counter : Str) ->
(counterReplace : Bool) -> (tsumatachi : Str) -> N
= \tsuma,okusan,a,c,b,tsumatachi ->
lin N (mkNoun tsuma okusan tsumatachi tsumatachi a c b False)
mkN : (man : Str) -> (anim : Animacy) -> (counterReplace : Bool) -> N -- No style variation. Arguments are animacy and whether counter replaces the noun (here the counter and the noun coincide, e.g. 'day' 日, 'two days' 二日, not 二日の日).
= \n,a,b -> lin N (regNoun n a n b False) ; -- Liza 16/02/2020
mkN : (man : Str) -> (anim : Animacy) -> (counter : Str) -> N -- No style variation. Arguments are animacy and counter, which does not replace the noun.
= \n,a,c -> lin N (regNoun n a c False False) ; -- Liza 16/02/2020
mkN : (man : Str) -> (anim : Animacy) -> (counter : Str) -> (counterReplace : Bool) -> (men : Str) -> N -- Like previous, but unpredictable plural.
= \n,a,c,b,pl -> lin N (numberNoun n a c b pl False) ;
mkN : (kane,okane : Str) -> (anim : Animacy) -> (counter : Str) -> (counterReplace : Bool) -> N -- Plain form, respectful form, animacy, counter and whether counter replaces the noun.
= \kane,okane,a,c,b -> lin N (styleNoun kane okane a c b False) ;
mkN : (tsuma,okusan : Str) -> (anim : Animacy) -> (counter : Str) -> (counterReplace : Bool) -> (tsumatachi : Str) -> N -- Worst case paradigm: plain form, respectful form, animacy, counter, whether counter replaces the noun, and plural form.
= \tsuma,okusan,a,c,b,tsumatachi ->
lin N (mkNoun tsuma okusan tsumatachi tsumatachi a c b False)
} ;
mkN2 : (man : Str) -> (anim : Animateness) -> (counter : Str) -> (counterReplace : Bool) ->
(men : Str) -> (prep : Str) -> N2 = \n,a,c,b,pl,pr ->
mkN2 : (man : Str) -> (anim : Animacy) -> (counter : Str) -> (counterReplace : Bool) -> (men : Str) -> (prep : Str) -> N2 = \n,a,c,b,pl,pr ->
lin N2 (numberNoun n a c b pl False) ** {prep = pr ; object = \\st => []} ;
mkN3 : (distance : Str) -> (prep1: Str) -> (prep2: Str) -> (anim : Animateness) -> N3
mkN3 : (distance : Str) -> (prep1: Str) -> (prep2: Str) -> (anim : Animacy) -> N3
= \n,p1,p2,a -> lin N3 (regNoun n a "つ" False True) ** {prep1 = p1; prep2 = p2} ;
mkPN = overload {
mkPN : (paris : Str) -> PN
mkPN : (paris : Str) -> PN
= \n -> lin PN (regPN n) ;
mkPN : (jon,jonsan : Str) -> PN
mkPN : (jon,jonsan : Str) -> PN
= \jon,jonsan -> lin PN (personPN jon jonsan)
} ;
mkPron = overload {
mkPron : (kare : Str) -> (Pron1Sg : Bool) -> (anim : Animateness) -> Pron
mkPron : (kare : Str) -> (Pron1Sg : Bool) -> (anim : Animacy) -> Pron
= \kare,b,a -> lin Pron (regPron kare b a) ;
mkPron : (boku,watashi : Str) -> (Pron1Sg : Bool) -> (anim : Animateness) -> Pron
mkPron : (boku,watashi : Str) -> (Pron1Sg : Bool) -> (anim : Animacy) -> Pron
= \boku,watashi,b,a -> lin Pron (stylePron boku watashi b a)
} ;
mkA = overload {
mkA : (ookina : Str) -> A = \a -> lin A (regAdj a) ;
mkA : (kekkonshiteiru,kikonno : Str) -> A = \pred,attr -> lin A (VerbalA pred attr)
mkA : (ookina : Str) -> A -- One form for both predicative and attribute
= \a -> lin A (regAdj a) ;
mkA : (kekkonshiteiru,kikonno : Str) -> A -- Verbal adjective, arguments are predicative and attributive
= \pred,attr -> lin A (VerbalA pred attr)
} ;
mkA2 = overload {
mkA2 : (yasui : Str) -> (prep : Str) -> A2 = \a,p -> lin A2 (regAdj a) ** {prep = p} ;
mkA2 : (pred : Str) -> (attr : Str) -> (prep : Str) -> A2 =
\pred,attr,pr -> lin A2 (VerbalA pred attr) ** {prep = pr}
mkA2 : (yasui : Str) -> (prep : Str) -> A2 -- 2-place adjective. Arguments: adjective (same for predicative and attributive) and object marker.
= \a,p -> lin A2 (regAdj a) ** {prep = p} ;
mkA2 : (pred : Str) -> (attr : Str) -> (prep : Str) -> A2 -- Predicative, attributive and object marker.
= \pred,attr,pr -> lin A2 (VerbalA pred attr) ** {prep = pr}
} ;
mkV = overload {
mkV : (yomu : Str) -> V
mkV : (yomu : Str) -> V -- Default: group 1 verb
= \yomu -> lin V (mkVerb yomu Gr1) ; ---- AR 15/11/2014
mkV : (yomu : Str) -> (group : ResJpn.VerbGroup) -> V
= \yomu,gr -> lin V (mkVerb yomu gr) ;
mkV : (yomu : Str) -> (group : VerbGroup) -> V
= \yomu,gr -> lin V (mkVerb yomu gr) ; -- Base form and verb group given
} ;
mkV2 = overload {
mkV2 : (yomu : Str) -> V2 ---- AR 15/11/2014
= \yomu -> lin V2 (mkVerb2 yomu "を" Gr1) ;
mkV2 : (yomu, prep : Str) -> (group : ResJpn.VerbGroup) -> V2
mkV2 : (yomu : Str) -> V2 -- Group 1 verb, を as object marker
= \yomu -> lin V2 (mkVerb2 yomu "を" Gr1) ; ---- AR 15/11/2014
mkV2 : (yomu, prep : Str) -> (group : VerbGroup) -> V2 -- Base form, object marker and verb group given
= \yomu,p,gr -> lin V2 (mkVerb2 yomu p gr) ;
} ;
mkV3 = overload {
mkV3 : (yomu : Str) -> V3
= \yomu -> lin V3 (mkVerb3 yomu "に" "を" Gr1) ;
mkV3 : (uru, p1, p2 : Str) -> (group : ResJpn.VerbGroup) -> V3 = \uru,p1,p2,gr ->
lin V3 (mkVerb3 uru p1 p2 gr) ;
mkV3 : (yomu : Str) -> V3 -- Group 1 verb, に and を as object markers
= \yomu -> lin V3 (mkVerb3 yomu "に" "を" Gr1) ;
mkV3 : (uru, p1, p2 : Str) -> (group : VerbGroup) -> V3 -- Base form, object markers and verb group given
= \uru,p1,p2,gr -> lin V3 (mkVerb3 uru p1 p2 gr) ;
} ;
mkVS : (yomu : Str) -> VS = \yomu -> lin VS (mkVerb2 yomu "ことを" Gr1) ;
mkVS : (yomu : Str) -> VS
= \yomu -> lin VS (mkVerb2 yomu "ことを" Gr1) ;
mkVV : (yomu : Str) -> VV = \yomu -> lin VV (mkVerbV yomu Gr1) ;
mkVV : (yomu : Str) -> VV
= \yomu -> lin VV (mkVerbV yomu Gr1) ;
mkV2V : (yomu : Str) -> V2V = \yomu -> lin V2V (mkVerb yomu Gr1) ;
mkV2V : (yomu : Str) -> V2V
= \yomu -> lin V2V (mkVerb yomu Gr1) ;
mkV2S : (yomu : Str) -> V2S = \yomu -> lin V2S (mkVerb yomu Gr1) ;
mkV2S : (yomu : Str) -> V2S
= \yomu -> lin V2S (mkVerb yomu Gr1) ;
mkVQ : (yomu : Str) -> VQ = \yomu -> lin VQ (mkVerb2 yomu "を" Gr1) ;
mkVQ : (yomu : Str) -> VQ
= \yomu -> lin VQ (mkVerb2 yomu "を" Gr1) ;
mkVA : (yomu : Str) -> VA = \yomu -> lin VA (mkVerb yomu Gr1) ;
mkVA : (yomu : Str) -> VA
= \yomu -> lin VA (mkVerb yomu Gr1) ;
mkV2A : (yomu : Str) -> V2A = \yomu -> lin V2A (mkVerb yomu Gr1) ;
mkV2A : (yomu : Str) -> V2A
= \yomu -> lin V2A (mkVerb yomu Gr1) ;
mkAdv : Str -> Adv ---- AR 15/11/2014
= \s -> lin Adv (ResJpn.mkAdv s) ;
mkAdv : Str -> Adv
= \s -> lin Adv (ResJpn.mkAdv s) ; ---- AR 15/11/2014
mkPrep : Str -> Prep ---- AR 15/11/2014
= \s -> lin Prep (ResJpn.mkPrep s) ;
mkPrep : Str -> Prep
= \s -> lin Prep (ResJpn.mkPrep s) ; ---- AR 15/11/2014
mkDet : Str -> Det = \d -> lin Det (ResJpn.mkDet d d ResJpn.Sg) ;
mkConj : Str -> Conj = \c -> lin Conj (ResJpn.mkConj c ResJpn.And) ;
mkConj : Str -> Conj = \c -> lin Conj (ResJpn.mkConj c ResJpn.And) ;
mkInterj : Str -> Interj
= \s -> lin Interj (ss s) ;
@@ -114,6 +131,19 @@ oper
i_stem = \\sp => mkGo.i_stem ;
ba = \\sp => mkGo.ba ;
te_neg = \\sp => "行かないで" ;
ba_neg = \\sp => "行かなければ" ;
sense = Abil} ;
ba_neg = \\sp => "行かなければ" ;
sense = Abil} ;
--.
-- Hidden definitions
Animacy : Type = ResJpn.Animateness ;
animate : Animacy = ResJpn.Anim ;
inanimate : Animacy = ResJpn.Inanim ;
VerbGroup : Type = ResJpn.VerbGroup ;
gr1 : VerbGroup = Gr1 ;
gr2 : VerbGroup = Gr2 ;
suru : VerbGroup = Suru ;
kuru : VerbGroup = Kuru ;
}

View File

@@ -0,0 +1,142 @@
module Main where
import PGF
import qualified Data.Map as M
import Data.Char
import Data.List
import System.Environment (getArgs)
-- AR 2020-02-28
-- making a word list purely morphological, i.e.
-- - functions are 1-to-1 with lemgrams, i.e.
-- - no sense distinctions
-- - no subcategorizations
-- - no variants
-- - functionname = baseform_category, with exceptions
-- - variant inflection tables: lie_1_V, lie_2_V
-- - words that have non-ident characters: 'bird\'s-eye_A'
-- - words that start with non-letters: W_'tween_Adv
-- example:
-- gf -make ../english/DictEng.gf
-- runghc MkMorphodict.hs DictEngAbs.pgf MorphoDictEng
-- 64923 -> 56599 functions
usage = "MkMorphodict <pgf> <outfile>"
main = do
pgfile:outfile:_ <- getArgs
pgf <- readPGF pgfile
config <- readFile (outfile ++ ".config") >>= return . mkConfig
let (absrules,cncrules) = mkMorphoDict (MDEnv pgf config (head (languages pgf)))
absheader <- readFile (outfile ++ "Abs.header")
cncheader <- readFile (outfile ++ ".header")
writeFile (outfile ++ "Abs.gf") absheader
appendFile (outfile ++ "Abs.gf") $ unlines absrules
appendFile (outfile ++ "Abs.gf") "}"
writeFile (outfile ++ ".gf") cncheader
appendFile (outfile ++ ".gf") $ unlines cncrules
appendFile (outfile ++ ".gf") "}"
type Cat = CId
type Oper = String
type Config = M.Map Cat (Cat,Oper,[Int])
data MDEnv = MDEnv {
pgf :: PGF,
config :: Config,
lang :: Language
}
mkConfig :: String -> Config
mkConfig ls = M.fromList [(c,i) | Left (c,i) <- map mkOne (lines ls)]
where
mkOne s = case words s of
"--":_ -> Right s
cat:":":tcat:oper:ints -> Left (mkCId cat,(mkCId tcat,oper,map read ints))
_ -> Right s
mkMorphoDict :: MDEnv -> ([String],[String])
mkMorphoDict env =
unzip $
map splitRule $
findCompounds $
nameFunctions $
mergeRules $
concatMap findRules cats
where
splitRule (fun,(cat,lin)) = (unwords ["fun",fun,":",showCId cat,";"], unwords ["lin",fun,"=", unwords lin,";"])
cats = nub [c | (c,(_,_,_)) <- M.assocs (config env)]
findRules cat = [
([snd (lin !! head ints), showCId c], (c, op : appSig ints (map snd lin))) | --- head ints is the base form in smart paradigms
f <- functionsByCat (pgf env) cat,
lin <- tabularLinearizes (pgf env) (lang env) (mkApp f []), -- [[(String, String)]]
Just (c,op,ints) <- [M.lookup cat (config env)]
]
appSig ints forms = [forms !! i | i <- ints]
mergeRules = map head . groupBy (\x y -> snd x == snd y) . sortOn snd
nameFunctions = expandNames . sortOn fst
expandNames fls = case fls of
(f,l):fls2 -> case span ((==f) . fst) fls2 of
([],_) -> (mkFun f,l) : expandNames fls2
(fls1,fls3) -> renames ((f,l):fls1) ++ expandNames fls3
_ -> []
renames fls = [(mkFun (init f ++ [show i,last f]),l) | (i,(f,l)) <- zip [1..] fls]
findCompounds = getCompounds . sortOn cat_orthrevforms
cat_orthrevforms (_,(cat,_:forms)) = (cat,[map (!!i) fss | let fss = map reverse forms, i <- [0..minimum (map length fss) - 1]])
cat_revforms (_,(cat,_:forms)) = (cat,map reverse forms)
revstem = head . snd . cat_revforms
wforms (_,(_,_:forms)) = forms
getCompounds fls = case fls of
fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1
fl : fls2 -> case span (\x -> and [isPrefixOf (reverse w) (reverse w1) | (w,w1) <- zip (wforms fl) (wforms x)]) fls2 of
([],_:_) -> markWith fl [] : getCompounds fls2
(fls1,fls3) -> markWith fl [] : map (markCompound fl) fls1 ++ getCompounds fls3
_ -> []
markCompound fl fl1 =
case and [isPrefixWord (reverse w) (reverse w1) | (w,w1) <- zip (wforms fl) (wforms fl1)] of
True -> markWith fl1 [";","--","compound",(fst fl)]
False -> markWith fl1 [";","--","notcompound",(fst fl)]
markWith (f,(c,op:ws)) xs = (f,(c,op : map quote ws ++ xs))
isPrefixWord x xy =
length suff > 1 &&
any (\c -> elem c "-0123456789aeiouyåäö") suff &&
isPrefixOf x xy
where
suff = drop (length x) xy
mkFun = quoteIf . concat . intersperse "_"
quoteIf s = case s of
_ | any (\c -> not (isAlphaNum c || elem c "_'")) s -> "'" ++ unSgQuote s ++ "'"
c:_ | not (isAlpha c) -> "W_" ++ s
_ -> s
where
unSgQuote s = case s of
'\'':cs -> "\\\'" ++ unSgQuote cs
c:cs -> c : unSgQuote cs
_ -> s
quote s = "\"" ++ s ++ "\""

View File

@@ -0,0 +1,8 @@
N : N mkN 0 2
A : A mkA 0 2 4 6
V : V mkV 0 4 2
V2 : V mkV 0 4 2
Adv : Adv mkAdv 0
Prep : Prep mkPrep 0

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
concrete MorphoDictEng of MorphoDictEngAbs =
CatEng [N,A,V,Adv,Prep] **
open
ParadigmsEng
in
{

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,4 @@
abstract MorphoDictEngAbs =
Cat [N,A,V,Adv,Prep] **
{

View File

@@ -0,0 +1,8 @@
N : N mkN 0 2 4 6
A : A mkA 0 2 4 10 12
V : V mkV 6 0 4 2 8 10
V2 : V mkV 6 0 4 2 8 10
Adv : Adv mkAdv 0
Prep : Prep mkPrep 0
PN : PN mkPN 0

View File

@@ -0,0 +1,8 @@
concrete MorphoDictSwe of MorphoDictSweAbs =
CatSwe [N,A,V,Adv,Prep,PN] **
open
ParadigmsSwe
in
{

View File

@@ -0,0 +1,4 @@
abstract MorphoDictSweAbs =
Cat [N,A,V,Adv,Prep,PN] **
{

38
src/morphodict/README Normal file
View File

@@ -0,0 +1,38 @@
MkMorphoDict: Extracting a minimal morphological dictionary from an existing GF dictionary.
Aarne Ranta 2020-03-02
principles:
There should be a single source for each lemgram (i.e. inflection table of a word)
Functions names should be easy to guess: baseform_Category (but avoiding accidental errors if this is not a unique key)
Hence,
Functions are 1-to-1 with lemgrams, i.e. inflection tables, thus
- no sense distinctions
- no subcategorizations
- no variants
Functionname = baseform_category, with exceptions
- same baseform_Category, different inflection tables: lie_1_V, lie_2_V
- words that have non-ident characters: 'bird\'s-eye_A'
- words that start with non-letters: W_'tween_Adv
Example run, English:
gf -make ../english/DictEng.gf
runghc MkMorphodict.hs DictEngAbs.pgf MorphoDictEng
Result: 64923 -> 56599 functions, of which 21679 could be compounds
Swedish, using a dump of SALDO (not available in these sources)
cd saldo/
runghc SaldoGF.hs
# combine abs.tmp with Saldo.header to obtain Saldo.gf
# combine cnc.tmp with SaldoSwe.header to obtain SaldoSwe.gf
gf -make SaldoSwe.gf
cd ..
runghc MkMorphodict.hs saldo/Saldo.pgf MorphoDictSwe

View File

@@ -0,0 +1 @@
abstract Saldo = Cat [N,A,V,PN,Adv,Prep] ** {

View File

@@ -0,0 +1,97 @@
import Data.List
import qualified Data.Map as M
-- AR 2020-03-03
-- generating GF from preprocessed SALDO (of type Lex by John Camilleri)
main = do
lexicon <- readFile "saldom.hsdump" >>= return . readLex -- this is the preprocessed file
let gf = map (mkRules . treatNone) $ mkFuns lexicon
writeFile "abs.tmp" $ unlines $ map fst gf -- the generated files need headers
writeFile "cnc.tmp" $ unlines $ map snd gf -- use SaldoGF.header for this
-- JC's datatypes, using String for simplicity
type Lex = M.Map String Entry -- key is lemgram ID
type Table = [(String,String)]
data Entry = E
{ ePOS :: String
, eTable :: Table -- morphological tags to surface form: ("sg def gen" ,"killens")
} deriving (Show, Read)
readLex :: String -> [(String,Entry)]
readLex = read . drop 8
-- new code by AR
mkRules (fun,cat,lin) = (nunwords ["fun",fun,":",cat,";"],nunwords ["lin",fun,"=",lin,";"])
where
-- commenting out functions that still have NONE forms
nunwords ws = unwords ((if elem "\"NONE\"" (words lin) then ["--n"] else []) ++ ws)
-- converting incomplete paradigms to special mkC constructors, defined in SaldoSwe.header
treatNone (f,cat,lin) = case (cat,drop 1 (words lin)) of
("V", "\"NONE\"":"\"NONE\"":v:_) -> (f, "V", unwords ("mkVDep":[v]))
("V", i:d:p:a:b:"\"NONE\"":_) -> (f, "V", unwords ("mkVIntr":[i,d,p,a,b]))
("A", i:"\"NONE\"":p:c:s:_) -> (f, "A", unwords ("mkAUtr":[i,p,c,s]))
("A", i:d:p:"\"NONE\"":"\"NONE\"":_) -> (f, "A", unwords ("mkAComp":[i,d,p]))
("N", "\"NONE\"":d:"\"NONE\"":_) -> (f, "PN", unwords ("mkPNDef":[d])) ---
("N", i:"\"NONE\"":"\"NONE\"":_) -> (f, "PN", unwords ("mkPNIndef":[i]))
("N", i:d:"\"NONE\"":"\"NONE\"":_) -> (f, "N", unwords ("mkNSg":[i,d]))
("N", "\"NONE\"":"\"NONE\"":i:d:_) -> (f, "N", unwords ("mkNPl":[i,d]))
_ -> (f,cat,lin)
--- generating function names for simplicity: the result is fed to ../MkMorphoDict anyway
mkFuns lx = [("w"++show i, cat, lin) | (i,(cat,lin)) <- zip [1000000..] (concatMap (entry2lin . snd) lx)]
entry2lin e =
[(cat, mkLin cat ws) | ws <- manyTables valuess]
where
(cat,forms) = formSpec (ePOS e)
valuess = [nub [v | (t,v) <- eTable e, t == f] | f <- forms]
mkLin c ws = unwords $ ["mk"++c] ++ ["\"" ++ w ++ "\"" | w <- ws]
-- looking for the characteristic forms for each POS
formSpec pos = case pos of
"nn" -> ("N",[
"sg indef nom",
"sg def nom",
"pl indef nom",
"pl def nom"
])
"av" -> ("A",[
"pos indef sg u nom",
"pos indef sg n nom",
"pos indef pl nom",
"komp nom",
"super indef nom"
])
"vb" -> ("V",[
"inf aktiv",
"pres ind aktiv",
"imper",
"pret ind aktiv",
"sup aktiv",
"pret_part indef sg u nom"
])
"ab" -> ("Adv",[
"invar"
---- "pos"
])
"pp" -> ("Prep",[
"invar"
])
_ -> ("NONE++pos",["NONE++pos"]) -- ignoring other POS tags, which are rare anyway
-- trying to generate a small number of tables from sets of variant forms; seems to work well enough
manyTables formss = [
map ((!!i) . pad) formss |
i <- [0..maximum (map length formss)-1],
let pad forms = if null forms then repeat "NONE" else forms ++ repeat (head forms)
]

View File

@@ -0,0 +1,50 @@
concrete SaldoSwe of Saldo =
CatSwe [N,A,V,Adv,Prep,PN] **
open
ParadigmsSwe, Prelude
in
{
-- to deal with incomplete paradigms
--- the values could be in special categories to avoid overgeneration
oper
mkVDep : Str -> V
= \v -> case v of {
x + "as" => depV (mkV (x + "a")) ;
x + "es" => depV (mkV (x + "er")) ;
x + "s" => depV (mkV (x + "er")) ;
_ => Predef.error (v ++ "not for mkVDep")
} ;
mkVIntr : (_,_,_,_,_ : Str) -> V
= \i,_,_,p,pt -> mkV i p pt ; ---
mkAUtr : (_,_,_,_ : Str) -> A
= \u,p,c,s -> mkA u u p c s ; ---
mkAComp : (_,_,_ : Str) -> A
= \u,n,p -> compoundA (mkA u n) ; ---
mkPNDef : Str -> PN
= \s -> case s of {
_ + "n" => mkPN s utrum ;
_ => mkPN s neutrum
} ;
mkPNIndef : Str -> PN
= \s -> mkPN s neutrum ; ---
mkNSg : (_,_ : Str) -> N
= \i,d -> case d of {
_ + "n" => mkN i utrum ; ---
_ => mkN i neutrum ---
} ;
mkNPl : (_,_ : Str) -> N
= \i,d -> case i of {
s + "or" => mkN (s + "a") ; ---
s + ("ar"|"er") => mkN s i ; ---
s + "en" => mkN (s + "e") i ; ---
_ => mkN i i ---
} ;

13
src/swedish/README.md Normal file
View File

@@ -0,0 +1,13 @@
# Swedish
## Language info
- English name: Swedish
- Autonym: Svenska
- ISO code: Swe
## Dictionaries
- `OldDictSwe`: Converted from SALDO using [this code](https://github.com/MalinAhlberg/SwedishProject/tree/master/saldo) in 2011.
- `NewDictSwe`: Re-import from SALDO using [this code](https://github.com/DigitalGrammarsAB/SALDOtoGF/tree/a45e503a824ded39844df2aeeb7a6ee891e3bee1) in 2018, with more words and different identifier structure.
- `DictSwe` is a union of `OldDictSwe` and `NewDictSwe`