mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-28 01:18:57 -06:00
Merge branch 'master' of https://github.com/GrammaticalFramework/gf-rgl
This commit is contained in:
@@ -47,6 +47,7 @@ abstract Extend = Cat ** {
|
|||||||
PredVPS : NP -> VPS -> S ; -- she [has walked and won't sleep]
|
PredVPS : NP -> VPS -> S ; -- she [has walked and won't sleep]
|
||||||
SQuestVPS : NP -> VPS -> QS ; -- has she walked
|
SQuestVPS : NP -> VPS -> QS ; -- has she walked
|
||||||
QuestVPS : IP -> VPS -> QS ; -- who has walked
|
QuestVPS : IP -> VPS -> QS ; -- who has walked
|
||||||
|
RelVPS : RP -> VPS -> RS ; -- which won't sleep
|
||||||
|
|
||||||
-- existentials that work in the absence of Cl
|
-- existentials that work in the absence of Cl
|
||||||
ExistS : Temp -> Pol -> NP -> S ; -- there was a party
|
ExistS : Temp -> Pol -> NP -> S ; -- there was a party
|
||||||
@@ -286,4 +287,6 @@ fun UseDAP : DAP -> NP ;
|
|||||||
UseDAPMasc : DAP -> NP ;
|
UseDAPMasc : DAP -> NP ;
|
||||||
UseDAPFem : DAP -> NP ;
|
UseDAPFem : DAP -> NP ;
|
||||||
|
|
||||||
|
cat X ; -- for words that are difficult to classify, mainly for MorphoDict
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ lincat
|
|||||||
RNP = Grammar.NP ;
|
RNP = Grammar.NP ;
|
||||||
RNPList = Grammar.ListNP ;
|
RNPList = Grammar.ListNP ;
|
||||||
|
|
||||||
|
X = {s : Str} ; -- for words that are difficult to classify, mainly for MorphoDict
|
||||||
|
|
||||||
lin
|
lin
|
||||||
BaseVPS = variants {} ;
|
BaseVPS = variants {} ;
|
||||||
ConsVPS = variants {} ;
|
ConsVPS = variants {} ;
|
||||||
@@ -26,6 +28,7 @@ lin
|
|||||||
MkVPS vp = variants {} ; -- Temp -> Pol -> VP -> VPS ; -- hasn't slept
|
MkVPS vp = variants {} ; -- Temp -> Pol -> VP -> VPS ; -- hasn't slept
|
||||||
ConjVPS = variants {} ; -- Conj -> [VPS] -> VPS ; -- has walked and won't sleep
|
ConjVPS = variants {} ; -- Conj -> [VPS] -> VPS ; -- has walked and won't sleep
|
||||||
PredVPS = variants {} ; -- NP -> VPS -> S ; -- has walked and won't sleep
|
PredVPS = variants {} ; -- NP -> VPS -> S ; -- has walked and won't sleep
|
||||||
|
RelVPS = variants {} ; -- RP -> VPS -> RS ; -- which won't sleep
|
||||||
MkVPI vp = variants {} ; -- Temp -> Pol -> VP -> VPI ; -- to sleep / hasn't slept
|
MkVPI vp = variants {} ; -- Temp -> Pol -> VP -> VPI ; -- to sleep / hasn't slept
|
||||||
ConjVPI = variants {} ; -- Conj -> [VPI] -> VPI ; -- has walked and won't sleep
|
ConjVPI = variants {} ; -- Conj -> [VPI] -> VPI ; -- has walked and won't sleep
|
||||||
ComplVPIVV = variants {} ; -- VV -> VPI -> VP ; -- want to sleep and to walk
|
ComplVPIVV = variants {} ; -- VV -> VPI -> VP ; -- want to sleep and to walk
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ concrete ExtendEng of Extend =
|
|||||||
[
|
[
|
||||||
VPS, ListVPS, VPI, ListVPI, VPS2, ListVPS2, VPI2, ListVPI2, RNP, RNPList,
|
VPS, ListVPS, VPI, ListVPI, VPS2, ListVPS2, VPI2, ListVPI2, RNP, RNPList,
|
||||||
AdAdV, AdjAsCN, AdjAsNP, ApposNP, AdvIsNP,
|
AdAdV, AdjAsCN, AdjAsNP, ApposNP, AdvIsNP,
|
||||||
MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS, QuestVPS, SQuestVPS,
|
MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS, QuestVPS, SQuestVPS, RelVPS,
|
||||||
MkVPI, BaseVPI, ConsVPI, ConjVPI, ComplVPIVV,
|
MkVPI, BaseVPI, ConsVPI, ConjVPI, ComplVPIVV,
|
||||||
MkVPS2, BaseVPS2, ConsVPS2, ConjVPS2, ComplVPS2, ReflVPS2,
|
MkVPS2, BaseVPS2, ConsVPS2, ConjVPS2, ComplVPS2, ReflVPS2,
|
||||||
MkVPI2, BaseVPI2, ConsVPI2, ConjVPI2, ComplVPI2,
|
MkVPI2, BaseVPI2, ConsVPI2, ConjVPI2, ComplVPI2,
|
||||||
@@ -127,6 +127,11 @@ concrete ExtendEng of Extend =
|
|||||||
QuestVPS ip vps = let vp = vps.s ! oDir ! toAgr ip.n P3 Neutr in {
|
QuestVPS ip vps = let vp = vps.s ! oDir ! toAgr ip.n P3 Neutr in {
|
||||||
s = \\q => ip.s ! npNom ++ vp.fin ++ vp.inf
|
s = \\q => ip.s ! npNom ++ vp.fin ++ vp.inf
|
||||||
} ;
|
} ;
|
||||||
|
RelVPS rp vps = {
|
||||||
|
s = \\agr => let vp = vps.s ! oDir ! agr in
|
||||||
|
rp.s ! RC (fromAgr agr).g npNom ++ vp.fin ++ vp.inf ;
|
||||||
|
c = npNom ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
|
||||||
MkVPI vp = mkVPI (lin VP vp) ;
|
MkVPI vp = mkVPI (lin VP vp) ;
|
||||||
@@ -177,7 +182,7 @@ concrete ExtendEng of Extend =
|
|||||||
let
|
let
|
||||||
verb = vp.s ! t.t ! t.a ! p.p ! o ! a ; -- choice of Order determines aux or not
|
verb = vp.s ! t.t ! t.a ! p.p ! o ! a ; -- choice of Order determines aux or not
|
||||||
compl = vp.s2 ! a ++ vp.ext
|
compl = vp.s2 ! a ++ vp.ext
|
||||||
in {fin = verb.aux ;
|
in {fin = verb.aux ++ t.s ++ p.s ;
|
||||||
inf = verb.adv ++ vp.ad ! a ++ verb.fin ++ verb.inf ++ vp.p ++ compl} ;
|
inf = verb.adv ++ vp.ad ! a ++ verb.fin ++ verb.inf ++ vp.p ++ compl} ;
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ flags optimize=values ;
|
|||||||
bend_V = irregV "bend" "bent" "bent" ;
|
bend_V = irregV "bend" "bent" "bent" ;
|
||||||
beset_V = irregV "beset" "beset" "beset" ;
|
beset_V = irregV "beset" "beset" "beset" ;
|
||||||
bet_V = irregDuplV "bet" "bet" "bet" ;
|
bet_V = irregDuplV "bet" "bet" "bet" ;
|
||||||
bid_V = irregDuplV "bid" (variants {"bid" ; "bade"}) (variants {"bid" ; "bidden"}) ;
|
bid_V = irregDuplV "bid" "bid" "bid" ;
|
||||||
|
bid_bade_V = irregDuplV "bid" "bade" "bidden" ;
|
||||||
bind_V = irregV "bind" "bound" "bound" ;
|
bind_V = irregV "bind" "bound" "bound" ;
|
||||||
bite_V = irregV "bite" "bit" "bitten" ;
|
bite_V = irregV "bite" "bit" "bitten" ;
|
||||||
bleed_V = irregV "bleed" "bled" "bled" ;
|
bleed_V = irregV "bleed" "bled" "bled" ;
|
||||||
@@ -23,7 +24,7 @@ flags optimize=values ;
|
|||||||
bring_V = irregV "bring" "brought" "brought" ;
|
bring_V = irregV "bring" "brought" "brought" ;
|
||||||
broadcast_V = irregV "broadcast" "broadcast" "broadcast" ;
|
broadcast_V = irregV "broadcast" "broadcast" "broadcast" ;
|
||||||
build_V = irregV "build" "built" "built" ;
|
build_V = irregV "build" "built" "built" ;
|
||||||
burn_V = irregV "burn" (variants {"burned" ; "burnt"}) (variants {"burned" ; "burnt"}) ;
|
burn_V = irregV "burn" "burnt" "burnt" ;
|
||||||
burst_V = irregV "burst" "burst" "burst" ;
|
burst_V = irregV "burst" "burst" "burst" ;
|
||||||
buy_V = irregV "buy" "bought" "bought" ;
|
buy_V = irregV "buy" "bought" "bought" ;
|
||||||
cast_V = irregV "cast" "cast" "cast" ;
|
cast_V = irregV "cast" "cast" "cast" ;
|
||||||
@@ -36,10 +37,10 @@ flags optimize=values ;
|
|||||||
cut_V = irregDuplV "cut" "cut" "cut" ;
|
cut_V = irregDuplV "cut" "cut" "cut" ;
|
||||||
deal_V = irregV "deal" "dealt" "dealt" ;
|
deal_V = irregV "deal" "dealt" "dealt" ;
|
||||||
dig_V = irregDuplV "dig" "dug" "dug" ;
|
dig_V = irregDuplV "dig" "dug" "dug" ;
|
||||||
dive_V = irregV "dive" (variants {"dived" ; "dove"}) "dived" ;
|
dive_V = irregV "dive" "dove" "dived" ;
|
||||||
do_V = mk5V "do" "does" "did" "done" "doing" ;
|
do_V = mk5V "do" "does" "did" "done" "doing" ;
|
||||||
draw_V = irregV "draw" "drew" "drawn" ;
|
draw_V = irregV "draw" "drew" "drawn" ;
|
||||||
dream_V = irregV "dream" (variants {"dreamed" ; "dreamt"}) (variants {"dreamed" ; "dreamt"}) ;
|
dream_V = irregV "dream" "dreamt" "dreamt" ;
|
||||||
drive_V = irregV "drive" "drove" "driven" ;
|
drive_V = irregV "drive" "drove" "driven" ;
|
||||||
drink_V = irregV "drink" "drank" "drunk" ;
|
drink_V = irregV "drink" "drank" "drunk" ;
|
||||||
eat_V = irregV "eat" "ate" "eaten" ;
|
eat_V = irregV "eat" "ate" "eaten" ;
|
||||||
@@ -75,27 +76,27 @@ flags optimize=values ;
|
|||||||
know_V = irregV "know" "knew" "known" ;
|
know_V = irregV "know" "knew" "known" ;
|
||||||
lay_V = irregV "lay" "laid" "laid" ;
|
lay_V = irregV "lay" "laid" "laid" ;
|
||||||
lead_V = irregV "lead" "led" "led" ;
|
lead_V = irregV "lead" "led" "led" ;
|
||||||
leap_V = irregV "leap" (variants {"leaped" ; "lept"}) (variants {"leaped" ; "lept"}) ;
|
leap_V = irregV "leap" "lept" "lept" ;
|
||||||
learn_V = irregV "learn" (variants {"learned" ; "learnt"}) (variants {"learned" ; "learnt"}) ;
|
learn_V = irregV "learn" "learnt" "learnt" ;
|
||||||
leave_V = irregV "leave" "left" "left" ;
|
leave_V = irregV "leave" "left" "left" ;
|
||||||
lend_V = irregV "lend" "lent" "lent" ;
|
lend_V = irregV "lend" "lent" "lent" ;
|
||||||
let_V = irregDuplV "let" "let" "let" ;
|
let_V = irregDuplV "let" "let" "let" ;
|
||||||
lie_V = irregV "lie" "lay" "lain" ;
|
lie_V = irregV "lie" "lay" "lain" ;
|
||||||
light_V = irregV "light" (variants {"lighted" ; "lit"}) "lighted" ;
|
light_V = irregV "light" "lit" "lit" ;
|
||||||
lose_V = irregV "lose" "lost" "lost" ;
|
lose_V = irregV "lose" "lost" "lost" ;
|
||||||
make_V = irregV "make" "made" "made" ;
|
make_V = irregV "make" "made" "made" ;
|
||||||
mean_V = irregV "mean" "meant" "meant" ;
|
mean_V = irregV "mean" "meant" "meant" ;
|
||||||
meet_V = irregV "meet" "met" "met" ;
|
meet_V = irregV "meet" "met" "met" ;
|
||||||
misspell_V = irregV "misspell" (variants {"misspelled" ; "misspelt"}) (variants {"misspelled" ; "misspelt"}) ;
|
misspell_V = irregV "misspell" "misspelt" "misspelt" ;
|
||||||
mistake_V = irregV "mistake" "mistook" "mistaken" ;
|
mistake_V = irregV "mistake" "mistook" "mistaken" ;
|
||||||
mow_V = irregV "mow" "mowed" (variants {"mowed" ; "mown"}) ;
|
mow_V = irregV "mow" "mowed" "mown" ;
|
||||||
overcome_V = irregV "overcome" "overcame" "overcome" ;
|
overcome_V = irregV "overcome" "overcame" "overcome" ;
|
||||||
overdo_V = mk5V "overdo" "overdoes" "overdid" "overdone" "overdoing" ;
|
overdo_V = mk5V "overdo" "overdoes" "overdid" "overdone" "overdoing" ;
|
||||||
overtake_V = irregV "overtake" "overtook" "overtaken" ;
|
overtake_V = irregV "overtake" "overtook" "overtaken" ;
|
||||||
overthrow_V = irregV "overthrow" "overthrew" "overthrown" ;
|
overthrow_V = irregV "overthrow" "overthrew" "overthrown" ;
|
||||||
pay_V = irregV "pay" "paid" "paid" ;
|
pay_V = irregV "pay" "paid" "paid" ;
|
||||||
plead_V = irregV "plead" "pled" "pled" ;
|
plead_V = irregV "plead" "pled" "pled" ;
|
||||||
prove_V = irregV "prove" "proved" (variants {"proved" ; "proven"}) ;
|
prove_V = irregV "prove" "proved" "proven" ;
|
||||||
put_V = irregDuplV "put" "put" "put" ;
|
put_V = irregDuplV "put" "put" "put" ;
|
||||||
quit_V = irregDuplV "quit" "quit" "quit" ;
|
quit_V = irregDuplV "quit" "quit" "quit" ;
|
||||||
read_V = irregV "read" "read" "read" ;
|
read_V = irregV "read" "read" "read" ;
|
||||||
@@ -104,22 +105,22 @@ flags optimize=values ;
|
|||||||
ring_V = irregV "ring" "rang" "rung" ;
|
ring_V = irregV "ring" "rang" "rung" ;
|
||||||
rise_V = irregV "rise" "rose" "risen" ;
|
rise_V = irregV "rise" "rose" "risen" ;
|
||||||
run_V = irregDuplV "run" "ran" "run" ;
|
run_V = irregDuplV "run" "ran" "run" ;
|
||||||
saw_V = irregV "saw" "sawed" (variants {"sawed" ; "sawn"}) ;
|
saw_V = irregV "saw" "sawed" "sawn" ;
|
||||||
say_V = irregV "say" "said" "said" ;
|
say_V = irregV "say" "said" "said" ;
|
||||||
see_V = irregV "see" "saw" "seen" ;
|
see_V = irregV "see" "saw" "seen" ;
|
||||||
seek_V = irregV "seek" "sought" "sought" ;
|
seek_V = irregV "seek" "sought" "sought" ;
|
||||||
sell_V = irregV "sell" "sold" "sold" ;
|
sell_V = irregV "sell" "sold" "sold" ;
|
||||||
send_V = irregV "send" "sent" "sent" ;
|
send_V = irregV "send" "sent" "sent" ;
|
||||||
set_V = irregDuplV "set" "set" "set" ;
|
set_V = irregDuplV "set" "set" "set" ;
|
||||||
sew_V = irregV "sew" "sewed" (variants {"sewed" ; "sewn"}) ;
|
sew_V = irregV "sew" "sewed" "sewn" ;
|
||||||
shake_V = irregV "shake" "shook" "shaken" ;
|
shake_V = irregV "shake" "shook" "shaken" ;
|
||||||
shave_V = irregV "shave" "shaved" (variants {"shaved" ; "shaven"}) ;
|
shave_V = irregV "shave" "shaved" "shaven" ;
|
||||||
shear_V = irregV "shear" "shore" "shorn" ;
|
shear_V = irregV "shear" "shore" "shorn" ;
|
||||||
shed_V = irregDuplV "shed" "shed" "shed" ;
|
shed_V = irregDuplV "shed" "shed" "shed" ;
|
||||||
shine_V = irregV "shine" "shone" "shone" ;
|
shine_V = irregV "shine" "shone" "shone" ;
|
||||||
shoe_V = irregV "shoe" "shoed" (variants {"shoed" ; "shod"}) ;
|
shoe_V = irregV "shoe" "shoed" "shod" ;
|
||||||
shoot_V = irregV "shoot" "shot" "shot" ;
|
shoot_V = irregV "shoot" "shot" "shot" ;
|
||||||
show_V = irregV "show" "showed" (variants {"showed" ; "shown"}) ;
|
show_V = irregV "show" "showed" "shown" ;
|
||||||
shrink_V = irregV "shrink" "shrank" "shrunk" ;
|
shrink_V = irregV "shrink" "shrank" "shrunk" ;
|
||||||
shut_V = irregDuplV "shut" "shut" "shut" ;
|
shut_V = irregDuplV "shut" "shut" "shut" ;
|
||||||
sing_V = irregV "sing" "sang" "sung" ;
|
sing_V = irregV "sing" "sang" "sung" ;
|
||||||
@@ -131,16 +132,18 @@ flags optimize=values ;
|
|||||||
sling_V = irregV "sling" "slung" "slung" ;
|
sling_V = irregV "sling" "slung" "slung" ;
|
||||||
slit_V = irregDuplV "slit" "slit" "slit" ;
|
slit_V = irregDuplV "slit" "slit" "slit" ;
|
||||||
smite_V = irregV "smite" "smote" "smitten" ;
|
smite_V = irregV "smite" "smote" "smitten" ;
|
||||||
sow_V = irregV "sow" "sowed" (variants {"sowed" ; "sown"}) ;
|
sow_V = irregV "sow" "sowed" "sown" ;
|
||||||
speak_V = irregV "speak" "spoke" "spoken" ;
|
speak_V = irregV "speak" "spoke" "spoken" ;
|
||||||
speed_V = irregV "speed" "sped" "sped" ;
|
speed_V = irregV "speed" "sped" "sped" ;
|
||||||
spend_V = irregV "spend" "spent" "spent" ;
|
spend_V = irregV "spend" "spent" "spent" ;
|
||||||
spill_V = irregV "spill" (variants {"spilled" ; "spilt"}) (variants {"spilled" ; "spilt"}) ;
|
spill_V = irregV "spill" "spilt" "spilt" ;
|
||||||
spin_V = irregDuplV "spin" "spun" "spun" ;
|
spin_V = irregDuplV "spin" "spun" "spun" ;
|
||||||
spit_V = irregDuplV "spit" (variants {"spit" ; "spat"}) "spit" ;
|
spit_V = irregDuplV "spit" "spit" "spit" ;
|
||||||
|
spit_spat_V = irregDuplV "spit" "spat" "spit" ;
|
||||||
split_V = irregDuplV "split" "split" "split" ;
|
split_V = irregDuplV "split" "split" "split" ;
|
||||||
spread_V = irregV "spread" "spread" "spread" ;
|
spread_V = irregV "spread" "spread" "spread" ;
|
||||||
spring_V = irregV "spring" (variants {"sprang" ; "sprung"}) "sprung" ;
|
spring_V = irregV "spring" "sprang" "sprung" ;
|
||||||
|
spring_sprung_V = irregV "spring" "sprung" "sprung" ;
|
||||||
stand_V = irregV "stand" "stood" "stood" ;
|
stand_V = irregV "stand" "stood" "stood" ;
|
||||||
steal_V = irregV "steal" "stole" "stolen" ;
|
steal_V = irregV "steal" "stole" "stolen" ;
|
||||||
stick_V = irregV "stick" "stuck" "stuck" ;
|
stick_V = irregV "stick" "stuck" "stuck" ;
|
||||||
@@ -152,7 +155,7 @@ flags optimize=values ;
|
|||||||
strive_V = irregV "strive" "strove" "striven" ;
|
strive_V = irregV "strive" "strove" "striven" ;
|
||||||
swear_V = irregV "swear" "swore" "sworn" ;
|
swear_V = irregV "swear" "swore" "sworn" ;
|
||||||
sweep_V = irregV "sweep" "swept" "swept" ;
|
sweep_V = irregV "sweep" "swept" "swept" ;
|
||||||
swell_V = irregV "swell" "swelled" (variants {"swelled" ; "swollen"}) ;
|
swell_V = irregV "swell" "swelled" "swollen" ;
|
||||||
swim_V = irregDuplV "swim" "swam" "swum" ;
|
swim_V = irregDuplV "swim" "swam" "swum" ;
|
||||||
swing_V = irregV "swing" "swung" "swung" ;
|
swing_V = irregV "swing" "swung" "swung" ;
|
||||||
take_V = irregV "take" "took" "taken" ;
|
take_V = irregV "take" "took" "taken" ;
|
||||||
@@ -160,7 +163,7 @@ flags optimize=values ;
|
|||||||
tear_V = irregV "tear" "tore" "torn" ;
|
tear_V = irregV "tear" "tore" "torn" ;
|
||||||
tell_V = irregV "tell" "told" "told" ;
|
tell_V = irregV "tell" "told" "told" ;
|
||||||
think_V = irregV "think" "thought" "thought" ;
|
think_V = irregV "think" "thought" "thought" ;
|
||||||
thrive_V = irregV "thrive" (variants {"thrived" ; "throve"}) "thrived" ;
|
thrive_V = irregV "thrive" "throve" "thrived" ;
|
||||||
throw_V = irregV "throw" "threw" "thrown" ;
|
throw_V = irregV "throw" "threw" "thrown" ;
|
||||||
thrust_V = irregV "thrust" "thrust" "thrust" ;
|
thrust_V = irregV "thrust" "thrust" "thrust" ;
|
||||||
tread_V = irregV "tread" "trod" "trodden" ;
|
tread_V = irregV "tread" "trod" "trodden" ;
|
||||||
@@ -169,7 +172,7 @@ flags optimize=values ;
|
|||||||
upset_V = irregDuplV "upset" "upset" "upset" ;
|
upset_V = irregDuplV "upset" "upset" "upset" ;
|
||||||
wake_V = irregV "wake" "woke" "woken" ;
|
wake_V = irregV "wake" "woke" "woken" ;
|
||||||
wear_V = irregV "wear" "wore" "worn" ;
|
wear_V = irregV "wear" "wore" "worn" ;
|
||||||
weave_V = irregV "weave" (variants {"weaved" ; "wove"}) (variants {"weaved" ; "woven"}) ;
|
weave_V = irregV "weave" "wove" "woven" ;
|
||||||
wed_V = irregDuplV "wed" "wed" "wed" ;
|
wed_V = irregDuplV "wed" "wed" "wed" ;
|
||||||
weep_V = irregV "weep" "wept" "wept" ;
|
weep_V = irregV "weep" "wept" "wept" ;
|
||||||
wind_V = irregV "wind" "wound" "wound" ;
|
wind_V = irregV "wind" "wound" "wound" ;
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ fun
|
|||||||
beset_V : V ;
|
beset_V : V ;
|
||||||
bet_V : V ;
|
bet_V : V ;
|
||||||
bid_V : V ;
|
bid_V : V ;
|
||||||
|
bid_bade_V : V ;
|
||||||
bind_V : V ;
|
bind_V : V ;
|
||||||
bite_V : V ;
|
bite_V : V ;
|
||||||
bleed_V : V ;
|
bleed_V : V ;
|
||||||
@@ -133,9 +134,11 @@ fun
|
|||||||
spill_V : V ;
|
spill_V : V ;
|
||||||
spin_V : V ;
|
spin_V : V ;
|
||||||
spit_V : V ;
|
spit_V : V ;
|
||||||
|
spit_spat_V : V ;
|
||||||
split_V : V ;
|
split_V : V ;
|
||||||
spread_V : V ;
|
spread_V : V ;
|
||||||
spring_V : V ;
|
spring_V : V ;
|
||||||
|
spring_sprung_V : V ;
|
||||||
stand_V : V ;
|
stand_V : V ;
|
||||||
steal_V : V ;
|
steal_V : V ;
|
||||||
stick_V : V ;
|
stick_V : V ;
|
||||||
|
|||||||
@@ -19885,7 +19885,7 @@ lin moniaalle_Adv = mkAdv {s = c99 "moniaalle"} ;
|
|||||||
lin moniaalta_Adv = mkAdv {s = c99 "moniaalta"} ;
|
lin moniaalta_Adv = mkAdv {s = c99 "moniaalta"} ;
|
||||||
lin monialaistua_V = mkV {s = c52 "monialaistua"} ;
|
lin monialaistua_V = mkV {s = c52 "monialaistua"} ;
|
||||||
lin monias_N = mkN {s = d41 "monias"} ;
|
lin monias_N = mkN {s = d41 "monias"} ;
|
||||||
lin moni_ilmeinen_N = mkN {s = d18 "moni-ilmeinen"} ;
|
lin moni_ilmeinen_N = mkN {s = d38 "moni-ilmeinen"} ;
|
||||||
lin monijumalaisuus_N = mkN {s = d40 "monijumalaisuus"} ;
|
lin monijumalaisuus_N = mkN {s = d40 "monijumalaisuus"} ;
|
||||||
lin monikansainen_N = mkN {s = d38 "monikansainen"} ;
|
lin monikansainen_N = mkN {s = d38 "monikansainen"} ;
|
||||||
lin monikko_N = mkN {s = d04A "monikko"} ;
|
lin monikko_N = mkN {s = d04A "monikko"} ;
|
||||||
@@ -41387,7 +41387,7 @@ lin ykkönen_N = mkN {s = d38 "ykkönen"} ;
|
|||||||
lin yks_Adv = mkAdv {s = c99 "yks"} ;
|
lin yks_Adv = mkAdv {s = c99 "yks"} ;
|
||||||
lin ykseys_N = mkN {s = d40 "ykseys"} ;
|
lin ykseys_N = mkN {s = d40 "ykseys"} ;
|
||||||
lin yksi_N = mkN {s = d31 "yksi"} ;
|
lin yksi_N = mkN {s = d31 "yksi"} ;
|
||||||
lin yksi_ilmeinen_N = mkN {s = d18 "yksi-ilmeinen"} ;
|
lin yksi_ilmeinen_N = mkN {s = d38 "yksi-ilmeinen"} ;
|
||||||
lin yksiin_Adv = mkAdv {s = c99 "yksiin"} ;
|
lin yksiin_Adv = mkAdv {s = c99 "yksiin"} ;
|
||||||
lin yksijumalaisuus_N = mkN {s = d40 "yksijumalaisuus"} ;
|
lin yksijumalaisuus_N = mkN {s = d40 "yksijumalaisuus"} ;
|
||||||
lin yksikkö_N = mkN {s = d04A "yksikkö"} ;
|
lin yksikkö_N = mkN {s = d04A "yksikkö"} ;
|
||||||
|
|||||||
@@ -159,6 +159,8 @@ oper
|
|||||||
s = \\c => (StemFin.snoun2nounBind veri).s ! NCase n Gen + paine.s ! c
|
s = \\c => (StemFin.snoun2nounBind veri).s ! NCase n Gen + paine.s ! c
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
foreignN : Str -> N ; -- foreign word without Finnish alternations, e.g. sake/saken/sakeja
|
||||||
|
|
||||||
-- Nouns used as functions need a case, of which the default is
|
-- Nouns used as functions need a case, of which the default is
|
||||||
-- the genitive.
|
-- the genitive.
|
||||||
|
|
||||||
@@ -667,7 +669,8 @@ mkVS = overload {
|
|||||||
|
|
||||||
mkPN_1 : Str -> PN = \s -> lin PN (snoun2spn (mk1N s)) ;
|
mkPN_1 : Str -> PN = \s -> lin PN (snoun2spn (mk1N s)) ;
|
||||||
|
|
||||||
foreignPN : Str -> PN = \s -> (lin PN (snoun2spn (nforms2snoun (noun s)))) where {
|
foreignPN : Str -> PN = \s -> mkPN (foreignN s) ;
|
||||||
|
foreignN : Str -> N = \s -> (lin N (nforms2snoun (noun s))) where {
|
||||||
noun : Str -> NForms = \s -> case s of {
|
noun : Str -> NForms = \s -> case s of {
|
||||||
_ + "i" => dPaatti s (s + "n") ;
|
_ + "i" => dPaatti s (s + "n") ;
|
||||||
_ + "e" => dNukke s (s + "n") ;
|
_ + "e" => dNukke s (s + "n") ;
|
||||||
|
|||||||
@@ -11,6 +11,10 @@ concrete IdiomFre of Idiom = CatFre **
|
|||||||
ExistNP np =
|
ExistNP np =
|
||||||
mkClause "il" True False (agrP3 Masc Sg)
|
mkClause "il" True False (agrP3 Masc Sg)
|
||||||
(insertClit3 "y" (insertComplement (\\_ => (np.s ! Acc).ton) (predV avoir_V))) ;
|
(insertClit3 "y" (insertComplement (\\_ => (np.s ! Acc).ton) (predV avoir_V))) ;
|
||||||
|
|
||||||
|
ExistNPAdv np adv =
|
||||||
|
mkClause "il" True False (agrP3 Masc Sg)
|
||||||
|
(insertAdv adv.s (insertClit3 "y" (insertComplement (\\_ => (np.s ! Acc).ton) (predV avoir_V)))) ;
|
||||||
|
|
||||||
ExistIP ip = {
|
ExistIP ip = {
|
||||||
s = \\t,a,p,_ =>
|
s = \\t,a,p,_ =>
|
||||||
|
|||||||
@@ -144,10 +144,10 @@ concrete VerbGer of Verb = CatGer ** open Prelude, ResGer, Coordination in {
|
|||||||
CompAdv a = {s = \\_ => a.s ; ext = []} ;
|
CompAdv a = {s = \\_ => a.s ; ext = []} ;
|
||||||
|
|
||||||
CompCN cn = {s = \\a => case numberAgr a of {
|
CompCN cn = {s = \\a => case numberAgr a of {
|
||||||
Sg => "ein" + pronEnding ! GSg cn.g ! Nom ++ cn.s ! Strong ! Sg ! Nom ;
|
Sg => "ein" + pronEnding ! GSg cn.g ! Nom ++ cn.s ! Strong ! Sg ! Nom ++ cn.rc ! Sg ; ---
|
||||||
Pl => cn.s ! Strong ! Pl ! Nom
|
Pl => cn.s ! Strong ! Pl ! Nom ++ cn.rc ! Pl ---
|
||||||
} ;
|
} ;
|
||||||
ext = []
|
ext = cn.adv ++ cn.ext
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
AdvVP vp adv = insertAdv adv.s vp ;
|
AdvVP vp adv = insertAdv adv.s vp ;
|
||||||
|
|||||||
@@ -23,6 +23,14 @@ concrete IdiomIta of Idiom = CatIta **
|
|||||||
(insertClit3 (elision "ci" "c'" "ci")
|
(insertClit3 (elision "ci" "c'" "ci")
|
||||||
(insertComplement (\\_ => (np.s ! Nom).ton)
|
(insertComplement (\\_ => (np.s ! Nom).ton)
|
||||||
(predV copula))) ;
|
(predV copula))) ;
|
||||||
|
|
||||||
|
ExistNPAdv np adv =
|
||||||
|
let npa = complAgr np.a in
|
||||||
|
mkClause [] True False (agrP3 npa.g npa.n)
|
||||||
|
(insertAdv adv.s
|
||||||
|
(insertClit3 (elision "ci" "c'" "ci")
|
||||||
|
(insertComplement (\\_ => (np.s ! Nom).ton)
|
||||||
|
(predV copula)))) ;
|
||||||
|
|
||||||
ExistIP ip = {
|
ExistIP ip = {
|
||||||
s = \\t,a,p,_ =>
|
s = \\t,a,p,_ =>
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ import PGF
|
|||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.List
|
import Data.List
|
||||||
|
import Safe
|
||||||
import System.Environment (getArgs)
|
import System.Environment (getArgs)
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
-- AR 2020-02-28
|
-- AR 2020-02-28
|
||||||
|
|
||||||
@@ -17,86 +19,164 @@ import System.Environment (getArgs)
|
|||||||
-- - functionname = baseform_category, with exceptions
|
-- - functionname = baseform_category, with exceptions
|
||||||
-- - variant inflection tables: lie_1_V, lie_2_V
|
-- - variant inflection tables: lie_1_V, lie_2_V
|
||||||
-- - words that have non-ident characters: 'bird\'s-eye_A'
|
-- - words that have non-ident characters: 'bird\'s-eye_A'
|
||||||
-- - words that start with non-letters: W_'tween_Adv
|
|
||||||
|
|
||||||
-- example:
|
-- example:
|
||||||
-- gf -make ../english/DictEng.gf
|
-- gf -make ../english/DictEng.gf
|
||||||
-- runghc MkMorphodict.hs DictEngAbs.pgf MorphoDictEng
|
-- runghc MkMorphodict.hs pgf MorphoDictEng.config DictEngAbs.pgf MorphoDictEng
|
||||||
-- 64923 -> 56599 functions
|
-- 64923 -> 56599 functions
|
||||||
|
|
||||||
usage = "MkMorphodict <pgf> <outfile>"
|
usage = "runghc MkMorphodict (raw|pgf) <configfile> <datafile> <outfile>"
|
||||||
|
|
||||||
main = do
|
main = do
|
||||||
pgfile:outfile:_ <- getArgs
|
xx <- getArgs
|
||||||
pgf <- readPGF pgfile
|
if length xx /= 4
|
||||||
config <- readFile (outfile ++ ".config") >>= return . mkConfig
|
then do
|
||||||
|
putStrLn "Usage:"
|
||||||
let (absrules,cncrules) = mkMorphoDict (MDEnv pgf config (head (languages pgf)))
|
putStrLn usage
|
||||||
|
putStrLn $ "Got instead: " ++ show xx
|
||||||
|
else do
|
||||||
|
let mode:configfile:datafile:outfile:_ = xx
|
||||||
|
config <- readFile configfile >>= return . mkConfig
|
||||||
|
|
||||||
|
rawdata <- case mode of
|
||||||
|
"pgf" -> pgfFile2rawData config datafile
|
||||||
|
"raw" -> readFile datafile >>= return . map getRawData . filter (not . null) . lines
|
||||||
|
_ -> error $ "Expected mode (pgf|raw), got " ++ mode
|
||||||
|
rawdata2gf config rawdata outfile
|
||||||
|
|
||||||
|
|
||||||
|
rawdata2gf config rawdata outfile = do
|
||||||
|
|
||||||
|
let env = MDEnv rawdata config
|
||||||
|
let (absrules,cncrules) = mkMorphoDict env
|
||||||
|
|
||||||
absheader <- readFile (outfile ++ "Abs.header")
|
absheader <- readFile (outfile ++ "Abs.header")
|
||||||
cncheader <- readFile (outfile ++ ".header")
|
cncheader <- readFile (outfile ++ ".header")
|
||||||
|
|
||||||
writeFile (outfile ++ "Abs.gf") absheader
|
writeFile (outfile ++ "Abs.gf") absheader
|
||||||
appendFile (outfile ++ "Abs.gf") $ unlines absrules
|
appendFile (outfile ++ "Abs.gf") $ unlines $ sort absrules
|
||||||
appendFile (outfile ++ "Abs.gf") "}"
|
appendFile (outfile ++ "Abs.gf") "}"
|
||||||
|
|
||||||
writeFile (outfile ++ ".gf") cncheader
|
writeFile (outfile ++ ".gf") cncheader
|
||||||
appendFile (outfile ++ ".gf") $ unlines cncrules
|
appendFile (outfile ++ ".gf") $ unlines $ sort cncrules
|
||||||
appendFile (outfile ++ ".gf") "}"
|
appendFile (outfile ++ ".gf") "}"
|
||||||
|
|
||||||
|
-- one way to get raw data from a dictionary pgf
|
||||||
|
-- another way, more controllod, is to write a wrapper grammar with a function, for each category, to generate a RawData entry
|
||||||
|
pgfFile2rawData config pgffile = do
|
||||||
|
pgf <- readPGF pgffile
|
||||||
|
|
||||||
type Cat = CId
|
let cats = nub [c | (c,(_,_,_)) <- M.assocs config]
|
||||||
|
let lang:_ = languages pgf
|
||||||
|
|
||||||
|
return [
|
||||||
|
(cat, map snd lin) |
|
||||||
|
cat <- cats,
|
||||||
|
f <- functionsByCat pgf (mkCId cat),
|
||||||
|
lin <- tabularLinearizes pgf lang (mkApp f [])
|
||||||
|
]
|
||||||
|
|
||||||
|
type Cat = String
|
||||||
|
type Fun = String
|
||||||
type Oper = String
|
type Oper = String
|
||||||
type Config = M.Map Cat (Cat,Oper,[Int])
|
type Config = M.Map Cat (Cat,Oper,([Int],[Int])) -- lin word_Cat = Oper str_i1 str_i2 ... str_in featj1 ... featjn ;
|
||||||
|
|
||||||
data MDEnv = MDEnv {
|
data MDEnv = MDEnv {
|
||||||
pgf :: PGF,
|
rawdata :: [RawData],
|
||||||
config :: Config,
|
config :: Config
|
||||||
lang :: Language
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mkConfig :: String -> Config
|
mkConfig :: String -> Config -- N : N mkN 0 2 4 6 # 9
|
||||||
mkConfig ls = M.fromList [(c,i) | Left (c,i) <- map mkOne (lines ls)]
|
mkConfig ls = M.fromList [(c,i) | Left (c,i) <- map mkOne (lines ls)]
|
||||||
where
|
where
|
||||||
mkOne s = case words s of
|
mkOne s = case words s of
|
||||||
"--":_ -> Right s
|
"--":_ -> Right s
|
||||||
cat:":":tcat:oper:ints -> Left (mkCId cat,(mkCId tcat,oper,map read ints))
|
cat:":":tcat:oper:ints -> Left (cat,(tcat,oper,mkArgs ints))
|
||||||
_ -> Right s
|
_ -> Right s
|
||||||
|
mkArgs ints = case break (=="#") ints of
|
||||||
|
(ss,[]) -> (map read' ss, [])
|
||||||
|
(ss,_:fs) -> (map read' ss, map read' fs)
|
||||||
|
read' a = readNote [] a -- Safe.readNote provides better error message
|
||||||
|
|
||||||
mkMorphoDict :: MDEnv -> ([String],[String])
|
getRawData s = case words s of
|
||||||
|
c:cs -> (c,cs)
|
||||||
|
|
||||||
|
type RawData = (String,[String]) -- old cat name, forms and features
|
||||||
|
type RawRule = (([String],Cat), (Oper, ([String],[String]))) -- parts of fun name, new category, oper, arguments
|
||||||
|
type RuleData = ((Fun, Cat), (Oper, ([String],[String]))) -- final fun name, cat, oper, args
|
||||||
|
|
||||||
|
mkMorphoDict :: MDEnv -> ([String],[String]) -- fun rules, lin rules
|
||||||
mkMorphoDict env =
|
mkMorphoDict env =
|
||||||
unzip $
|
unzip $
|
||||||
map splitRule $
|
map splitRule $
|
||||||
findCompounds $
|
---- findCompounds $ -- let us not care about compounds for the time being, but include them if they are given
|
||||||
nameFunctions $
|
nameFunctions $
|
||||||
mergeRules $
|
mergeRules $
|
||||||
concatMap findRules cats
|
findRules $
|
||||||
|
rawdata env
|
||||||
where
|
where
|
||||||
splitRule (fun,(cat,lin)) = (unwords ["fun",fun,":",showCId cat,";"], unwords ["lin",fun,"=", unwords lin,";"])
|
splitRule ((fun,cat),(oper,(forms,feats))) =
|
||||||
|
(unwords ["fun",fun,":",cat,";"], unwords ["lin",fun,"=", unwords (oper : map quote forms ++ feats),";"])
|
||||||
|
|
||||||
cats = nub [c | (c,(_,_,_)) <- M.assocs (config env)]
|
cats = nub [c | (c,(_,_,_)) <- M.assocs (config env)]
|
||||||
|
|
||||||
findRules cat = [
|
findRules :: [RawData] -> [RawRule]
|
||||||
([snd (lin !! head ints), showCId c], (c, op : appSig ints (map snd lin))) | --- head ints is the base form in smart paradigms
|
findRules raws = [
|
||||||
f <- functionsByCat (pgf env) cat,
|
(([lemma],newcat),(oper, appSig sig args)) |
|
||||||
lin <- tabularLinearizes (pgf env) (lang env) (mkApp f []), -- [[(String, String)]]
|
(oldcat,args) <- raws,
|
||||||
Just (c,op,ints) <- [M.lookup cat (config env)]
|
Just (newcat, oper, sig) <- [M.lookup oldcat (config env)],
|
||||||
]
|
let lemma = args `at` head (fst sig)
|
||||||
|
]
|
||||||
|
|
||||||
appSig ints forms = [forms !! i | i <- ints]
|
appSig (ints,feats) args =
|
||||||
|
-- If there's wrong number in config file, uncomment the line below to see which number it should be
|
||||||
|
-- trace (intercalate "\n" $ map show (zip [0..] args)) $
|
||||||
|
([args `at` i | i <- ints], [args `at` i | i <- feats])
|
||||||
|
|
||||||
|
mergeRules :: [RawRule] -> [RawRule]
|
||||||
mergeRules = map head . groupBy (\x y -> snd x == snd y) . sortOn snd
|
mergeRules = map head . groupBy (\x y -> snd x == snd y) . sortOn snd
|
||||||
|
|
||||||
|
nameFunctions :: [RawRule] -> [RuleData]
|
||||||
nameFunctions = expandNames . sortOn fst
|
nameFunctions = expandNames . sortOn fst
|
||||||
|
|
||||||
|
expandNames :: [RawRule] -> [RuleData]
|
||||||
expandNames fls = case fls of
|
expandNames fls = case fls of
|
||||||
(f,l):fls2 -> case span ((==f) . fst) fls2 of
|
(f@(w,c),l) : fls2 -> case span ((==f) . fst) fls2 of
|
||||||
([],_) -> (mkFun f,l) : expandNames fls2
|
([],_) -> ((mkFun (w ++ [c]),c),l) : expandNames fls2
|
||||||
(fls1,fls3) -> renames ((f,l):fls1) ++ expandNames fls3
|
(fls1,fls3) -> renames ((f,l):fls1) ++ expandNames fls3
|
||||||
_ -> []
|
_ -> []
|
||||||
|
|
||||||
renames fls = [(mkFun (init f ++ [show i,last f]),l) | (i,(f,l)) <- zip [1..] fls]
|
renames :: [RawRule] -> [RuleData]
|
||||||
|
--- renames fls = [((mkFun (f ++ [show i,c]),c),l) | (i,((f,c),l)) <- zip [1..] fls] -- disambiguate with int
|
||||||
|
renames fls = [((mkFun (f ++ fs ++ [c]),c),l) | (i,(((f,c),l),fs)) <- zip [1..] (zip fls (minimize fls))] -- disambiguate with different forms
|
||||||
|
|
||||||
|
minimize :: [RawRule] -> [[String]]
|
||||||
|
minimize fls = shrink [ws ++ fs | (_,(_,(_:ws,fs))) <- fls]
|
||||||
|
|
||||||
|
shrink fls = case fls of
|
||||||
|
fl@(_:_):_ | all ((==head fl) . head) fls -> shrink (map tail fls)
|
||||||
|
fl@(_:_):_ | all ((==last fl) . last) fls -> shrink (map init fls)
|
||||||
|
_ -> shrinkMore fls
|
||||||
|
|
||||||
|
shrinkMore fls = case fls of
|
||||||
|
_ | length (nub (map init fls)) == length fls -> shrinkMore (map init fls)
|
||||||
|
_ | length (nub (map tail fls)) == length fls -> shrinkMore (map tail fls)
|
||||||
|
_ -> fls
|
||||||
|
|
||||||
|
-- >>> mkFun ["hello", "world", "hello friends", "hello-all"]
|
||||||
|
-- "hello_world_hello_friends_hello_all"
|
||||||
|
mkFun :: [String] -> String -- if word contains space or hyphen, replace with underscore
|
||||||
|
mkFun = showCId . mkCId . concat . intersperse "_" . concatMap (words . removeHyphen)
|
||||||
|
where
|
||||||
|
removeHyphen [] = []
|
||||||
|
removeHyphen ['-'] = ['-'] -- If hyphen is the last character, it's usually meaningful, leave it
|
||||||
|
removeHyphen ('-':cs) = ' ' : removeHyphen cs
|
||||||
|
removeHyphen (c:cs) = c : removeHyphen cs
|
||||||
|
|
||||||
|
quote s = "\"" ++ s ++ "\""
|
||||||
|
|
||||||
|
{- ---- let us ignore this
|
||||||
|
findCompounds :: [RuleData] -> [RuleData]
|
||||||
findCompounds = getCompounds . sortOn cat_orthrevforms
|
findCompounds = getCompounds . sortOn cat_orthrevforms
|
||||||
|
|
||||||
cat_orthrevforms (_,(cat,_:forms)) = (cat,[map (!!i) fss | let fss = map reverse forms, i <- [0..minimum (map length fss) - 1]])
|
cat_orthrevforms (_,(cat,_:forms)) = (cat,[map (!!i) fss | let fss = map reverse forms, i <- [0..minimum (map length fss) - 1]])
|
||||||
@@ -105,8 +185,9 @@ mkMorphoDict env =
|
|||||||
revstem = head . snd . cat_revforms
|
revstem = head . snd . cat_revforms
|
||||||
wforms (_,(_,_:forms)) = forms
|
wforms (_,(_,_:forms)) = forms
|
||||||
|
|
||||||
|
getCompounds :: [RuleData] -> [RuleData]
|
||||||
getCompounds fls = case fls of
|
getCompounds fls = case fls of
|
||||||
fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1
|
fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1
|
||||||
fl : fls2 -> case span (\x -> and [isPrefixOf (reverse w) (reverse w1) | (w,w1) <- zip (wforms fl) (wforms x)]) fls2 of
|
fl : fls2 -> case span (\x -> and [isPrefixOf (reverse w) (reverse w1) | (w,w1) <- zip (wforms fl) (wforms x)]) fls2 of
|
||||||
([],_:_) -> markWith fl [] : getCompounds fls2
|
([],_:_) -> markWith fl [] : getCompounds fls2
|
||||||
(fls1,fls3) -> markWith fl [] : map (markCompound fl) fls1 ++ getCompounds fls3
|
(fls1,fls3) -> markWith fl [] : map (markCompound fl) fls1 ++ getCompounds fls3
|
||||||
@@ -117,26 +198,14 @@ mkMorphoDict env =
|
|||||||
True -> markWith fl1 [";","--","compound",(fst fl)]
|
True -> markWith fl1 [";","--","compound",(fst fl)]
|
||||||
False -> markWith fl1 [";","--","notcompound",(fst fl)]
|
False -> markWith fl1 [";","--","notcompound",(fst fl)]
|
||||||
|
|
||||||
markWith (f,(c,op:ws)) xs = (f,(c,op : map quote ws ++ xs))
|
markWith (f,(c,op:ws)) xs = (f,(c,op : map quote ws ++ xs)) ---- TODO only quote string args, not features
|
||||||
|
|
||||||
isPrefixWord x xy =
|
isPrefixWord x xy =
|
||||||
length suff > 1 &&
|
length suff > 1 && ---- compound first part must be at least two letters long
|
||||||
any (\c -> elem c "-0123456789aeiouyåäö") suff &&
|
any (\c -> elem c "-0123456789aeiouyåäö") suff && ---- must contain a vowel or a digit
|
||||||
isPrefixOf x xy
|
isPrefixOf x xy ---- and of course be a prefix
|
||||||
where
|
where
|
||||||
suff = drop (length x) xy
|
suff = drop (length x) xy
|
||||||
|
-}
|
||||||
mkFun = quoteIf . concat . intersperse "_"
|
|
||||||
quoteIf s = case s of
|
|
||||||
_ | any (\c -> not (isAlphaNum c || elem c "_'")) s -> "'" ++ unSgQuote s ++ "'"
|
|
||||||
c:_ | not (isAlpha c) -> "W_" ++ s
|
|
||||||
_ -> s
|
|
||||||
where
|
|
||||||
unSgQuote s = case s of
|
|
||||||
'\'':cs -> "\\\'" ++ unSgQuote cs
|
|
||||||
c:cs -> c : unSgQuote cs
|
|
||||||
_ -> s
|
|
||||||
|
|
||||||
|
|
||||||
quote s = "\"" ++ s ++ "\""
|
|
||||||
|
|
||||||
|
|||||||
6
src/morphodict/MorphoDictFin.config
Normal file
6
src/morphodict/MorphoDictFin.config
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
N : N mkN 0 1 2 4 7 13 14 16 17 19
|
||||||
|
A : A mkA' 0 1 2 4 7 13 14 16 17 19
|
||||||
|
V : V mkV 0 17 19 22 43 49 23 25 31 58 94 37
|
||||||
|
V2 : V mkV 0 17 19 22 43 49 23 25 31 58 94 37
|
||||||
|
Adv : Adv mkAdv 0
|
||||||
|
Prep : Prep mkPrep 0
|
||||||
File diff suppressed because it is too large
Load Diff
13
src/morphodict/MorphoDictFin.header
Normal file
13
src/morphodict/MorphoDictFin.header
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
concrete MorphoDictFin of MorphoDictFinAbs = CatFin ** open
|
||||||
|
ParadigmsFin,
|
||||||
|
-- MorphoFin,
|
||||||
|
Kotus
|
||||||
|
-- Prelude
|
||||||
|
in {
|
||||||
|
|
||||||
|
-- extracted from http://kaino.kotus.fi/sanat/nykysuomi/, licensed under LGPL
|
||||||
|
|
||||||
|
flags coding = utf8 ;
|
||||||
|
|
||||||
|
oper mkA' : (x1,_,_,_,_,_,_,_,_,x10 : Str) -> A = \a,b,c,d,e,f,g,h,i,j -> mkA (mkN a b c d e f g h i j) ; -- Need a single worst-case paradigm for how config is implemented
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
4
src/morphodict/MorphoDictFinAbs.header
Normal file
4
src/morphodict/MorphoDictFinAbs.header
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
abstract MorphoDictFinAbs =
|
||||||
|
Cat [N,A,V,Adv,Prep] **
|
||||||
|
{
|
||||||
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
MkMorphoDict: Extracting a minimal morphological dictionary from an existing GF dictionary.
|
|
||||||
|
|
||||||
Aarne Ranta 2020-03-02
|
|
||||||
|
|
||||||
principles:
|
|
||||||
|
|
||||||
There should be a single source for each lemgram (i.e. inflection table of a word)
|
|
||||||
Functions names should be easy to guess: baseform_Category (but avoiding accidental errors if this is not a unique key)
|
|
||||||
|
|
||||||
Hence,
|
|
||||||
|
|
||||||
Functions are 1-to-1 with lemgrams, i.e. inflection tables, thus
|
|
||||||
- no sense distinctions
|
|
||||||
- no subcategorizations
|
|
||||||
- no variants
|
|
||||||
|
|
||||||
Functionname = baseform_category, with exceptions
|
|
||||||
- same baseform_Category, different inflection tables: lie_1_V, lie_2_V
|
|
||||||
- words that have non-ident characters: 'bird\'s-eye_A'
|
|
||||||
- words that start with non-letters: W_'tween_Adv
|
|
||||||
|
|
||||||
Example run, English:
|
|
||||||
|
|
||||||
gf -make ../english/DictEng.gf
|
|
||||||
runghc MkMorphodict.hs DictEngAbs.pgf MorphoDictEng
|
|
||||||
|
|
||||||
Result: 64923 -> 56599 functions, of which 21679 could be compounds
|
|
||||||
|
|
||||||
Swedish, using a dump of SALDO (not available in these sources)
|
|
||||||
|
|
||||||
cd saldo/
|
|
||||||
runghc SaldoGF.hs
|
|
||||||
# combine abs.tmp with Saldo.header to obtain Saldo.gf
|
|
||||||
# combine cnc.tmp with SaldoSwe.header to obtain SaldoSwe.gf
|
|
||||||
gf -make SaldoSwe.gf
|
|
||||||
cd ..
|
|
||||||
runghc MkMorphodict.hs saldo/Saldo.pgf MorphoDictSwe
|
|
||||||
|
|
||||||
171
src/morphodict/README.md
Normal file
171
src/morphodict/README.md
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
# morphodict: purely morphological unilingual dictionaries
|
||||||
|
|
||||||
|
Aarne Ranta 2020-03-02 -- 2021-05-27
|
||||||
|
|
||||||
|
UNDER CONSTRUCTION, INCOMPLETE AND BUGGY
|
||||||
|
|
||||||
|
## The vision
|
||||||
|
|
||||||
|
Vision 1: if you need the noun "stjärna" in Swedish, you will find it
|
||||||
|
as `MorphoDictSwe.stjärna_N`.
|
||||||
|
|
||||||
|
Vision 2: if you analyse a Swedish text that contains the word "stjärnornas", it will be returned as `MorphoDictSwe.stjärna_N`.
|
||||||
|
|
||||||
|
Vision 3: this will work for all words of Swedish and all other RGL languages. Only seldom will you need `ParadigmsSwe`.
|
||||||
|
|
||||||
|
|
||||||
|
## What is contained
|
||||||
|
|
||||||
|
The guiding principle is to provide a single source for each *lemgram* (i.e. linearization records, i.e. inflection table plus inherent features).
|
||||||
|
Functions names should be easy to guess:
|
||||||
|
- `baseform_Category`
|
||||||
|
|
||||||
|
Baseforms that have many different lemgrams are an exception.
|
||||||
|
They should be disambiguated by adding the differing forms, as in
|
||||||
|
- `lie_lay_V` ("lie, lay, lain")
|
||||||
|
- `lie_lied_V` ("lie, lied lied")
|
||||||
|
|
||||||
|
Such distinctions are made in all cases where there are alternative inflections, even if there is no sense distinction:
|
||||||
|
- `learn_learned_V` ("learn, learned, learned")
|
||||||
|
- `learn_learnt_V` ("learn, learnt, learnt")
|
||||||
|
|
||||||
|
Hence,
|
||||||
|
- no `variants` should appear in the MorphoDict
|
||||||
|
- no entries should be duplicated if their lemgrams are the same
|
||||||
|
- hence, in particular, sense distinctions do not result in different entries
|
||||||
|
|
||||||
|
The dictionary will also exclude *multiwords* consisting of several tokens.
|
||||||
|
Most of the time, even *compounds* written as single tokens should be excluded.
|
||||||
|
However, as the status of a compound is not always clear, and since they do not create spurious morphological analyses, they can be tolerated, in particular if extracted from legacy sources.
|
||||||
|
|
||||||
|
Since multiwords and compounds are excluded, `Paradigms` and `MakeStructural` should for each language provide API functions for easy definitions of them, preferably of the form
|
||||||
|
```
|
||||||
|
mkC : Str -> C -> C
|
||||||
|
```
|
||||||
|
The situation when this is not enough is when separate functions are needed for gluing and concatenation compounds.
|
||||||
|
|
||||||
|
*Open question*: what to do with compound prepositions that are common in e.g. English?
|
||||||
|
The above principles imply
|
||||||
|
```
|
||||||
|
according_to_Prep = mkPrep "according" to_Prep
|
||||||
|
```
|
||||||
|
defined *outside* `MorphoDictEng`, so that `mkPrep` comes from `ParadigmsEng` and `to_Prep` from `MorphoDictEng`.
|
||||||
|
This may sound like against tradition, but follows the general guidelines of morphological dictionaries.
|
||||||
|
|
||||||
|
|
||||||
|
## Relevant categories
|
||||||
|
|
||||||
|
In addition to sense distinctions, MorphoDict ignores subcategorizations.
|
||||||
|
One reason is that, just like senses (although in a lesser degree), they are open-ended and sometimes vague.
|
||||||
|
Another reason is that different subcategory variants overload morphological analysis.
|
||||||
|
|
||||||
|
The most numerous categories to be addressed are content words:
|
||||||
|
- `A`
|
||||||
|
- `Adv`
|
||||||
|
- `Interj`
|
||||||
|
- `N`
|
||||||
|
- `PN`
|
||||||
|
- `Symb`
|
||||||
|
- `V`
|
||||||
|
|
||||||
|
In addition, structural words should appear here with their native lemma names:
|
||||||
|
- `Conj`
|
||||||
|
- `Det`
|
||||||
|
- `IAdv`
|
||||||
|
- `IDet`
|
||||||
|
- `IP`
|
||||||
|
- `NP` (special NP-like "pronouns", such as "somebody")
|
||||||
|
- `Prep`
|
||||||
|
- `Pron` (in the RGL only covering personal pronouns)
|
||||||
|
- `Punct`
|
||||||
|
- `Subj`
|
||||||
|
|
||||||
|
Additional language-specific categories can be included if the reasons are clear.
|
||||||
|
They must then be importable from the `Paradigms` module for that language, together with `mk` functions.
|
||||||
|
The `Extend` module may also put them in use in syntax.
|
||||||
|
|
||||||
|
Following the model of Universal Tagset, we add a category `X` for unspecified words in `Extend`, with the linearization type `{s : Str}`.
|
||||||
|
Hence it can only be used for uninflected strings with unclear status.
|
||||||
|
|
||||||
|
## Naming
|
||||||
|
|
||||||
|
As stated before,
|
||||||
|
- `functionname` = `baseform_category` if there is a unique lemgram
|
||||||
|
- = `baseform_number_category` if there is a need to disambiguate
|
||||||
|
|
||||||
|
The disambiguation numbering should reflect the frequency or probability of the lemgram, but this is just a recommendation, since the frequency is not always known.
|
||||||
|
|
||||||
|
The baseform should be the native alphabet baseform in Unicode letters, which is as such a valid GF identifier.
|
||||||
|
However, if the word contains characters that are not legal in identifiers, the function name should be simply included in single quotes, rather than inventing transliterations.
|
||||||
|
If function names are formed by the API function `PGF.mkCId`, these conventions are automatically followed.
|
||||||
|
|
||||||
|
|
||||||
|
## Coding conventions
|
||||||
|
|
||||||
|
To enable easy ocular and automatic inspection,
|
||||||
|
- write one entry per line, each prefixed by `fun` or `lin` keyword
|
||||||
|
- sort the entries alphabetically
|
||||||
|
- use paradigms with enough many arguments to make the characteristic forms explicit
|
||||||
|
|
||||||
|
To guarantee compatibility with the rest of the RGL and application grammars,
|
||||||
|
- paradigms used should be imported from `Paradigms` and `MakeStructural` rather than defined in `MorphoDict` itself
|
||||||
|
- import of *low-level modules* such as `Res` should be avoided
|
||||||
|
- `MorphoDict` should be self-contained, i.e. not inherit from other modules such as `Structural` or `Irreg`. But it is OK to `open` them in a qualified mode to use when defining linearizations.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Bootstrapping with `MkMorphoDict`
|
||||||
|
|
||||||
|
Example run, English:
|
||||||
|
```
|
||||||
|
gf -make ../english/DictEng.gf
|
||||||
|
runghc MkMorphodict.hs pgf MorphoDictEng.config DictEngAbs.pgf MorphoDictEng
|
||||||
|
```
|
||||||
|
Or, if you have raw data from another source, of the format "N woman women", you can do
|
||||||
|
```
|
||||||
|
runghc MkMorphodict.hs raw MorphoDictEng.config raw_words_eng.txt MorphoDictEng
|
||||||
|
```
|
||||||
|
The script needs a *configuration file* mapping legacy categories and forms lists to parts of GF code:
|
||||||
|
```
|
||||||
|
N : N mkN 0 2
|
||||||
|
A : A mkA 0 2 4 6
|
||||||
|
V : V mkV 0 4 2
|
||||||
|
V2 : V mkV 0 4 2
|
||||||
|
Adv : Adv mkAdv 0
|
||||||
|
Prep : Prep mkPrep 0
|
||||||
|
```
|
||||||
|
In addition, it needs *header files* containing lines to be prefixed to the generated files:
|
||||||
|
```
|
||||||
|
concrete MorphoDictEng of MorphoDictEngAbs =
|
||||||
|
CatEng [N,A,V,Adv,Prep] **
|
||||||
|
open
|
||||||
|
ParadigmsEng
|
||||||
|
in
|
||||||
|
{
|
||||||
|
```
|
||||||
|
```
|
||||||
|
abstract MorphoDictEngAbs =
|
||||||
|
Cat [N,A,V,Adv,Prep] **
|
||||||
|
{
|
||||||
|
```
|
||||||
|
For more details, we refer to `MkMorphodict.hs` for the time being.
|
||||||
|
|
||||||
|
If the config and header files are sound, the script produces compilable GF files.
|
||||||
|
They also mostly comply to the guidelines given in this document.
|
||||||
|
|
||||||
|
Some things TODO:
|
||||||
|
- deal with multiwords such as "more regular" generated by Paradigms
|
||||||
|
- use references to native Irreg files instead of very long smart paradigms
|
||||||
|
- support increments in addition to overwrites
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Things to do
|
||||||
|
|
||||||
|
To support the construction of a `MorphoDict`, the following should be provided in `Paradigms`:
|
||||||
|
- explicit smart paradigms with characteristic forms and inherent features for each category
|
||||||
|
- API constants for all inherent features that are needed
|
||||||
|
- compound-constructing functions for all categories that need them
|
||||||
|
- the extra categories that one wants to include in that language
|
||||||
|
|
||||||
|
|
||||||
20
src/morphodict/morphodict.cabal
Normal file
20
src/morphodict/morphodict.cabal
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
name: morphodict
|
||||||
|
version: 0.1
|
||||||
|
homepage: https://github.com/GrammaticalFramework/gf-rgl/tree/master/src/morphodict
|
||||||
|
author: Aarne Ranta
|
||||||
|
category: Natural Language Processing
|
||||||
|
build-type: Simple
|
||||||
|
extra-source-files: README.md
|
||||||
|
cabal-version: >=1.10
|
||||||
|
|
||||||
|
executable MkMorphoDict
|
||||||
|
hs-source-dirs:
|
||||||
|
.
|
||||||
|
main-is: MkMorphoDict.hs
|
||||||
|
other-modules:
|
||||||
|
build-depends:
|
||||||
|
base,
|
||||||
|
containers,
|
||||||
|
safe,
|
||||||
|
gf
|
||||||
|
default-language: Haskell2010
|
||||||
16
src/morphodict/stack.yaml
Normal file
16
src/morphodict/stack.yaml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
resolver: lts-12.26
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- .
|
||||||
|
|
||||||
|
# so that `stack build --copy-bins` puts bin here
|
||||||
|
local-bin-path: .
|
||||||
|
|
||||||
|
extra-deps:
|
||||||
|
- gf-3.10
|
||||||
|
- cgi-3001.3.0.3 # dependency of gf
|
||||||
|
|
||||||
|
flags:
|
||||||
|
# this excludes PGF2 module in gf package
|
||||||
|
gf:
|
||||||
|
c-runtime: false
|
||||||
26
src/morphodict/stack.yaml.lock
Normal file
26
src/morphodict/stack.yaml.lock
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# This file was autogenerated by Stack.
|
||||||
|
# You should not edit this file by hand.
|
||||||
|
# For more information, please see the documentation at:
|
||||||
|
# https://docs.haskellstack.org/en/stable/lock_files
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- completed:
|
||||||
|
hackage: gf-3.10@sha256:6f851dfaab5e1f9d4f3796515b86f78806a2bb305136a902713dfc2b92d9cfb0,8477
|
||||||
|
pantry-tree:
|
||||||
|
size: 64924
|
||||||
|
sha256: 66332577ff42a42eed767f451f53266e1020b72749cdcdf7387933615d5de091
|
||||||
|
original:
|
||||||
|
hackage: gf-3.10
|
||||||
|
- completed:
|
||||||
|
hackage: cgi-3001.3.0.3@sha256:4f3768d09e4a6620642588cab2e99d83c1b6b542dad6147d0af9532170036115,2076
|
||||||
|
pantry-tree:
|
||||||
|
size: 667
|
||||||
|
sha256: 65f6fd4574cffd1e5e2490c133b7ba58fd2fea0a65d81f1fa6fe14f08025629b
|
||||||
|
original:
|
||||||
|
hackage: cgi-3001.3.0.3
|
||||||
|
snapshots:
|
||||||
|
- completed:
|
||||||
|
size: 509471
|
||||||
|
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/12/26.yaml
|
||||||
|
sha256: 95f014df58d0679b1c4a2b7bf2b652b61da8d30de5f571abb0d59015ef678646
|
||||||
|
original: lts-12.26
|
||||||
51
src/morphodict/utils/only_homonyms.sh
Executable file
51
src/morphodict/utils/only_homonyms.sh
Executable file
@@ -0,0 +1,51 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
USAGE="usage: ./remove_sense_distinctions.sh <concrete syntax file>"
|
||||||
|
NOTE="This is not extremely useful, it will just create a file with only those entries that are homonymous in dictionary form, but differ in other forms. The purpose of the file is for you to look at/do small experiments with. The real job is done in MkMorphoDict.hs."
|
||||||
|
|
||||||
|
# String manipulation
|
||||||
|
CONC=$1 # e.g. MorphoDictFin.gf
|
||||||
|
BAK="$CONC.bak" # e.g. MorphoDictFin.gf.bak
|
||||||
|
|
||||||
|
NAME=`echo $CONC | cut -f 1 -d '.'` # e.g. MorphoDictFin
|
||||||
|
ABS="${NAME}Abs.gf" # e.g. MorphoDictFinAbs.gf
|
||||||
|
CONC_HEADER="$NAME.header" # e.g. MorphoDictFin.header
|
||||||
|
ABS_HEADER="${NAME}Abs.header" # e.g. MorphoDictFinAbs.header
|
||||||
|
|
||||||
|
find_duplicates() {
|
||||||
|
echo "Putting (temporarily) only homonyms in $CONC"
|
||||||
|
echo "cat $CONC_HEADER > $CONC"
|
||||||
|
cat $CONC_HEADER > $CONC
|
||||||
|
DUPLS=`cut -f 2 -d ' ' /tmp/$CONC \
|
||||||
|
| sort | uniq -c | sort -nr \
|
||||||
|
| egrep "^ +1?[2-9][0-9]? [a-zåäö]+_" \
|
||||||
|
| tr -d '[0-9][A-ZÅÄÖ]'`
|
||||||
|
for d in $DUPLS
|
||||||
|
do
|
||||||
|
grep "lin $d" $BAK >> $CONC
|
||||||
|
done
|
||||||
|
echo "}" >> $CONC
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_numbers() {
|
||||||
|
echo "cp $CONC{,.bak}"
|
||||||
|
cp $CONC{,.bak}
|
||||||
|
echo "cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC"
|
||||||
|
cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC
|
||||||
|
echo "Done removing numbers."
|
||||||
|
}
|
||||||
|
|
||||||
|
#### Action starts here
|
||||||
|
|
||||||
|
echo $NOTE
|
||||||
|
|
||||||
|
if [[ $CONC == *"Abs.gf" ]]
|
||||||
|
then
|
||||||
|
echo $USAGE
|
||||||
|
else
|
||||||
|
remove_numbers
|
||||||
|
find_duplicates
|
||||||
|
# echo "gf -v=0 -make $CONC"
|
||||||
|
# gf -v=0 -make $CONC
|
||||||
|
echo "$CONC contains now only homonyms. Original file is found in $BAK."
|
||||||
|
fi
|
||||||
@@ -7,7 +7,7 @@ concrete ExtendSwe of Extend = CatSwe **
|
|||||||
StrandRelSlash, EmptyRelSlash, StrandQuestSlash,
|
StrandRelSlash, EmptyRelSlash, StrandQuestSlash,
|
||||||
PassVPSlash, PassAgentVPSlash, UttVPShort, ByVP, InOrderToVP,
|
PassVPSlash, PassAgentVPSlash, UttVPShort, ByVP, InOrderToVP,
|
||||||
MkVPI, BaseVPI, ConsVPI, ConjVPI, ComplVPIVV,
|
MkVPI, BaseVPI, ConsVPI, ConjVPI, ComplVPIVV,
|
||||||
MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS,
|
MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS, RelVPS,
|
||||||
MkVPS2, ConjVPS2, ComplVPS2, ReflVPS2, MkVPI2, ConjVPI2, ComplVPI2,
|
MkVPS2, ConjVPS2, ComplVPS2, ReflVPS2, MkVPI2, ConjVPI2, ComplVPI2,
|
||||||
ICompAP,ProDrop,EmbedSSlash,
|
ICompAP,ProDrop,EmbedSSlash,
|
||||||
AdAdV, PositAdVAdj, GerundCN, GerundNP, GerundAdv, PresPartAP, PastPartAP, PastPartAgentAP,
|
AdAdV, PositAdVAdj, GerundCN, GerundNP, GerundAdv, PresPartAP, PastPartAP, PastPartAgentAP,
|
||||||
@@ -94,22 +94,6 @@ in {
|
|||||||
{c2 = a2.c2} ; -- has the right c2
|
{c2 = a2.c2} ; -- has the right c2
|
||||||
|
|
||||||
|
|
||||||
N2VPSlash n2 =
|
|
||||||
let vp : CatSwe.VP = UseComp (CompCN (UseN2 n2)) ;
|
|
||||||
dummyVPS : VPSlash = SlashV2a (P.mkV2 "dummy") ;
|
|
||||||
in dummyVPS ** -- has necessary fields for VPSlash
|
|
||||||
vp ** -- has all the right fields except for c2
|
|
||||||
{c2 = n2.c2} ; -- has the right c2
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
A2VPSlash a2 =
|
|
||||||
let vp : CatSwe.VP = UseComp (CompAP (UseA2 a2)) ;
|
|
||||||
dummyVPS : VPSlash = SlashV2a (P.mkV2 "dummy") ;
|
|
||||||
in dummyVPS ** -- has necessary fields for VPSlash
|
|
||||||
vp ** -- has all the right fields except for c2
|
|
||||||
{c2 = a2.c2} ; -- has the right c2
|
|
||||||
|
|
||||||
lin UttVPShort vp = {s = infVP vp (agrP3 Utr Sg)} ;
|
lin UttVPShort vp = {s = infVP vp (agrP3 Utr Sg)} ;
|
||||||
|
|
||||||
lincat
|
lincat
|
||||||
@@ -148,6 +132,16 @@ in {
|
|||||||
}
|
}
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
RelVPS rp vps = {
|
||||||
|
s = \\ag,rcase =>
|
||||||
|
let agr = case rp.a of { -- RP's agr may override in the regular RelativeScand, is this true with VPS too?
|
||||||
|
RNoAg => ag ;
|
||||||
|
RAg g n p => {g = g ; n = n ; p = p}
|
||||||
|
} ;
|
||||||
|
in rp.s ! ag.g ! ag.n ! rcase ++ vps.s ! Sub ! agr ;
|
||||||
|
c = NPNom
|
||||||
|
} ;
|
||||||
|
|
||||||
MkVPS t p vp = {
|
MkVPS t p vp = {
|
||||||
s = \\o,a =>
|
s = \\o,a =>
|
||||||
let
|
let
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ oper
|
|||||||
|
|
||||||
utrum : Gender ; -- the "en" gender
|
utrum : Gender ; -- the "en" gender
|
||||||
neutrum : Gender ; -- the "ett" gender
|
neutrum : Gender ; -- the "ett" gender
|
||||||
|
neuter : Gender ; -- synonym of neutrum
|
||||||
|
|
||||||
-- To abstract over number names, we define the following.
|
-- To abstract over number names, we define the following.
|
||||||
|
|
||||||
@@ -151,6 +152,7 @@ oper
|
|||||||
mkPN : (jesus,jesu : Str) -> Gender -> PN -- irregular genitive
|
mkPN : (jesus,jesu : Str) -> Gender -> PN -- irregular genitive
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
geoPN : Str -> PN ; -- neuter, with identical genitive if ends in a vowel
|
||||||
|
|
||||||
--2 Adjectives
|
--2 Adjectives
|
||||||
|
|
||||||
@@ -348,6 +350,7 @@ oper
|
|||||||
Case = CommonScand.Case ;
|
Case = CommonScand.Case ;
|
||||||
utrum = Utr ;
|
utrum = Utr ;
|
||||||
neutrum = Neutr ;
|
neutrum = Neutr ;
|
||||||
|
neuter = Neutr ;
|
||||||
singular = Sg ;
|
singular = Sg ;
|
||||||
plural = Pl ;
|
plural = Pl ;
|
||||||
nominative = Nom ;
|
nominative = Nom ;
|
||||||
@@ -525,6 +528,14 @@ oper
|
|||||||
{s = table {Nom => jesus ; Gen => jesu} ; g = g ; lock_PN = <>} ;
|
{s = table {Nom => jesus ; Gen => jesu} ; g = g ; lock_PN = <>} ;
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
geoPN name =
|
||||||
|
let names : Str = case name of {
|
||||||
|
_ + ("a"|"e"|"i"|"o"|"u"|"y"|"å"|"ä"|"ö") => name ;
|
||||||
|
_ + "s" => name ;
|
||||||
|
_ => name + "s"
|
||||||
|
} in
|
||||||
|
mkPN name names neutrum ;
|
||||||
|
|
||||||
regPN n = regGenPN n utrum ;
|
regPN n = regGenPN n utrum ;
|
||||||
regGenPN n g = {s = \\c => mkCase c n ; g = g} ** {lock_PN = <>} ;
|
regGenPN n g = {s = \\c => mkCase c n ; g = g} ** {lock_PN = <>} ;
|
||||||
nounPN n = {s = n.s ! singular ! Indef ; g = n.g ; lock_PN = <>} ;
|
nounPN n = {s = n.s ! singular ! Indef ; g = n.g ; lock_PN = <>} ;
|
||||||
|
|||||||
Reference in New Issue
Block a user