forked from GitHub/comp-syntax-gu-mlt
everything in place for Lab 2
This commit is contained in:
@@ -1,14 +1,14 @@
|
|||||||
# Lab 2: Multilingual text generation from Wikidata
|
# Lab 2: Multilingual text generation from Wikidata
|
||||||
|
|
||||||
This uses GF to generate texts from facts in the Wikidata fact database.
|
This uses GF to generate texts from facts in the Wikidata fact database.
|
||||||
You will be given
|
You are given
|
||||||
|
|
||||||
- an abstract syntax,
|
- an abstract syntax and an English concrete syntax, in the subdirectory grammars/
|
||||||
- an English concrete syntax,
|
- a json dump from Wikidata, in the subdirectory data/
|
||||||
- a json dump from Wikidata
|
- a Python file that connects Wikidata with GF, in the subdirectory scripts/
|
||||||
- a Python file that connects Wikidata with GF
|
|
||||||
|
|
||||||
Your task will be to create a concrete syntax for some other language by using the
|
Your task is to create a concrete syntax for some other language by using the
|
||||||
GF RGL and evaluate the text generated by this.
|
GF RGL and evaluate the text generated by this.
|
||||||
|
|
||||||
The listed files will be provided before the lab starts.
|
More instructions will be given in the lectures of the week 5-9 May 2025.
|
||||||
|
|
||||||
|
|||||||
112
lab2/data/Nobel-funs.jsonl
Normal file
112
lab2/data/Nobel-funs.jsonl
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
["Q800", "Q800_Costa_Rica_Country"]
|
||||||
|
["Q219060", "Q219060_State_of_Palestine_Country"]
|
||||||
|
["Q37", "Q37_Lithuania_Country"]
|
||||||
|
["Q137816", "Q137816_Taiwan_under_Japanese_rule_Country"]
|
||||||
|
["Q1028", "Q1028_Morocco_Country"]
|
||||||
|
["Q796", "Q796_Iraq_Country"]
|
||||||
|
["Q184", "Q184_Belarus_Country"]
|
||||||
|
["Q225", "Q225_Bosnia_and_Herzegovina_Country"]
|
||||||
|
["Q20", "Q20_Norway_Country"]
|
||||||
|
["Q211", "Q211_Latvia_Country"]
|
||||||
|
["Q117", "Q117_Ghana_Country"]
|
||||||
|
["Q39", "Q39_Switzerland_Country"]
|
||||||
|
["Q159631", "Q159631_Kingdom_of_Württemberg_Country"]
|
||||||
|
["Q17", "Q17_Japan_Country"]
|
||||||
|
["Q189", "Q189_Iceland_Country"]
|
||||||
|
["Q221", "Q221_North_Macedonia_Country"]
|
||||||
|
["Q9683", "Q9683_Tang_dynasty_Country"]
|
||||||
|
["Q79", "Q79_Egypt_Country"]
|
||||||
|
["Q408", "Q408_Australia_Country"]
|
||||||
|
["Q4628", "Q4628_Faroe_Islands_Country"]
|
||||||
|
["Q145", "Q145_United_Kingdom_Country"]
|
||||||
|
["Q214", "Q214_Slovakia_Country"]
|
||||||
|
["Q16", "Q16_Canada_Country"]
|
||||||
|
["Q924", "Q924_Tanzania_Country"]
|
||||||
|
["Q55502", "Q55502_Kingdom_of_Jerusalem_Country"]
|
||||||
|
["Q183", "Q183_Germany_Country"]
|
||||||
|
["Q754", "Q754_Trinidad_and_Tobago_Country"]
|
||||||
|
["Q298", "Q298_Chile_Country"]
|
||||||
|
["Q41", "Q41_Greece_Country"]
|
||||||
|
["Q30623", "Q30623_Manchukuo_Country"]
|
||||||
|
["Q774", "Q774_Guatemala_Country"]
|
||||||
|
["Q836", "Q836_Myanmar_Country"]
|
||||||
|
["Q902", "Q902_Bangladesh_Country"]
|
||||||
|
["Q215", "Q215_Slovenia_Country"]
|
||||||
|
["Q7313", "Q7313_Yuan_dynasty_Country"]
|
||||||
|
["Q822", "Q822_Lebanon_Country"]
|
||||||
|
["Q12548", "Q12548_Holy_Roman_Empire_Country"]
|
||||||
|
["Q12407080", "Q12407080_early_Islamic_period_in_Palestine_Country"]
|
||||||
|
["Q717", "Q717_Venezuela_Country"]
|
||||||
|
["Q31", "Q31_Belgium_Country"]
|
||||||
|
["Q794", "Q794_Iran_Country"]
|
||||||
|
["Q43", "Q43_Turkey_Country"]
|
||||||
|
["Q948", "Q948_Tunisia_Country"]
|
||||||
|
["Q258", "Q258_South_Africa_Country"]
|
||||||
|
["Q28", "Q28_Hungary_Country"]
|
||||||
|
["Q80061", "Q80061_Nobel_Prize_in_Physiology_or_Medicine_Award"]
|
||||||
|
["Q142", "Q142_France_Country"]
|
||||||
|
["Q805", "Q805_Yemen_Country"]
|
||||||
|
["Q881", "Q881_Vietnam_Country"]
|
||||||
|
["Q7462", "Q7462_Song_dynasty_Country"]
|
||||||
|
["Q12544", "Q12544_Byzantine_Empire_Country"]
|
||||||
|
["Q664", "Q664_New_Zealand_Country"]
|
||||||
|
["Q33", "Q33_Finland_Country"]
|
||||||
|
["Q282428", "Q282428_Mamluk_Sultanate_Country"]
|
||||||
|
["Q38104", "Q38104_Nobel_Prize_in_Physics_Award"]
|
||||||
|
["Q9903", "Q9903_Ming_dynasty_Country"]
|
||||||
|
["Q739", "Q739_Colombia_Country"]
|
||||||
|
["Q13426199", "Q13426199_Republic_of_China_Country"]
|
||||||
|
["Q55", "Q55_Netherlands_Country"]
|
||||||
|
["Q159", "Q159_Russia_Country"]
|
||||||
|
["Q27", "Q27_Ireland_Country"]
|
||||||
|
["Q48685", "Q48685_Kingdom_of_Judah_Country"]
|
||||||
|
["Q810", "Q810_Jordan_Country"]
|
||||||
|
["Q36", "Q36_Poland_Country"]
|
||||||
|
["Q1014", "Q1014_Liberia_Country"]
|
||||||
|
["Q38872", "Q38872_Prussia_Country"]
|
||||||
|
["Q574", "'Q574_Timor-Leste_Country'"]
|
||||||
|
["Q974", "Q974_Democratic_Republic_of_the_Congo_Country"]
|
||||||
|
["Q15843470", "Q15843470_Roman_Palestine_Country"]
|
||||||
|
["Q40", "Q40_Austria_Country"]
|
||||||
|
["Q928", "Q928_Philippines_Country"]
|
||||||
|
["Q148", "Q148_People's_Republic_of_China_Country"]
|
||||||
|
["Q35", "Q35_Denmark_Country"]
|
||||||
|
["Q954", "Q954_Zimbabwe_Country"]
|
||||||
|
["Q216173", "Q216173_Free_City_of_Danzig_Country"]
|
||||||
|
["Q227", "Q227_Azerbaijan_Country"]
|
||||||
|
["Q252", "Q252_Indonesia_Country"]
|
||||||
|
["Q801", "Q801_Israel_Country"]
|
||||||
|
["Q155", "Q155_Brazil_Country"]
|
||||||
|
["Q29", "Q29_Spain_Country"]
|
||||||
|
["Q7075820", "Q7075820_Occupied_Enemy_Territory_Administration_Country"]
|
||||||
|
["Q2685298", "Q2685298_Romanian_People's_Republic_Country"]
|
||||||
|
["Q45", "Q45_Portugal_Country"]
|
||||||
|
["Q32", "Q32_Luxembourg_Country"]
|
||||||
|
["Q115", "Q115_Ethiopia_Country"]
|
||||||
|
["Q193714", "Q193714_Mandatory_Palestine_Country"]
|
||||||
|
["Q34", "Q34_Sweden_Country"]
|
||||||
|
["Q262", "Q262_Algeria_Country"]
|
||||||
|
["Q37922", "Q37922_Nobel_Prize_in_Literature_Award"]
|
||||||
|
["Q843", "Q843_Pakistan_Country"]
|
||||||
|
["Q35637", "Q35637_Nobel_Peace_Prize_Award"]
|
||||||
|
["Q1033", "Q1033_Nigeria_Country"]
|
||||||
|
["Q38", "Q38_Italy_Country"]
|
||||||
|
["Q668", "Q668_India_Country"]
|
||||||
|
["Q496922", "Q496922_Hasmonean_dynasty_Country"]
|
||||||
|
["Q212", "Q212_Ukraine_Country"]
|
||||||
|
["Q44585", "Q44585_Nobel_Prize_in_Chemistry_Award"]
|
||||||
|
["Q760", "Q760_Saint_Lucia_Country"]
|
||||||
|
["Q414", "Q414_Argentina_Country"]
|
||||||
|
["Q218", "Q218_Romania_Country"]
|
||||||
|
["Q213", "Q213_Czech_Republic_Country"]
|
||||||
|
["Q219", "Q219_Bulgaria_Country"]
|
||||||
|
["Q12560", "Q12560_Ottoman_Empire_Country"]
|
||||||
|
["Q224", "Q224_Croatia_Country"]
|
||||||
|
["Q419", "Q419_Peru_Country"]
|
||||||
|
["Q1019", "Q1019_Madagascar_Country"]
|
||||||
|
["Q30", "Q30_United_States_Country"]
|
||||||
|
["Q180114", "Q180114_Ayyubid_dynasty_Country"]
|
||||||
|
["Q8733", "Q8733_Qing_dynasty_Country"]
|
||||||
|
["Q96", "Q96_Mexico_Country"]
|
||||||
|
["Q884", "Q884_South_Korea_Country"]
|
||||||
|
["Q114", "Q114_Kenya_Country"]
|
||||||
@@ -9,7 +9,7 @@ oper mkCountry = overload {
|
|||||||
} ;
|
} ;
|
||||||
|
|
||||||
oper mkAward = overload {
|
oper mkAward = overload {
|
||||||
mkAward : Str -> NP = \s -> mkNP (mkPN s) ;
|
mkAward : Str -> NP = \s -> mkNP the_Det (mkN s) ;
|
||||||
mkAward : NP -> NP = \np -> np ;
|
mkAward : NP -> NP = \np -> np ;
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
|||||||
128
lab2/grammars/LabelsFin.gf
Normal file
128
lab2/grammars/LabelsFin.gf
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
concrete LabelsFin of Labels = open SyntaxFin, ParadigmsFin in {
|
||||||
|
|
||||||
|
lincat Country = NP ;
|
||||||
|
lincat Award = NP ;
|
||||||
|
|
||||||
|
oper mkCountry = overload {
|
||||||
|
mkCountry : Str -> NP = \s -> mkNP (mkPN s) ;
|
||||||
|
mkCountry : NP -> NP = \np -> np ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
oper mkAward = overload {
|
||||||
|
mkAward : Str -> NP = \s -> mkNP the_Det (mkN s) ;
|
||||||
|
mkAward : NP -> NP = \np -> np ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
lin Q800_Costa_Rica_Country = mkCountry "Costa Rica" ;
|
||||||
|
lin Q219060_State_of_Palestine_Country = mkCountry "Palestiina" ;
|
||||||
|
lin Q37_Lithuania_Country = mkCountry "Liettua" ;
|
||||||
|
lin Q137816_Taiwan_under_Japanese_rule_Country = mkCountry "Taiwan Japanin alaisuudessa" ;
|
||||||
|
lin Q1028_Morocco_Country = mkCountry "Marokko" ;
|
||||||
|
lin Q796_Iraq_Country = mkCountry "Irak" ;
|
||||||
|
lin Q184_Belarus_Country = mkCountry "Valko-Venäjä" ;
|
||||||
|
lin Q225_Bosnia_and_Herzegovina_Country = mkCountry "Bosnia ja Hertsegovina" ;
|
||||||
|
lin Q20_Norway_Country = mkCountry "Norja" ;
|
||||||
|
lin Q211_Latvia_Country = mkCountry "Latvia" ;
|
||||||
|
lin Q117_Ghana_Country = mkCountry "Ghana" ;
|
||||||
|
lin Q39_Switzerland_Country = mkCountry "Sveitsi" ;
|
||||||
|
lin Q159631_Kingdom_of_Württemberg_Country = mkCountry "Württembergin kuningaskunta" ;
|
||||||
|
lin Q17_Japan_Country = mkCountry "Japani" ;
|
||||||
|
lin Q189_Iceland_Country = mkCountry "Islanti" ;
|
||||||
|
lin Q221_North_Macedonia_Country = mkCountry "Pohjois-Makedonia" ;
|
||||||
|
lin Q9683_Tang_dynasty_Country = mkCountry "Tang-dynastia" ;
|
||||||
|
lin Q79_Egypt_Country = mkCountry "Egypti" ;
|
||||||
|
lin Q408_Australia_Country = mkCountry "Australia" ;
|
||||||
|
lin Q4628_Faroe_Islands_Country = mkCountry "Färsaaret" ;
|
||||||
|
lin Q145_United_Kingdom_Country = mkCountry "Yhdistynyt kuningaskunta" ;
|
||||||
|
lin Q214_Slovakia_Country = mkCountry "Slovakia" ;
|
||||||
|
lin Q16_Canada_Country = mkCountry "Kanada" ;
|
||||||
|
lin Q924_Tanzania_Country = mkCountry "Tansania" ;
|
||||||
|
lin Q55502_Kingdom_of_Jerusalem_Country = mkCountry "Jerusalemin kuningaskunta" ;
|
||||||
|
lin Q183_Germany_Country = mkCountry "Saksa" ;
|
||||||
|
lin Q754_Trinidad_and_Tobago_Country = mkCountry "Trinidad ja Tobago" ;
|
||||||
|
lin Q298_Chile_Country = mkCountry "Chile" ;
|
||||||
|
lin Q41_Greece_Country = mkCountry "Kreikka" ;
|
||||||
|
lin Q30623_Manchukuo_Country = mkCountry "Mantšukuo" ;
|
||||||
|
lin Q774_Guatemala_Country = mkCountry "Guatemala" ;
|
||||||
|
lin Q836_Myanmar_Country = mkCountry "Myanmar" ;
|
||||||
|
lin Q902_Bangladesh_Country = mkCountry "Bangladesh" ;
|
||||||
|
lin Q215_Slovenia_Country = mkCountry "Slovenia" ;
|
||||||
|
lin Q7313_Yuan_dynasty_Country = mkCountry "Yuan" ;
|
||||||
|
lin Q822_Lebanon_Country = mkCountry "Libanon" ;
|
||||||
|
lin Q12548_Holy_Roman_Empire_Country = mkCountry "Pyhä saksalais-roomalainen keisarikunta" ;
|
||||||
|
lin Q12407080_early_Islamic_period_in_Palestine_Country = mkCountry "early Islamic period in Palestine" ;
|
||||||
|
lin Q717_Venezuela_Country = mkCountry "Venezuela" ;
|
||||||
|
lin Q31_Belgium_Country = mkCountry "Belgia" ;
|
||||||
|
lin Q794_Iran_Country = mkCountry "Iran" ;
|
||||||
|
lin Q43_Turkey_Country = mkCountry "Turkki" ;
|
||||||
|
lin Q948_Tunisia_Country = mkCountry "Tunisia" ;
|
||||||
|
lin Q258_South_Africa_Country = mkCountry "Etelä-Afrikka" ;
|
||||||
|
lin Q28_Hungary_Country = mkCountry "Unkari" ;
|
||||||
|
lin Q80061_Nobel_Prize_in_Physiology_or_Medicine_Award = mkAward "Nobelin fysiologian tai lääketieteen palkinto" ;
|
||||||
|
lin Q142_France_Country = mkCountry "Ranska" ;
|
||||||
|
lin Q805_Yemen_Country = mkCountry "Jemen" ;
|
||||||
|
lin Q881_Vietnam_Country = mkCountry "Vietnam" ;
|
||||||
|
lin Q7462_Song_dynasty_Country = mkCountry "Song-dynastia" ;
|
||||||
|
lin Q12544_Byzantine_Empire_Country = mkCountry "Bysantin valtakunta" ;
|
||||||
|
lin Q664_New_Zealand_Country = mkCountry "Uusi-Seelanti" ;
|
||||||
|
lin Q33_Finland_Country = mkCountry "Suomi" ;
|
||||||
|
lin Q282428_Mamluk_Sultanate_Country = mkCountry "Mamlukin sulttaanikunta" ;
|
||||||
|
lin Q38104_Nobel_Prize_in_Physics_Award = mkAward "Nobelin fysiikanpalkinto" ;
|
||||||
|
lin Q9903_Ming_dynasty_Country = mkCountry "Ming-dynastia" ;
|
||||||
|
lin Q739_Colombia_Country = mkCountry "Kolumbia" ;
|
||||||
|
lin Q13426199_Republic_of_China_Country = mkCountry "Kiinan tasavalta" ;
|
||||||
|
lin Q55_Netherlands_Country = mkCountry "Alankomaat" ;
|
||||||
|
lin Q159_Russia_Country = mkCountry "Venäjä" ;
|
||||||
|
lin Q27_Ireland_Country = mkCountry "Irlanti" ;
|
||||||
|
lin Q48685_Kingdom_of_Judah_Country = mkCountry "Juudan kuningaskunta" ;
|
||||||
|
lin Q810_Jordan_Country = mkCountry "Jordania" ;
|
||||||
|
lin Q36_Poland_Country = mkCountry "Puola" ;
|
||||||
|
lin Q1014_Liberia_Country = mkCountry "Liberia" ;
|
||||||
|
lin Q38872_Prussia_Country = mkCountry "Preussi" ;
|
||||||
|
lin 'Q574_Timor-Leste_Country' = mkCountry "Itä-Timor" ;
|
||||||
|
lin Q974_Democratic_Republic_of_the_Congo_Country = mkCountry "Kongon demokraattinen tasavalta" ;
|
||||||
|
lin Q15843470_Roman_Palestine_Country = mkCountry "Roman Palestine" ;
|
||||||
|
lin Q40_Austria_Country = mkCountry "Itävalta" ;
|
||||||
|
lin Q928_Philippines_Country = mkCountry "Filippiinit" ;
|
||||||
|
lin Q148_People's_Republic_of_China_Country = mkCountry "Kiinan kansantasavalta" ;
|
||||||
|
lin Q35_Denmark_Country = mkCountry "Tanska" ;
|
||||||
|
lin Q954_Zimbabwe_Country = mkCountry "Zimbabwe" ;
|
||||||
|
lin Q216173_Free_City_of_Danzig_Country = mkCountry "Danzigin vapaakaupunki" ;
|
||||||
|
lin Q227_Azerbaijan_Country = mkCountry "Azerbaidžan" ;
|
||||||
|
lin Q252_Indonesia_Country = mkCountry "Indonesia" ;
|
||||||
|
lin Q801_Israel_Country = mkCountry "Israel" ;
|
||||||
|
lin Q155_Brazil_Country = mkCountry "Brasilia" ;
|
||||||
|
lin Q29_Spain_Country = mkCountry "Espanja" ;
|
||||||
|
lin Q7075820_Occupied_Enemy_Territory_Administration_Country = mkCountry "Occupied Enemy Territory Administration" ;
|
||||||
|
lin Q2685298_Romanian_People's_Republic_Country = mkCountry "Romanian kansantasavalta" ;
|
||||||
|
lin Q45_Portugal_Country = mkCountry "Portugali" ;
|
||||||
|
lin Q32_Luxembourg_Country = mkCountry "Luxemburg" ;
|
||||||
|
lin Q115_Ethiopia_Country = mkCountry "Etiopia" ;
|
||||||
|
lin Q193714_Mandatory_Palestine_Country = mkCountry "Palestiinan brittiläinen mandaatti" ;
|
||||||
|
lin Q34_Sweden_Country = mkCountry "Ruotsi" ;
|
||||||
|
lin Q262_Algeria_Country = mkCountry "Algeria" ;
|
||||||
|
lin Q37922_Nobel_Prize_in_Literature_Award = mkAward "Nobelin kirjallisuuspalkinto" ;
|
||||||
|
lin Q843_Pakistan_Country = mkCountry "Pakistan" ;
|
||||||
|
lin Q35637_Nobel_Peace_Prize_Award = mkAward "Nobelin rauhanpalkinto" ;
|
||||||
|
lin Q1033_Nigeria_Country = mkCountry "Nigeria" ;
|
||||||
|
lin Q38_Italy_Country = mkCountry "Italia" ;
|
||||||
|
lin Q668_India_Country = mkCountry "Intia" ;
|
||||||
|
lin Q496922_Hasmonean_dynasty_Country = mkCountry "Israelin toinen kuningaskunta" ;
|
||||||
|
lin Q212_Ukraine_Country = mkCountry "Ukraina" ;
|
||||||
|
lin Q44585_Nobel_Prize_in_Chemistry_Award = mkAward "Nobelin kemianpalkinto" ;
|
||||||
|
lin Q760_Saint_Lucia_Country = mkCountry "Saint Lucia" ;
|
||||||
|
lin Q414_Argentina_Country = mkCountry "Argentiina" ;
|
||||||
|
lin Q218_Romania_Country = mkCountry "Romania" ;
|
||||||
|
lin Q213_Czech_Republic_Country = mkCountry "Tšekki" ;
|
||||||
|
lin Q219_Bulgaria_Country = mkCountry "Bulgaria" ;
|
||||||
|
lin Q12560_Ottoman_Empire_Country = mkCountry "Osmanien valtakunta" ;
|
||||||
|
lin Q224_Croatia_Country = mkCountry "Kroatia" ;
|
||||||
|
lin Q419_Peru_Country = mkCountry "Peru" ;
|
||||||
|
lin Q1019_Madagascar_Country = mkCountry "Madagaskar" ;
|
||||||
|
lin Q30_United_States_Country = mkCountry "Yhdysvallat" ;
|
||||||
|
lin Q180114_Ayyubid_dynasty_Country = mkCountry "Aijubidit" ;
|
||||||
|
lin Q8733_Qing_dynasty_Country = mkCountry "Qing-dynastia" ;
|
||||||
|
lin Q96_Mexico_Country = mkCountry "Meksiko" ;
|
||||||
|
lin Q884_South_Korea_Country = mkCountry "Korean tasavalta" ;
|
||||||
|
lin Q114_Kenya_Country = mkCountry "Kenia" ;
|
||||||
|
}
|
||||||
@@ -1,15 +1,17 @@
|
|||||||
abstract Nobel = Labels ** {
|
abstract Nobel = Labels ** {
|
||||||
|
|
||||||
flags startcat = Description ;
|
flags startcat = Sentence ;
|
||||||
|
|
||||||
cat
|
cat
|
||||||
Description ;
|
Sentence ;
|
||||||
Name ;
|
Name ;
|
||||||
Date ;
|
Date ;
|
||||||
|
|
||||||
fun
|
fun
|
||||||
LivingDescription : Name -> Name -> Country -> Date -> Date -> Award -> Description ;
|
BornSentence : Name -> Country -> Date -> Sentence ;
|
||||||
PastDescription : Name -> Name -> Country -> Date -> Date -> Date -> Award -> Description ;
|
AwardSentence : Name -> Award -> Date -> Sentence ;
|
||||||
|
DiedSentence : Name -> Date -> Sentence ;
|
||||||
|
|
||||||
StringName : String -> Name ;
|
StringName : String -> Name ;
|
||||||
YearDate : Int -> Date ;
|
YearDate : Int -> Date ;
|
||||||
he_Name, she_Name, they_Name : Name ;
|
he_Name, she_Name, they_Name : Name ;
|
||||||
|
|||||||
@@ -6,17 +6,20 @@ concrete NobelEng of Nobel = LabelsEng ** open
|
|||||||
in {
|
in {
|
||||||
|
|
||||||
lincat
|
lincat
|
||||||
Description = Text ;
|
Sentence = S ;
|
||||||
Name = NP ;
|
Name = NP ;
|
||||||
Date = Adv ;
|
Date = Adv ;
|
||||||
|
|
||||||
lin
|
lin
|
||||||
LivingDescription name pron country birthdate awarddate award =
|
BornSentence name country date =
|
||||||
mkText
|
mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) date)) ;
|
||||||
(mkPhr (mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) birthdate))))
|
|
||||||
(mkText (mkS pastTense (mkCl pron (mkVP (mkVP (mkV2 get_V) award) awarddate)))) ;
|
AwardSentence name award date =
|
||||||
|
mkS pastTense (mkCl name (mkVP (mkVP (mkV2 get_V) award) date)) ;
|
||||||
|
|
||||||
|
DiedSentence name date =
|
||||||
|
mkS pastTense (mkCl name (mkVP die_VP date)) ;
|
||||||
|
|
||||||
-- PastDescription : Name -> Country -> Date -> Date -> Award -> Description ;
|
|
||||||
StringName s = symb s ;
|
StringName s = symb s ;
|
||||||
|
|
||||||
YearDate i = inAdv <symb i : NP> ;
|
YearDate i = inAdv <symb i : NP> ;
|
||||||
@@ -28,5 +31,6 @@ lin
|
|||||||
oper
|
oper
|
||||||
inAdv : NP -> Adv = \np -> SyntaxEng.mkAdv in_Prep np ;
|
inAdv : NP -> Adv = \np -> SyntaxEng.mkAdv in_Prep np ;
|
||||||
born_VP = mkVP (mkA "born") ;
|
born_VP = mkVP (mkA "born") ;
|
||||||
|
die_VP = mkVP (mkV "die") ;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
36
lab2/grammars/NobelFin.gf
Normal file
36
lab2/grammars/NobelFin.gf
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
concrete NobelFin of Nobel = LabelsFin ** open
|
||||||
|
SyntaxFin,
|
||||||
|
ParadigmsFin,
|
||||||
|
SymbolicFin
|
||||||
|
in {
|
||||||
|
|
||||||
|
lincat
|
||||||
|
Sentence = S ;
|
||||||
|
Name = NP ;
|
||||||
|
Date = Adv ;
|
||||||
|
|
||||||
|
lin
|
||||||
|
BornSentence name country date =
|
||||||
|
mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) date)) ;
|
||||||
|
|
||||||
|
AwardSentence name award date =
|
||||||
|
mkS pastTense (mkCl name (mkVP (mkVP get_V2 award) date)) ;
|
||||||
|
|
||||||
|
DiedSentence name date =
|
||||||
|
mkS pastTense (mkCl name (mkVP die_VP date)) ;
|
||||||
|
|
||||||
|
StringName s = symb s ;
|
||||||
|
|
||||||
|
YearDate i = SyntaxFin.mkAdv (mkPrep "vuonna" nominative) <symb i : NP> ;
|
||||||
|
|
||||||
|
he_Name = he_NP ;
|
||||||
|
she_Name = she_NP ;
|
||||||
|
they_Name = he_NP ;
|
||||||
|
|
||||||
|
oper
|
||||||
|
inAdv : NP -> Adv = \np -> SyntaxFin.mkAdv in_Prep np ;
|
||||||
|
born_VP = mkVP (mkV "syntyä") ;
|
||||||
|
die_VP = mkVP (mkV "kuolla") ;
|
||||||
|
get_V2 = mkV2 (mkV "saada") ;
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
import json
|
|
||||||
|
|
||||||
# query: https://w.wiki/3tEM
|
|
||||||
|
|
||||||
DATA_FILE = 'query.json'
|
|
||||||
|
|
||||||
with open(DATA_FILE) as file:
|
|
||||||
data = json.load(file)
|
|
||||||
|
|
||||||
print(data[0])
|
|
||||||
|
|
||||||
awards = {(d['award'], d['awardLabel']) for d in data}
|
|
||||||
|
|
||||||
#print(awards)
|
|
||||||
#print(len(awards))
|
|
||||||
|
|
||||||
countries = {(d['country'], d['countryLabel']) for d in data}
|
|
||||||
|
|
||||||
#print(countries)
|
|
||||||
#print(len(countries))
|
|
||||||
|
|
||||||
#print(data[0].keys())
|
|
||||||
|
|
||||||
def pronoun(d):
|
|
||||||
sex = d.get('sexLabel', 'other')
|
|
||||||
if sex == 'female':
|
|
||||||
return 'she'
|
|
||||||
elif sex == 'male':
|
|
||||||
return 'he'
|
|
||||||
else:
|
|
||||||
return 'they'
|
|
||||||
|
|
||||||
|
|
||||||
def year(date):
|
|
||||||
return date[:4]
|
|
||||||
|
|
||||||
|
|
||||||
def person_descr(d):
|
|
||||||
died = f"{d['personLabel']} died {year(d['deathDate'])}" if 'deathDate' in d else ''
|
|
||||||
return (
|
|
||||||
f"{d['personLabel']} from {d['countryLabel']} was born in {year(d['birthDate'])}. " +
|
|
||||||
f"{pronoun(d)} got {d['awardLabel']} in {year(d['date'])}." +
|
|
||||||
died
|
|
||||||
)
|
|
||||||
|
|
||||||
for d in data:
|
|
||||||
print(person_descr(d))
|
|
||||||
|
|
||||||
|
|
||||||
95
lab2/scripts/describe_nobel.py
Normal file
95
lab2/scripts/describe_nobel.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import pgf
|
||||||
|
|
||||||
|
# query: https://w.wiki/3tEM
|
||||||
|
|
||||||
|
DATA_FILE = '../data/query.json'
|
||||||
|
WIKIDATA_PREFIX = 'http://www.wikidata.org/entity/'
|
||||||
|
GRAMMAR_PREFIX = 'Nobel'
|
||||||
|
GRAMMAR_FILE = f'../grammars/{GRAMMAR_PREFIX}.pgf'
|
||||||
|
FUN_FILE = f'../data/{GRAMMAR_PREFIX}-funs.jsonl'
|
||||||
|
|
||||||
|
|
||||||
|
with open(DATA_FILE) as file:
|
||||||
|
data = json.load(file)
|
||||||
|
|
||||||
|
#print(data[0])
|
||||||
|
|
||||||
|
awards = {(d['award'], d['awardLabel']) for d in data}
|
||||||
|
|
||||||
|
#print(awards)
|
||||||
|
#print(len(awards))
|
||||||
|
|
||||||
|
countries = {(d['country'], d['countryLabel']) for d in data}
|
||||||
|
|
||||||
|
# template-based generation in English
|
||||||
|
|
||||||
|
def pronoun(d):
|
||||||
|
sex = d.get('sexLabel', 'other')
|
||||||
|
if sex == 'female':
|
||||||
|
return 'she'
|
||||||
|
elif sex == 'male':
|
||||||
|
return 'he'
|
||||||
|
else:
|
||||||
|
return 'they'
|
||||||
|
|
||||||
|
def year(date):
|
||||||
|
return date[:4]
|
||||||
|
|
||||||
|
# template-based generation in English
|
||||||
|
|
||||||
|
def template_description(d):
|
||||||
|
died = f"{d['personLabel']} died {year(d['deathDate'])}" if 'deathDate' in d else ''
|
||||||
|
return (
|
||||||
|
f"{d['personLabel']} was born in {d['countryLabel']} in {year(d['birthDate'])}. " +
|
||||||
|
f"{pronoun(d)} got the {d['awardLabel']} in {year(d['date'])}." +
|
||||||
|
died
|
||||||
|
)
|
||||||
|
|
||||||
|
# grammar-based generation in a given language
|
||||||
|
|
||||||
|
def name(d):
|
||||||
|
person = d['personLabel']
|
||||||
|
return f'StringName "{person}"'
|
||||||
|
|
||||||
|
|
||||||
|
def funs(funfile):
|
||||||
|
with open(funfile) as file:
|
||||||
|
data = {WIKIDATA_PREFIX + qf[0]: qf[1] for line in file for qf in [json.loads(line)]}
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def country(fundata, d):
|
||||||
|
return fundata[d['country']]
|
||||||
|
|
||||||
|
|
||||||
|
def award(fundata, d):
|
||||||
|
return fundata[d['award']]
|
||||||
|
|
||||||
|
|
||||||
|
def grammar_description(grammar, fundata, d, lang):
|
||||||
|
born = pgf.readExpr(
|
||||||
|
f"BornSentence ({name(d)}) {country(fundata, d)} (YearDate {year(d['birthDate'])})")
|
||||||
|
awarded = pgf.readExpr(
|
||||||
|
f"AwardSentence {pronoun(d)}_Name {award(fundata, d)} (YearDate {year(d['date'])})")
|
||||||
|
sentences = [born, awarded]
|
||||||
|
if 'deathDate' in d:
|
||||||
|
died = pgf.readExpr(
|
||||||
|
f"DiedSentence ({name(d)}) (YearDate {year(d['deathDate'])})")
|
||||||
|
sentences.append(died)
|
||||||
|
return ' '.join([lang.linearize(s) + '.' for s in sentences])
|
||||||
|
|
||||||
|
|
||||||
|
if sys.argv[1:]:
|
||||||
|
grammar = pgf.readPGF(GRAMMAR_FILE)
|
||||||
|
fundata = funs(FUN_FILE)
|
||||||
|
lang = grammar.languages[GRAMMAR_PREFIX + sys.argv[1]]
|
||||||
|
for d in data:
|
||||||
|
print(grammar_description(grammar, fundata, d, lang))
|
||||||
|
else:
|
||||||
|
for d in data:
|
||||||
|
print(template_description(d))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -8,9 +8,10 @@ from gf_utils import *
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
To collect labels from query.json (Wikidata query result) and extract grammars:
|
To collect labels from query.json (Wikidata query result) and extract grammars:
|
||||||
python3 find_labels.py init >labels.jsonl
|
python3 find_labels.py init >../data/labels.jsonl
|
||||||
python3 find_labels.py abstract >Labels.gf
|
python3 find_labels.py funs >../data/funs.jsonl
|
||||||
python3 find_labels.py en >LabelsEng.gf
|
python3 find_labels.py abstract >../data/Labels.gf
|
||||||
|
python3 find_labels.py en >../data/LabelsEng.gf
|
||||||
"""
|
"""
|
||||||
|
|
||||||
WIKIDATA_FILE = '../data/query.json'
|
WIKIDATA_FILE = '../data/query.json'
|
||||||
@@ -19,7 +20,7 @@ WIKIDATA_URL_PREFIX = 'http://www.wikidata.org/wiki/Special:EntityData/'
|
|||||||
NOBEL_FIELDS = ['award', 'country']
|
NOBEL_FIELDS = ['award', 'country']
|
||||||
LABEL_FILE = '../data/labels.jsonl'
|
LABEL_FILE = '../data/labels.jsonl'
|
||||||
|
|
||||||
USAGE = 'usage: find_labels.py (init | abstract | en | sv | fi | ...)'
|
USAGE = 'usage: find_labels.py (init | funs | abstract | en | sv | fi | ...)'
|
||||||
|
|
||||||
if sys.argv[1:]:
|
if sys.argv[1:]:
|
||||||
MODE = sys.argv[1]
|
MODE = sys.argv[1]
|
||||||
@@ -78,7 +79,9 @@ def extract_labels(labeldata, mode):
|
|||||||
eng = labels.get('en', 'X')
|
eng = labels.get('en', 'X')
|
||||||
cat = labels['field'].capitalize()
|
cat = labels['field'].capitalize()
|
||||||
fun = mk_fun_from_strs([qid, eng, cat])
|
fun = mk_fun_from_strs([qid, eng, cat])
|
||||||
if mode == 'abstract':
|
if mode == 'funs':
|
||||||
|
print(json.dumps([qid, fun], ensure_ascii=False))
|
||||||
|
elif mode == 'abstract':
|
||||||
print(mk_fun_rule(fun, cat))
|
print(mk_fun_rule(fun, cat))
|
||||||
else:
|
else:
|
||||||
lin = labels.get(mode, labels.get('en', 'X'))
|
lin = labels.get(mode, labels.get('en', 'X'))
|
||||||
|
|||||||
Reference in New Issue
Block a user