everything in place for Lab 2

This commit is contained in:
Aarne Ranta
2025-05-04 12:14:37 +02:00
parent cc604093d0
commit 6ea7d5d838
10 changed files with 403 additions and 72 deletions

View File

@@ -1,14 +1,14 @@
# Lab 2: Multilingual text generation from Wikidata
This uses GF to generate texts from facts in the Wikidata fact database.
You will be given
You are given
- an abstract syntax,
- an English concrete syntax,
- a json dump from Wikidata
- a Python file that connects Wikidata with GF
- an abstract syntax and an English concrete syntax, in the subdirectory grammars/
- a json dump from Wikidata, in the subdirectory data/
- a Python file that connects Wikidata with GF, in the subdirectory scripts/
Your task will be to create a concrete syntax for some other language by using the
Your task is to create a concrete syntax for some other language by using the
GF RGL and evaluate the text generated by this.
The listed files will be provided before the lab starts.
More instructions will be given in the lectures of the week 5-9 May 2025.

112
lab2/data/Nobel-funs.jsonl Normal file
View File

@@ -0,0 +1,112 @@
["Q800", "Q800_Costa_Rica_Country"]
["Q219060", "Q219060_State_of_Palestine_Country"]
["Q37", "Q37_Lithuania_Country"]
["Q137816", "Q137816_Taiwan_under_Japanese_rule_Country"]
["Q1028", "Q1028_Morocco_Country"]
["Q796", "Q796_Iraq_Country"]
["Q184", "Q184_Belarus_Country"]
["Q225", "Q225_Bosnia_and_Herzegovina_Country"]
["Q20", "Q20_Norway_Country"]
["Q211", "Q211_Latvia_Country"]
["Q117", "Q117_Ghana_Country"]
["Q39", "Q39_Switzerland_Country"]
["Q159631", "Q159631_Kingdom_of_Württemberg_Country"]
["Q17", "Q17_Japan_Country"]
["Q189", "Q189_Iceland_Country"]
["Q221", "Q221_North_Macedonia_Country"]
["Q9683", "Q9683_Tang_dynasty_Country"]
["Q79", "Q79_Egypt_Country"]
["Q408", "Q408_Australia_Country"]
["Q4628", "Q4628_Faroe_Islands_Country"]
["Q145", "Q145_United_Kingdom_Country"]
["Q214", "Q214_Slovakia_Country"]
["Q16", "Q16_Canada_Country"]
["Q924", "Q924_Tanzania_Country"]
["Q55502", "Q55502_Kingdom_of_Jerusalem_Country"]
["Q183", "Q183_Germany_Country"]
["Q754", "Q754_Trinidad_and_Tobago_Country"]
["Q298", "Q298_Chile_Country"]
["Q41", "Q41_Greece_Country"]
["Q30623", "Q30623_Manchukuo_Country"]
["Q774", "Q774_Guatemala_Country"]
["Q836", "Q836_Myanmar_Country"]
["Q902", "Q902_Bangladesh_Country"]
["Q215", "Q215_Slovenia_Country"]
["Q7313", "Q7313_Yuan_dynasty_Country"]
["Q822", "Q822_Lebanon_Country"]
["Q12548", "Q12548_Holy_Roman_Empire_Country"]
["Q12407080", "Q12407080_early_Islamic_period_in_Palestine_Country"]
["Q717", "Q717_Venezuela_Country"]
["Q31", "Q31_Belgium_Country"]
["Q794", "Q794_Iran_Country"]
["Q43", "Q43_Turkey_Country"]
["Q948", "Q948_Tunisia_Country"]
["Q258", "Q258_South_Africa_Country"]
["Q28", "Q28_Hungary_Country"]
["Q80061", "Q80061_Nobel_Prize_in_Physiology_or_Medicine_Award"]
["Q142", "Q142_France_Country"]
["Q805", "Q805_Yemen_Country"]
["Q881", "Q881_Vietnam_Country"]
["Q7462", "Q7462_Song_dynasty_Country"]
["Q12544", "Q12544_Byzantine_Empire_Country"]
["Q664", "Q664_New_Zealand_Country"]
["Q33", "Q33_Finland_Country"]
["Q282428", "Q282428_Mamluk_Sultanate_Country"]
["Q38104", "Q38104_Nobel_Prize_in_Physics_Award"]
["Q9903", "Q9903_Ming_dynasty_Country"]
["Q739", "Q739_Colombia_Country"]
["Q13426199", "Q13426199_Republic_of_China_Country"]
["Q55", "Q55_Netherlands_Country"]
["Q159", "Q159_Russia_Country"]
["Q27", "Q27_Ireland_Country"]
["Q48685", "Q48685_Kingdom_of_Judah_Country"]
["Q810", "Q810_Jordan_Country"]
["Q36", "Q36_Poland_Country"]
["Q1014", "Q1014_Liberia_Country"]
["Q38872", "Q38872_Prussia_Country"]
["Q574", "'Q574_Timor-Leste_Country'"]
["Q974", "Q974_Democratic_Republic_of_the_Congo_Country"]
["Q15843470", "Q15843470_Roman_Palestine_Country"]
["Q40", "Q40_Austria_Country"]
["Q928", "Q928_Philippines_Country"]
["Q148", "Q148_People's_Republic_of_China_Country"]
["Q35", "Q35_Denmark_Country"]
["Q954", "Q954_Zimbabwe_Country"]
["Q216173", "Q216173_Free_City_of_Danzig_Country"]
["Q227", "Q227_Azerbaijan_Country"]
["Q252", "Q252_Indonesia_Country"]
["Q801", "Q801_Israel_Country"]
["Q155", "Q155_Brazil_Country"]
["Q29", "Q29_Spain_Country"]
["Q7075820", "Q7075820_Occupied_Enemy_Territory_Administration_Country"]
["Q2685298", "Q2685298_Romanian_People's_Republic_Country"]
["Q45", "Q45_Portugal_Country"]
["Q32", "Q32_Luxembourg_Country"]
["Q115", "Q115_Ethiopia_Country"]
["Q193714", "Q193714_Mandatory_Palestine_Country"]
["Q34", "Q34_Sweden_Country"]
["Q262", "Q262_Algeria_Country"]
["Q37922", "Q37922_Nobel_Prize_in_Literature_Award"]
["Q843", "Q843_Pakistan_Country"]
["Q35637", "Q35637_Nobel_Peace_Prize_Award"]
["Q1033", "Q1033_Nigeria_Country"]
["Q38", "Q38_Italy_Country"]
["Q668", "Q668_India_Country"]
["Q496922", "Q496922_Hasmonean_dynasty_Country"]
["Q212", "Q212_Ukraine_Country"]
["Q44585", "Q44585_Nobel_Prize_in_Chemistry_Award"]
["Q760", "Q760_Saint_Lucia_Country"]
["Q414", "Q414_Argentina_Country"]
["Q218", "Q218_Romania_Country"]
["Q213", "Q213_Czech_Republic_Country"]
["Q219", "Q219_Bulgaria_Country"]
["Q12560", "Q12560_Ottoman_Empire_Country"]
["Q224", "Q224_Croatia_Country"]
["Q419", "Q419_Peru_Country"]
["Q1019", "Q1019_Madagascar_Country"]
["Q30", "Q30_United_States_Country"]
["Q180114", "Q180114_Ayyubid_dynasty_Country"]
["Q8733", "Q8733_Qing_dynasty_Country"]
["Q96", "Q96_Mexico_Country"]
["Q884", "Q884_South_Korea_Country"]
["Q114", "Q114_Kenya_Country"]

View File

@@ -9,7 +9,7 @@ oper mkCountry = overload {
} ;
oper mkAward = overload {
mkAward : Str -> NP = \s -> mkNP (mkPN s) ;
mkAward : Str -> NP = \s -> mkNP the_Det (mkN s) ;
mkAward : NP -> NP = \np -> np ;
} ;

128
lab2/grammars/LabelsFin.gf Normal file
View File

@@ -0,0 +1,128 @@
concrete LabelsFin of Labels = open SyntaxFin, ParadigmsFin in {
lincat Country = NP ;
lincat Award = NP ;
oper mkCountry = overload {
mkCountry : Str -> NP = \s -> mkNP (mkPN s) ;
mkCountry : NP -> NP = \np -> np ;
} ;
oper mkAward = overload {
mkAward : Str -> NP = \s -> mkNP the_Det (mkN s) ;
mkAward : NP -> NP = \np -> np ;
} ;
lin Q800_Costa_Rica_Country = mkCountry "Costa Rica" ;
lin Q219060_State_of_Palestine_Country = mkCountry "Palestiina" ;
lin Q37_Lithuania_Country = mkCountry "Liettua" ;
lin Q137816_Taiwan_under_Japanese_rule_Country = mkCountry "Taiwan Japanin alaisuudessa" ;
lin Q1028_Morocco_Country = mkCountry "Marokko" ;
lin Q796_Iraq_Country = mkCountry "Irak" ;
lin Q184_Belarus_Country = mkCountry "Valko-Venäjä" ;
lin Q225_Bosnia_and_Herzegovina_Country = mkCountry "Bosnia ja Hertsegovina" ;
lin Q20_Norway_Country = mkCountry "Norja" ;
lin Q211_Latvia_Country = mkCountry "Latvia" ;
lin Q117_Ghana_Country = mkCountry "Ghana" ;
lin Q39_Switzerland_Country = mkCountry "Sveitsi" ;
lin Q159631_Kingdom_of_Württemberg_Country = mkCountry "Württembergin kuningaskunta" ;
lin Q17_Japan_Country = mkCountry "Japani" ;
lin Q189_Iceland_Country = mkCountry "Islanti" ;
lin Q221_North_Macedonia_Country = mkCountry "Pohjois-Makedonia" ;
lin Q9683_Tang_dynasty_Country = mkCountry "Tang-dynastia" ;
lin Q79_Egypt_Country = mkCountry "Egypti" ;
lin Q408_Australia_Country = mkCountry "Australia" ;
lin Q4628_Faroe_Islands_Country = mkCountry "Färsaaret" ;
lin Q145_United_Kingdom_Country = mkCountry "Yhdistynyt kuningaskunta" ;
lin Q214_Slovakia_Country = mkCountry "Slovakia" ;
lin Q16_Canada_Country = mkCountry "Kanada" ;
lin Q924_Tanzania_Country = mkCountry "Tansania" ;
lin Q55502_Kingdom_of_Jerusalem_Country = mkCountry "Jerusalemin kuningaskunta" ;
lin Q183_Germany_Country = mkCountry "Saksa" ;
lin Q754_Trinidad_and_Tobago_Country = mkCountry "Trinidad ja Tobago" ;
lin Q298_Chile_Country = mkCountry "Chile" ;
lin Q41_Greece_Country = mkCountry "Kreikka" ;
lin Q30623_Manchukuo_Country = mkCountry "Mantšukuo" ;
lin Q774_Guatemala_Country = mkCountry "Guatemala" ;
lin Q836_Myanmar_Country = mkCountry "Myanmar" ;
lin Q902_Bangladesh_Country = mkCountry "Bangladesh" ;
lin Q215_Slovenia_Country = mkCountry "Slovenia" ;
lin Q7313_Yuan_dynasty_Country = mkCountry "Yuan" ;
lin Q822_Lebanon_Country = mkCountry "Libanon" ;
lin Q12548_Holy_Roman_Empire_Country = mkCountry "Pyhä saksalais-roomalainen keisarikunta" ;
lin Q12407080_early_Islamic_period_in_Palestine_Country = mkCountry "early Islamic period in Palestine" ;
lin Q717_Venezuela_Country = mkCountry "Venezuela" ;
lin Q31_Belgium_Country = mkCountry "Belgia" ;
lin Q794_Iran_Country = mkCountry "Iran" ;
lin Q43_Turkey_Country = mkCountry "Turkki" ;
lin Q948_Tunisia_Country = mkCountry "Tunisia" ;
lin Q258_South_Africa_Country = mkCountry "Etelä-Afrikka" ;
lin Q28_Hungary_Country = mkCountry "Unkari" ;
lin Q80061_Nobel_Prize_in_Physiology_or_Medicine_Award = mkAward "Nobelin fysiologian tai lääketieteen palkinto" ;
lin Q142_France_Country = mkCountry "Ranska" ;
lin Q805_Yemen_Country = mkCountry "Jemen" ;
lin Q881_Vietnam_Country = mkCountry "Vietnam" ;
lin Q7462_Song_dynasty_Country = mkCountry "Song-dynastia" ;
lin Q12544_Byzantine_Empire_Country = mkCountry "Bysantin valtakunta" ;
lin Q664_New_Zealand_Country = mkCountry "Uusi-Seelanti" ;
lin Q33_Finland_Country = mkCountry "Suomi" ;
lin Q282428_Mamluk_Sultanate_Country = mkCountry "Mamlukin sulttaanikunta" ;
lin Q38104_Nobel_Prize_in_Physics_Award = mkAward "Nobelin fysiikanpalkinto" ;
lin Q9903_Ming_dynasty_Country = mkCountry "Ming-dynastia" ;
lin Q739_Colombia_Country = mkCountry "Kolumbia" ;
lin Q13426199_Republic_of_China_Country = mkCountry "Kiinan tasavalta" ;
lin Q55_Netherlands_Country = mkCountry "Alankomaat" ;
lin Q159_Russia_Country = mkCountry "Venäjä" ;
lin Q27_Ireland_Country = mkCountry "Irlanti" ;
lin Q48685_Kingdom_of_Judah_Country = mkCountry "Juudan kuningaskunta" ;
lin Q810_Jordan_Country = mkCountry "Jordania" ;
lin Q36_Poland_Country = mkCountry "Puola" ;
lin Q1014_Liberia_Country = mkCountry "Liberia" ;
lin Q38872_Prussia_Country = mkCountry "Preussi" ;
lin 'Q574_Timor-Leste_Country' = mkCountry "Itä-Timor" ;
lin Q974_Democratic_Republic_of_the_Congo_Country = mkCountry "Kongon demokraattinen tasavalta" ;
lin Q15843470_Roman_Palestine_Country = mkCountry "Roman Palestine" ;
lin Q40_Austria_Country = mkCountry "Itävalta" ;
lin Q928_Philippines_Country = mkCountry "Filippiinit" ;
lin Q148_People's_Republic_of_China_Country = mkCountry "Kiinan kansantasavalta" ;
lin Q35_Denmark_Country = mkCountry "Tanska" ;
lin Q954_Zimbabwe_Country = mkCountry "Zimbabwe" ;
lin Q216173_Free_City_of_Danzig_Country = mkCountry "Danzigin vapaakaupunki" ;
lin Q227_Azerbaijan_Country = mkCountry "Azerbaidžan" ;
lin Q252_Indonesia_Country = mkCountry "Indonesia" ;
lin Q801_Israel_Country = mkCountry "Israel" ;
lin Q155_Brazil_Country = mkCountry "Brasilia" ;
lin Q29_Spain_Country = mkCountry "Espanja" ;
lin Q7075820_Occupied_Enemy_Territory_Administration_Country = mkCountry "Occupied Enemy Territory Administration" ;
lin Q2685298_Romanian_People's_Republic_Country = mkCountry "Romanian kansantasavalta" ;
lin Q45_Portugal_Country = mkCountry "Portugali" ;
lin Q32_Luxembourg_Country = mkCountry "Luxemburg" ;
lin Q115_Ethiopia_Country = mkCountry "Etiopia" ;
lin Q193714_Mandatory_Palestine_Country = mkCountry "Palestiinan brittiläinen mandaatti" ;
lin Q34_Sweden_Country = mkCountry "Ruotsi" ;
lin Q262_Algeria_Country = mkCountry "Algeria" ;
lin Q37922_Nobel_Prize_in_Literature_Award = mkAward "Nobelin kirjallisuuspalkinto" ;
lin Q843_Pakistan_Country = mkCountry "Pakistan" ;
lin Q35637_Nobel_Peace_Prize_Award = mkAward "Nobelin rauhanpalkinto" ;
lin Q1033_Nigeria_Country = mkCountry "Nigeria" ;
lin Q38_Italy_Country = mkCountry "Italia" ;
lin Q668_India_Country = mkCountry "Intia" ;
lin Q496922_Hasmonean_dynasty_Country = mkCountry "Israelin toinen kuningaskunta" ;
lin Q212_Ukraine_Country = mkCountry "Ukraina" ;
lin Q44585_Nobel_Prize_in_Chemistry_Award = mkAward "Nobelin kemianpalkinto" ;
lin Q760_Saint_Lucia_Country = mkCountry "Saint Lucia" ;
lin Q414_Argentina_Country = mkCountry "Argentiina" ;
lin Q218_Romania_Country = mkCountry "Romania" ;
lin Q213_Czech_Republic_Country = mkCountry "Tšekki" ;
lin Q219_Bulgaria_Country = mkCountry "Bulgaria" ;
lin Q12560_Ottoman_Empire_Country = mkCountry "Osmanien valtakunta" ;
lin Q224_Croatia_Country = mkCountry "Kroatia" ;
lin Q419_Peru_Country = mkCountry "Peru" ;
lin Q1019_Madagascar_Country = mkCountry "Madagaskar" ;
lin Q30_United_States_Country = mkCountry "Yhdysvallat" ;
lin Q180114_Ayyubid_dynasty_Country = mkCountry "Aijubidit" ;
lin Q8733_Qing_dynasty_Country = mkCountry "Qing-dynastia" ;
lin Q96_Mexico_Country = mkCountry "Meksiko" ;
lin Q884_South_Korea_Country = mkCountry "Korean tasavalta" ;
lin Q114_Kenya_Country = mkCountry "Kenia" ;
}

View File

@@ -1,15 +1,17 @@
abstract Nobel = Labels ** {
flags startcat = Description ;
flags startcat = Sentence ;
cat
Description ;
Sentence ;
Name ;
Date ;
fun
LivingDescription : Name -> Name -> Country -> Date -> Date -> Award -> Description ;
PastDescription : Name -> Name -> Country -> Date -> Date -> Date -> Award -> Description ;
BornSentence : Name -> Country -> Date -> Sentence ;
AwardSentence : Name -> Award -> Date -> Sentence ;
DiedSentence : Name -> Date -> Sentence ;
StringName : String -> Name ;
YearDate : Int -> Date ;
he_Name, she_Name, they_Name : Name ;

View File

@@ -6,17 +6,20 @@ concrete NobelEng of Nobel = LabelsEng ** open
in {
lincat
Description = Text ;
Sentence = S ;
Name = NP ;
Date = Adv ;
lin
LivingDescription name pron country birthdate awarddate award =
mkText
(mkPhr (mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) birthdate))))
(mkText (mkS pastTense (mkCl pron (mkVP (mkVP (mkV2 get_V) award) awarddate)))) ;
BornSentence name country date =
mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) date)) ;
AwardSentence name award date =
mkS pastTense (mkCl name (mkVP (mkVP (mkV2 get_V) award) date)) ;
DiedSentence name date =
mkS pastTense (mkCl name (mkVP die_VP date)) ;
-- PastDescription : Name -> Country -> Date -> Date -> Award -> Description ;
StringName s = symb s ;
YearDate i = inAdv <symb i : NP> ;
@@ -28,5 +31,6 @@ lin
oper
inAdv : NP -> Adv = \np -> SyntaxEng.mkAdv in_Prep np ;
born_VP = mkVP (mkA "born") ;
die_VP = mkVP (mkV "die") ;
}

36
lab2/grammars/NobelFin.gf Normal file
View File

@@ -0,0 +1,36 @@
concrete NobelFin of Nobel = LabelsFin ** open
SyntaxFin,
ParadigmsFin,
SymbolicFin
in {
lincat
Sentence = S ;
Name = NP ;
Date = Adv ;
lin
BornSentence name country date =
mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) date)) ;
AwardSentence name award date =
mkS pastTense (mkCl name (mkVP (mkVP get_V2 award) date)) ;
DiedSentence name date =
mkS pastTense (mkCl name (mkVP die_VP date)) ;
StringName s = symb s ;
YearDate i = SyntaxFin.mkAdv (mkPrep "vuonna" nominative) <symb i : NP> ;
he_Name = he_NP ;
she_Name = she_NP ;
they_Name = he_NP ;
oper
inAdv : NP -> Adv = \np -> SyntaxFin.mkAdv in_Prep np ;
born_VP = mkVP (mkV "syntyä") ;
die_VP = mkVP (mkV "kuolla") ;
get_V2 = mkV2 (mkV "saada") ;
}

View File

@@ -1,49 +0,0 @@
import json
# query: https://w.wiki/3tEM
DATA_FILE = 'query.json'
with open(DATA_FILE) as file:
data = json.load(file)
print(data[0])
awards = {(d['award'], d['awardLabel']) for d in data}
#print(awards)
#print(len(awards))
countries = {(d['country'], d['countryLabel']) for d in data}
#print(countries)
#print(len(countries))
#print(data[0].keys())
def pronoun(d):
sex = d.get('sexLabel', 'other')
if sex == 'female':
return 'she'
elif sex == 'male':
return 'he'
else:
return 'they'
def year(date):
return date[:4]
def person_descr(d):
died = f"{d['personLabel']} died {year(d['deathDate'])}" if 'deathDate' in d else ''
return (
f"{d['personLabel']} from {d['countryLabel']} was born in {year(d['birthDate'])}. " +
f"{pronoun(d)} got {d['awardLabel']} in {year(d['date'])}." +
died
)
for d in data:
print(person_descr(d))

View File

@@ -0,0 +1,95 @@
import sys
import json
import pgf
# query: https://w.wiki/3tEM
DATA_FILE = '../data/query.json'
WIKIDATA_PREFIX = 'http://www.wikidata.org/entity/'
GRAMMAR_PREFIX = 'Nobel'
GRAMMAR_FILE = f'../grammars/{GRAMMAR_PREFIX}.pgf'
FUN_FILE = f'../data/{GRAMMAR_PREFIX}-funs.jsonl'
with open(DATA_FILE) as file:
data = json.load(file)
#print(data[0])
awards = {(d['award'], d['awardLabel']) for d in data}
#print(awards)
#print(len(awards))
countries = {(d['country'], d['countryLabel']) for d in data}
# template-based generation in English
def pronoun(d):
sex = d.get('sexLabel', 'other')
if sex == 'female':
return 'she'
elif sex == 'male':
return 'he'
else:
return 'they'
def year(date):
return date[:4]
# template-based generation in English
def template_description(d):
died = f"{d['personLabel']} died {year(d['deathDate'])}" if 'deathDate' in d else ''
return (
f"{d['personLabel']} was born in {d['countryLabel']} in {year(d['birthDate'])}. " +
f"{pronoun(d)} got the {d['awardLabel']} in {year(d['date'])}." +
died
)
# grammar-based generation in a given language
def name(d):
person = d['personLabel']
return f'StringName "{person}"'
def funs(funfile):
with open(funfile) as file:
data = {WIKIDATA_PREFIX + qf[0]: qf[1] for line in file for qf in [json.loads(line)]}
return data
def country(fundata, d):
return fundata[d['country']]
def award(fundata, d):
return fundata[d['award']]
def grammar_description(grammar, fundata, d, lang):
born = pgf.readExpr(
f"BornSentence ({name(d)}) {country(fundata, d)} (YearDate {year(d['birthDate'])})")
awarded = pgf.readExpr(
f"AwardSentence {pronoun(d)}_Name {award(fundata, d)} (YearDate {year(d['date'])})")
sentences = [born, awarded]
if 'deathDate' in d:
died = pgf.readExpr(
f"DiedSentence ({name(d)}) (YearDate {year(d['deathDate'])})")
sentences.append(died)
return ' '.join([lang.linearize(s) + '.' for s in sentences])
if sys.argv[1:]:
grammar = pgf.readPGF(GRAMMAR_FILE)
fundata = funs(FUN_FILE)
lang = grammar.languages[GRAMMAR_PREFIX + sys.argv[1]]
for d in data:
print(grammar_description(grammar, fundata, d, lang))
else:
for d in data:
print(template_description(d))

View File

@@ -8,9 +8,10 @@ from gf_utils import *
"""
To collect labels from query.json (Wikidata query result) and extract grammars:
python3 find_labels.py init >labels.jsonl
python3 find_labels.py abstract >Labels.gf
python3 find_labels.py en >LabelsEng.gf
python3 find_labels.py init >../data/labels.jsonl
python3 find_labels.py funs >../data/funs.jsonl
python3 find_labels.py abstract >../data/Labels.gf
python3 find_labels.py en >../data/LabelsEng.gf
"""
WIKIDATA_FILE = '../data/query.json'
@@ -19,7 +20,7 @@ WIKIDATA_URL_PREFIX = 'http://www.wikidata.org/wiki/Special:EntityData/'
NOBEL_FIELDS = ['award', 'country']
LABEL_FILE = '../data/labels.jsonl'
USAGE = 'usage: find_labels.py (init | abstract | en | sv | fi | ...)'
USAGE = 'usage: find_labels.py (init | funs | abstract | en | sv | fi | ...)'
if sys.argv[1:]:
MODE = sys.argv[1]
@@ -78,7 +79,9 @@ def extract_labels(labeldata, mode):
eng = labels.get('en', 'X')
cat = labels['field'].capitalize()
fun = mk_fun_from_strs([qid, eng, cat])
if mode == 'abstract':
if mode == 'funs':
print(json.dumps([qid, fun], ensure_ascii=False))
elif mode == 'abstract':
print(mk_fun_rule(fun, cat))
else:
lin = labels.get(mode, labels.get('en', 'X'))