forked from GitHub/comp-syntax-gu-mlt
everything in place for Lab 2
This commit is contained in:
@@ -1,49 +0,0 @@
|
||||
import json
|
||||
|
||||
# query: https://w.wiki/3tEM
|
||||
|
||||
DATA_FILE = 'query.json'
|
||||
|
||||
with open(DATA_FILE) as file:
|
||||
data = json.load(file)
|
||||
|
||||
print(data[0])
|
||||
|
||||
awards = {(d['award'], d['awardLabel']) for d in data}
|
||||
|
||||
#print(awards)
|
||||
#print(len(awards))
|
||||
|
||||
countries = {(d['country'], d['countryLabel']) for d in data}
|
||||
|
||||
#print(countries)
|
||||
#print(len(countries))
|
||||
|
||||
#print(data[0].keys())
|
||||
|
||||
def pronoun(d):
|
||||
sex = d.get('sexLabel', 'other')
|
||||
if sex == 'female':
|
||||
return 'she'
|
||||
elif sex == 'male':
|
||||
return 'he'
|
||||
else:
|
||||
return 'they'
|
||||
|
||||
|
||||
def year(date):
|
||||
return date[:4]
|
||||
|
||||
|
||||
def person_descr(d):
|
||||
died = f"{d['personLabel']} died {year(d['deathDate'])}" if 'deathDate' in d else ''
|
||||
return (
|
||||
f"{d['personLabel']} from {d['countryLabel']} was born in {year(d['birthDate'])}. " +
|
||||
f"{pronoun(d)} got {d['awardLabel']} in {year(d['date'])}." +
|
||||
died
|
||||
)
|
||||
|
||||
for d in data:
|
||||
print(person_descr(d))
|
||||
|
||||
|
||||
95
lab2/scripts/describe_nobel.py
Normal file
95
lab2/scripts/describe_nobel.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import sys
|
||||
import json
|
||||
import pgf
|
||||
|
||||
# query: https://w.wiki/3tEM
|
||||
|
||||
DATA_FILE = '../data/query.json'
|
||||
WIKIDATA_PREFIX = 'http://www.wikidata.org/entity/'
|
||||
GRAMMAR_PREFIX = 'Nobel'
|
||||
GRAMMAR_FILE = f'../grammars/{GRAMMAR_PREFIX}.pgf'
|
||||
FUN_FILE = f'../data/{GRAMMAR_PREFIX}-funs.jsonl'
|
||||
|
||||
|
||||
with open(DATA_FILE) as file:
|
||||
data = json.load(file)
|
||||
|
||||
#print(data[0])
|
||||
|
||||
awards = {(d['award'], d['awardLabel']) for d in data}
|
||||
|
||||
#print(awards)
|
||||
#print(len(awards))
|
||||
|
||||
countries = {(d['country'], d['countryLabel']) for d in data}
|
||||
|
||||
# template-based generation in English
|
||||
|
||||
def pronoun(d):
|
||||
sex = d.get('sexLabel', 'other')
|
||||
if sex == 'female':
|
||||
return 'she'
|
||||
elif sex == 'male':
|
||||
return 'he'
|
||||
else:
|
||||
return 'they'
|
||||
|
||||
def year(date):
|
||||
return date[:4]
|
||||
|
||||
# template-based generation in English
|
||||
|
||||
def template_description(d):
|
||||
died = f"{d['personLabel']} died {year(d['deathDate'])}" if 'deathDate' in d else ''
|
||||
return (
|
||||
f"{d['personLabel']} was born in {d['countryLabel']} in {year(d['birthDate'])}. " +
|
||||
f"{pronoun(d)} got the {d['awardLabel']} in {year(d['date'])}." +
|
||||
died
|
||||
)
|
||||
|
||||
# grammar-based generation in a given language
|
||||
|
||||
def name(d):
|
||||
person = d['personLabel']
|
||||
return f'StringName "{person}"'
|
||||
|
||||
|
||||
def funs(funfile):
|
||||
with open(funfile) as file:
|
||||
data = {WIKIDATA_PREFIX + qf[0]: qf[1] for line in file for qf in [json.loads(line)]}
|
||||
return data
|
||||
|
||||
|
||||
def country(fundata, d):
|
||||
return fundata[d['country']]
|
||||
|
||||
|
||||
def award(fundata, d):
|
||||
return fundata[d['award']]
|
||||
|
||||
|
||||
def grammar_description(grammar, fundata, d, lang):
|
||||
born = pgf.readExpr(
|
||||
f"BornSentence ({name(d)}) {country(fundata, d)} (YearDate {year(d['birthDate'])})")
|
||||
awarded = pgf.readExpr(
|
||||
f"AwardSentence {pronoun(d)}_Name {award(fundata, d)} (YearDate {year(d['date'])})")
|
||||
sentences = [born, awarded]
|
||||
if 'deathDate' in d:
|
||||
died = pgf.readExpr(
|
||||
f"DiedSentence ({name(d)}) (YearDate {year(d['deathDate'])})")
|
||||
sentences.append(died)
|
||||
return ' '.join([lang.linearize(s) + '.' for s in sentences])
|
||||
|
||||
|
||||
if sys.argv[1:]:
|
||||
grammar = pgf.readPGF(GRAMMAR_FILE)
|
||||
fundata = funs(FUN_FILE)
|
||||
lang = grammar.languages[GRAMMAR_PREFIX + sys.argv[1]]
|
||||
for d in data:
|
||||
print(grammar_description(grammar, fundata, d, lang))
|
||||
else:
|
||||
for d in data:
|
||||
print(template_description(d))
|
||||
|
||||
|
||||
|
||||
@@ -8,9 +8,10 @@ from gf_utils import *
|
||||
|
||||
"""
|
||||
To collect labels from query.json (Wikidata query result) and extract grammars:
|
||||
python3 find_labels.py init >labels.jsonl
|
||||
python3 find_labels.py abstract >Labels.gf
|
||||
python3 find_labels.py en >LabelsEng.gf
|
||||
python3 find_labels.py init >../data/labels.jsonl
|
||||
python3 find_labels.py funs >../data/funs.jsonl
|
||||
python3 find_labels.py abstract >../data/Labels.gf
|
||||
python3 find_labels.py en >../data/LabelsEng.gf
|
||||
"""
|
||||
|
||||
WIKIDATA_FILE = '../data/query.json'
|
||||
@@ -19,7 +20,7 @@ WIKIDATA_URL_PREFIX = 'http://www.wikidata.org/wiki/Special:EntityData/'
|
||||
NOBEL_FIELDS = ['award', 'country']
|
||||
LABEL_FILE = '../data/labels.jsonl'
|
||||
|
||||
USAGE = 'usage: find_labels.py (init | abstract | en | sv | fi | ...)'
|
||||
USAGE = 'usage: find_labels.py (init | funs | abstract | en | sv | fi | ...)'
|
||||
|
||||
if sys.argv[1:]:
|
||||
MODE = sys.argv[1]
|
||||
@@ -78,7 +79,9 @@ def extract_labels(labeldata, mode):
|
||||
eng = labels.get('en', 'X')
|
||||
cat = labels['field'].capitalize()
|
||||
fun = mk_fun_from_strs([qid, eng, cat])
|
||||
if mode == 'abstract':
|
||||
if mode == 'funs':
|
||||
print(json.dumps([qid, fun], ensure_ascii=False))
|
||||
elif mode == 'abstract':
|
||||
print(mk_fun_rule(fun, cat))
|
||||
else:
|
||||
lin = labels.get(mode, labels.get('en', 'X'))
|
||||
|
||||
Reference in New Issue
Block a user