From cc604093d08a8f2015c667ec973bf2cb77492fa9 Mon Sep 17 00:00:00 2001 From: Aarne Ranta Date: Sun, 4 May 2025 10:12:23 +0200 Subject: [PATCH] first Nobel grammar --- lab2/grammars/Labels.gf | 7 ++- lab2/grammars/LabelsEng.gf | 18 +++++++ lab2/grammars/Nobel.gf | 17 ++++++ lab2/grammars/NobelEng.gf | 32 +++++++++++ lab2/scripts/find_labels.py~ | 101 ----------------------------------- 5 files changed, 73 insertions(+), 102 deletions(-) create mode 100644 lab2/grammars/Nobel.gf create mode 100644 lab2/grammars/NobelEng.gf delete mode 100644 lab2/scripts/find_labels.py~ diff --git a/lab2/grammars/Labels.gf b/lab2/grammars/Labels.gf index ed971ed..d38c2d9 100644 --- a/lab2/grammars/Labels.gf +++ b/lab2/grammars/Labels.gf @@ -1,3 +1,7 @@ +abstract Labels = { +cat Country ; +cat Award ; + fun Q800_Costa_Rica_Country : Country ; fun Q219060_State_of_Palestine_Country : Country ; fun Q37_Lithuania_Country : Country ; @@ -109,4 +113,5 @@ fun Q180114_Ayyubid_dynasty_Country : Country ; fun Q8733_Qing_dynasty_Country : Country ; fun Q96_Mexico_Country : Country ; fun Q884_South_Korea_Country : Country ; -fun Q114_Kenya_Country : Country ; +fun Q114_Kenya_Country : Country ; +} diff --git a/lab2/grammars/LabelsEng.gf b/lab2/grammars/LabelsEng.gf index ccd2e45..fb685b4 100644 --- a/lab2/grammars/LabelsEng.gf +++ b/lab2/grammars/LabelsEng.gf @@ -1,3 +1,19 @@ +concrete LabelsEng of Labels = open SyntaxEng, ParadigmsEng in { + +lincat Country = NP ; +lincat Award = NP ; + +oper mkCountry = overload { + mkCountry : Str -> NP = \s -> mkNP (mkPN s) ; + mkCountry : NP -> NP = \np -> np ; + } ; + +oper mkAward = overload { + mkAward : Str -> NP = \s -> mkNP (mkPN s) ; + mkAward : NP -> NP = \np -> np ; + } ; + + lin Q800_Costa_Rica_Country = mkCountry "Costa Rica" ; lin Q219060_State_of_Palestine_Country = mkCountry "State of Palestine" ; lin Q37_Lithuania_Country = mkCountry "Lithuania" ; @@ -110,3 +126,5 @@ lin Q8733_Qing_dynasty_Country = mkCountry "Qing dynasty" ; lin Q96_Mexico_Country = mkCountry "Mexico" ; lin Q884_South_Korea_Country = mkCountry "South Korea" ; lin Q114_Kenya_Country = mkCountry "Kenya" ; + +} \ No newline at end of file diff --git a/lab2/grammars/Nobel.gf b/lab2/grammars/Nobel.gf new file mode 100644 index 0000000..160abcb --- /dev/null +++ b/lab2/grammars/Nobel.gf @@ -0,0 +1,17 @@ +abstract Nobel = Labels ** { + +flags startcat = Description ; + +cat + Description ; + Name ; + Date ; + +fun + LivingDescription : Name -> Name -> Country -> Date -> Date -> Award -> Description ; + PastDescription : Name -> Name -> Country -> Date -> Date -> Date -> Award -> Description ; + StringName : String -> Name ; + YearDate : Int -> Date ; + he_Name, she_Name, they_Name : Name ; + +} diff --git a/lab2/grammars/NobelEng.gf b/lab2/grammars/NobelEng.gf new file mode 100644 index 0000000..311f312 --- /dev/null +++ b/lab2/grammars/NobelEng.gf @@ -0,0 +1,32 @@ +concrete NobelEng of Nobel = LabelsEng ** open + SyntaxEng, + ParadigmsEng, + SymbolicEng, + IrregEng +in { + +lincat + Description = Text ; + Name = NP ; + Date = Adv ; + +lin + LivingDescription name pron country birthdate awarddate award = + mkText + (mkPhr (mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) birthdate)))) + (mkText (mkS pastTense (mkCl pron (mkVP (mkVP (mkV2 get_V) award) awarddate)))) ; + +-- PastDescription : Name -> Country -> Date -> Date -> Award -> Description ; + StringName s = symb s ; + + YearDate i = inAdv ; + + he_Name = he_NP ; + she_Name = she_NP ; + they_Name = they_NP ; + +oper + inAdv : NP -> Adv = \np -> SyntaxEng.mkAdv in_Prep np ; + born_VP = mkVP (mkA "born") ; + +} diff --git a/lab2/scripts/find_labels.py~ b/lab2/scripts/find_labels.py~ deleted file mode 100644 index 18e4c5b..0000000 --- a/lab2/scripts/find_labels.py~ +++ /dev/null @@ -1,101 +0,0 @@ -# https://www.wikidata.org/wiki/Special:EntityData/Q18644475.json - -import urllib.request -import json -import sys -import ssl -from gf_utils import * - -""" -To collect labels from query.json (Wikidata query result) and extract grammars: -python3 find_labels.py init >labels.jsonl -python3 find_labels.py abstract >Labels.gf -python3 find_labels.py en >LabelsEng.gf -""" - -WIKIDATA_FILE = 'query.json' -WIKIDATA_PREFIX = 'http://www.wikidata.org/entity/' -WIKIDATA_URL_PREFIX = 'http://www.wikidata.org/wiki/Special:EntityData/' -NOBEL_FIELDS = ['award', 'country'] -LABEL_FILE = 'labels.jsonl' - -USAGE = 'usage: find_labels.py (init | abstract | en | sv | fi | ...)' - -if sys.argv[1:]: - MODE = sys.argv[1] -else: - print(USAGE) - - -# qids given in the data file -def get_wikidata_qids(jsonfile, fields): - qids = set() - with open(jsonfile) as file: - data = json.load(file) - for d in data: - for f in fields: - if f in d: - qids.add((f, d[f][len(WIKIDATA_PREFIX):])) - return qids - - -qids = get_wikidata_qids(WIKIDATA_FILE, NOBEL_FIELDS) - -if __name__ == '__mainz__': - for qid in qids: - print(qid) - -# get all wikidata for each qid -# use this only once, because it is slow -def get_wikidata_json(qids): - context = ssl._create_unverified_context() - for field, qid in qids: - try: - with urllib.request.urlopen(WIKIDATA_URL_PREFIX + qid +'.json', context=context) as url: - data = json.load(url) - yield (field, qid, data) - except Exception as error: - pass - -# extract the labels, redirect to LABEL_FILE (only once) -def get_wikidata_labels(data, languages=None): - for field, qid, dict in data: - entities = dict.get('entities', {'foo': {}}) - entity = list(entities.values())[0] - entitylabels = entity.get('labels', {}) - entitylabels = {val['language']: val['value'] - for val in entitylabels.values() - if (languages is None) or - (val['language'] in languages)} - entitylabels['field'] = field - print(json.dumps({qid: entitylabels}, ensure_ascii=False)) - - -# {"Q800": {"tg": "Коста Рика", "sk": "Kostarika", ... "field": }} -def extract_labels(labeldata, mode): - for entry in data: - qid, labels = list(entry.items())[0] - eng = labels.get('en', 'X') - cat = labels['field'].capitalize() - fun = mk_fun_from_strs([qid, eng, cat]) - if mode == 'abstract': - print(mk_fun_rule(fun, cat)) - else: - lin = labels.get(mode, labels.get('en', 'X')) - oper = 'mk' + cat - print(mk_lin_rule(fun, mk_lin(oper, [lin], []))) - - -if MODE == 'init': - # do this only once, redirect to labels.jsonl - data = get_wikidata_json(list(qids)) - get_wikidata_labels(data) -else: - # do this once for abs and for every language you want - with open(LABEL_FILE) as file: - data = [json.loads(line) for line in file] - extract_labels(data, MODE) - - - -