first Nobel grammar

2026-05-22 15:02:50 -06:00 · 2025-05-04 10:12:23 +02:00
parent 0671eee0ba
commit cc604093d0
5 changed files with 73 additions and 102 deletions
--- a/lab2/grammars/Labels.gf
+++ b/lab2/grammars/Labels.gf
@@ -1,3 +1,7 @@
+abstract Labels = {
+cat Country ;
+cat Award ;
+
 fun Q800_Costa_Rica_Country : Country ; 
 fun Q219060_State_of_Palestine_Country : Country ; 
 fun Q37_Lithuania_Country : Country ; 
@@ -110,3 +114,4 @@ fun Q8733_Qing_dynasty_Country : Country ;
 fun Q96_Mexico_Country : Country ; 
 fun Q884_South_Korea_Country : Country ; 
 fun Q114_Kenya_Country : Country ;
+}
--- a/lab2/grammars/LabelsEng.gf
+++ b/lab2/grammars/LabelsEng.gf
@@ -1,3 +1,19 @@
+concrete LabelsEng of Labels = open SyntaxEng, ParadigmsEng in {
+
+lincat Country = NP ;
+lincat Award = NP ;
+
+oper mkCountry = overload {
+  mkCountry : Str -> NP = \s -> mkNP (mkPN s) ;
+  mkCountry : NP -> NP = \np -> np ;
+  } ;
+
+oper mkAward = overload {
+  mkAward : Str -> NP = \s -> mkNP (mkPN s) ;
+  mkAward : NP -> NP = \np -> np ;
+  } ;
+
+
 lin Q800_Costa_Rica_Country = mkCountry "Costa Rica" ; 
 lin Q219060_State_of_Palestine_Country = mkCountry "State of Palestine" ; 
 lin Q37_Lithuania_Country = mkCountry "Lithuania" ; 
@@ -110,3 +126,5 @@ lin Q8733_Qing_dynasty_Country = mkCountry "Qing dynasty" ;
 lin Q96_Mexico_Country = mkCountry "Mexico" ; 
 lin Q884_South_Korea_Country = mkCountry "South Korea" ; 
 lin Q114_Kenya_Country = mkCountry "Kenya" ; 
+
+}
--- a/lab2/grammars/Nobel.gf
+++ b/lab2/grammars/Nobel.gf
@@ -0,0 +1,17 @@
+abstract Nobel = Labels ** {
+
+flags startcat = Description ;
+
+cat
+  Description ;
+  Name ;
+  Date ;
+
+fun
+  LivingDescription : Name -> Name -> Country -> Date -> Date -> Award -> Description ;
+  PastDescription : Name -> Name -> Country -> Date -> Date -> Date -> Award -> Description ;
+  StringName : String -> Name ;
+  YearDate : Int -> Date ;
+  he_Name, she_Name, they_Name : Name ;
+
+}
--- a/lab2/grammars/NobelEng.gf
+++ b/lab2/grammars/NobelEng.gf
@@ -0,0 +1,32 @@
+concrete NobelEng of Nobel = LabelsEng ** open
+  SyntaxEng,
+  ParadigmsEng,
+  SymbolicEng,
+  IrregEng
+in {
+
+lincat
+  Description = Text ;
+  Name = NP ;
+  Date = Adv ;
+
+lin
+  LivingDescription name pron country birthdate awarddate award =
+    mkText
+      (mkPhr (mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) birthdate))))
+      (mkText (mkS pastTense (mkCl pron (mkVP (mkVP (mkV2 get_V) award) awarddate)))) ;
+
+--  PastDescription : Name -> Country -> Date -> Date -> Award -> Description ;
+  StringName s = symb s ;
+  
+  YearDate i = inAdv <symb i : NP> ;
+
+  he_Name = he_NP ;
+  she_Name = she_NP ;
+  they_Name = they_NP ;
+
+oper
+  inAdv : NP -> Adv = \np -> SyntaxEng.mkAdv in_Prep np ;
+  born_VP = mkVP (mkA "born") ;
+
+}
--- a/lab2/scripts/find_labels.py~
+++ b/lab2/scripts/find_labels.py~
@@ -1,101 +0,0 @@
-# https://www.wikidata.org/wiki/Special:EntityData/Q18644475.json
-
-import urllib.request
-import json
-import sys
-import ssl
-from gf_utils import *
-
-"""
-To collect labels from query.json (Wikidata query result) and extract grammars:
-python3 find_labels.py init >labels.jsonl
-python3 find_labels.py abstract >Labels.gf
-python3 find_labels.py en >LabelsEng.gf
-"""
-
-WIKIDATA_FILE = 'query.json'
-WIKIDATA_PREFIX = 'http://www.wikidata.org/entity/'
-WIKIDATA_URL_PREFIX = 'http://www.wikidata.org/wiki/Special:EntityData/'
-NOBEL_FIELDS = ['award', 'country']
-LABEL_FILE = 'labels.jsonl'
-
-USAGE = 'usage: find_labels.py (init | abstract | en | sv | fi | ...)'
-
-if sys.argv[1:]:
-    MODE = sys.argv[1]
-else:
-    print(USAGE)
-    
-
-# qids given in the data file
-def get_wikidata_qids(jsonfile, fields):
-    qids = set()
-    with open(jsonfile) as file:
-        data = json.load(file)
-        for d in data:
-            for f in fields:
-                if f in d:
-                    qids.add((f, d[f][len(WIKIDATA_PREFIX):]))
-    return qids
-
-
-qids = get_wikidata_qids(WIKIDATA_FILE, NOBEL_FIELDS)
-
-if __name__ == '__mainz__':
-    for qid in qids:
-        print(qid)
-
-# get all wikidata for each qid
-# use this only once, because it is slow
-def get_wikidata_json(qids):
-    context = ssl._create_unverified_context()
-    for field, qid in qids:
-        try:
-            with urllib.request.urlopen(WIKIDATA_URL_PREFIX + qid +'.json', context=context) as url:
-                data = json.load(url)
-                yield (field, qid, data)
-        except Exception as error:
-            pass
-
-# extract the labels, redirect to LABEL_FILE (only once)
-def get_wikidata_labels(data, languages=None):
-    for field, qid, dict in data:
-        entities = dict.get('entities', {'foo': {}})
-        entity = list(entities.values())[0]
-        entitylabels = entity.get('labels', {})
-        entitylabels = {val['language']: val['value']
-                        for val in entitylabels.values()
-                        if (languages is None) or
-                           (val['language'] in languages)}
-        entitylabels['field'] = field
-        print(json.dumps({qid: entitylabels}, ensure_ascii=False))
-
-
-# {"Q800": {"tg": "Коста Рика", "sk": "Kostarika", ... "field": <field>}}
-def extract_labels(labeldata, mode):
-    for entry in data:
-        qid, labels = list(entry.items())[0]
-        eng = labels.get('en', 'X')
-        cat = labels['field'].capitalize()
-        fun = mk_fun_from_strs([qid, eng, cat])
-        if mode == 'abstract':
-            print(mk_fun_rule(fun, cat))
-        else:
-            lin = labels.get(mode, labels.get('en', 'X'))
-            oper = 'mk' + cat
-            print(mk_lin_rule(fun, mk_lin(oper, [lin], [])))
-
-
-if MODE == 'init':
-    # do this only once, redirect to labels.jsonl
-    data = get_wikidata_json(list(qids))
-    get_wikidata_labels(data)
-else:
-    # do this once for abs and for every language you want
-    with open(LABEL_FILE) as file:
-        data = [json.loads(line) for line in file]
-        extract_labels(data, MODE)
-
-
-
-