mirror of
https://github.com/GrammaticalFramework/comp-syntax-gu-mlt.git
synced 2026-02-08 22:41:05 -07:00
first Nobel grammar
This commit is contained in:
@@ -1,3 +1,7 @@
|
|||||||
|
abstract Labels = {
|
||||||
|
cat Country ;
|
||||||
|
cat Award ;
|
||||||
|
|
||||||
fun Q800_Costa_Rica_Country : Country ;
|
fun Q800_Costa_Rica_Country : Country ;
|
||||||
fun Q219060_State_of_Palestine_Country : Country ;
|
fun Q219060_State_of_Palestine_Country : Country ;
|
||||||
fun Q37_Lithuania_Country : Country ;
|
fun Q37_Lithuania_Country : Country ;
|
||||||
@@ -109,4 +113,5 @@ fun Q180114_Ayyubid_dynasty_Country : Country ;
|
|||||||
fun Q8733_Qing_dynasty_Country : Country ;
|
fun Q8733_Qing_dynasty_Country : Country ;
|
||||||
fun Q96_Mexico_Country : Country ;
|
fun Q96_Mexico_Country : Country ;
|
||||||
fun Q884_South_Korea_Country : Country ;
|
fun Q884_South_Korea_Country : Country ;
|
||||||
fun Q114_Kenya_Country : Country ;
|
fun Q114_Kenya_Country : Country ;
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,3 +1,19 @@
|
|||||||
|
concrete LabelsEng of Labels = open SyntaxEng, ParadigmsEng in {
|
||||||
|
|
||||||
|
lincat Country = NP ;
|
||||||
|
lincat Award = NP ;
|
||||||
|
|
||||||
|
oper mkCountry = overload {
|
||||||
|
mkCountry : Str -> NP = \s -> mkNP (mkPN s) ;
|
||||||
|
mkCountry : NP -> NP = \np -> np ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
oper mkAward = overload {
|
||||||
|
mkAward : Str -> NP = \s -> mkNP (mkPN s) ;
|
||||||
|
mkAward : NP -> NP = \np -> np ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
|
||||||
lin Q800_Costa_Rica_Country = mkCountry "Costa Rica" ;
|
lin Q800_Costa_Rica_Country = mkCountry "Costa Rica" ;
|
||||||
lin Q219060_State_of_Palestine_Country = mkCountry "State of Palestine" ;
|
lin Q219060_State_of_Palestine_Country = mkCountry "State of Palestine" ;
|
||||||
lin Q37_Lithuania_Country = mkCountry "Lithuania" ;
|
lin Q37_Lithuania_Country = mkCountry "Lithuania" ;
|
||||||
@@ -110,3 +126,5 @@ lin Q8733_Qing_dynasty_Country = mkCountry "Qing dynasty" ;
|
|||||||
lin Q96_Mexico_Country = mkCountry "Mexico" ;
|
lin Q96_Mexico_Country = mkCountry "Mexico" ;
|
||||||
lin Q884_South_Korea_Country = mkCountry "South Korea" ;
|
lin Q884_South_Korea_Country = mkCountry "South Korea" ;
|
||||||
lin Q114_Kenya_Country = mkCountry "Kenya" ;
|
lin Q114_Kenya_Country = mkCountry "Kenya" ;
|
||||||
|
|
||||||
|
}
|
||||||
17
lab2/grammars/Nobel.gf
Normal file
17
lab2/grammars/Nobel.gf
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
abstract Nobel = Labels ** {
|
||||||
|
|
||||||
|
flags startcat = Description ;
|
||||||
|
|
||||||
|
cat
|
||||||
|
Description ;
|
||||||
|
Name ;
|
||||||
|
Date ;
|
||||||
|
|
||||||
|
fun
|
||||||
|
LivingDescription : Name -> Name -> Country -> Date -> Date -> Award -> Description ;
|
||||||
|
PastDescription : Name -> Name -> Country -> Date -> Date -> Date -> Award -> Description ;
|
||||||
|
StringName : String -> Name ;
|
||||||
|
YearDate : Int -> Date ;
|
||||||
|
he_Name, she_Name, they_Name : Name ;
|
||||||
|
|
||||||
|
}
|
||||||
32
lab2/grammars/NobelEng.gf
Normal file
32
lab2/grammars/NobelEng.gf
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
concrete NobelEng of Nobel = LabelsEng ** open
|
||||||
|
SyntaxEng,
|
||||||
|
ParadigmsEng,
|
||||||
|
SymbolicEng,
|
||||||
|
IrregEng
|
||||||
|
in {
|
||||||
|
|
||||||
|
lincat
|
||||||
|
Description = Text ;
|
||||||
|
Name = NP ;
|
||||||
|
Date = Adv ;
|
||||||
|
|
||||||
|
lin
|
||||||
|
LivingDescription name pron country birthdate awarddate award =
|
||||||
|
mkText
|
||||||
|
(mkPhr (mkS pastTense (mkCl name (mkVP (mkVP born_VP (inAdv country)) birthdate))))
|
||||||
|
(mkText (mkS pastTense (mkCl pron (mkVP (mkVP (mkV2 get_V) award) awarddate)))) ;
|
||||||
|
|
||||||
|
-- PastDescription : Name -> Country -> Date -> Date -> Award -> Description ;
|
||||||
|
StringName s = symb s ;
|
||||||
|
|
||||||
|
YearDate i = inAdv <symb i : NP> ;
|
||||||
|
|
||||||
|
he_Name = he_NP ;
|
||||||
|
she_Name = she_NP ;
|
||||||
|
they_Name = they_NP ;
|
||||||
|
|
||||||
|
oper
|
||||||
|
inAdv : NP -> Adv = \np -> SyntaxEng.mkAdv in_Prep np ;
|
||||||
|
born_VP = mkVP (mkA "born") ;
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
# https://www.wikidata.org/wiki/Special:EntityData/Q18644475.json
|
|
||||||
|
|
||||||
import urllib.request
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import ssl
|
|
||||||
from gf_utils import *
|
|
||||||
|
|
||||||
"""
|
|
||||||
To collect labels from query.json (Wikidata query result) and extract grammars:
|
|
||||||
python3 find_labels.py init >labels.jsonl
|
|
||||||
python3 find_labels.py abstract >Labels.gf
|
|
||||||
python3 find_labels.py en >LabelsEng.gf
|
|
||||||
"""
|
|
||||||
|
|
||||||
WIKIDATA_FILE = 'query.json'
|
|
||||||
WIKIDATA_PREFIX = 'http://www.wikidata.org/entity/'
|
|
||||||
WIKIDATA_URL_PREFIX = 'http://www.wikidata.org/wiki/Special:EntityData/'
|
|
||||||
NOBEL_FIELDS = ['award', 'country']
|
|
||||||
LABEL_FILE = 'labels.jsonl'
|
|
||||||
|
|
||||||
USAGE = 'usage: find_labels.py (init | abstract | en | sv | fi | ...)'
|
|
||||||
|
|
||||||
if sys.argv[1:]:
|
|
||||||
MODE = sys.argv[1]
|
|
||||||
else:
|
|
||||||
print(USAGE)
|
|
||||||
|
|
||||||
|
|
||||||
# qids given in the data file
|
|
||||||
def get_wikidata_qids(jsonfile, fields):
|
|
||||||
qids = set()
|
|
||||||
with open(jsonfile) as file:
|
|
||||||
data = json.load(file)
|
|
||||||
for d in data:
|
|
||||||
for f in fields:
|
|
||||||
if f in d:
|
|
||||||
qids.add((f, d[f][len(WIKIDATA_PREFIX):]))
|
|
||||||
return qids
|
|
||||||
|
|
||||||
|
|
||||||
qids = get_wikidata_qids(WIKIDATA_FILE, NOBEL_FIELDS)
|
|
||||||
|
|
||||||
if __name__ == '__mainz__':
|
|
||||||
for qid in qids:
|
|
||||||
print(qid)
|
|
||||||
|
|
||||||
# get all wikidata for each qid
|
|
||||||
# use this only once, because it is slow
|
|
||||||
def get_wikidata_json(qids):
|
|
||||||
context = ssl._create_unverified_context()
|
|
||||||
for field, qid in qids:
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(WIKIDATA_URL_PREFIX + qid +'.json', context=context) as url:
|
|
||||||
data = json.load(url)
|
|
||||||
yield (field, qid, data)
|
|
||||||
except Exception as error:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# extract the labels, redirect to LABEL_FILE (only once)
|
|
||||||
def get_wikidata_labels(data, languages=None):
|
|
||||||
for field, qid, dict in data:
|
|
||||||
entities = dict.get('entities', {'foo': {}})
|
|
||||||
entity = list(entities.values())[0]
|
|
||||||
entitylabels = entity.get('labels', {})
|
|
||||||
entitylabels = {val['language']: val['value']
|
|
||||||
for val in entitylabels.values()
|
|
||||||
if (languages is None) or
|
|
||||||
(val['language'] in languages)}
|
|
||||||
entitylabels['field'] = field
|
|
||||||
print(json.dumps({qid: entitylabels}, ensure_ascii=False))
|
|
||||||
|
|
||||||
|
|
||||||
# {"Q800": {"tg": "Коста Рика", "sk": "Kostarika", ... "field": <field>}}
|
|
||||||
def extract_labels(labeldata, mode):
|
|
||||||
for entry in data:
|
|
||||||
qid, labels = list(entry.items())[0]
|
|
||||||
eng = labels.get('en', 'X')
|
|
||||||
cat = labels['field'].capitalize()
|
|
||||||
fun = mk_fun_from_strs([qid, eng, cat])
|
|
||||||
if mode == 'abstract':
|
|
||||||
print(mk_fun_rule(fun, cat))
|
|
||||||
else:
|
|
||||||
lin = labels.get(mode, labels.get('en', 'X'))
|
|
||||||
oper = 'mk' + cat
|
|
||||||
print(mk_lin_rule(fun, mk_lin(oper, [lin], [])))
|
|
||||||
|
|
||||||
|
|
||||||
if MODE == 'init':
|
|
||||||
# do this only once, redirect to labels.jsonl
|
|
||||||
data = get_wikidata_json(list(qids))
|
|
||||||
get_wikidata_labels(data)
|
|
||||||
else:
|
|
||||||
# do this once for abs and for every language you want
|
|
||||||
with open(LABEL_FILE) as file:
|
|
||||||
data = [json.loads(line) for line in file]
|
|
||||||
extract_labels(data, MODE)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user