forked from GitHub/comp-syntax-gu-mlt
73 lines
2.1 KiB
Python
73 lines
2.1 KiB
Python
import pgf
|
|
from collections import namedtuple
|
|
|
|
|
|
class FactSystem:
|
|
def __init__(self,fnames,gr,lang1):
|
|
self.fieldnames = fnames
|
|
self.grammar = gr
|
|
self.language1 = lang1 # the language in which entities are parsed to trees
|
|
|
|
def get_data(self,filename):
|
|
data = []
|
|
Data = namedtuple('Data', self.fieldnames)
|
|
file = open(filename)
|
|
for line in file:
|
|
fields = Data(*line.split('\t'))
|
|
data.append(fields)
|
|
return data
|
|
|
|
# can raise ParseError
|
|
def str2exp(self,cat,s):
|
|
eng = self.grammar.languages[self.language1]
|
|
pp = eng.parse(s,cat=pgf.readType(cat))
|
|
_,e = pp.__next__()
|
|
return e
|
|
|
|
def exp2str(self,exp):
|
|
eng = self.grammar.languages[self.language1]
|
|
return eng.linearize(exp)
|
|
|
|
def data2lin(self,cat,s):
|
|
return self.exp2str(self.str2exp(cat,s))
|
|
|
|
def run(self,datafile,fact_generator):
|
|
gr = self.grammar
|
|
data = sorted(list(self.get_data(datafile)))
|
|
langs = list(gr.languages.values())
|
|
for lang in langs:
|
|
text = []
|
|
for tree in fact_generator(self,data):
|
|
lin = lang.linearize(tree)
|
|
if lin: text.append(lin[0].upper() + lin[1:])
|
|
print('\n'.join(text))
|
|
|
|
|
|
def simple_facts(factsys,data):
|
|
"for each tuple in data, generate an attribute fact for each field"
|
|
fields = factsys.fieldnames.split()
|
|
facts = []
|
|
for tuple in data:
|
|
object = factsys.str2exp("Object",tuple[0])
|
|
for (attr,val) in [(fields[i],tuple[i]) for i in range(1,len(fields))]:
|
|
fact = pgf.Expr("AttributeFact", [
|
|
factsys.str2exp("Attribute",attr),
|
|
object,
|
|
factsys.str2exp("Value",val)])
|
|
facts.append(fact)
|
|
return facts
|
|
|
|
|
|
def example_run():
|
|
gr = pgf.readPGF('Countries.pgf')
|
|
factsys = FactSystem(
|
|
'country capital area population continent currency',
|
|
gr,
|
|
'CountriesEng'
|
|
)
|
|
|
|
factsys.run('../data/countries.tsv',simple_facts)
|
|
|
|
if __name__ == "__main__":
|
|
example_run()
|