forked from GitHub/gf-core
just for fun - grammar for trigram models in GF
This commit is contained in:
84
examples/trigram/Shaw.gf
Normal file
84
examples/trigram/Shaw.gf
Normal file
@@ -0,0 +1,84 @@
|
||||
abstract Shaw = Trigram ** {
|
||||
|
||||
-- This module contains Trigram model of this quote from George Bernard Shaw:
|
||||
--
|
||||
-- The reasonable man adapts himself to the world; the unreasonable one persists in trying
|
||||
-- to adapt the world to himself. Therefore all progress depends on the unreasonable man.
|
||||
|
||||
data
|
||||
the_W,reasonable_W,man_W,adapts_W,himself_W,to_W,world_W,unreasonable_W,
|
||||
one_W,persists_W,in_W,trying_W,adapt_W,therefore_W,all_W,progress_W,depends_W,on_W : Word ;
|
||||
|
||||
the_U : Unigram the_W ; --# prob 0.179
|
||||
reasonable_U : Unigram reasonable_W ; --# prob 0.036
|
||||
man_U : Unigram man_W ; --# prob 0.071
|
||||
adapts_U : Unigram adapts_W ; --# prob 0.036
|
||||
himself_U : Unigram himself_W ; --# prob 0.071
|
||||
to_U : Unigram to_W ; --# prob 0.107
|
||||
world_U : Unigram world_W ; --# prob 0.071
|
||||
unreasonable_U : Unigram unreasonable_W ; --# prob 0.071
|
||||
one_U : Unigram one_W ; --# prob 0.036
|
||||
persists_U : Unigram persists_W ; --# prob 0.036
|
||||
in_U : Unigram in_W ; --# prob 0.036
|
||||
trying_U : Unigram trying_W ; --# prob 0.036
|
||||
adapt_U : Unigram adapt_W ; --# prob 0.036
|
||||
therefore_U : Unigram therefore_W ; --# prob 0.036
|
||||
all_U : Unigram all_W ; --# prob 0.036
|
||||
progress_U : Unigram progress_W ; --# prob 0.036
|
||||
depends_U : Unigram depends_W ; --# prob 0.036
|
||||
on_U : Unigram on_W ; --# prob 0.036
|
||||
|
||||
the_reasonable_B : Bigram the_W reasonable_W ; --# prob 0.037
|
||||
reasonable_man_B : Bigram reasonable_W man_W ; --# prob 0.037
|
||||
man_adapts_B : Bigram man_W adapts_W ; --# prob 0.037
|
||||
adapts_himself_B : Bigram adapts_W himself_W ; --# prob 0.037
|
||||
himself_to_B : Bigram himself_W to_W ; --# prob 0.037
|
||||
to_the_B : Bigram to_W the_W ; --# prob 0.037
|
||||
the_world_B : Bigram the_W world_W ; --# prob 0.037
|
||||
world_the_B : Bigram world_W the_W ; --# prob 0.037
|
||||
the_unreasonable_B : Bigram unreasonable_W one_W ; --# prob 0.074
|
||||
unreasonable_one_B : Bigram unreasonable_W one_W ; --# prob 0.037
|
||||
one_persists_B : Bigram one_W persists_W ; --# prob 0.037
|
||||
persists_in_B : Bigram persists_W in_W ; --# prob 0.037
|
||||
in_trying_B : Bigram in_W trying_W ; --# prob 0.037
|
||||
trying_to_B : Bigram trying_W to_W ; --# prob 0.037
|
||||
to_adapt_B : Bigram to_W adapt_W ; --# prob 0.037
|
||||
adapt_the_B : Bigram adapt_W the_W ; --# prob 0.037
|
||||
the_world_B : Bigram the_W world_W ; --# prob 0.037
|
||||
world_to_B : Bigram world_W to_W ; --# prob 0.037
|
||||
to_himself_B : Bigram to_W himself_W ; --# prob 0.037
|
||||
himself_therefore_B : Bigram himself_W therefore_W ; --# prob 0.037
|
||||
therefore_all_B : Bigram therefore_W all_W ; --# prob 0.037
|
||||
all_progress_B : Bigram all_W progress_W ; --# prob 0.037
|
||||
progress_depends_B : Bigram progress_W depends_W ; --# prob 0.037
|
||||
depends_on_B : Bigram depends_W on_W ; --# prob 0.037
|
||||
on_the_B : Bigram on_W the_W ; --# prob 0.037
|
||||
unreasonable_man_B : Bigram unreasonable_W man_W ; --# prob 0.037
|
||||
|
||||
the_reasonable_man_T : Trigram the_W reasonable_W man_W ; --# prob 0.038
|
||||
reasonable_man_adapts_T : Trigram reasonable_W man_W adapts_W ; --# prob 0.038
|
||||
man_adapts_himself_T : Trigram man_W adapts_W himself_W ; --# prob 0.038
|
||||
adapts_himself_to_T : Trigram adapts_W himself_W to_W ; --# prob 0.038
|
||||
himself_to_the_T : Trigram himself_W to_W the_W ; --# prob 0.038
|
||||
to_the_world_T : Trigram to_W the_W world_W ; --# prob 0.038
|
||||
the_world_the_T : Trigram the_W world_W the_W ; --# prob 0.038
|
||||
world_the_unreasonable_T : Trigram world_W the_W unreasonable_W ; --# prob 0.038
|
||||
the_unreasonable_one_T : Trigram the_W unreasonable_W one_W ; --# prob 0.038
|
||||
unreasonable_one_persists_T : Trigram unreasonable_W one_W persists_W ; --# prob 0.038
|
||||
one_persists_in_T : Trigram one_W persists_W in_W ; --# prob 0.038
|
||||
persists_in_trying_T : Trigram persists_W in_W trying_W ; --# prob 0.038
|
||||
in_trying_to_T : Trigram in_W trying_W to_W ; --# prob 0.038
|
||||
trying_to_adapt_T : Trigram trying_W to_W adapt_W ; --# prob 0.038
|
||||
to_adapt_the_T : Trigram to_W adapt_W the_W ; --# prob 0.038
|
||||
adapt_the_world_T : Trigram adapt_W the_W world_W ; --# prob 0.038
|
||||
the_world_to_T : Trigram the_W world_W to_W ; --# prob 0.038
|
||||
world_to_himself_T : Trigram world_W to_W himself_W ; --# prob 0.038
|
||||
to_himself_therefore_T : Trigram to_W himself_W therefore_W ; --# prob 0.038
|
||||
himself_therefore_all_T : Trigram himself_W therefore_W all_W ; --# prob 0.038
|
||||
therefore_all_progress_T : Trigram therefore_W all_W progress_W ; --# prob 0.038
|
||||
all_progress_depends_T : Trigram all_W progress_W depends_W ; --# prob 0.038
|
||||
progress_depends_on_T : Trigram progress_W depends_W on_W ; --# prob 0.038
|
||||
depends_on_the_T : Trigram depends_W on_W the_W ; --# prob 0.038
|
||||
on_the_unreasonable_T : Trigram on_W the_W unreasonable_W ; --# prob 0.038
|
||||
the_unreasonable_man_T : Trigram the_W unreasonable_W man_W ; --# prob 0.038
|
||||
}
|
||||
23
examples/trigram/ShawCnc.gf
Normal file
23
examples/trigram/ShawCnc.gf
Normal file
@@ -0,0 +1,23 @@
|
||||
concrete ShawCnc of Shaw = TrigramCnc ** {
|
||||
|
||||
lin
|
||||
the_W = "the" ;
|
||||
reasonable_W = "reasonable" ;
|
||||
man_W = "man" ;
|
||||
adapts_W = "adapts" ;
|
||||
himself_W = "himself" ;
|
||||
to_W = "to" ;
|
||||
world_W = "world" ;
|
||||
unreasonable_W = "unreasonable" ;
|
||||
one_W = "one" ;
|
||||
persists_W = "persists" ;
|
||||
in_W = "in" ;
|
||||
trying_W = "trying" ;
|
||||
adapt_W = "adapt" ;
|
||||
therefore_W = "therefore" ;
|
||||
all_W = "all" ;
|
||||
progress_W = "progress" ;
|
||||
depends_W = "depends" ;
|
||||
on_W = "on" ;
|
||||
|
||||
}
|
||||
34
examples/trigram/Trigram.gf
Normal file
34
examples/trigram/Trigram.gf
Normal file
@@ -0,0 +1,34 @@
|
||||
abstract Trigram = {
|
||||
|
||||
cat
|
||||
-- A lexicon is a set of 'Word's
|
||||
Word ;
|
||||
|
||||
-- All N-gram instances seen in the corpus are abstract syntax constants
|
||||
Unigram (a : Word) ;
|
||||
Bigram (a,b : Word) ;
|
||||
Trigram (a,b,c : Word) ;
|
||||
|
||||
-- A text is a sequence words where the sequence is indexed by the last two tokens
|
||||
Seq (a,b : Word) ;
|
||||
|
||||
-- The estimated probability of the trigram 'a b c' is the total probability of all
|
||||
-- trees of type Prob a b c.
|
||||
Prob (a,b,c : Word) ;
|
||||
|
||||
data
|
||||
-- Here we construct sequence by using nil and cons. The Prob argument ensures
|
||||
-- that the sequence contains only valid N-grams and contributes with the right
|
||||
-- probability mass
|
||||
nil : (a,b,c : Word) -> Prob a b c -> Seq b c ;
|
||||
cons : ({a,b} : Word) -> Seq a b -> (c : Word) -> Prob a b c -> Seq b c ;
|
||||
|
||||
-- Here we construct probabilities. There are two ways: by trigrams, by bigrams and
|
||||
-- by unigrams. Since the trigramP, bigramP, unigramP functions have some associated
|
||||
-- probabilities as well this results in linear smoothing between the unigram, bigram
|
||||
-- and trigram models
|
||||
trigramP : ({a,b,c} : Word) -> Trigram a b c -> Prob a b c ;
|
||||
bigramP : ({a,b,c} : Word) -> Bigram a b -> Bigram b c -> Prob a b c ;
|
||||
unigramP : ({a,b,c} : Word) -> Unigram a -> Unigram b -> Unigram c -> Prob a b c ;
|
||||
|
||||
}
|
||||
12
examples/trigram/TrigramCnc.gf
Normal file
12
examples/trigram/TrigramCnc.gf
Normal file
@@ -0,0 +1,12 @@
|
||||
concrete TrigramCnc of Trigram = {
|
||||
|
||||
lincat
|
||||
Word,Seq = Str;
|
||||
|
||||
Unigram, Bigram, Trigram, Prob = {} ;
|
||||
|
||||
lin
|
||||
nil a b c _ = a ++ b ++ c ;
|
||||
cons _ _ l c _ = l ++ c ;
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user