diff --git a/examples/trigram/Shaw.gf b/examples/trigram/Shaw.gf new file mode 100644 index 000000000..d8a2814ea --- /dev/null +++ b/examples/trigram/Shaw.gf @@ -0,0 +1,84 @@ +abstract Shaw = Trigram ** { + +-- This module contains Trigram model of this quote from George Bernard Shaw: +-- +-- The reasonable man adapts himself to the world; the unreasonable one persists in trying +-- to adapt the world to himself. Therefore all progress depends on the unreasonable man. + +data + the_W,reasonable_W,man_W,adapts_W,himself_W,to_W,world_W,unreasonable_W, + one_W,persists_W,in_W,trying_W,adapt_W,therefore_W,all_W,progress_W,depends_W,on_W : Word ; + + the_U : Unigram the_W ; --# prob 0.179 + reasonable_U : Unigram reasonable_W ; --# prob 0.036 + man_U : Unigram man_W ; --# prob 0.071 + adapts_U : Unigram adapts_W ; --# prob 0.036 + himself_U : Unigram himself_W ; --# prob 0.071 + to_U : Unigram to_W ; --# prob 0.107 + world_U : Unigram world_W ; --# prob 0.071 + unreasonable_U : Unigram unreasonable_W ; --# prob 0.071 + one_U : Unigram one_W ; --# prob 0.036 + persists_U : Unigram persists_W ; --# prob 0.036 + in_U : Unigram in_W ; --# prob 0.036 + trying_U : Unigram trying_W ; --# prob 0.036 + adapt_U : Unigram adapt_W ; --# prob 0.036 + therefore_U : Unigram therefore_W ; --# prob 0.036 + all_U : Unigram all_W ; --# prob 0.036 + progress_U : Unigram progress_W ; --# prob 0.036 + depends_U : Unigram depends_W ; --# prob 0.036 + on_U : Unigram on_W ; --# prob 0.036 + + the_reasonable_B : Bigram the_W reasonable_W ; --# prob 0.037 + reasonable_man_B : Bigram reasonable_W man_W ; --# prob 0.037 + man_adapts_B : Bigram man_W adapts_W ; --# prob 0.037 + adapts_himself_B : Bigram adapts_W himself_W ; --# prob 0.037 + himself_to_B : Bigram himself_W to_W ; --# prob 0.037 + to_the_B : Bigram to_W the_W ; --# prob 0.037 + the_world_B : Bigram the_W world_W ; --# prob 0.037 + world_the_B : Bigram world_W the_W ; --# prob 0.037 + the_unreasonable_B : Bigram unreasonable_W one_W ; --# prob 0.074 + unreasonable_one_B : Bigram unreasonable_W one_W ; --# prob 0.037 + one_persists_B : Bigram one_W persists_W ; --# prob 0.037 + persists_in_B : Bigram persists_W in_W ; --# prob 0.037 + in_trying_B : Bigram in_W trying_W ; --# prob 0.037 + trying_to_B : Bigram trying_W to_W ; --# prob 0.037 + to_adapt_B : Bigram to_W adapt_W ; --# prob 0.037 + adapt_the_B : Bigram adapt_W the_W ; --# prob 0.037 + the_world_B : Bigram the_W world_W ; --# prob 0.037 + world_to_B : Bigram world_W to_W ; --# prob 0.037 + to_himself_B : Bigram to_W himself_W ; --# prob 0.037 + himself_therefore_B : Bigram himself_W therefore_W ; --# prob 0.037 + therefore_all_B : Bigram therefore_W all_W ; --# prob 0.037 + all_progress_B : Bigram all_W progress_W ; --# prob 0.037 + progress_depends_B : Bigram progress_W depends_W ; --# prob 0.037 + depends_on_B : Bigram depends_W on_W ; --# prob 0.037 + on_the_B : Bigram on_W the_W ; --# prob 0.037 + unreasonable_man_B : Bigram unreasonable_W man_W ; --# prob 0.037 + + the_reasonable_man_T : Trigram the_W reasonable_W man_W ; --# prob 0.038 + reasonable_man_adapts_T : Trigram reasonable_W man_W adapts_W ; --# prob 0.038 + man_adapts_himself_T : Trigram man_W adapts_W himself_W ; --# prob 0.038 + adapts_himself_to_T : Trigram adapts_W himself_W to_W ; --# prob 0.038 + himself_to_the_T : Trigram himself_W to_W the_W ; --# prob 0.038 + to_the_world_T : Trigram to_W the_W world_W ; --# prob 0.038 + the_world_the_T : Trigram the_W world_W the_W ; --# prob 0.038 + world_the_unreasonable_T : Trigram world_W the_W unreasonable_W ; --# prob 0.038 + the_unreasonable_one_T : Trigram the_W unreasonable_W one_W ; --# prob 0.038 + unreasonable_one_persists_T : Trigram unreasonable_W one_W persists_W ; --# prob 0.038 + one_persists_in_T : Trigram one_W persists_W in_W ; --# prob 0.038 + persists_in_trying_T : Trigram persists_W in_W trying_W ; --# prob 0.038 + in_trying_to_T : Trigram in_W trying_W to_W ; --# prob 0.038 + trying_to_adapt_T : Trigram trying_W to_W adapt_W ; --# prob 0.038 + to_adapt_the_T : Trigram to_W adapt_W the_W ; --# prob 0.038 + adapt_the_world_T : Trigram adapt_W the_W world_W ; --# prob 0.038 + the_world_to_T : Trigram the_W world_W to_W ; --# prob 0.038 + world_to_himself_T : Trigram world_W to_W himself_W ; --# prob 0.038 + to_himself_therefore_T : Trigram to_W himself_W therefore_W ; --# prob 0.038 + himself_therefore_all_T : Trigram himself_W therefore_W all_W ; --# prob 0.038 + therefore_all_progress_T : Trigram therefore_W all_W progress_W ; --# prob 0.038 + all_progress_depends_T : Trigram all_W progress_W depends_W ; --# prob 0.038 + progress_depends_on_T : Trigram progress_W depends_W on_W ; --# prob 0.038 + depends_on_the_T : Trigram depends_W on_W the_W ; --# prob 0.038 + on_the_unreasonable_T : Trigram on_W the_W unreasonable_W ; --# prob 0.038 + the_unreasonable_man_T : Trigram the_W unreasonable_W man_W ; --# prob 0.038 +} \ No newline at end of file diff --git a/examples/trigram/ShawCnc.gf b/examples/trigram/ShawCnc.gf new file mode 100644 index 000000000..b56ec16a9 --- /dev/null +++ b/examples/trigram/ShawCnc.gf @@ -0,0 +1,23 @@ +concrete ShawCnc of Shaw = TrigramCnc ** { + +lin + the_W = "the" ; + reasonable_W = "reasonable" ; + man_W = "man" ; + adapts_W = "adapts" ; + himself_W = "himself" ; + to_W = "to" ; + world_W = "world" ; + unreasonable_W = "unreasonable" ; + one_W = "one" ; + persists_W = "persists" ; + in_W = "in" ; + trying_W = "trying" ; + adapt_W = "adapt" ; + therefore_W = "therefore" ; + all_W = "all" ; + progress_W = "progress" ; + depends_W = "depends" ; + on_W = "on" ; + +} \ No newline at end of file diff --git a/examples/trigram/Trigram.gf b/examples/trigram/Trigram.gf new file mode 100644 index 000000000..2e6e18fdd --- /dev/null +++ b/examples/trigram/Trigram.gf @@ -0,0 +1,34 @@ +abstract Trigram = { + +cat + -- A lexicon is a set of 'Word's + Word ; + + -- All N-gram instances seen in the corpus are abstract syntax constants + Unigram (a : Word) ; + Bigram (a,b : Word) ; + Trigram (a,b,c : Word) ; + + -- A text is a sequence words where the sequence is indexed by the last two tokens + Seq (a,b : Word) ; + + -- The estimated probability of the trigram 'a b c' is the total probability of all + -- trees of type Prob a b c. + Prob (a,b,c : Word) ; + +data + -- Here we construct sequence by using nil and cons. The Prob argument ensures + -- that the sequence contains only valid N-grams and contributes with the right + -- probability mass + nil : (a,b,c : Word) -> Prob a b c -> Seq b c ; + cons : ({a,b} : Word) -> Seq a b -> (c : Word) -> Prob a b c -> Seq b c ; + + -- Here we construct probabilities. There are two ways: by trigrams, by bigrams and + -- by unigrams. Since the trigramP, bigramP, unigramP functions have some associated + -- probabilities as well this results in linear smoothing between the unigram, bigram + -- and trigram models + trigramP : ({a,b,c} : Word) -> Trigram a b c -> Prob a b c ; + bigramP : ({a,b,c} : Word) -> Bigram a b -> Bigram b c -> Prob a b c ; + unigramP : ({a,b,c} : Word) -> Unigram a -> Unigram b -> Unigram c -> Prob a b c ; + +} \ No newline at end of file diff --git a/examples/trigram/TrigramCnc.gf b/examples/trigram/TrigramCnc.gf new file mode 100644 index 000000000..917aa3db8 --- /dev/null +++ b/examples/trigram/TrigramCnc.gf @@ -0,0 +1,12 @@ +concrete TrigramCnc of Trigram = { + +lincat + Word,Seq = Str; + + Unigram, Bigram, Trigram, Prob = {} ; + +lin + nil a b c _ = a ++ b ++ c ; + cons _ _ l c _ = l ++ c ; + +} \ No newline at end of file