diff --git a/examples/TWA.cf b/examples/TWA.cf new file mode 100644 index 000000000..56b14dd2c --- /dev/null +++ b/examples/TWA.cf @@ -0,0 +1,52 @@ +-- example of probabilistic grammar from Jurafsky & Martin p. 449 + +PredVP. S ::= NP VP ; --# prob 0.80 +PredAux. S ::= Aux NP VP ; --# prob 0.15 +JustVP. S ::= VP ; --# prob 0.05 + +DetNO. NP ::= Det Nom ; --# prob 0.20 +PNounNP. NP ::= PNoun ; --# prob 0.35 +NomNP. NP ::= Nom ; --# prob 0.05 +ProNP. NP ::= Pro ; --# prob 0.40 + +NounNom. Nom ::= Noun ; --# prob 0.75 +CompNom. Nom ::= Noun Nom ; --# prob 0.20 +PNounNom. Nom ::= PNoun Nom ; --# prob 0.05 + +IntrVP. VP ::= Verb ; --# prob 0.55 +TrVP. VP ::= Verb NP ; --# prob 0.40 +DitrVP: VP ::= Verb NP NP ; --# prob 0.05 + +that. Det ::= "that" ; --# prob 0.05 +the. Det ::= "the" ; --# prob 0.80 +a. Det ::= "a" ; --# prob 0.15 + +bookN. Noun ::= "book" ; --# prob 0.10 +flights. Noun ::= "flights" ; --# prob 0.50 +meal. Noun ::= "meal" ; --# prob 0.40 + +bookV. Verb ::= "book" ; --# prob 0.30 +includeV. Verb ::= "include" ; --# prob 0.30 +want. Verb ::= "want" ; --# prob 0.40 + +can. Aux ::= "can" ; --# prob 0.40 +does. Aux ::= "does" ; --# prob 0.30 +do. Aux ::= "do" ; --# prob 0.30 + +TWA. PNoun ::= "TWA" ; --# prob 0.40 +Denver. PNoun ::= "Denver" ; --# prob 0.60 + +you. Pro ::= "you" ; --# prob 0.40 +I. Pro ::= "I" ; --# prob 0.60 + +-- > p -prob "can you book TWA flights" +-- +-- 4.3200000000000016e-7 +-- 3.7800000000000013e-7 +-- PredAux can (ProNP you) (TrVP bookV (NomNP (PNounNom TWA (NounNom flights)))) +-- PredAux can (ProNP you) (DitrVP bookV (PNounNP TWA) (NomNP (NounNom flights))) +-- [0.15, 0.40,0.40, 0.40, 0.05, 0.30, 0.35, 0.40, 0.05, 0.75, 0.50] +-- +-- J&M have different figures, but they seem to be wrong. For +-- instance, the products have 12 terms although the trees have only +-- 11 constructors. diff --git a/src/GF/Probabilistic/Probabilistic.hs b/src/GF/Probabilistic/Probabilistic.hs index daf382790..1126776c8 100644 --- a/src/GF/Probabilistic/Probabilistic.hs +++ b/src/GF/Probabilistic/Probabilistic.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/10/31 19:02:35 $ +-- > CVS $Date: 2005/11/01 09:10:54 $ -- > CVS $Author: aarne $ --- > CVS $Revision: 1.3 $ +-- > CVS $Revision: 1.4 $ -- -- Probabilistic abstract syntax. AR 30\/10\/2005 -- @@ -78,12 +78,19 @@ getProbsFromFile :: Options -> FilePath -> IO Probs getProbsFromFile opts file = do s <- maybe (readFile file) readFile $ getOptVal opts probFile return $ buildTree $ concatMap pProb $ lines s - where - pProb s = case words s of +-- where +pProb s = case words s of "--#":"prob":f:p:_ | isDouble p -> [(zIdent f, read p)] - f:p:_ | isDouble p -> [(zIdent f, read p)] + f:ps@(g:rest) -> case span (/= "--#") ps of + (_,_:"prob":p:_) | isDouble p -> [(zIdent f', readD p)] where + f' = if f=="fun" then ident g else ident f + _ -> [] _ -> [] + where isDouble = all (flip elem ('.':['0'..'9'])) + ident = takeWhile (flip notElem ".:") + readD :: String -> Double + readD = read type Probs = BinTree Ident Double