SRG generation: merge categories with identical set of productions. The LC_LR algorithm produces lots of those, especially when there is little inflection.

This commit is contained in:
bringert
2007-03-24 23:29:38 +00:00
parent 436ddd5ebf
commit ed1e7f4485
2 changed files with 14 additions and 1 deletions

View File

@@ -95,7 +95,8 @@ makeSimpleSRG opt s =
probs = stateProbs s
l = fmap (replace '_' '-') $ getOptVal opts speechLanguage
(cats,cfgRules) = unzip $ preprocess $ cfgToCFRules s
preprocess = removeLeftRecursion origStart
preprocess = mergeIdentical
. removeLeftRecursion origStart
. fix (topDownFilter origStart . bottomUpFilter)
. removeCycles
names = mkCatNames name cats