linearization by chunks in the GF shell: a new command 'lc' needed because 'l' requires type checking and trees with metavariable function heads don't type check. This will hopefully be a temporary command.

This commit is contained in:
aarne
2013-11-05 17:28:47 +00:00
parent 3814841d7d
commit aba666c5bc
4 changed files with 53 additions and 15 deletions

View File

@@ -529,6 +529,24 @@ allCommands = Map.fromList [
("unlexer","set unlexers separately to each language (space-separated)")
]
}),
("lc", emptyCommandInfo {
longname = "linearize_chunks",
synopsis = "linearize a tree that has metavariables in maximal chunks without them",
explanation = unlines [
"A hopefully temporary command, intended to work around the type checker that fails",
"trees where a function node is a metavariable."
],
examples = [
mkEx "l -lang=LangSwe,LangNor -chunks ? a b (? c d)"
],
exec = \env@(pgf, mos) opts -> return . fromStrings . optLins pgf (opts ++ [OOpt "chunks"]),
options = [
] ++ stringOpOptions,
flags = [
("lang","the languages of linearization (comma-separated, no spaces)")
],
needsTypeCheck = False
}),
("ma", emptyCommandInfo {
longname = "morpho_analyse",
synopsis = "print the morphological analyses of all words in the string",
@@ -1155,6 +1173,8 @@ allCommands = Map.fromList [
_ | isOpt "treebank" opts ->
(showCId (abstractName pgf) ++ ": " ++ showExpr [] t) :
[showCId lang ++ ": " ++ linear pgf opts lang t | lang <- optLangs pgf opts]
_ | isOpt "chunks" opts ->
[unwords (intersperse "<+>" (map (linear pgf opts lang) (treeChunks t))) | lang <- optLangs pgf opts]
_ -> [linear pgf opts lang t | lang <- optLangs pgf opts]
linear :: PGF -> [Option] -> CId -> Expr -> String

View File

@@ -1,6 +1,7 @@
module GF.Command.TreeOperations (
treeOp,
allTreeOps
allTreeOps,
treeChunks
) where
import PGF
@@ -30,6 +31,8 @@ allTreeOps pgf = [
Left $ concatMap subtrees)),
("funs",("return all fun functions appearing in the tree, with duplications",
Left $ concatMap funNodes))
--- ("chunks",("return all chunks, i.e. maximal subtrees where the top node is not a metavariable",
--- Left $ concatMap treeChunks)) --- a tree with ? head does not type check anyway AR 5/11/2013
]
largest :: [Expr] -> [Expr]
@@ -42,6 +45,15 @@ smallest = sortBy (\t u -> compare (size t) (size u)) where
EApp e1 e2 -> size e1 + size e2 + 1
_ -> 1
treeChunks :: Expr -> [Expr]
treeChunks = snd . cks where
cks t = case unAppForm t of
(EFun f, ts) -> case unzip (map cks ts) of
(bs,_) | and bs -> (True, [t])
(_,cts) -> (False,concat cts)
(EMeta _, ts) -> (False,concatMap (snd . cks) ts)
_ -> (True, [t])
subtrees :: Expr -> [Expr]
subtrees t = t : case unApp t of
Just (f,ts) -> concatMap subtrees ts

View File

@@ -2,7 +2,7 @@ module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..
readExpr, showExpr, pExpr, pBinds, ppExpr, ppPatt, pattScope,
mkAbs, unAbs,
mkApp, unApp,
mkApp, unApp, unAppForm,
mkStr, unStr,
mkInt, unInt,
mkDouble, unDouble,
@@ -112,13 +112,19 @@ mkApp f es = foldl EApp (EFun f) es
-- | Decomposes an expression into application of function
unApp :: Expr -> Maybe (CId,[Expr])
unApp = extract []
unApp e = case unAppForm e of
(EFun f,es) -> Just (f,es)
_ -> Nothing
-- | Decomposes an expression into an application of a constructor such as a constant or a metavariable
unAppForm :: Expr -> (Expr,[Expr])
unAppForm = extract []
where
extract es (EFun f) = Just (f,es)
extract es f@(EFun _) = (f,es)
extract es (EApp e1 e2) = extract (e2:es) e1
extract es (ETyped e ty)= extract es e
extract es (EImplArg e) = extract es e
extract es _ = Nothing
extract es h = (h,es)
-- | Constructs an expression from string literal
mkStr :: String -> Expr

View File

@@ -22,18 +22,18 @@ public class Translator {
private static final String TAG = "Translator";
// TODO: allow changing
private String mGrammar = "ParseEngAbs.pgf";
///private String mGrammar = "TranslateEngChiFinSwe.pgf"; // AR
/// private String mGrammar = "ParseEngAbs.pgf";
private String mGrammar = "TranslateEngChiFinSwe.pgf"; // AR
// TODO: build dynamically?
private Language[] mLanguages = {
///new Language("en-US", "English", "TranslateEng", R.xml.inflection_en), // AR
///new Language("cmn-Hans-CN", "Chinese", "TranslateChi", 0),
/// new Language("fi-FI", "Finnish", "TranslateFin", 0),
/// new Language("sv-SE", "Swedish", "TranslateSwe", 0),
new Language("en-US", "English", "TranslateEng", R.xml.inflection_en), // AR
new Language("cmn-Hans-CN", "Chinese", "TranslateChi", 0),
new Language("fi-FI", "Finnish", "TranslateFin", 0),
new Language("sv-SE", "Swedish", "TranslateSwe", 0),
new Language("en-US", "English", "ParseEng", R.xml.inflection_en),
new Language("bg-BG", "Bulgarian", "ParseBul", R.xml.inflection_bg),
/// new Language("en-US", "English", "ParseEng", R.xml.inflection_en),
/// new Language("bg-BG", "Bulgarian", "ParseBul", R.xml.inflection_bg),
};
private Language mSourceLanguage;