1
0
forked from GitHub/gf-core

added 'make ParseFre' and 'make ParseEngFre' to lib/src/Makefile; added the script french/MkWikt.hs

This commit is contained in:
aarne
2013-10-10 23:32:05 +00:00
parent 85f7450427
commit 24c64d918e
6 changed files with 120 additions and 8 deletions

View File

@@ -149,6 +149,12 @@ ParseSpa:
ParseEngSpa:
gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseEngSpa ParseEng.pgf ParseSpa.pgf
ParseFre:
gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseFre french/ParseFre.gf
ParseEngFre:
gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseEngFre ParseEng.pgf ParseFre.pgf
ParseSwe:
gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseSwe swedish/ParseSwe.gf

103
lib/src/french/MkWikt.hs Normal file
View File

@@ -0,0 +1,103 @@
import Data.Char
import Data.List
import qualified Data.Map as M
import qualified Data.Set as S
wiktFile = "en-fr-enwiktionary.txt"
dictFuns = "dictfuns.txt"
irregFre = "irregFre.txt"
oldFre = "oldFre.txt"
-- AR 10/10/2013
-- extracting a lexicon from Wiktionary as presented in
-- http://en.wiktionary.org/wiki/User:Matthias_Buchmeier/download
-- downloaded from
-- https://hotfile.com/dl/248980034/2861991/dictionaries_enwiktionary_ding_dictd_20131002.tgz.html
-- abjure_V2 : V2 ; ==> (abjure_V, 2)
main = do
funs <- readFile dictFuns >>= return . map (break (\c -> isDigit c || isSpace c)) . lines
let funmap = M.fromList [(w,takeWhile (not . isSpace) c) | (w,c) <- funs]
fverbmap <- readFile irregFre >>= return . M.fromList . map mkVerb . lines
oldFre <- readFile oldFre >>= return . S.fromList . map (head . words) . lines
wiks0 <- readFile wiktFile >>= return . map analyseLine . lines
let wiks1 = [(w1,c1,f1) | (w0,c0,f0) <- wiks0,
c0 /= "?",
not (all isSpace f0), -- not empty string
let w = w0 ++ "_" ++ c0,
Just cs <- [M.lookup w funmap],
let c1 = c0 ++ cs,
let w1 = w0 ++ "_" ++ c1,
not (S.member w1 oldFre),
let f1 = analyseFre fverbmap c0 (uncomment f0),
notElem ' ' (fst f1) -- exclude multiwords, for sanity
]
let dict = unlines $ map convertLine $ groupEntries wiks1
-- putStrLn dict
writeFile "NewDictFre.txt" dict
-- [sur un sofa or sur un canapé] s'allonger ==> s'allonger
uncomment s = case break (=='[') s of
(s1,_:s2) -> s1 ++ case break (== ']') s2 of
(_,_:s4) -> s4
_ -> []
_ -> s
groupEntries = map variants . groupBy sameFun where
sameFun (f,_,_) (g,_,_) = f == g
variants fes@((f,c,_):_) = (f,c,[s | (_,_,s) <- fes])
-- abjure {v} /æbˈdʒʊɹ/ (to renounce with solemnity) :: abjurer ==> (abjure, V, abjurer)
analyseLine l = case words l of
w:c:rest | head c == '{' && elem "::" rest ->
(fun w,cat c, takeWhile (/=',') (unwords (tail (dropWhile (/= "::") rest))))
_ -> ([],[],[])
fun = map fc where
fc c = if isAlphaNum c then c else '_'
cat s = case (init (tail s)) of
"adj" -> "A"
"n" -> "N"
"v" -> "V"
"prop" -> "PN"
"adv" -> "Adv"
"conj" -> "Conj"
"interj" -> "Interj"
"determiner" -> "Det"
_ -> "?"
analyseFre vmap c s = case (c, break (=='{') s) of
("N", (w,"{m}")) -> (init w,["masculine"])
("N", (w,"{f}")) -> (init w,["feminine"])
("PN", (w,"{m}")) -> (init w,["masculine"])
("PN", (w,"{f}")) -> (init w,["feminine"])
("A", (w,'{':_)) -> (init w,[])
(_, (w,'{':_)) -> (init w,[])
(_,_) -> case (c, splitAt 2 s) of
('V':_, ("se", ' ':v)) -> (mkV v, ["reflV"])
('V':_, ("s'", v)) -> (mkV v, ["reflV"])
('V':_, _) -> (mkV s, [])
_ -> (s, [])
where
mkV s = case M.lookup s vmap of
Just f -> "I." ++ f
_ -> s
mkVerb s = case words s of
v:_ -> (takeWhile (/='_') v, v)
convertLine (eng,cat,fps) = eng ++ " = " ++ unwords (intersperse "|" (map lin fps)) ++ " ;" where
lin (fre,ps) = case (cat,fre,ps) of
('V':_, 'I':'.':_, ["reflV"]) -> "mk" ++ cat ++ " (reflV (mkV " ++ fre ++ "))"
('V':_, 'I':'.':_, []) -> "mk" ++ cat ++ " (mkV " ++ fre ++ ")"
('V':_, _, ["reflV"]) -> "mk" ++ cat ++ " (reflV (mkV \"" ++ fre ++ "\"))"
('V':_, _, []) -> "mk" ++ cat ++ " (mkV \"" ++ fre ++ "\")"
_ -> "mk" ++ cat ++ " \"" ++ fre ++ "\" " ++ unwords ps

View File

@@ -10,7 +10,7 @@ concrete ParseFre of ParseEngAbs =
VerbFre - [SlashV2V, PassV2, UseCopula, ComplVV , SlashV2VNP],
AdverbFre,
PhraseFre,
SentenceFre - [ SlashVP],
SentenceFre - [ SlashVP, SlashVS],
QuestionFre,
RelativeFre,
IdiomFre [NP, VP, Tense, Cl, ProgrVP, ExistNP],

View File

@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<string name="app_name">GF Translator</string>
<string name="app_name">GF4Translator</string>
<string name="microphone">Microphone</string>
<string name="switch_languages">Switch languages</string>

View File

@@ -44,8 +44,8 @@ public class LexicalEntryActivity extends ListActivity {
List<String> data = new ArrayList<String>();
for (MorphoAnalysis a : list) {
Expr e = Expr.readExpr(a.getLemma());
String phrase = mTranslator.linearize(e);
// Expr e = Expr.readExpr(a.getLemma());
String phrase = "FOO" ; //mTranslator.linearize(e);
if (!data.contains(phrase)) {
data.add(phrase);

View File

@@ -20,13 +20,16 @@ public class Translator {
private static final String TAG = "Translator";
// TODO: allow changing
private String mGrammar = "ParseEngAbs.pgf";
private String mGrammar = "ParseEngChi.pgf";
// TODO: build dynamically?
private Language[] mLanguages = {
new Language("en-US", "English", "ParseEng"),
new Language("bg-BG", "Bulgarian", "ParseBul"),
new Language("cmn-Hans-CN", "Chinese", "ParseChi")
new Language("cmn-Hans-CN", "Chinese", "ParseChi"),
// new Language("de-DE", "German", "ParseGer"),
// new Language("es-ES", "Spanish", "ResourceDemoSpa"),
//new Language("fr-FR", "French", "ResourceDemoFre"),
// new Language("bg-BG", "Bulgarian", "ParseBul"),
};
private Language mSourceLanguage;
@@ -92,7 +95,7 @@ public class Translator {
return output;
} catch (ParseError e) {
Log.e(TAG, "Parse error: " + e);
return "parse error: " + e.getMessage(); // TODO: no no no
return "parse error: " + input + " " + e.getMessage(); // TODO: no no no
}
}