forked from GitHub/gf-core
added 'make ParseFre' and 'make ParseEngFre' to lib/src/Makefile; added the script french/MkWikt.hs
This commit is contained in:
@@ -149,6 +149,12 @@ ParseSpa:
|
||||
ParseEngSpa:
|
||||
gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseEngSpa ParseEng.pgf ParseSpa.pgf
|
||||
|
||||
ParseFre:
|
||||
gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseFre french/ParseFre.gf
|
||||
|
||||
ParseEngFre:
|
||||
gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseEngFre ParseEng.pgf ParseFre.pgf
|
||||
|
||||
ParseSwe:
|
||||
gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseSwe swedish/ParseSwe.gf
|
||||
|
||||
|
||||
103
lib/src/french/MkWikt.hs
Normal file
103
lib/src/french/MkWikt.hs
Normal file
@@ -0,0 +1,103 @@
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
|
||||
wiktFile = "en-fr-enwiktionary.txt"
|
||||
dictFuns = "dictfuns.txt"
|
||||
irregFre = "irregFre.txt"
|
||||
oldFre = "oldFre.txt"
|
||||
|
||||
-- AR 10/10/2013
|
||||
-- extracting a lexicon from Wiktionary as presented in
|
||||
-- http://en.wiktionary.org/wiki/User:Matthias_Buchmeier/download
|
||||
-- downloaded from
|
||||
-- https://hotfile.com/dl/248980034/2861991/dictionaries_enwiktionary_ding_dictd_20131002.tgz.html
|
||||
|
||||
|
||||
-- abjure_V2 : V2 ; ==> (abjure_V, 2)
|
||||
|
||||
main = do
|
||||
funs <- readFile dictFuns >>= return . map (break (\c -> isDigit c || isSpace c)) . lines
|
||||
let funmap = M.fromList [(w,takeWhile (not . isSpace) c) | (w,c) <- funs]
|
||||
fverbmap <- readFile irregFre >>= return . M.fromList . map mkVerb . lines
|
||||
oldFre <- readFile oldFre >>= return . S.fromList . map (head . words) . lines
|
||||
wiks0 <- readFile wiktFile >>= return . map analyseLine . lines
|
||||
let wiks1 = [(w1,c1,f1) | (w0,c0,f0) <- wiks0,
|
||||
c0 /= "?",
|
||||
not (all isSpace f0), -- not empty string
|
||||
let w = w0 ++ "_" ++ c0,
|
||||
Just cs <- [M.lookup w funmap],
|
||||
let c1 = c0 ++ cs,
|
||||
let w1 = w0 ++ "_" ++ c1,
|
||||
not (S.member w1 oldFre),
|
||||
let f1 = analyseFre fverbmap c0 (uncomment f0),
|
||||
notElem ' ' (fst f1) -- exclude multiwords, for sanity
|
||||
]
|
||||
let dict = unlines $ map convertLine $ groupEntries wiks1
|
||||
-- putStrLn dict
|
||||
writeFile "NewDictFre.txt" dict
|
||||
|
||||
-- [sur un sofa or sur un canapé] s'allonger ==> s'allonger
|
||||
uncomment s = case break (=='[') s of
|
||||
(s1,_:s2) -> s1 ++ case break (== ']') s2 of
|
||||
(_,_:s4) -> s4
|
||||
_ -> []
|
||||
_ -> s
|
||||
|
||||
groupEntries = map variants . groupBy sameFun where
|
||||
sameFun (f,_,_) (g,_,_) = f == g
|
||||
variants fes@((f,c,_):_) = (f,c,[s | (_,_,s) <- fes])
|
||||
|
||||
|
||||
-- abjure {v} /æbˈdʒʊɹ/ (to renounce with solemnity) :: abjurer ==> (abjure, V, abjurer)
|
||||
|
||||
analyseLine l = case words l of
|
||||
w:c:rest | head c == '{' && elem "::" rest ->
|
||||
(fun w,cat c, takeWhile (/=',') (unwords (tail (dropWhile (/= "::") rest))))
|
||||
_ -> ([],[],[])
|
||||
|
||||
fun = map fc where
|
||||
fc c = if isAlphaNum c then c else '_'
|
||||
|
||||
cat s = case (init (tail s)) of
|
||||
"adj" -> "A"
|
||||
"n" -> "N"
|
||||
"v" -> "V"
|
||||
"prop" -> "PN"
|
||||
"adv" -> "Adv"
|
||||
"conj" -> "Conj"
|
||||
"interj" -> "Interj"
|
||||
"determiner" -> "Det"
|
||||
_ -> "?"
|
||||
|
||||
|
||||
analyseFre vmap c s = case (c, break (=='{') s) of
|
||||
("N", (w,"{m}")) -> (init w,["masculine"])
|
||||
("N", (w,"{f}")) -> (init w,["feminine"])
|
||||
("PN", (w,"{m}")) -> (init w,["masculine"])
|
||||
("PN", (w,"{f}")) -> (init w,["feminine"])
|
||||
("A", (w,'{':_)) -> (init w,[])
|
||||
(_, (w,'{':_)) -> (init w,[])
|
||||
(_,_) -> case (c, splitAt 2 s) of
|
||||
('V':_, ("se", ' ':v)) -> (mkV v, ["reflV"])
|
||||
('V':_, ("s'", v)) -> (mkV v, ["reflV"])
|
||||
('V':_, _) -> (mkV s, [])
|
||||
_ -> (s, [])
|
||||
where
|
||||
mkV s = case M.lookup s vmap of
|
||||
Just f -> "I." ++ f
|
||||
_ -> s
|
||||
|
||||
mkVerb s = case words s of
|
||||
v:_ -> (takeWhile (/='_') v, v)
|
||||
|
||||
|
||||
convertLine (eng,cat,fps) = eng ++ " = " ++ unwords (intersperse "|" (map lin fps)) ++ " ;" where
|
||||
lin (fre,ps) = case (cat,fre,ps) of
|
||||
('V':_, 'I':'.':_, ["reflV"]) -> "mk" ++ cat ++ " (reflV (mkV " ++ fre ++ "))"
|
||||
('V':_, 'I':'.':_, []) -> "mk" ++ cat ++ " (mkV " ++ fre ++ ")"
|
||||
('V':_, _, ["reflV"]) -> "mk" ++ cat ++ " (reflV (mkV \"" ++ fre ++ "\"))"
|
||||
('V':_, _, []) -> "mk" ++ cat ++ " (mkV \"" ++ fre ++ "\")"
|
||||
_ -> "mk" ++ cat ++ " \"" ++ fre ++ "\" " ++ unwords ps
|
||||
|
||||
@@ -10,7 +10,7 @@ concrete ParseFre of ParseEngAbs =
|
||||
VerbFre - [SlashV2V, PassV2, UseCopula, ComplVV , SlashV2VNP],
|
||||
AdverbFre,
|
||||
PhraseFre,
|
||||
SentenceFre - [ SlashVP],
|
||||
SentenceFre - [ SlashVP, SlashVS],
|
||||
QuestionFre,
|
||||
RelativeFre,
|
||||
IdiomFre [NP, VP, Tense, Cl, ProgrVP, ExistNP],
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
|
||||
<string name="app_name">GF Translator</string>
|
||||
<string name="app_name">GF4Translator</string>
|
||||
|
||||
<string name="microphone">Microphone</string>
|
||||
<string name="switch_languages">Switch languages</string>
|
||||
|
||||
@@ -44,8 +44,8 @@ public class LexicalEntryActivity extends ListActivity {
|
||||
|
||||
List<String> data = new ArrayList<String>();
|
||||
for (MorphoAnalysis a : list) {
|
||||
Expr e = Expr.readExpr(a.getLemma());
|
||||
String phrase = mTranslator.linearize(e);
|
||||
// Expr e = Expr.readExpr(a.getLemma());
|
||||
String phrase = "FOO" ; //mTranslator.linearize(e);
|
||||
|
||||
if (!data.contains(phrase)) {
|
||||
data.add(phrase);
|
||||
|
||||
@@ -20,13 +20,16 @@ public class Translator {
|
||||
private static final String TAG = "Translator";
|
||||
|
||||
// TODO: allow changing
|
||||
private String mGrammar = "ParseEngAbs.pgf";
|
||||
private String mGrammar = "ParseEngChi.pgf";
|
||||
|
||||
// TODO: build dynamically?
|
||||
private Language[] mLanguages = {
|
||||
new Language("en-US", "English", "ParseEng"),
|
||||
new Language("bg-BG", "Bulgarian", "ParseBul"),
|
||||
new Language("cmn-Hans-CN", "Chinese", "ParseChi")
|
||||
new Language("cmn-Hans-CN", "Chinese", "ParseChi"),
|
||||
// new Language("de-DE", "German", "ParseGer"),
|
||||
// new Language("es-ES", "Spanish", "ResourceDemoSpa"),
|
||||
//new Language("fr-FR", "French", "ResourceDemoFre"),
|
||||
// new Language("bg-BG", "Bulgarian", "ParseBul"),
|
||||
};
|
||||
|
||||
private Language mSourceLanguage;
|
||||
@@ -92,7 +95,7 @@ public class Translator {
|
||||
return output;
|
||||
} catch (ParseError e) {
|
||||
Log.e(TAG, "Parse error: " + e);
|
||||
return "parse error: " + e.getMessage(); // TODO: no no no
|
||||
return "parse error: " + input + " " + e.getMessage(); // TODO: no no no
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user