added 'make ParseFre' and 'make ParseEngFre' to lib/src/Makefile; added the script french/MkWikt.hs

2013-10-10 23:32:05 +00:00
parent 85f7450427
commit 24c64d918e
6 changed files with 120 additions and 8 deletions
--- a/lib/src/Makefile
+++ b/lib/src/Makefile
@@ -149,6 +149,12 @@ ParseSpa:
 ParseEngSpa:
 	gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseEngSpa ParseEng.pgf ParseSpa.pgf

+ParseFre:
+	gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseFre french/ParseFre.gf
+
+ParseEngFre:
+	gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseEngFre ParseEng.pgf ParseFre.pgf
+
 ParseSwe:
 	gf -make -literal=Symb -probs=$(PROBSFILE) -name=ParseSwe swedish/ParseSwe.gf

--- a/lib/src/french/MkWikt.hs
+++ b/lib/src/french/MkWikt.hs
@@ -0,0 +1,103 @@
+import Data.Char
+import Data.List
+import qualified Data.Map as M
+import qualified Data.Set as S
+
+wiktFile = "en-fr-enwiktionary.txt"
+dictFuns = "dictfuns.txt"
+irregFre = "irregFre.txt"
+oldFre   = "oldFre.txt"
+
+-- AR 10/10/2013
+-- extracting a lexicon from Wiktionary as presented in
+--   http://en.wiktionary.org/wiki/User:Matthias_Buchmeier/download
+-- downloaded from
+--   https://hotfile.com/dl/248980034/2861991/dictionaries_enwiktionary_ding_dictd_20131002.tgz.html
+
+
+-- abjure_V2 : V2 ;    ==>   (abjure_V, 2)
+
+main = do
+  funs <- readFile dictFuns >>= return . map (break (\c -> isDigit c || isSpace c)) . lines
+  let funmap = M.fromList [(w,takeWhile (not . isSpace) c) | (w,c) <- funs]
+  fverbmap <- readFile irregFre >>= return . M.fromList . map mkVerb . lines
+  oldFre <- readFile oldFre >>= return . S.fromList . map (head . words) . lines
+  wiks0 <- readFile wiktFile >>= return . map analyseLine . lines 
+  let wiks1 = [(w1,c1,f1) | (w0,c0,f0) <- wiks0,
+                            c0 /= "?",
+                            not (all isSpace f0),      -- not empty string
+                            let w = w0 ++ "_" ++ c0, 
+                            Just cs <- [M.lookup w funmap],
+                            let c1 = c0 ++ cs,
+                            let w1 = w0 ++ "_" ++ c1,
+                            not (S.member w1 oldFre),
+                            let f1 = analyseFre fverbmap c0 (uncomment f0),
+                            notElem ' ' (fst f1)       -- exclude multiwords, for sanity
+                           ]
+  let dict = unlines $ map convertLine $ groupEntries wiks1
+--  putStrLn dict
+  writeFile "NewDictFre.txt" dict
+
+-- [sur un sofa or sur un canapé] s'allonger  ==> s'allonger
+uncomment s = case break (=='[') s of
+  (s1,_:s2) -> s1 ++ case break (== ']') s2 of
+    (_,_:s4) -> s4
+    _ -> []
+  _ -> s
+
+groupEntries = map variants . groupBy sameFun where
+  sameFun (f,_,_) (g,_,_) = f == g
+  variants fes@((f,c,_):_) = (f,c,[s | (_,_,s) <- fes])
+  
+
+-- abjure {v} /æbˈdʒʊɹ/ (to renounce with solemnity) :: abjurer   ==>  (abjure, V, abjurer)
+
+analyseLine l = case words l of
+  w:c:rest | head c == '{' && elem "::" rest -> 
+       (fun w,cat c, takeWhile (/=',') (unwords (tail (dropWhile (/= "::") rest))))
+  _ -> ([],[],[])
+
+fun = map fc where
+  fc c = if isAlphaNum c then c else '_'
+
+cat s = case (init (tail s)) of
+  "adj"  -> "A"
+  "n"    -> "N"
+  "v"    -> "V"
+  "prop" -> "PN"
+  "adv"  -> "Adv"
+  "conj" -> "Conj"
+  "interj" -> "Interj"
+  "determiner" -> "Det"
+  _ -> "?"
+
+
+analyseFre vmap c s = case (c, break (=='{') s) of
+  ("N", (w,"{m}")) -> (init w,["masculine"])
+  ("N", (w,"{f}")) -> (init w,["feminine"])
+  ("PN", (w,"{m}")) -> (init w,["masculine"])
+  ("PN", (w,"{f}")) -> (init w,["feminine"])
+  ("A",  (w,'{':_)) -> (init w,[])
+  (_,    (w,'{':_)) -> (init w,[])
+  (_,_)             -> case (c, splitAt 2 s) of
+    ('V':_,  ("se", ' ':v)) -> (mkV v, ["reflV"])
+    ('V':_,  ("s'",   v)) -> (mkV v, ["reflV"])
+    ('V':_,  _)           -> (mkV s, [])
+    _ -> (s,     [])
+ where
+  mkV s = case M.lookup s vmap of
+    Just f -> "I." ++ f
+    _ -> s
+
+mkVerb s = case words s of
+  v:_ -> (takeWhile (/='_') v, v)
+
+
+convertLine (eng,cat,fps) = eng ++ " = " ++ unwords (intersperse "|" (map lin fps)) ++ " ;" where
+  lin (fre,ps) = case (cat,fre,ps) of
+    ('V':_,  'I':'.':_,  ["reflV"]) -> "mk" ++ cat ++ " (reflV (mkV " ++ fre ++ "))" 
+    ('V':_,  'I':'.':_,  [])        -> "mk" ++ cat ++ " (mkV " ++ fre ++ ")"
+    ('V':_,  _,          ["reflV"]) -> "mk" ++ cat ++ " (reflV (mkV \"" ++ fre ++ "\"))"
+    ('V':_,  _,          [])        -> "mk" ++ cat ++ " (mkV \"" ++ fre ++ "\")"
+    _ -> "mk" ++ cat ++ " \"" ++ fre ++ "\" " ++ unwords ps
+
--- a/lib/src/french/ParseFre.gf
+++ b/lib/src/french/ParseFre.gf
@@ -10,7 +10,7 @@ concrete ParseFre of ParseEngAbs =
  VerbFre - [SlashV2V, PassV2, UseCopula, ComplVV  , SlashV2VNP],
  AdverbFre,
  PhraseFre,
-  SentenceFre - [  SlashVP],
+  SentenceFre - [  SlashVP, SlashVS],
  QuestionFre,
  RelativeFre,
  IdiomFre [NP, VP, Tense, Cl, ProgrVP, ExistNP],
--- a/src/ui/android/res/values/strings.xml
+++ b/src/ui/android/res/values/strings.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <resources>

-    <string name="app_name">GF Translator</string>
+    <string name="app_name">GF4Translator</string>

    <string name="microphone">Microphone</string>
    <string name="switch_languages">Switch languages</string>
--- a/src/ui/android/src/org/grammaticalframework/ui/android/LexicalEntryActivity.java
+++ b/src/ui/android/src/org/grammaticalframework/ui/android/LexicalEntryActivity.java
@@ -44,8 +44,8 @@ public class LexicalEntryActivity extends ListActivity {

 		List<String> data = new ArrayList<String>();
 	    for (MorphoAnalysis a : list) {
-	    	Expr e = Expr.readExpr(a.getLemma());
-	    	String phrase = mTranslator.linearize(e);
+		//	    	Expr e = Expr.readExpr(a.getLemma());
+	    	String phrase = "FOO" ; //mTranslator.linearize(e);
 	    	
 	    	if (!data.contains(phrase)) {
 		    	data.add(phrase);
--- a/src/ui/android/src/org/grammaticalframework/ui/android/Translator.java
+++ b/src/ui/android/src/org/grammaticalframework/ui/android/Translator.java
@@ -20,13 +20,16 @@ public class Translator {
    private static final String TAG = "Translator";

    // TODO: allow changing
-    private String mGrammar = "ParseEngAbs.pgf";
+    private String mGrammar = "ParseEngChi.pgf";

    // TODO: build dynamically?
    private Language[] mLanguages = {
            new Language("en-US", "English", "ParseEng"),
-            new Language("bg-BG", "Bulgarian", "ParseBul"),
-            new Language("cmn-Hans-CN", "Chinese", "ParseChi")
+            new Language("cmn-Hans-CN", "Chinese", "ParseChi"), 
+	    //            new Language("de-DE", "German",  "ParseGer"),
+	    //           new Language("es-ES", "Spanish", "ResourceDemoSpa"),
+            //new Language("fr-FR", "French", "ResourceDemoFre"),
+            //            new Language("bg-BG", "Bulgarian", "ParseBul"),
    };

    private Language mSourceLanguage;
@@ -92,7 +95,7 @@ public class Translator {
            return output;
        } catch (ParseError e) {
            Log.e(TAG, "Parse error: " + e);
-            return "parse error: " + e.getMessage(); // TODO: no no no
+            return "parse error: " + input + " " + e.getMessage(); // TODO: no no no
        }
    }