diff --git a/lib/doc/status.txt b/lib/doc/status.txt index 94e6d661e..b6f0e9b87 100644 --- a/lib/doc/status.txt +++ b/lib/doc/status.txt @@ -50,7 +50,8 @@ are marked in the table | Tur | + | - | ++ | + | - | - | - | - | + | *SC,KA | Urd | + | + | ++ | + | + | + | + | - | - | *SV,MH -Lang = 3-letter ISO language code, used in library file names +Lang = 3-letter ISO language code, used in library file names +(mostly ISO 639-2 B (bibliographic)) Darcs = available in the darcs repository of --http://code.haskell.org/gf-- http://www.grammaticalframework.org/ diff --git a/src/www/gfmorpho/GFMorpho.hs b/src/www/gfmorpho/GFMorpho.hs new file mode 100644 index 000000000..e01d1da56 --- /dev/null +++ b/src/www/gfmorpho/GFMorpho.hs @@ -0,0 +1,108 @@ +import Network.HTTP.Base +import Codec.Binary.UTF8.String +import Data.Char +import Data.List +import System + +main = do + xs <- getArgs + let xxoo = lexArgs (unwords xs) + case pArgs xxoo of + Just (oo,xx) -> do + morpho oo xx + _ -> do + putStrLn $ "cannot read " ++ unwords xs ++ "." + putStrLn "

" + putStrLn usage + +usage = "usage: gfmorpho LANG POS FORMS OPT*" + +noParse xx = length xx < 3 ---- + +lexArgs = map (decodeString . urlDecode) . words . map unspec . drop 1 . dropWhile (/='=') where + unspec c = case c of + '=' -> ' ' + '+' -> ' ' + _ -> c + +pArgs xxoo = do + let (oo,xx) = partition isOption xxoo + if length xx < 3 then Nothing else return (oo,xx) + +morpho :: [String] -> [String] -> IO () +morpho oo xx = do + writeFile tmpCommand (script xx) + system $ command xx + s <- readFile tmpFile + putStrLn $ mkFile $ response oo s + +script ("!":lang:rest) = "cc -table -unqual " ++ unwords rest +script (lang: pos: forms) = "cc -table -unqual " ++ fun pos ++ quotes forms + where + fun pos = "mk" ++ pos + +command ("!":args) = command args +command (lang: pos: forms) = + "/usr/local/bin/gf -run -retain -path=alltenses alltenses/Paradigms" ++ lang ++ ".gfo" + ++ " < " ++ tmpCommand + ++ " > " ++ tmpFile + +quotes = unwords . map quote where + quote s = case s of + '_':tag -> tag + _ -> "\"" ++ s ++ "\"" + +-- html response +response oo = + tag "table border=1" . unlines . map (tag "tr" . unwords) . map cleanTable . grep oo . map words . lines + +cleanTable ws = [tag "td" (unwords param), tag "td" (tag "i" (unwords form))] where + (param,form) = getOne (map cleant ws) + cleant w = case w of + "s" -> "" + "." -> "" + _ -> cleanw w + cleanw = filter (flip notElem "()") + getOne ws = let ww = filter (/= "=>") ws in (init ww, [last ww]) -- excludes multiwords + +responsePlain oo = + unlines . map unwords . grep oo . map cleanTablePlain . map words . lines + +cleanTablePlain = map clean where + clean w = case w of + "=>" -> "\t" + "s" -> "" + "." -> "" + _ -> cleanw w + cleanw = filter (flip notElem "()") + +grep oo wss = filter (\ws -> all (flip matchIn ws) (map tail oo)) wss + +matchIn p ws = any (match p) ws where + match p w = case (p,w) of + ('*':ps,_ ) -> any (match ps) [drop i w | i <- [0..length w]] --- + (c:ps, d:ws) -> c == d && match ps ws + _ -> p == w + +tmpFile = "_gfmorpho.tmp" +tmpCommand = "_gfcommand.tmp" + +isOption = (=='-') . head + +tag t s = "<" ++ t ++ ">" ++ s ++ "" + + +-- html file with UTF8 + +mkFile s = unlines $ [ + "", + "", + "", + "GF Smart Paradigm Output", + "", + "", + s, + "", + "" + ] + diff --git a/src/www/gfmorpho/README b/src/www/gfmorpho/README new file mode 100644 index 000000000..2090bb5d3 --- /dev/null +++ b/src/www/gfmorpho/README @@ -0,0 +1,23 @@ +A service for using smart paradigms on the web. + +Works with a cgi script running a Haskell program that calls GF to interprete a query string as a "cc" command on a specified Paradigms file. For instance, if the +user submits the query + + Eng N baby + +the program executes the command + + cc -table -unqual ParadigmsEng.mkN "baby" + +The resulting output is converted into an HTML table. + +The file gfmorpho.html gives some more information. Open issues in addition to those mentioned there are: + +- GFMorpho.hs creates the temporary files _gfcommand.tmp and _gfmorpho.tmp which need to be world-writable; they should be created more properly and removed after use +- gfmorpho.cgi defines the variable GF_LIB_PATH to reside in /Users/aarne, and must be edited for other environments +- to work for all languages mentioned, one has to compile some incomplete GF grammars not standardly compiled: + + GF/lib/src$ runghc Make alltenses lang langs=Amh,Ara,Lat,Mlt,Tur + +(c) Aarne Ranta 2012 under LGPL/BSD. + diff --git a/src/www/gfmorpho/gfmorpho.cgi b/src/www/gfmorpho/gfmorpho.cgi new file mode 100644 index 000000000..c08a9450c --- /dev/null +++ b/src/www/gfmorpho/gfmorpho.cgi @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "Content-type: text/html"; +echo "" +export LANG=en_US.UTF-8 +runghc GFMorpho "$QUERY_STRING" + diff --git a/src/www/gfmorpho/gfmorpho.html b/src/www/gfmorpho/gfmorpho.html new file mode 100644 index 000000000..3a58d4442 --- /dev/null +++ b/src/www/gfmorpho/gfmorpho.html @@ -0,0 +1,100 @@ + + + +Use GF Smart Paradigms + + + +

Word inflection with smart paradigms

+ +Give language, part of speech, and one or more word forms, to obtain +the inflection table. +

+

+ + +
+Examples: +
+  Eng N baby
+  Fin V odottaa odotti
+  Fre V manger
+  Ger N Soldat Soldaten _masculine
+  Hin N बच्छा
+  Jpn V 答える _Gr2
+  Lat A vetus veteris
+
+Thus notice that strings are given without quotes, but features +are prefixed with an underscore _ (a temporary hack). + + +

Languages and part of speech tags

+ +The available languages are: +
+  Afr Amh Cat Dan Dut Eng Fin Fre Ger Hin Ina Ita Jpn Lat
+  Lav Nep Nor Pes Pnb Ron Rus Snd Spa Swe Tha Tur Urd
+
+In addition, the library has the languages Ara Bul Pol, but they +are not yet available in this way; you can however use the full form of +paradigm applications prefixed by "!" as described below. + +

+ +The parts of speech are: N (= noun), A (= adjective), V (= verb). + +

+ +The way this works is that the program constructs the most probable +inflection table from the forms given. For a vast majority of words in +all languages, it is enough to give just one form. But sometimes more +forms are needed to get the inflection table right. + + +

Filtering with patterns

+ +You may not want to see the whole table. Then you can filter it with patterns, each of which works like +"grep", using * to match any substring, either in the +features or in the forms: +
+  Eng N baby -Gen
+  Eng V die -dy*
+
+This is a front end to the Paradigms modules in the GF Resource Grammar. +See RGL +Synopsis for available languages and paradigms. + + +

Using custom paradigms

+ +(Another temporary hack, for GF experts:) If you want to use other paradigms than the smart +mk paradigms, you can prefix your input with ! and +use the normal expression syntax of GF. For example: +
+  ! Ara brkN "طير" "فَعل" "فُعُول" Masc NoHum
+  ! Bul mkN041 "птица"
+  ! Pol mkRegAdj "duży" "większy" "dużo" "więcej"
+
+This also allows you to use structured terms: +
+  ! Ger prefixV "auf" (mkV "fassen")
+
+ + +

To do

+ + + +

+ +Powered by GF. Aarne Ranta 2012. + +


+
+ Last modified: Wed Sep 12 14:24:51 CEST 2012 +