forked from GitHub/gf-core
First version of a web server for morphological paradigms.
This commit is contained in:
@@ -50,7 +50,8 @@ are marked in the table
|
|||||||
| Tur | + | - | ++ | + | - | - | - | - | + | *SC,KA
|
| Tur | + | - | ++ | + | - | - | - | - | + | *SC,KA
|
||||||
| Urd | + | + | ++ | + | + | + | + | - | - | *SV,MH
|
| Urd | + | + | ++ | + | + | + | + | - | - | *SV,MH
|
||||||
|
|
||||||
Lang = 3-letter ISO language code, used in library file names
|
Lang = 3-letter ISO language code, used in library file names
|
||||||
|
(mostly ISO 639-2 B (bibliographic))
|
||||||
|
|
||||||
Darcs = available in the darcs repository of --http://code.haskell.org/gf-- http://www.grammaticalframework.org/
|
Darcs = available in the darcs repository of --http://code.haskell.org/gf-- http://www.grammaticalframework.org/
|
||||||
|
|
||||||
|
|||||||
108
src/www/gfmorpho/GFMorpho.hs
Normal file
108
src/www/gfmorpho/GFMorpho.hs
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
import Network.HTTP.Base
|
||||||
|
import Codec.Binary.UTF8.String
|
||||||
|
import Data.Char
|
||||||
|
import Data.List
|
||||||
|
import System
|
||||||
|
|
||||||
|
main = do
|
||||||
|
xs <- getArgs
|
||||||
|
let xxoo = lexArgs (unwords xs)
|
||||||
|
case pArgs xxoo of
|
||||||
|
Just (oo,xx) -> do
|
||||||
|
morpho oo xx
|
||||||
|
_ -> do
|
||||||
|
putStrLn $ "cannot read " ++ unwords xs ++ "."
|
||||||
|
putStrLn "<p>"
|
||||||
|
putStrLn usage
|
||||||
|
|
||||||
|
usage = "usage: gfmorpho LANG POS FORMS OPT*"
|
||||||
|
|
||||||
|
noParse xx = length xx < 3 ----
|
||||||
|
|
||||||
|
lexArgs = map (decodeString . urlDecode) . words . map unspec . drop 1 . dropWhile (/='=') where
|
||||||
|
unspec c = case c of
|
||||||
|
'=' -> ' '
|
||||||
|
'+' -> ' '
|
||||||
|
_ -> c
|
||||||
|
|
||||||
|
pArgs xxoo = do
|
||||||
|
let (oo,xx) = partition isOption xxoo
|
||||||
|
if length xx < 3 then Nothing else return (oo,xx)
|
||||||
|
|
||||||
|
morpho :: [String] -> [String] -> IO ()
|
||||||
|
morpho oo xx = do
|
||||||
|
writeFile tmpCommand (script xx)
|
||||||
|
system $ command xx
|
||||||
|
s <- readFile tmpFile
|
||||||
|
putStrLn $ mkFile $ response oo s
|
||||||
|
|
||||||
|
script ("!":lang:rest) = "cc -table -unqual " ++ unwords rest
|
||||||
|
script (lang: pos: forms) = "cc -table -unqual " ++ fun pos ++ quotes forms
|
||||||
|
where
|
||||||
|
fun pos = "mk" ++ pos
|
||||||
|
|
||||||
|
command ("!":args) = command args
|
||||||
|
command (lang: pos: forms) =
|
||||||
|
"/usr/local/bin/gf -run -retain -path=alltenses alltenses/Paradigms" ++ lang ++ ".gfo"
|
||||||
|
++ " < " ++ tmpCommand
|
||||||
|
++ " > " ++ tmpFile
|
||||||
|
|
||||||
|
quotes = unwords . map quote where
|
||||||
|
quote s = case s of
|
||||||
|
'_':tag -> tag
|
||||||
|
_ -> "\"" ++ s ++ "\""
|
||||||
|
|
||||||
|
-- html response
|
||||||
|
response oo =
|
||||||
|
tag "table border=1" . unlines . map (tag "tr" . unwords) . map cleanTable . grep oo . map words . lines
|
||||||
|
|
||||||
|
cleanTable ws = [tag "td" (unwords param), tag "td" (tag "i" (unwords form))] where
|
||||||
|
(param,form) = getOne (map cleant ws)
|
||||||
|
cleant w = case w of
|
||||||
|
"s" -> ""
|
||||||
|
"." -> ""
|
||||||
|
_ -> cleanw w
|
||||||
|
cleanw = filter (flip notElem "()")
|
||||||
|
getOne ws = let ww = filter (/= "=>") ws in (init ww, [last ww]) -- excludes multiwords
|
||||||
|
|
||||||
|
responsePlain oo =
|
||||||
|
unlines . map unwords . grep oo . map cleanTablePlain . map words . lines
|
||||||
|
|
||||||
|
cleanTablePlain = map clean where
|
||||||
|
clean w = case w of
|
||||||
|
"=>" -> "\t"
|
||||||
|
"s" -> ""
|
||||||
|
"." -> ""
|
||||||
|
_ -> cleanw w
|
||||||
|
cleanw = filter (flip notElem "()")
|
||||||
|
|
||||||
|
grep oo wss = filter (\ws -> all (flip matchIn ws) (map tail oo)) wss
|
||||||
|
|
||||||
|
matchIn p ws = any (match p) ws where
|
||||||
|
match p w = case (p,w) of
|
||||||
|
('*':ps,_ ) -> any (match ps) [drop i w | i <- [0..length w]] ---
|
||||||
|
(c:ps, d:ws) -> c == d && match ps ws
|
||||||
|
_ -> p == w
|
||||||
|
|
||||||
|
tmpFile = "_gfmorpho.tmp"
|
||||||
|
tmpCommand = "_gfcommand.tmp"
|
||||||
|
|
||||||
|
isOption = (=='-') . head
|
||||||
|
|
||||||
|
tag t s = "<" ++ t ++ ">" ++ s ++ "</" ++ t ++ ">"
|
||||||
|
|
||||||
|
|
||||||
|
-- html file with UTF8
|
||||||
|
|
||||||
|
mkFile s = unlines $ [
|
||||||
|
"<HTML>",
|
||||||
|
"<HEAD>",
|
||||||
|
"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=utf-8\">",
|
||||||
|
"<TITLE>GF Smart Paradigm Output</TITLE>",
|
||||||
|
"</HEAD>",
|
||||||
|
"<BODY>",
|
||||||
|
s,
|
||||||
|
"</BODY>",
|
||||||
|
"</HTML>"
|
||||||
|
]
|
||||||
|
|
||||||
23
src/www/gfmorpho/README
Normal file
23
src/www/gfmorpho/README
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
A service for using smart paradigms on the web.
|
||||||
|
|
||||||
|
Works with a cgi script running a Haskell program that calls GF to interprete a query string as a "cc" command on a specified Paradigms file. For instance, if the
|
||||||
|
user submits the query
|
||||||
|
|
||||||
|
Eng N baby
|
||||||
|
|
||||||
|
the program executes the command
|
||||||
|
|
||||||
|
cc -table -unqual ParadigmsEng.mkN "baby"
|
||||||
|
|
||||||
|
The resulting output is converted into an HTML table.
|
||||||
|
|
||||||
|
The file gfmorpho.html gives some more information. Open issues in addition to those mentioned there are:
|
||||||
|
|
||||||
|
- GFMorpho.hs creates the temporary files _gfcommand.tmp and _gfmorpho.tmp which need to be world-writable; they should be created more properly and removed after use
|
||||||
|
- gfmorpho.cgi defines the variable GF_LIB_PATH to reside in /Users/aarne, and must be edited for other environments
|
||||||
|
- to work for all languages mentioned, one has to compile some incomplete GF grammars not standardly compiled:
|
||||||
|
|
||||||
|
GF/lib/src$ runghc Make alltenses lang langs=Amh,Ara,Lat,Mlt,Tur
|
||||||
|
|
||||||
|
(c) Aarne Ranta 2012 under LGPL/BSD.
|
||||||
|
|
||||||
7
src/www/gfmorpho/gfmorpho.cgi
Normal file
7
src/www/gfmorpho/gfmorpho.cgi
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "Content-type: text/html";
|
||||||
|
echo ""
|
||||||
|
export LANG=en_US.UTF-8
|
||||||
|
runghc GFMorpho "$QUERY_STRING"
|
||||||
|
|
||||||
100
src/www/gfmorpho/gfmorpho.html
Normal file
100
src/www/gfmorpho/gfmorpho.html
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
|
||||||
|
<html> <head>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
|
||||||
|
<title>Use GF Smart Paradigms</title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<h1>Word inflection with smart paradigms</h1>
|
||||||
|
|
||||||
|
Give language, part of speech, and one or more word forms, to obtain
|
||||||
|
the inflection table.
|
||||||
|
<p>
|
||||||
|
<form method=get action="gfmorpho.cgi">
|
||||||
|
<input name=args>
|
||||||
|
<INPUT TYPE=SUBMIT VALUE="Submit">
|
||||||
|
</form>
|
||||||
|
Examples:
|
||||||
|
<pre>
|
||||||
|
Eng N baby
|
||||||
|
Fin V odottaa odotti
|
||||||
|
Fre V manger
|
||||||
|
Ger N Soldat Soldaten _masculine
|
||||||
|
Hin N बच्छा
|
||||||
|
Jpn V 答える _Gr2
|
||||||
|
Lat A vetus veteris
|
||||||
|
</pre>
|
||||||
|
Thus notice that strings are given without quotes, but features
|
||||||
|
are prefixed with an underscore <tt>_</tt> (a temporary hack).
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Languages and part of speech tags</h2>
|
||||||
|
|
||||||
|
The available languages are:
|
||||||
|
<pre>
|
||||||
|
Afr Amh Cat Dan Dut Eng Fin Fre Ger Hin Ina Ita Jpn Lat
|
||||||
|
Lav Nep Nor Pes Pnb Ron Rus Snd Spa Swe Tha Tur Urd
|
||||||
|
</pre>
|
||||||
|
In addition, the library has the languages <tt>Ara Bul Pol</tt>, but they
|
||||||
|
are not yet available in this way; you can however use the full form of
|
||||||
|
paradigm applications prefixed by "!" as described below.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
|
The parts of speech are: N (= noun), A (= adjective), V (= verb).
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
|
The way this works is that the program constructs the most probable
|
||||||
|
inflection table from the forms given. For a vast majority of words in
|
||||||
|
all languages, it is enough to give just one form. But sometimes more
|
||||||
|
forms are needed to get the inflection table right.
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Filtering with patterns</h2>
|
||||||
|
|
||||||
|
You may not want to see the whole table. Then you can filter it with patterns, each of which works like
|
||||||
|
"grep", using <tt>*</tt> to match any substring, either in the
|
||||||
|
features or in the forms:
|
||||||
|
<pre>
|
||||||
|
Eng N baby -Gen
|
||||||
|
Eng V die -dy*
|
||||||
|
</pre>
|
||||||
|
This is a front end to the Paradigms modules in the GF Resource Grammar.
|
||||||
|
See <a href=http://grammaticalframework.org/lib/doc/synopsis.html>RGL
|
||||||
|
Synopsis</a> for available languages and paradigms.
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Using custom paradigms</h2>
|
||||||
|
|
||||||
|
(Another temporary hack, for GF experts:) If you want to use other paradigms than the smart
|
||||||
|
<tt>mk</tt> paradigms, you can prefix your input with <tt>!</tt> and
|
||||||
|
use the normal expression syntax of GF. For example:
|
||||||
|
<pre>
|
||||||
|
! Ara brkN "طير" "فَعل" "فُعُول" Masc NoHum
|
||||||
|
! Bul mkN041 "птица"
|
||||||
|
! Pol mkRegAdj "duży" "większy" "dużo" "więcej"
|
||||||
|
</pre>
|
||||||
|
This also allows you to use structured terms:
|
||||||
|
<pre>
|
||||||
|
! Ger prefixV "auf" (mkV "fassen")
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>To do</h2>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li> nicer input helped by menus
|
||||||
|
<li> error handling and reporting when some language doesn't follow
|
||||||
|
the format assumed here
|
||||||
|
<li> better documentation of the paradigms
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
|
Powered by <a href=http://grammaticalframework.org>GF</a>. Aarne Ranta 2012.
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
<address></address>
|
||||||
|
<!-- hhmts start --> Last modified: Wed Sep 12 14:24:51 CEST 2012 <!-- hhmts end -->
|
||||||
|
</body> </html>
|
||||||
Reference in New Issue
Block a user