forked from GitHub/gf-core
First version of a web server for morphological paradigms.
This commit is contained in:
@@ -50,7 +50,8 @@ are marked in the table
|
||||
| Tur | + | - | ++ | + | - | - | - | - | + | *SC,KA
|
||||
| Urd | + | + | ++ | + | + | + | + | - | - | *SV,MH
|
||||
|
||||
Lang = 3-letter ISO language code, used in library file names
|
||||
Lang = 3-letter ISO language code, used in library file names
|
||||
(mostly ISO 639-2 B (bibliographic))
|
||||
|
||||
Darcs = available in the darcs repository of --http://code.haskell.org/gf-- http://www.grammaticalframework.org/
|
||||
|
||||
|
||||
108
src/www/gfmorpho/GFMorpho.hs
Normal file
108
src/www/gfmorpho/GFMorpho.hs
Normal file
@@ -0,0 +1,108 @@
|
||||
import Network.HTTP.Base
|
||||
import Codec.Binary.UTF8.String
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import System
|
||||
|
||||
main = do
|
||||
xs <- getArgs
|
||||
let xxoo = lexArgs (unwords xs)
|
||||
case pArgs xxoo of
|
||||
Just (oo,xx) -> do
|
||||
morpho oo xx
|
||||
_ -> do
|
||||
putStrLn $ "cannot read " ++ unwords xs ++ "."
|
||||
putStrLn "<p>"
|
||||
putStrLn usage
|
||||
|
||||
usage = "usage: gfmorpho LANG POS FORMS OPT*"
|
||||
|
||||
noParse xx = length xx < 3 ----
|
||||
|
||||
lexArgs = map (decodeString . urlDecode) . words . map unspec . drop 1 . dropWhile (/='=') where
|
||||
unspec c = case c of
|
||||
'=' -> ' '
|
||||
'+' -> ' '
|
||||
_ -> c
|
||||
|
||||
pArgs xxoo = do
|
||||
let (oo,xx) = partition isOption xxoo
|
||||
if length xx < 3 then Nothing else return (oo,xx)
|
||||
|
||||
morpho :: [String] -> [String] -> IO ()
|
||||
morpho oo xx = do
|
||||
writeFile tmpCommand (script xx)
|
||||
system $ command xx
|
||||
s <- readFile tmpFile
|
||||
putStrLn $ mkFile $ response oo s
|
||||
|
||||
script ("!":lang:rest) = "cc -table -unqual " ++ unwords rest
|
||||
script (lang: pos: forms) = "cc -table -unqual " ++ fun pos ++ quotes forms
|
||||
where
|
||||
fun pos = "mk" ++ pos
|
||||
|
||||
command ("!":args) = command args
|
||||
command (lang: pos: forms) =
|
||||
"/usr/local/bin/gf -run -retain -path=alltenses alltenses/Paradigms" ++ lang ++ ".gfo"
|
||||
++ " < " ++ tmpCommand
|
||||
++ " > " ++ tmpFile
|
||||
|
||||
quotes = unwords . map quote where
|
||||
quote s = case s of
|
||||
'_':tag -> tag
|
||||
_ -> "\"" ++ s ++ "\""
|
||||
|
||||
-- html response
|
||||
response oo =
|
||||
tag "table border=1" . unlines . map (tag "tr" . unwords) . map cleanTable . grep oo . map words . lines
|
||||
|
||||
cleanTable ws = [tag "td" (unwords param), tag "td" (tag "i" (unwords form))] where
|
||||
(param,form) = getOne (map cleant ws)
|
||||
cleant w = case w of
|
||||
"s" -> ""
|
||||
"." -> ""
|
||||
_ -> cleanw w
|
||||
cleanw = filter (flip notElem "()")
|
||||
getOne ws = let ww = filter (/= "=>") ws in (init ww, [last ww]) -- excludes multiwords
|
||||
|
||||
responsePlain oo =
|
||||
unlines . map unwords . grep oo . map cleanTablePlain . map words . lines
|
||||
|
||||
cleanTablePlain = map clean where
|
||||
clean w = case w of
|
||||
"=>" -> "\t"
|
||||
"s" -> ""
|
||||
"." -> ""
|
||||
_ -> cleanw w
|
||||
cleanw = filter (flip notElem "()")
|
||||
|
||||
grep oo wss = filter (\ws -> all (flip matchIn ws) (map tail oo)) wss
|
||||
|
||||
matchIn p ws = any (match p) ws where
|
||||
match p w = case (p,w) of
|
||||
('*':ps,_ ) -> any (match ps) [drop i w | i <- [0..length w]] ---
|
||||
(c:ps, d:ws) -> c == d && match ps ws
|
||||
_ -> p == w
|
||||
|
||||
tmpFile = "_gfmorpho.tmp"
|
||||
tmpCommand = "_gfcommand.tmp"
|
||||
|
||||
isOption = (=='-') . head
|
||||
|
||||
tag t s = "<" ++ t ++ ">" ++ s ++ "</" ++ t ++ ">"
|
||||
|
||||
|
||||
-- html file with UTF8
|
||||
|
||||
mkFile s = unlines $ [
|
||||
"<HTML>",
|
||||
"<HEAD>",
|
||||
"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=utf-8\">",
|
||||
"<TITLE>GF Smart Paradigm Output</TITLE>",
|
||||
"</HEAD>",
|
||||
"<BODY>",
|
||||
s,
|
||||
"</BODY>",
|
||||
"</HTML>"
|
||||
]
|
||||
|
||||
23
src/www/gfmorpho/README
Normal file
23
src/www/gfmorpho/README
Normal file
@@ -0,0 +1,23 @@
|
||||
A service for using smart paradigms on the web.
|
||||
|
||||
Works with a cgi script running a Haskell program that calls GF to interprete a query string as a "cc" command on a specified Paradigms file. For instance, if the
|
||||
user submits the query
|
||||
|
||||
Eng N baby
|
||||
|
||||
the program executes the command
|
||||
|
||||
cc -table -unqual ParadigmsEng.mkN "baby"
|
||||
|
||||
The resulting output is converted into an HTML table.
|
||||
|
||||
The file gfmorpho.html gives some more information. Open issues in addition to those mentioned there are:
|
||||
|
||||
- GFMorpho.hs creates the temporary files _gfcommand.tmp and _gfmorpho.tmp which need to be world-writable; they should be created more properly and removed after use
|
||||
- gfmorpho.cgi defines the variable GF_LIB_PATH to reside in /Users/aarne, and must be edited for other environments
|
||||
- to work for all languages mentioned, one has to compile some incomplete GF grammars not standardly compiled:
|
||||
|
||||
GF/lib/src$ runghc Make alltenses lang langs=Amh,Ara,Lat,Mlt,Tur
|
||||
|
||||
(c) Aarne Ranta 2012 under LGPL/BSD.
|
||||
|
||||
7
src/www/gfmorpho/gfmorpho.cgi
Normal file
7
src/www/gfmorpho/gfmorpho.cgi
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "Content-type: text/html";
|
||||
echo ""
|
||||
export LANG=en_US.UTF-8
|
||||
runghc GFMorpho "$QUERY_STRING"
|
||||
|
||||
100
src/www/gfmorpho/gfmorpho.html
Normal file
100
src/www/gfmorpho/gfmorpho.html
Normal file
@@ -0,0 +1,100 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
|
||||
<html> <head>
|
||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
|
||||
<title>Use GF Smart Paradigms</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Word inflection with smart paradigms</h1>
|
||||
|
||||
Give language, part of speech, and one or more word forms, to obtain
|
||||
the inflection table.
|
||||
<p>
|
||||
<form method=get action="gfmorpho.cgi">
|
||||
<input name=args>
|
||||
<INPUT TYPE=SUBMIT VALUE="Submit">
|
||||
</form>
|
||||
Examples:
|
||||
<pre>
|
||||
Eng N baby
|
||||
Fin V odottaa odotti
|
||||
Fre V manger
|
||||
Ger N Soldat Soldaten _masculine
|
||||
Hin N बच्छा
|
||||
Jpn V 答える _Gr2
|
||||
Lat A vetus veteris
|
||||
</pre>
|
||||
Thus notice that strings are given without quotes, but features
|
||||
are prefixed with an underscore <tt>_</tt> (a temporary hack).
|
||||
|
||||
|
||||
<h2>Languages and part of speech tags</h2>
|
||||
|
||||
The available languages are:
|
||||
<pre>
|
||||
Afr Amh Cat Dan Dut Eng Fin Fre Ger Hin Ina Ita Jpn Lat
|
||||
Lav Nep Nor Pes Pnb Ron Rus Snd Spa Swe Tha Tur Urd
|
||||
</pre>
|
||||
In addition, the library has the languages <tt>Ara Bul Pol</tt>, but they
|
||||
are not yet available in this way; you can however use the full form of
|
||||
paradigm applications prefixed by "!" as described below.
|
||||
|
||||
<p>
|
||||
|
||||
The parts of speech are: N (= noun), A (= adjective), V (= verb).
|
||||
|
||||
<p>
|
||||
|
||||
The way this works is that the program constructs the most probable
|
||||
inflection table from the forms given. For a vast majority of words in
|
||||
all languages, it is enough to give just one form. But sometimes more
|
||||
forms are needed to get the inflection table right.
|
||||
|
||||
|
||||
<h2>Filtering with patterns</h2>
|
||||
|
||||
You may not want to see the whole table. Then you can filter it with patterns, each of which works like
|
||||
"grep", using <tt>*</tt> to match any substring, either in the
|
||||
features or in the forms:
|
||||
<pre>
|
||||
Eng N baby -Gen
|
||||
Eng V die -dy*
|
||||
</pre>
|
||||
This is a front end to the Paradigms modules in the GF Resource Grammar.
|
||||
See <a href=http://grammaticalframework.org/lib/doc/synopsis.html>RGL
|
||||
Synopsis</a> for available languages and paradigms.
|
||||
|
||||
|
||||
<h2>Using custom paradigms</h2>
|
||||
|
||||
(Another temporary hack, for GF experts:) If you want to use other paradigms than the smart
|
||||
<tt>mk</tt> paradigms, you can prefix your input with <tt>!</tt> and
|
||||
use the normal expression syntax of GF. For example:
|
||||
<pre>
|
||||
! Ara brkN "طير" "فَعل" "فُعُول" Masc NoHum
|
||||
! Bul mkN041 "птица"
|
||||
! Pol mkRegAdj "duży" "większy" "dużo" "więcej"
|
||||
</pre>
|
||||
This also allows you to use structured terms:
|
||||
<pre>
|
||||
! Ger prefixV "auf" (mkV "fassen")
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>To do</h2>
|
||||
|
||||
<ul>
|
||||
<li> nicer input helped by menus
|
||||
<li> error handling and reporting when some language doesn't follow
|
||||
the format assumed here
|
||||
<li> better documentation of the paradigms
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
|
||||
Powered by <a href=http://grammaticalframework.org>GF</a>. Aarne Ranta 2012.
|
||||
|
||||
<hr>
|
||||
<address></address>
|
||||
<!-- hhmts start --> Last modified: Wed Sep 12 14:24:51 CEST 2012 <!-- hhmts end -->
|
||||
</body> </html>
|
||||
Reference in New Issue
Block a user