Started experimenting with shallow resource API.

Started experimenting with shallow resource API.
Added PossessPrep to Structural.
This commit is contained in:
aarne
2004-02-20 16:28:42 +00:00
parent 7f3009e2cc
commit 2ff53b2ffd
17 changed files with 45686 additions and 0 deletions

View File

@@ -76,6 +76,7 @@ fun
InFrontPrep, BehindPrep, BetweenPrep : Prep ;
BeforePrep, DuringPrep, AfterPrep : Prep ; -- temporal relations
WithPrep, WithoutPrep, ByMeansPrep : Prep ; -- some other relations
PossessPrep : Prep ; -- possessive/genitive
PartPrep : Prep ; -- partitive "of" ("bottle of wine")
AgentPrep : Prep ; -- agent "by" in passive constructions

View File

@@ -97,6 +97,7 @@ concrete StructuralEng of Structural =
WithPrep = ss "with" ;
WithoutPrep = ss "without" ;
ByMeansPrep = ss "by" ;
PossessPrep = ss "of" ;
PartPrep = ss "of" ;
AgentPrep = ss "by" ;

View File

@@ -142,6 +142,7 @@ concrete StructuralFin of Structural =
WithPrep = prepPostpGen "kanssa" ;
WithoutPrep = prepPrep "ilman" Part ;
ByMeansPrep = prepPostpGen "avulla" ;
PossessPrep = prepCase Gen ;
PartPrep = prepCase Part ;
AgentPrep = prepPostpGen "toimesta" ;

View File

@@ -104,6 +104,7 @@ lin
WithPrep = justPrep "avec" ;
WithoutPrep = justPrep "sans" ;
ByMeansPrep = justPrep "par" ;
PossessPrep = justCase genitive ;
PartPrep = justCase genitive ; ---
AgentPrep = justPrep "par" ;

View File

@@ -127,6 +127,7 @@ lin
WithoutPrep = mkPrep "ohne" Acc ;
ByMeansPrep = mkPrep "mit" Dat ;
PartPrep = mkPrep "von" Dat ;
PossessPrep = mkPrep "von" Dat ;
AgentPrep = mkPrep "durch" Acc ;
} ;

View File

@@ -108,6 +108,7 @@ lin
WithPrep = justCase (CPrep P_con) ;
WithoutPrep = justPrep "senza" ;
ByMeansPrep = justPrep "per" ;
PossessPrep = justCase genitive ;
PartPrep = justCase genitive ; ---
AgentPrep = justCase (CPrep P_da) ;

View File

@@ -126,6 +126,7 @@ WantVV = extVerb verbKhotet Act Present ;
WithPrep = { s2 = "с" ; c = Inst};
WithoutPrep = { s2 = "без" ; c = Gen};
ByMeansPrep = { s2 = ["с помощью"] ; c = Gen};
PossessPrep = { s2 = "" ; c = Gen}; --- ?? AR 19/2/2004
PartPrep = { s2 = "" ; c = Nom}; -- missing in Russian
AgentPrep = { s2 = "" ; c = Nom}; -- missing in Russian
} ;

View File

@@ -0,0 +1,54 @@
-- Shallow.gf by AR 19/2/2004
--
-- This is a resource API for shallow parsing.
-- It aims to be as unambiguous as possible: so it hides
-- scope ambiguities.
-- Therefore it has many more rules than would be necessary
-- actually to define the language.
-- It is not primarily aimed to be used through selection from the API,
-- but through a parser.
-- It can also serve for experiments with shallow (fast?) parsing.
abstract Shallow = {
cat
Phr ;
S ;
Qu ;
Imp ;
Verb ;
TV ;
Adj ;
Noun ;
CN ;
NP ;
Adv ;
Prep ;
fun
PhrS : S -> Phr ;
PhrQu : Qu -> Phr ;
PhrImp : Imp -> Phr ;
SVerb, SNegVerb : NP -> Verb -> S ;
SVerbPP, SNegVerbPP : NP -> Verb -> Adv -> S ;
STV, SNegTV : NP -> TV -> NP -> S ;
SAdj, SNegAdj : NP -> Adj -> S ;
SAdjPP, SNegAdjPP : NP -> Adj -> Adv -> S ;
SCN, SNegCN : NP -> CN -> S ;
SAdv,SNegAdv : NP -> Adv -> S ;
QuVerb, QuNegVerb : NP -> Verb -> Qu ;
ImpVerb, ImpNegVerb : Verb -> Imp ;
ImpAdj, ImpNegAdj : Adj -> Imp ;
ImpCN, ImpNegCN : CN -> Imp ;
ImpAdv,ImpNegAdv : Adv -> Imp ;
ModNoun : Adj -> Noun -> Noun ;
PrepNP : Prep -> NP -> Adv ;
PrepNoun : CN -> Prep -> NP -> CN ;
CNNoun : Noun -> CN ;
DefNP, IndefNP, EveryNP, AllNP : CN -> NP ;
PossessPrep : Prep ;
}

View File

@@ -0,0 +1,4 @@
--# -path=.:../../prelude:../abstract:../english
concrete ShallowEng of Shallow = ShallowI with (Resource = ResourceEng) ;

View File

@@ -0,0 +1,73 @@
--# -path=.:../../prelude:../abstract
incomplete concrete ShallowI of Shallow = open (Resource = Resource) in {
lincat
Phr = Resource.Phr ;
S = Resource.S ;
Qu = Resource.Qu ;
Imp = Resource.Imp ;
Verb = Resource.V ;
TV = Resource.TV ;
Adj = Resource.Adj1 ;
Noun = Resource.CN ;
CN = Resource.CN ;
NP = Resource.NP ;
Adv = Resource.AdV ;
Det = Resource.Det ;
Prep = Resource.Prep ;
lin
PhrS = Resource.IndicPhrase ;
PhrQu = Resource.QuestPhrase ;
PhrImp = Resource.ImperOne ;
SVerb x f = Resource.PredVP x (Resource.PosVG (Resource.PredV f)) ;
SNegVerb x f = Resource.PredVP x (Resource.NegVG (Resource.PredV f)) ;
SVerbPP x f y = Resource.PredVP x
(Resource.AdvVP (Resource.PosVG (Resource.PredV f)) y) ;
SNegVerbPP x f y = Resource.PredVP x
(Resource.AdvVP (Resource.NegVG (Resource.PredV f)) y) ;
STV x f y = Resource.PredVP x (Resource.PosVG
(Resource.PredTV f y)) ;
SNegTV x f y = Resource.PredVP x (Resource.NegVG
(Resource.PredTV f y)) ;
SAdj x f = Resource.PredVP x (Resource.PosVG
(Resource.PredAP (Resource.AdjP1 f))) ;
SNegAdj x f = Resource.PredVP x (Resource.NegVG
(Resource.PredAP (Resource.AdjP1 f))) ;
SAdjPP x f y = Resource.PredVP x (Resource.AdvVP (Resource.PosVG
(Resource.PredAP (Resource.AdjP1 f))) y) ;
SNegAdjPP x f y = Resource.PredVP x (Resource.AdvVP (Resource.NegVG
(Resource.PredAP (Resource.AdjP1 f))) y) ;
SCN x f = Resource.PredVP x (Resource.PosVG (Resource.PredCN f)) ;
SNegCN x f = Resource.PredVP x (Resource.NegVG (Resource.PredCN f)) ;
SAdv x f = Resource.PredVP x (Resource.PosVG (Resource.PredAdV f)) ;
SNegAdv x f = Resource.PredVP x (Resource.NegVG (Resource.PredAdV f)) ;
QuVerb x f = Resource.QuestVP x (Resource.PosVG (Resource.PredV f)) ;
QuNegVerb x f = Resource.QuestVP x (Resource.NegVG (Resource.PredV f)) ;
ImpVerb f = Resource.ImperVP (Resource.PosVG (Resource.PredV f)) ;
ImpNegVerb f = Resource.ImperVP (Resource.NegVG (Resource.PredV f)) ;
ImpAdj f = Resource.ImperVP (Resource.PosVG
(Resource.PredAP (Resource.AdjP1 f))) ;
ImpNegAdj f = Resource.ImperVP (Resource.NegVG
(Resource.PredAP (Resource.AdjP1 f))) ;
ImpCN f = Resource.ImperVP (Resource.PosVG (Resource.PredCN f)) ;
ImpNegCN f = Resource.ImperVP (Resource.NegVG (Resource.PredCN f)) ;
ImpAdv f = Resource.ImperVP (Resource.PosVG (Resource.PredAdV f)) ;
ImpNegAdv f = Resource.ImperVP (Resource.NegVG (Resource.PredAdV f)) ;
ModNoun a n = Resource.ModAdj (Resource.AdjP1 a) n ;
PrepNP = Resource.PrepNP ;
PrepNoun f p x = Resource.AdvCN f (Resource.PrepNP p x) ;
CNNoun n = n ;
AllNP = Resource.DetNP (Resource.AllNumDet Resource.NoNum) ;
EveryNP = Resource.DetNP Resource.EveryDet ;
DefNP = Resource.DefOneNP ;
IndefNP = Resource.IndefOneNP ;
PossessPrep = Resource.PossessPrep ;
}

View File

@@ -0,0 +1,11 @@
abstract TestShallow = Shallow ** {
fun
Big, Happy, Small, Old, Young : Adj ;
American, Finnish : Adj ;
Married : Adj ;
Man, Woman, Car, House, Light, Bar, Bottle, Wine : Noun ;
Walk, Run : Verb ;
Send, Wait, Love, Drink, SwitchOn, SwitchOff : TV ;
Mother, Uncle : Noun ;
} ;

View File

@@ -0,0 +1,14 @@
--# -path=.:../../prelude:../abstract:../english
concrete TestShallowEng of TestShallow = ShallowEng ** open ParadigmsEng in {
lin
Big = mkAdj1 "big" ;
Happy = mkAdj1 "happy" ;
Small = mkAdj1 "small" ;
American = mkAdj1 "American" ;
--- Man = nMan "man" "men" human ;
Car = cnNonhuman "car" ;
Walk = vReg "walk" ;
Love = tvDir (vReg "love") ;
}

View File

@@ -0,0 +1,66 @@
module MkLex where
import System
import Monad
-- to massage an ispell word list into a GF lexicon. AR 20/2/2004
lexfile = "linux.words"
absfile = "Lex.gf"
cncfile = "LexEng.gf"
maxsize = 10000 :: Int --- add more!
massage :: IO ()
massage = do
-- initialize target files
system $ "echo \"\" >" ++ absfile
system $ "echo \"\" >" ++ cncfile
appendFile absfile $
"abstract Lex = Shallow ** {\nfun\n"
appendFile cncfile $
"--# -path=.:..:../../../prelude:../../abstract:../../english\n\n"
appendFile cncfile $
"concrete LexEng of Lex = ShallowEng ** open ParadigmsEng in {\nlin\n"
-- reverse to study endings
ws <- liftM (map reverse . lines) $ readFile lexfile
sortWords $ take maxsize ws
-- we exploit the fact that the original list is sorted and
-- different forms therefore lie consecutively
sortWords :: [String] -> IO ()
sortWords ws = case ws of
u : ('d':'e':v) : ('s':w) : vs | v == u && w == u -> -- regular verb
mkEntry verbReg u >> sortWords vs
('e':u) : ('d':'e':v) : ('s':'e':w) : vs | v == u && w == u -> -- e-verb
mkEntry verbE ('e':u) >> sortWords vs
u : ('y':'l':v) : vs | v == u -> -- regular adjective
mkEntry adjReg u >> sortWords vs
u : ('s':v) : vs | v == u -> -- regular noun
mkEntry nounReg u >> sortWords vs
---- add more
_ : vs -> sortWords vs
[] -> appendFile absfile " }\n" >> appendFile cncfile " }\n"
mkEntry :: (String -> (String,String)) -> String -> IO ()
mkEntry abc w0 = do
let w = reverse w0
let (ab,cn) = abc w
appendFile absfile $ " " ++ ab
appendFile absfile $ " ;\n"
appendFile cncfile $ " " ++ cn
appendFile cncfile $ " ;\n"
verbReg :: String -> (String,String)
verbReg = mkGF "Verb" "vReg"
verbE = verbReg ----
adjReg = mkGF "Adj" "mkAdj1"
nounReg = mkGF "Noun" "cnNonhuman"
mkGF :: String -> String -> (String -> (String,String))
mkGF cat oper w = (
w ++ "L : " ++ cat,
w ++ "L = " ++ oper ++ " \"" ++ w ++ "\""
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
Project: to massage an ispell word list into a GF lexicon. AR 20/2/2004
Used MkLex on linux.words (45424 word forms)
The first run gave 6471 words, which was too much for GF.
With the maxsize, got this down to 1587.
The grammar now compiles in 25sec, and parses as follows:
"every commander assumes of an abbot"
610 msec, 6 ws
"every accusing commander assumes of a coarse abbot"
840 msec, 8 ws
"every accusing commander assumes of a coarse abbot of all
cognitive colonizers"
820 msec, 12 ws
"every accusing commander of a comical careful colt assumes of
a coarse abbot of all cognitive colonizers"
3670 msec, 17 ws (nesting inside)
"every accusing commander assumes of a coarse abbot of all cognitive
colonizers of a colt of a careful carpenter of every carriage"
TIMEOUT!
Note: it is essential to import with the -cflexer flag!
Time spent: 58 min, includes writing MkLex, running tests,
and writing these notes.

View File

@@ -107,6 +107,7 @@ concrete StructuralSwe of Structural =
WithPrep = ss "med" ;
WithoutPrep = ss "utan" ;
ByMeansPrep = ss "med" ;
PossessPrep = ss "av" ;
PartPrep = ss "av" ;
AgentPrep = ss "av" ;

View File

@@ -3,6 +3,7 @@ GHCFLAGS=-package lang -package util -fglasgow-exts
GHCFUDFLAG=-package Fudgets
GHCINCLUDE=-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -ifor-ghc -iparsing -iparsers
GHCINCLUDENOFUD=-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -ifor-ghc-nofud -iparsing -iparsers
GHCINCLUDEGFT=-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -ifor-gft -iparsing -iparsers
WINDOWSINCLUDE=-ifor-windows -iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -iparsing -iparsers
all: