mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-19 09:49:33 -06:00
a minimal-lexicon version of resource
This commit is contained in:
14
lib/resource-1.0/minimal/MinLexicon.gf
Normal file
14
lib/resource-1.0/minimal/MinLexicon.gf
Normal file
@@ -0,0 +1,14 @@
|
||||
abstract MinLexicon = Cat ** {
|
||||
fun
|
||||
add_V3 : V3 ;
|
||||
apple_N : N ;
|
||||
become_VA : VA ;
|
||||
big_A : A ;
|
||||
break_V2 : V2 ;
|
||||
brother_N2 : N2 ;
|
||||
paint_V2A : V2A ;
|
||||
-- paris_PN : PN ;
|
||||
sleep_V : V ;
|
||||
wonder_VQ : VQ ;
|
||||
|
||||
}
|
||||
2
lib/resource-1.0/minimal/MinLexiconEng.gf
Normal file
2
lib/resource-1.0/minimal/MinLexiconEng.gf
Normal file
@@ -0,0 +1,2 @@
|
||||
concrete MinLexiconEng of MinLexicon = MinLexiconI with
|
||||
(Lexicon = LexiconEng) ;
|
||||
2
lib/resource-1.0/minimal/MinLexiconFre.gf
Normal file
2
lib/resource-1.0/minimal/MinLexiconFre.gf
Normal file
@@ -0,0 +1,2 @@
|
||||
concrete MinLexiconFre of MinLexicon = MinLexiconI with
|
||||
(Lexicon = LexiconFre) ;
|
||||
17
lib/resource-1.0/minimal/MinLexiconI.gf
Normal file
17
lib/resource-1.0/minimal/MinLexiconI.gf
Normal file
@@ -0,0 +1,17 @@
|
||||
incomplete concrete MinLexiconI of MinLexicon = Cat ** open Lexicon in {
|
||||
|
||||
lin
|
||||
|
||||
add_V3 = add_V3 ;
|
||||
apple_N = apple_N ;
|
||||
become_VA = become_VA ;
|
||||
big_A = big_A ;
|
||||
break_V2 = break_V2 ;
|
||||
brother_N2 = brother_N2 ;
|
||||
paint_V2A = paint_V2A ;
|
||||
-- paris_PN = paris_PN ;
|
||||
say_VS = say_VS ;
|
||||
sleep_V = sleep_V ;
|
||||
wonder_VQ = wonder_VQ ;
|
||||
|
||||
} ;
|
||||
29
lib/resource-1.0/minimal/MinStructural.gf
Normal file
29
lib/resource-1.0/minimal/MinStructural.gf
Normal file
@@ -0,0 +1,29 @@
|
||||
abstract MinStructural = Cat ** {
|
||||
|
||||
fun
|
||||
|
||||
-- This is an alphabetical list of structural words: just one from each category
|
||||
-- but so that all variation is created, e.g. all persons for pronouns.
|
||||
|
||||
almost_AdN : AdN ;
|
||||
always_AdV : AdV ;
|
||||
and_Conj : Conj ;
|
||||
but_PConj : PConj ;
|
||||
can_VV : VV ;
|
||||
either7or_DConj : DConj ;
|
||||
he_Pron : Pron ;
|
||||
here_Adv : Adv ;
|
||||
how_IAdv : IAdv ;
|
||||
i_Pron : Pron ;
|
||||
if_Subj : Subj ;
|
||||
in_Prep : Prep ;
|
||||
that_NP : NP ;
|
||||
we_Pron : Pron ;
|
||||
whichPl_IDet : IDet ;
|
||||
whichSg_IDet : IDet ;
|
||||
whoPl_IP : IP ;
|
||||
whoSg_IP : IP ;
|
||||
youSg_Pron : Pron ;
|
||||
youPl_Pron : Pron ;
|
||||
|
||||
}
|
||||
2
lib/resource-1.0/minimal/MinStructuralEng.gf
Normal file
2
lib/resource-1.0/minimal/MinStructuralEng.gf
Normal file
@@ -0,0 +1,2 @@
|
||||
concrete MinStructuralEng of MinStructural = MinStructuralI with
|
||||
(Structural = StructuralEng) ;
|
||||
2
lib/resource-1.0/minimal/MinStructuralFre.gf
Normal file
2
lib/resource-1.0/minimal/MinStructuralFre.gf
Normal file
@@ -0,0 +1,2 @@
|
||||
concrete MinStructuralFre of MinStructural = MinStructuralI with
|
||||
(Structural = StructuralFre) ;
|
||||
23
lib/resource-1.0/minimal/MinStructuralI.gf
Normal file
23
lib/resource-1.0/minimal/MinStructuralI.gf
Normal file
@@ -0,0 +1,23 @@
|
||||
incomplete concrete MinStructuralI of MinStructural = open Structural in {
|
||||
lin
|
||||
almost_AdN = almost_AdN ;
|
||||
always_AdV = always_AdV ;
|
||||
and_Conj = and_Conj ;
|
||||
but_PConj = but_PConj ;
|
||||
can_VV = can_VV ;
|
||||
either7or_DConj = either7or_DConj ;
|
||||
he_Pron = he_Pron ;
|
||||
here_Adv = here_Adv ;
|
||||
how_IAdv = how_IAdv ;
|
||||
i_Pron = i_Pron ;
|
||||
if_Subj = if_Subj ;
|
||||
in_Prep = in_Prep ;
|
||||
that_NP = that_NP ;
|
||||
we_Pron = we_Pron ;
|
||||
whichPl_IDet = whichPl_IDet ;
|
||||
whichSg_IDet = whichSg_IDet ;
|
||||
whoPl_IP = whoPl_IP ;
|
||||
whoSg_IP = whoSg_IP ;
|
||||
youSg_Pron = youSg_Pron ;
|
||||
youPl_Pron = youPl_Pron ;
|
||||
} ;
|
||||
21
lib/resource-1.0/minimal/Minimal.gf
Normal file
21
lib/resource-1.0/minimal/Minimal.gf
Normal file
@@ -0,0 +1,21 @@
|
||||
--1 Minimal Resource Grammar
|
||||
|
||||
-- This module defines the syntax with a minimal lexicon.
|
||||
-- This is useful for treebank generation.
|
||||
|
||||
abstract Minimal =
|
||||
Noun,
|
||||
Verb,
|
||||
Adjective,
|
||||
Adverb,
|
||||
Numeral,
|
||||
Sentence,
|
||||
Question,
|
||||
Relative,
|
||||
Conjunction,
|
||||
Phrase,
|
||||
Text,
|
||||
MinStructural,
|
||||
Idiom,
|
||||
MinLexicon
|
||||
** {} ;
|
||||
22
lib/resource-1.0/minimal/MinimalEng.gf
Normal file
22
lib/resource-1.0/minimal/MinimalEng.gf
Normal file
@@ -0,0 +1,22 @@
|
||||
--# -path=.:../english:../abstract:../common:prelude
|
||||
|
||||
concrete MinimalEng of Minimal =
|
||||
NounEng,
|
||||
VerbEng,
|
||||
AdjectiveEng,
|
||||
AdverbEng,
|
||||
NumeralEng,
|
||||
SentenceEng,
|
||||
QuestionEng,
|
||||
RelativeEng,
|
||||
ConjunctionEng,
|
||||
PhraseEng,
|
||||
TextX,
|
||||
StructuralEng,
|
||||
IdiomEng,
|
||||
LexiconEng
|
||||
** {
|
||||
|
||||
flags startcat = Phr ; -- unlexer = text ; lexer = text ;
|
||||
|
||||
} ;
|
||||
22
lib/resource-1.0/minimal/MinimalFre.gf
Normal file
22
lib/resource-1.0/minimal/MinimalFre.gf
Normal file
@@ -0,0 +1,22 @@
|
||||
--# -path=.:../french:../romance:../abstract:../common:prelude
|
||||
|
||||
concrete MinimalFre of Minimal =
|
||||
NounFre,
|
||||
VerbFre,
|
||||
AdjectiveFre,
|
||||
AdverbFre,
|
||||
NumeralFre,
|
||||
SentenceFre,
|
||||
QuestionFre,
|
||||
RelativeFre,
|
||||
ConjunctionFre,
|
||||
PhraseFre,
|
||||
TextX,
|
||||
StructuralFre,
|
||||
IdiomFre,
|
||||
LexiconFre
|
||||
** {
|
||||
|
||||
flags startcat = Phr ; -- unlexer = text ; lexer = text ;
|
||||
|
||||
} ;
|
||||
143
lib/resource-1.0/minimal/MkLex.hs
Normal file
143
lib/resource-1.0/minimal/MkLex.hs
Normal file
@@ -0,0 +1,143 @@
|
||||
module MkLexicon where
|
||||
|
||||
import Char
|
||||
|
||||
allLines o f = do
|
||||
s <- readFile f
|
||||
mapM_ (putStrLn . o) (filter noComm (lines s))
|
||||
|
||||
|
||||
-- discard comments and empty lines
|
||||
|
||||
noComm s = case s of
|
||||
'-':'-':_ -> False
|
||||
"" -> False
|
||||
_ -> True
|
||||
|
||||
-- remove tailing comments
|
||||
|
||||
remTail s = case s of
|
||||
'-':'-':_ -> []
|
||||
c:cs -> c : remTail cs
|
||||
_ -> s
|
||||
|
||||
-- postfix with category
|
||||
|
||||
postfix p s = takeWhile (not . isSpace) s ++ "_" ++ p
|
||||
|
||||
-- make fun rule
|
||||
|
||||
mkFun s =
|
||||
let (w,p) = span (/='_') s in
|
||||
" " ++ s ++ " : " ++ tail p ++ " ;"
|
||||
|
||||
-- make reused lin rule
|
||||
|
||||
mkRLin s =
|
||||
let w = head (words s) in
|
||||
" " ++ w ++ " = " ++ w ++ " ;"
|
||||
|
||||
-- make regular lin rule
|
||||
|
||||
mkLin s =
|
||||
let (w,p) = span (/='_') s in
|
||||
" " ++ s ++ " = " ++ lin (tail p) w ++ " ;"
|
||||
where
|
||||
lin cat w = case cat of
|
||||
"V2" -> "dirV2 (regV" ++ " \"" ++ w ++ "\")"
|
||||
'V':_ -> "mk" ++ cat ++ " (regV" ++ " \"" ++ w ++ "\")"
|
||||
_ -> "reg" ++ cat ++ " \"" ++ w ++ "\""
|
||||
|
||||
-- normalize identifiers in Structural
|
||||
|
||||
mkIdent s = case words s of
|
||||
w:ws -> if obsolete w then ""
|
||||
else " " ++ (unwords $ mkId (update w) : ws)
|
||||
_ -> s
|
||||
where
|
||||
mkId name@(c:cs) =
|
||||
let
|
||||
(x,y) = span isCat cs
|
||||
in
|
||||
toLower c : clean x ++ "_" ++ new y
|
||||
isCat = flip notElem "PDNVCAIS"
|
||||
clean x = case span isLower x of
|
||||
(_,[]) -> x
|
||||
(u,v) -> u ++ "8" ++ map toLower v
|
||||
new y = case y of
|
||||
"NumDet" -> "NDet"
|
||||
_ -> y
|
||||
obsolete w = elem w $ words "TheseNumNP ThoseNumNP NobodyNP NeitherNor NoDet AnyDet"
|
||||
update w = case w of
|
||||
"EitherOr" -> "EitherOrConjD"
|
||||
"BothAnd" -> "BothAndConjD"
|
||||
"PhrYes" -> "YesPhr"
|
||||
"PhrNo" -> "NoPhr"
|
||||
"WeNumNP" -> "WeNP"
|
||||
"YeNumNP" -> "YeNP"
|
||||
"HowManyDet" -> "HowManyIDet"
|
||||
"MostsDet" -> "MostManyDet"
|
||||
"WhichDet" -> "WhichOneIDet"
|
||||
"WhichNDet" -> "WhichManyIDet"
|
||||
"EverywhereNP" -> "EverywhereAdv"
|
||||
"SomewhereNP" -> "SomewhereAdv"
|
||||
"AgentPrep" -> "By8agentPrep"
|
||||
_ -> w
|
||||
|
||||
-- massage French verbs 9/2/2005
|
||||
|
||||
freVerb s = case words s of
|
||||
v:_ -> " " ++ v ++ " : " ++ cat v ++ " ;"
|
||||
_ -> []
|
||||
where
|
||||
cat v = dropWhile (not . isUpper) v
|
||||
|
||||
-- Swedish verbs 17/2
|
||||
|
||||
sweVerb s = case words s of
|
||||
('v':a:u:[]):verb:_ -> "fun " ++ verb ++ " : V ;\n" ++
|
||||
"lin " ++ verb ++ " = " ++ infl a u verb ++ " ;"
|
||||
_ -> []
|
||||
where
|
||||
infl a u verb =
|
||||
let
|
||||
(dne,geb) = span isConsonant $ tail $ reverse verb
|
||||
(beg,voc,end) = (reverse (tail geb), head geb, reverse dne)
|
||||
(pret,sup) = (beg++ [toLower a] ++end, beg++ [toLower u] ++ end ++"it")
|
||||
in
|
||||
unwords ["irregV", prQuot verb, prQuot pret, prQuot sup]
|
||||
|
||||
prQuot s = "\"" ++ s ++ "\""
|
||||
|
||||
isConsonant = not . isVowel
|
||||
|
||||
isVowel = flip elem "aeiouyäöå"
|
||||
|
||||
-- Norwegian 13/3
|
||||
|
||||
groupLines :: [String] -> [String]
|
||||
groupLines ss = [unwords [a, b, c] | [a,_,b,c,_] <- grps ss] where
|
||||
grps ls = let (g,rest) = splitAt 5 ls in g:grps rest
|
||||
|
||||
lin2fun s = case words s of
|
||||
_:fun:_:_ -> " fun " ++ fun ++ " : " ++ cat fun ++ " ;"
|
||||
_ -> s
|
||||
where
|
||||
cat fun = reverse (takeWhile (/='_') (reverse fun))
|
||||
|
||||
-- filter from a given file those lines whose first word is in a sought-set
|
||||
|
||||
allThose :: [String] -> [String] -> [String]
|
||||
allThose soughts givens = concatMap seek soughts where
|
||||
seek w = let s = [line | line <- givens, w':_ <- [words line], w == w']
|
||||
in if null s then ["-- " ++ w] else s
|
||||
|
||||
-- do this with files
|
||||
-- example: getAllThose "abstract/Mtmp" "english/BasicEng.gf"
|
||||
|
||||
getAllThose :: FilePath -> FilePath -> IO ()
|
||||
getAllThose sought given = do
|
||||
s <- readFile sought
|
||||
gi <- readFile given
|
||||
let so = [w | l <- lines s, w:_ <- [words l]]
|
||||
mapM_ putStrLn $ allThose so $ lines gi
|
||||
58
lib/resource-1.0/minimal/Tab.hs
Normal file
58
lib/resource-1.0/minimal/Tab.hs
Normal file
@@ -0,0 +1,58 @@
|
||||
module Tab where
|
||||
|
||||
import Data.Map
|
||||
|
||||
type Tree = String ----
|
||||
type Bracket = [String] ---
|
||||
|
||||
type Tab = Map [String] [Tree]
|
||||
|
||||
|
||||
-- build a Tab from a unilingual treebank file (not XML)
|
||||
|
||||
test file = do
|
||||
tab <- tb2tab file
|
||||
tst tab
|
||||
where
|
||||
tst tab = do
|
||||
s <- getLine
|
||||
-- let ts = analyse tab $ words s
|
||||
let ts = Prelude.map unwords $ analyseParts tab $ words s
|
||||
mapM_ putStrLn ts
|
||||
tst tab
|
||||
|
||||
-- analyse whole inputs
|
||||
analyse :: Tab -> [String] -> [Tree]
|
||||
analyse tab s = maybe [] id $ Data.Map.lookup s tab
|
||||
|
||||
-- analyse parts of inputs
|
||||
analyseParts :: Tab -> [String] -> [Bracket]
|
||||
analyseParts tab = anap where
|
||||
anap ws = case ws of
|
||||
w:vs -> case results ws of
|
||||
[] -> [w:res | res <- anap vs]
|
||||
(ts,ws2):_ -> [t:res | t <- ts, res <- anap ws2]
|
||||
_ -> [[]]
|
||||
results ws = Prelude.filter (not . Prelude.null . fst)
|
||||
[(ana ws1,ws2) | i <- [0..length ws], let (ws1,ws2) = splitAt i ws]
|
||||
ana = analyse tab
|
||||
|
||||
|
||||
tb2tab :: FilePath -> IO Tab
|
||||
tb2tab file = do
|
||||
ss <- readFile file >>= return . lines
|
||||
let ps = pairs ss
|
||||
return $ fromListWith (++) ps
|
||||
|
||||
pairs :: [String] -> [([String],[String])]
|
||||
pairs xs = case xs of
|
||||
x:y:ys -> (words y,[x]) : pairs ys
|
||||
_ -> []
|
||||
|
||||
|
||||
|
||||
{-
|
||||
|
||||
ceci - that_NP
|
||||
|
||||
-}
|
||||
Reference in New Issue
Block a user