mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-24 02:12:50 -06:00
a minimal-lexicon version of resource
This commit is contained in:
14
lib/resource-1.0/minimal/MinLexicon.gf
Normal file
14
lib/resource-1.0/minimal/MinLexicon.gf
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
abstract MinLexicon = Cat ** {
|
||||||
|
fun
|
||||||
|
add_V3 : V3 ;
|
||||||
|
apple_N : N ;
|
||||||
|
become_VA : VA ;
|
||||||
|
big_A : A ;
|
||||||
|
break_V2 : V2 ;
|
||||||
|
brother_N2 : N2 ;
|
||||||
|
paint_V2A : V2A ;
|
||||||
|
-- paris_PN : PN ;
|
||||||
|
sleep_V : V ;
|
||||||
|
wonder_VQ : VQ ;
|
||||||
|
|
||||||
|
}
|
||||||
2
lib/resource-1.0/minimal/MinLexiconEng.gf
Normal file
2
lib/resource-1.0/minimal/MinLexiconEng.gf
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
concrete MinLexiconEng of MinLexicon = MinLexiconI with
|
||||||
|
(Lexicon = LexiconEng) ;
|
||||||
2
lib/resource-1.0/minimal/MinLexiconFre.gf
Normal file
2
lib/resource-1.0/minimal/MinLexiconFre.gf
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
concrete MinLexiconFre of MinLexicon = MinLexiconI with
|
||||||
|
(Lexicon = LexiconFre) ;
|
||||||
17
lib/resource-1.0/minimal/MinLexiconI.gf
Normal file
17
lib/resource-1.0/minimal/MinLexiconI.gf
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
incomplete concrete MinLexiconI of MinLexicon = Cat ** open Lexicon in {
|
||||||
|
|
||||||
|
lin
|
||||||
|
|
||||||
|
add_V3 = add_V3 ;
|
||||||
|
apple_N = apple_N ;
|
||||||
|
become_VA = become_VA ;
|
||||||
|
big_A = big_A ;
|
||||||
|
break_V2 = break_V2 ;
|
||||||
|
brother_N2 = brother_N2 ;
|
||||||
|
paint_V2A = paint_V2A ;
|
||||||
|
-- paris_PN = paris_PN ;
|
||||||
|
say_VS = say_VS ;
|
||||||
|
sleep_V = sleep_V ;
|
||||||
|
wonder_VQ = wonder_VQ ;
|
||||||
|
|
||||||
|
} ;
|
||||||
29
lib/resource-1.0/minimal/MinStructural.gf
Normal file
29
lib/resource-1.0/minimal/MinStructural.gf
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
abstract MinStructural = Cat ** {
|
||||||
|
|
||||||
|
fun
|
||||||
|
|
||||||
|
-- This is an alphabetical list of structural words: just one from each category
|
||||||
|
-- but so that all variation is created, e.g. all persons for pronouns.
|
||||||
|
|
||||||
|
almost_AdN : AdN ;
|
||||||
|
always_AdV : AdV ;
|
||||||
|
and_Conj : Conj ;
|
||||||
|
but_PConj : PConj ;
|
||||||
|
can_VV : VV ;
|
||||||
|
either7or_DConj : DConj ;
|
||||||
|
he_Pron : Pron ;
|
||||||
|
here_Adv : Adv ;
|
||||||
|
how_IAdv : IAdv ;
|
||||||
|
i_Pron : Pron ;
|
||||||
|
if_Subj : Subj ;
|
||||||
|
in_Prep : Prep ;
|
||||||
|
that_NP : NP ;
|
||||||
|
we_Pron : Pron ;
|
||||||
|
whichPl_IDet : IDet ;
|
||||||
|
whichSg_IDet : IDet ;
|
||||||
|
whoPl_IP : IP ;
|
||||||
|
whoSg_IP : IP ;
|
||||||
|
youSg_Pron : Pron ;
|
||||||
|
youPl_Pron : Pron ;
|
||||||
|
|
||||||
|
}
|
||||||
2
lib/resource-1.0/minimal/MinStructuralEng.gf
Normal file
2
lib/resource-1.0/minimal/MinStructuralEng.gf
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
concrete MinStructuralEng of MinStructural = MinStructuralI with
|
||||||
|
(Structural = StructuralEng) ;
|
||||||
2
lib/resource-1.0/minimal/MinStructuralFre.gf
Normal file
2
lib/resource-1.0/minimal/MinStructuralFre.gf
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
concrete MinStructuralFre of MinStructural = MinStructuralI with
|
||||||
|
(Structural = StructuralFre) ;
|
||||||
23
lib/resource-1.0/minimal/MinStructuralI.gf
Normal file
23
lib/resource-1.0/minimal/MinStructuralI.gf
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
incomplete concrete MinStructuralI of MinStructural = open Structural in {
|
||||||
|
lin
|
||||||
|
almost_AdN = almost_AdN ;
|
||||||
|
always_AdV = always_AdV ;
|
||||||
|
and_Conj = and_Conj ;
|
||||||
|
but_PConj = but_PConj ;
|
||||||
|
can_VV = can_VV ;
|
||||||
|
either7or_DConj = either7or_DConj ;
|
||||||
|
he_Pron = he_Pron ;
|
||||||
|
here_Adv = here_Adv ;
|
||||||
|
how_IAdv = how_IAdv ;
|
||||||
|
i_Pron = i_Pron ;
|
||||||
|
if_Subj = if_Subj ;
|
||||||
|
in_Prep = in_Prep ;
|
||||||
|
that_NP = that_NP ;
|
||||||
|
we_Pron = we_Pron ;
|
||||||
|
whichPl_IDet = whichPl_IDet ;
|
||||||
|
whichSg_IDet = whichSg_IDet ;
|
||||||
|
whoPl_IP = whoPl_IP ;
|
||||||
|
whoSg_IP = whoSg_IP ;
|
||||||
|
youSg_Pron = youSg_Pron ;
|
||||||
|
youPl_Pron = youPl_Pron ;
|
||||||
|
} ;
|
||||||
21
lib/resource-1.0/minimal/Minimal.gf
Normal file
21
lib/resource-1.0/minimal/Minimal.gf
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
--1 Minimal Resource Grammar
|
||||||
|
|
||||||
|
-- This module defines the syntax with a minimal lexicon.
|
||||||
|
-- This is useful for treebank generation.
|
||||||
|
|
||||||
|
abstract Minimal =
|
||||||
|
Noun,
|
||||||
|
Verb,
|
||||||
|
Adjective,
|
||||||
|
Adverb,
|
||||||
|
Numeral,
|
||||||
|
Sentence,
|
||||||
|
Question,
|
||||||
|
Relative,
|
||||||
|
Conjunction,
|
||||||
|
Phrase,
|
||||||
|
Text,
|
||||||
|
MinStructural,
|
||||||
|
Idiom,
|
||||||
|
MinLexicon
|
||||||
|
** {} ;
|
||||||
22
lib/resource-1.0/minimal/MinimalEng.gf
Normal file
22
lib/resource-1.0/minimal/MinimalEng.gf
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
--# -path=.:../english:../abstract:../common:prelude
|
||||||
|
|
||||||
|
concrete MinimalEng of Minimal =
|
||||||
|
NounEng,
|
||||||
|
VerbEng,
|
||||||
|
AdjectiveEng,
|
||||||
|
AdverbEng,
|
||||||
|
NumeralEng,
|
||||||
|
SentenceEng,
|
||||||
|
QuestionEng,
|
||||||
|
RelativeEng,
|
||||||
|
ConjunctionEng,
|
||||||
|
PhraseEng,
|
||||||
|
TextX,
|
||||||
|
StructuralEng,
|
||||||
|
IdiomEng,
|
||||||
|
LexiconEng
|
||||||
|
** {
|
||||||
|
|
||||||
|
flags startcat = Phr ; -- unlexer = text ; lexer = text ;
|
||||||
|
|
||||||
|
} ;
|
||||||
22
lib/resource-1.0/minimal/MinimalFre.gf
Normal file
22
lib/resource-1.0/minimal/MinimalFre.gf
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
--# -path=.:../french:../romance:../abstract:../common:prelude
|
||||||
|
|
||||||
|
concrete MinimalFre of Minimal =
|
||||||
|
NounFre,
|
||||||
|
VerbFre,
|
||||||
|
AdjectiveFre,
|
||||||
|
AdverbFre,
|
||||||
|
NumeralFre,
|
||||||
|
SentenceFre,
|
||||||
|
QuestionFre,
|
||||||
|
RelativeFre,
|
||||||
|
ConjunctionFre,
|
||||||
|
PhraseFre,
|
||||||
|
TextX,
|
||||||
|
StructuralFre,
|
||||||
|
IdiomFre,
|
||||||
|
LexiconFre
|
||||||
|
** {
|
||||||
|
|
||||||
|
flags startcat = Phr ; -- unlexer = text ; lexer = text ;
|
||||||
|
|
||||||
|
} ;
|
||||||
143
lib/resource-1.0/minimal/MkLex.hs
Normal file
143
lib/resource-1.0/minimal/MkLex.hs
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
module MkLexicon where
|
||||||
|
|
||||||
|
import Char
|
||||||
|
|
||||||
|
allLines o f = do
|
||||||
|
s <- readFile f
|
||||||
|
mapM_ (putStrLn . o) (filter noComm (lines s))
|
||||||
|
|
||||||
|
|
||||||
|
-- discard comments and empty lines
|
||||||
|
|
||||||
|
noComm s = case s of
|
||||||
|
'-':'-':_ -> False
|
||||||
|
"" -> False
|
||||||
|
_ -> True
|
||||||
|
|
||||||
|
-- remove tailing comments
|
||||||
|
|
||||||
|
remTail s = case s of
|
||||||
|
'-':'-':_ -> []
|
||||||
|
c:cs -> c : remTail cs
|
||||||
|
_ -> s
|
||||||
|
|
||||||
|
-- postfix with category
|
||||||
|
|
||||||
|
postfix p s = takeWhile (not . isSpace) s ++ "_" ++ p
|
||||||
|
|
||||||
|
-- make fun rule
|
||||||
|
|
||||||
|
mkFun s =
|
||||||
|
let (w,p) = span (/='_') s in
|
||||||
|
" " ++ s ++ " : " ++ tail p ++ " ;"
|
||||||
|
|
||||||
|
-- make reused lin rule
|
||||||
|
|
||||||
|
mkRLin s =
|
||||||
|
let w = head (words s) in
|
||||||
|
" " ++ w ++ " = " ++ w ++ " ;"
|
||||||
|
|
||||||
|
-- make regular lin rule
|
||||||
|
|
||||||
|
mkLin s =
|
||||||
|
let (w,p) = span (/='_') s in
|
||||||
|
" " ++ s ++ " = " ++ lin (tail p) w ++ " ;"
|
||||||
|
where
|
||||||
|
lin cat w = case cat of
|
||||||
|
"V2" -> "dirV2 (regV" ++ " \"" ++ w ++ "\")"
|
||||||
|
'V':_ -> "mk" ++ cat ++ " (regV" ++ " \"" ++ w ++ "\")"
|
||||||
|
_ -> "reg" ++ cat ++ " \"" ++ w ++ "\""
|
||||||
|
|
||||||
|
-- normalize identifiers in Structural
|
||||||
|
|
||||||
|
mkIdent s = case words s of
|
||||||
|
w:ws -> if obsolete w then ""
|
||||||
|
else " " ++ (unwords $ mkId (update w) : ws)
|
||||||
|
_ -> s
|
||||||
|
where
|
||||||
|
mkId name@(c:cs) =
|
||||||
|
let
|
||||||
|
(x,y) = span isCat cs
|
||||||
|
in
|
||||||
|
toLower c : clean x ++ "_" ++ new y
|
||||||
|
isCat = flip notElem "PDNVCAIS"
|
||||||
|
clean x = case span isLower x of
|
||||||
|
(_,[]) -> x
|
||||||
|
(u,v) -> u ++ "8" ++ map toLower v
|
||||||
|
new y = case y of
|
||||||
|
"NumDet" -> "NDet"
|
||||||
|
_ -> y
|
||||||
|
obsolete w = elem w $ words "TheseNumNP ThoseNumNP NobodyNP NeitherNor NoDet AnyDet"
|
||||||
|
update w = case w of
|
||||||
|
"EitherOr" -> "EitherOrConjD"
|
||||||
|
"BothAnd" -> "BothAndConjD"
|
||||||
|
"PhrYes" -> "YesPhr"
|
||||||
|
"PhrNo" -> "NoPhr"
|
||||||
|
"WeNumNP" -> "WeNP"
|
||||||
|
"YeNumNP" -> "YeNP"
|
||||||
|
"HowManyDet" -> "HowManyIDet"
|
||||||
|
"MostsDet" -> "MostManyDet"
|
||||||
|
"WhichDet" -> "WhichOneIDet"
|
||||||
|
"WhichNDet" -> "WhichManyIDet"
|
||||||
|
"EverywhereNP" -> "EverywhereAdv"
|
||||||
|
"SomewhereNP" -> "SomewhereAdv"
|
||||||
|
"AgentPrep" -> "By8agentPrep"
|
||||||
|
_ -> w
|
||||||
|
|
||||||
|
-- massage French verbs 9/2/2005
|
||||||
|
|
||||||
|
freVerb s = case words s of
|
||||||
|
v:_ -> " " ++ v ++ " : " ++ cat v ++ " ;"
|
||||||
|
_ -> []
|
||||||
|
where
|
||||||
|
cat v = dropWhile (not . isUpper) v
|
||||||
|
|
||||||
|
-- Swedish verbs 17/2
|
||||||
|
|
||||||
|
sweVerb s = case words s of
|
||||||
|
('v':a:u:[]):verb:_ -> "fun " ++ verb ++ " : V ;\n" ++
|
||||||
|
"lin " ++ verb ++ " = " ++ infl a u verb ++ " ;"
|
||||||
|
_ -> []
|
||||||
|
where
|
||||||
|
infl a u verb =
|
||||||
|
let
|
||||||
|
(dne,geb) = span isConsonant $ tail $ reverse verb
|
||||||
|
(beg,voc,end) = (reverse (tail geb), head geb, reverse dne)
|
||||||
|
(pret,sup) = (beg++ [toLower a] ++end, beg++ [toLower u] ++ end ++"it")
|
||||||
|
in
|
||||||
|
unwords ["irregV", prQuot verb, prQuot pret, prQuot sup]
|
||||||
|
|
||||||
|
prQuot s = "\"" ++ s ++ "\""
|
||||||
|
|
||||||
|
isConsonant = not . isVowel
|
||||||
|
|
||||||
|
isVowel = flip elem "aeiouyäöå"
|
||||||
|
|
||||||
|
-- Norwegian 13/3
|
||||||
|
|
||||||
|
groupLines :: [String] -> [String]
|
||||||
|
groupLines ss = [unwords [a, b, c] | [a,_,b,c,_] <- grps ss] where
|
||||||
|
grps ls = let (g,rest) = splitAt 5 ls in g:grps rest
|
||||||
|
|
||||||
|
lin2fun s = case words s of
|
||||||
|
_:fun:_:_ -> " fun " ++ fun ++ " : " ++ cat fun ++ " ;"
|
||||||
|
_ -> s
|
||||||
|
where
|
||||||
|
cat fun = reverse (takeWhile (/='_') (reverse fun))
|
||||||
|
|
||||||
|
-- filter from a given file those lines whose first word is in a sought-set
|
||||||
|
|
||||||
|
allThose :: [String] -> [String] -> [String]
|
||||||
|
allThose soughts givens = concatMap seek soughts where
|
||||||
|
seek w = let s = [line | line <- givens, w':_ <- [words line], w == w']
|
||||||
|
in if null s then ["-- " ++ w] else s
|
||||||
|
|
||||||
|
-- do this with files
|
||||||
|
-- example: getAllThose "abstract/Mtmp" "english/BasicEng.gf"
|
||||||
|
|
||||||
|
getAllThose :: FilePath -> FilePath -> IO ()
|
||||||
|
getAllThose sought given = do
|
||||||
|
s <- readFile sought
|
||||||
|
gi <- readFile given
|
||||||
|
let so = [w | l <- lines s, w:_ <- [words l]]
|
||||||
|
mapM_ putStrLn $ allThose so $ lines gi
|
||||||
58
lib/resource-1.0/minimal/Tab.hs
Normal file
58
lib/resource-1.0/minimal/Tab.hs
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
module Tab where
|
||||||
|
|
||||||
|
import Data.Map
|
||||||
|
|
||||||
|
type Tree = String ----
|
||||||
|
type Bracket = [String] ---
|
||||||
|
|
||||||
|
type Tab = Map [String] [Tree]
|
||||||
|
|
||||||
|
|
||||||
|
-- build a Tab from a unilingual treebank file (not XML)
|
||||||
|
|
||||||
|
test file = do
|
||||||
|
tab <- tb2tab file
|
||||||
|
tst tab
|
||||||
|
where
|
||||||
|
tst tab = do
|
||||||
|
s <- getLine
|
||||||
|
-- let ts = analyse tab $ words s
|
||||||
|
let ts = Prelude.map unwords $ analyseParts tab $ words s
|
||||||
|
mapM_ putStrLn ts
|
||||||
|
tst tab
|
||||||
|
|
||||||
|
-- analyse whole inputs
|
||||||
|
analyse :: Tab -> [String] -> [Tree]
|
||||||
|
analyse tab s = maybe [] id $ Data.Map.lookup s tab
|
||||||
|
|
||||||
|
-- analyse parts of inputs
|
||||||
|
analyseParts :: Tab -> [String] -> [Bracket]
|
||||||
|
analyseParts tab = anap where
|
||||||
|
anap ws = case ws of
|
||||||
|
w:vs -> case results ws of
|
||||||
|
[] -> [w:res | res <- anap vs]
|
||||||
|
(ts,ws2):_ -> [t:res | t <- ts, res <- anap ws2]
|
||||||
|
_ -> [[]]
|
||||||
|
results ws = Prelude.filter (not . Prelude.null . fst)
|
||||||
|
[(ana ws1,ws2) | i <- [0..length ws], let (ws1,ws2) = splitAt i ws]
|
||||||
|
ana = analyse tab
|
||||||
|
|
||||||
|
|
||||||
|
tb2tab :: FilePath -> IO Tab
|
||||||
|
tb2tab file = do
|
||||||
|
ss <- readFile file >>= return . lines
|
||||||
|
let ps = pairs ss
|
||||||
|
return $ fromListWith (++) ps
|
||||||
|
|
||||||
|
pairs :: [String] -> [([String],[String])]
|
||||||
|
pairs xs = case xs of
|
||||||
|
x:y:ys -> (words y,[x]) : pairs ys
|
||||||
|
_ -> []
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{-
|
||||||
|
|
||||||
|
ceci - that_NP
|
||||||
|
|
||||||
|
-}
|
||||||
Reference in New Issue
Block a user