a minimal-lexicon version of resource

2026-06-03 07:08:54 -06:00 · 2006-03-02 17:20:46 +00:00
parent bf872d018e
commit 27467d32fd
13 changed files with 357 additions and 0 deletions
--- a/lib/resource-1.0/minimal/MinLexicon.gf
+++ b/lib/resource-1.0/minimal/MinLexicon.gf
@@ -0,0 +1,14 @@
+abstract MinLexicon = Cat ** {
+  fun
+  add_V3 : V3 ;
+  apple_N : N ;
+  become_VA : VA ;
+  big_A : A ;
+  break_V2 : V2 ;
+  brother_N2 : N2 ;
+  paint_V2A : V2A ;
+--  paris_PN : PN ;
+  sleep_V : V ;
+  wonder_VQ : VQ ;
+
+}
--- a/lib/resource-1.0/minimal/MinLexiconEng.gf
+++ b/lib/resource-1.0/minimal/MinLexiconEng.gf
@@ -0,0 +1,2 @@
+concrete MinLexiconEng of MinLexicon = MinLexiconI with
+  (Lexicon = LexiconEng) ;
--- a/lib/resource-1.0/minimal/MinLexiconFre.gf
+++ b/lib/resource-1.0/minimal/MinLexiconFre.gf
@@ -0,0 +1,2 @@
+concrete MinLexiconFre of MinLexicon = MinLexiconI with
+  (Lexicon = LexiconFre) ;
--- a/lib/resource-1.0/minimal/MinLexiconI.gf
+++ b/lib/resource-1.0/minimal/MinLexiconI.gf
@@ -0,0 +1,17 @@
+incomplete concrete MinLexiconI of MinLexicon = Cat ** open Lexicon in {
+
+  lin
+
+  add_V3 = add_V3 ;
+  apple_N = apple_N ;
+  become_VA = become_VA ;
+  big_A = big_A ;
+  break_V2 = break_V2 ;
+  brother_N2 = brother_N2 ;
+  paint_V2A = paint_V2A ;
+--  paris_PN = paris_PN ;
+  say_VS = say_VS ;
+  sleep_V = sleep_V ;
+  wonder_VQ = wonder_VQ ;
+
+  } ;
--- a/lib/resource-1.0/minimal/MinStructural.gf
+++ b/lib/resource-1.0/minimal/MinStructural.gf
@@ -0,0 +1,29 @@
+abstract MinStructural = Cat ** {
+
+  fun
+
+-- This is an alphabetical list of structural words: just one from each category
+-- but so that all variation is created, e.g. all persons for pronouns.
+
+  almost_AdN : AdN ;   
+  always_AdV : AdV ;
+  and_Conj : Conj ;
+  but_PConj : PConj ;
+  can_VV : VV ;
+  either7or_DConj : DConj ;
+  he_Pron : Pron ;
+  here_Adv : Adv ;
+  how_IAdv : IAdv ;
+  i_Pron : Pron ;
+  if_Subj : Subj ;
+  in_Prep : Prep ;
+  that_NP : NP ;
+  we_Pron : Pron ;
+  whichPl_IDet : IDet ;
+  whichSg_IDet : IDet ;
+  whoPl_IP : IP ;
+  whoSg_IP : IP ;
+  youSg_Pron : Pron ;
+  youPl_Pron : Pron ;
+
+}
--- a/lib/resource-1.0/minimal/MinStructuralEng.gf
+++ b/lib/resource-1.0/minimal/MinStructuralEng.gf
@@ -0,0 +1,2 @@
+concrete MinStructuralEng of MinStructural = MinStructuralI with
+  (Structural = StructuralEng) ;
--- a/lib/resource-1.0/minimal/MinStructuralFre.gf
+++ b/lib/resource-1.0/minimal/MinStructuralFre.gf
@@ -0,0 +1,2 @@
+concrete MinStructuralFre of MinStructural = MinStructuralI with
+  (Structural = StructuralFre) ;
--- a/lib/resource-1.0/minimal/MinStructuralI.gf
+++ b/lib/resource-1.0/minimal/MinStructuralI.gf
@@ -0,0 +1,23 @@
+incomplete concrete MinStructuralI of MinStructural = open Structural in {
+ lin
+  almost_AdN = almost_AdN ;
+  always_AdV = always_AdV ;
+  and_Conj = and_Conj ;
+  but_PConj = but_PConj ;
+  can_VV = can_VV ;
+  either7or_DConj = either7or_DConj ;
+  he_Pron = he_Pron ;
+  here_Adv = here_Adv ;
+  how_IAdv = how_IAdv ;
+  i_Pron = i_Pron ;
+  if_Subj = if_Subj ;
+  in_Prep = in_Prep ;
+  that_NP = that_NP ;
+  we_Pron = we_Pron ;
+  whichPl_IDet = whichPl_IDet ;
+  whichSg_IDet = whichSg_IDet ;
+  whoPl_IP = whoPl_IP ;
+  whoSg_IP = whoSg_IP ;
+  youSg_Pron = youSg_Pron ;
+  youPl_Pron = youPl_Pron ;
+  } ;
--- a/lib/resource-1.0/minimal/Minimal.gf
+++ b/lib/resource-1.0/minimal/Minimal.gf
@@ -0,0 +1,21 @@
+--1 Minimal Resource Grammar
+
+-- This module defines the syntax with a minimal lexicon.
+-- This is useful for treebank generation.
+
+abstract Minimal = 
+  Noun,
+  Verb, 
+  Adjective,
+  Adverb,
+  Numeral,
+  Sentence, 
+  Question,
+  Relative,
+  Conjunction,
+  Phrase,
+  Text,
+  MinStructural,
+  Idiom,
+  MinLexicon
+  ** {} ;
--- a/lib/resource-1.0/minimal/MinimalEng.gf
+++ b/lib/resource-1.0/minimal/MinimalEng.gf
@@ -0,0 +1,22 @@
+--# -path=.:../english:../abstract:../common:prelude
+
+concrete MinimalEng of Minimal = 
+  NounEng, 
+  VerbEng, 
+  AdjectiveEng,
+  AdverbEng,
+  NumeralEng,
+  SentenceEng,
+  QuestionEng,
+  RelativeEng,
+  ConjunctionEng,
+  PhraseEng,
+  TextX,
+  StructuralEng,
+  IdiomEng,
+  LexiconEng
+  ** {
+
+flags startcat = Phr ; -- unlexer = text ; lexer = text ;
+
+} ;
--- a/lib/resource-1.0/minimal/MinimalFre.gf
+++ b/lib/resource-1.0/minimal/MinimalFre.gf
@@ -0,0 +1,22 @@
+--# -path=.:../french:../romance:../abstract:../common:prelude
+
+concrete MinimalFre of Minimal = 
+  NounFre, 
+  VerbFre, 
+  AdjectiveFre,
+  AdverbFre,
+  NumeralFre,
+  SentenceFre,
+  QuestionFre,
+  RelativeFre,
+  ConjunctionFre,
+  PhraseFre,
+  TextX,
+  StructuralFre,
+  IdiomFre,
+  LexiconFre
+  ** {
+
+flags startcat = Phr ; -- unlexer = text ; lexer = text ;
+
+} ;
--- a/lib/resource-1.0/minimal/MkLex.hs
+++ b/lib/resource-1.0/minimal/MkLex.hs
@@ -0,0 +1,143 @@
+module MkLexicon where
+
+import Char
+
+allLines o f = do
+  s <- readFile f
+  mapM_ (putStrLn . o) (filter noComm (lines s))
+
+
+-- discard comments and empty lines
+
+noComm s = case s of
+  '-':'-':_ -> False
+  "" -> False
+  _ -> True
+
+-- remove tailing comments
+
+remTail s = case s of
+  '-':'-':_ -> []
+  c:cs -> c : remTail cs
+  _ -> s
+
+-- postfix with category
+
+postfix p s = takeWhile (not . isSpace) s ++ "_" ++ p
+
+-- make fun rule
+
+mkFun s = 
+  let (w,p) = span (/='_') s in 
+  "  " ++ s ++ " : " ++ tail p ++ " ;"
+
+-- make reused lin rule
+
+mkRLin s = 
+  let w = head (words s) in 
+  "  " ++ w ++ " = " ++ w ++ " ;"
+
+-- make regular lin rule
+
+mkLin s = 
+  let (w,p) = span (/='_') s in 
+  "  " ++ s ++ " = " ++ lin (tail p) w ++ " ;"
+ where
+  lin cat w = case cat of
+    "V2"  -> "dirV2 (regV" ++ " \"" ++ w ++ "\")" 
+    'V':_ -> "mk" ++ cat ++ " (regV" ++ " \"" ++ w ++ "\")" 
+    _     -> "reg" ++ cat ++ " \"" ++ w ++ "\"" 
+
+-- normalize identifiers in Structural
+
+mkIdent s = case words s of
+  w:ws -> if obsolete w then "" 
+          else "  " ++ (unwords $ mkId (update w) : ws) 
+  _ -> s
+ where
+   mkId name@(c:cs) = 
+     let 
+       (x,y) = span isCat cs 
+     in 
+     toLower c : clean x ++ "_" ++ new y
+   isCat = flip notElem "PDNVCAIS"
+   clean x = case span isLower x of
+     (_,[]) -> x
+     (u,v)  -> u ++ "8" ++ map toLower v
+   new y = case y of
+     "NumDet" -> "NDet"
+     _ -> y
+   obsolete w = elem w $ words "TheseNumNP ThoseNumNP NobodyNP NeitherNor NoDet AnyDet"
+   update w = case w of
+     "EitherOr" -> "EitherOrConjD"
+     "BothAnd"  -> "BothAndConjD"
+     "PhrYes"   -> "YesPhr"
+     "PhrNo"    -> "NoPhr"
+     "WeNumNP"    -> "WeNP"
+     "YeNumNP"    -> "YeNP"
+     "HowManyDet" -> "HowManyIDet"
+     "MostsDet" -> "MostManyDet"
+     "WhichDet" -> "WhichOneIDet"
+     "WhichNDet" -> "WhichManyIDet"
+     "EverywhereNP" -> "EverywhereAdv"
+     "SomewhereNP" -> "SomewhereAdv"
+     "AgentPrep" -> "By8agentPrep"
+     _ -> w
+
+-- massage French verbs 9/2/2005
+
+freVerb s = case words s of
+  v:_ -> "  " ++ v ++ " : " ++ cat v ++ " ;"
+  _ -> []
+ where
+   cat v = dropWhile (not . isUpper) v
+
+-- Swedish verbs 17/2
+
+sweVerb s = case words s of
+  ('v':a:u:[]):verb:_ -> "fun " ++ verb ++ " : V ;\n" ++ 
+                         "lin " ++ verb ++ " = " ++ infl a u verb ++ " ;"
+  _ -> []
+ where
+   infl a u verb = 
+     let
+       (dne,geb) = span isConsonant $ tail $ reverse verb 
+       (beg,voc,end) = (reverse (tail geb), head geb, reverse dne)
+       (pret,sup) = (beg++ [toLower a] ++end, beg++ [toLower u] ++ end ++"it")
+     in
+     unwords ["irregV", prQuot verb, prQuot pret, prQuot sup] 
+
+prQuot s = "\"" ++ s ++ "\""
+
+isConsonant = not . isVowel
+
+isVowel = flip elem "aeiouyäöå"
+
+-- Norwegian 13/3
+
+groupLines :: [String] -> [String]
+groupLines ss = [unwords [a, b, c] | [a,_,b,c,_] <- grps ss] where
+  grps ls = let (g,rest) = splitAt 5 ls in g:grps rest
+
+lin2fun s = case words s of
+  _:fun:_:_ -> "  fun " ++ fun ++ " : " ++ cat fun ++ " ;"
+  _ -> s 
+ where
+   cat fun = reverse (takeWhile (/='_') (reverse fun))
+
+-- filter from a given file those lines whose first word is in a sought-set
+
+allThose :: [String] -> [String] -> [String]
+allThose soughts givens = concatMap seek soughts where
+  seek w = let s = [line | line <- givens, w':_ <- [words line], w == w']
+           in if null s then ["-- " ++ w] else s
+
+-- do this with files
+-- example: getAllThose "abstract/Mtmp" "english/BasicEng.gf"
+
+getAllThose :: FilePath -> FilePath -> IO ()
+getAllThose sought given = do
+  s  <- readFile sought
+  gi <- readFile given
+  let so = [w | l <- lines s, w:_ <- [words l]] 
+  mapM_ putStrLn $ allThose so $ lines gi
--- a/lib/resource-1.0/minimal/Tab.hs
+++ b/lib/resource-1.0/minimal/Tab.hs
@@ -0,0 +1,58 @@
+module Tab where
+
+import Data.Map
+
+type Tree = String ----
+type Bracket = [String] ---
+
+type Tab = Map [String] [Tree]
+
+
+-- build a Tab from a unilingual treebank file (not XML)
+
+test file = do
+  tab <- tb2tab file
+  tst tab
+ where
+  tst tab = do
+    s  <- getLine 
+--    let ts = analyse tab $ words s
+    let ts = Prelude.map unwords $ analyseParts tab $ words s
+    mapM_ putStrLn ts
+    tst tab
+
+-- analyse whole inputs
+analyse :: Tab -> [String] -> [Tree]
+analyse tab s = maybe [] id $ Data.Map.lookup s tab
+
+-- analyse parts of inputs
+analyseParts :: Tab -> [String] -> [Bracket]
+analyseParts tab = anap where
+  anap ws = case ws of
+    w:vs -> case results ws of
+      []         -> [w:res | res <- anap vs]
+      (ts,ws2):_ -> [t:res | t <- ts, res <- anap ws2]
+    _ -> [[]]
+  results ws = Prelude.filter (not . Prelude.null . fst) 
+    [(ana ws1,ws2) | i <- [0..length ws], let (ws1,ws2) = splitAt i ws]
+  ana = analyse tab
+
+
+tb2tab :: FilePath -> IO Tab
+tb2tab file = do
+  ss <- readFile file >>= return . lines
+  let ps = pairs ss
+  return $ fromListWith (++) ps
+
+pairs :: [String] -> [([String],[String])]
+pairs xs = case xs of
+  x:y:ys -> (words y,[x]) : pairs ys
+  _ -> []
+
+
+
+{-
+
+ceci - that_NP 
+
+-}