eliminate modules PGF.Lexing, PGF.LexingAGreek. Make PGF.Utilities an internal module in the runtime. These are not really part of the core runtime.

2026-06-11 02:56:34 -06:00 · 2017-09-04 11:43:37 +02:00
parent cae52bb9af
commit 1f908fa7bf
12 changed files with 453 additions and 561 deletions
--- a/src/runtime/haskell-bind/PGF.hs
+++ b/src/runtime/haskell-bind/PGF.hs
@@ -0,0 +1,3 @@
+module PGF(module PGF2) where
+
+import PGF2
--- a/src/runtime/haskell-bind/PGF/Internal.hs
+++ b/src/runtime/haskell-bind/PGF/Internal.hs
@@ -0,0 +1 @@
+module PGF.Internal where
--- a/src/runtime/haskell-bind/examples/pgf-hsbind-trans.hs
+++ b/src/runtime/haskell-bind/examples/pgf-hsbind-trans.hs
@@ -1,61 +0,0 @@
-- | pgf-hsbind-trans: A simple batch translator to illustrate the use of the Haskell binding
--   to the C implementation of the PGF run-time system.
--
-- AR April 2015 modified from pgf-shell
-
-import PGF2
-import PGF.Lexing (lexText')
-
-import Data.Char(isSpace,toLower)
-import Data.List (nub)
-import System.Environment
-import qualified Data.Map as Map
-
-maxNumTrees :: Int
-maxNumTrees = 1
-
-maxNumVariants :: Int
-maxNumVariants = 1
-
-main = getPGF =<< getArgs
-
-getPGF args = case args of
-  [path,from,to,cat,mxt,mxv] -> pgfTrans from to (Just cat) (read mxt, read mxv) =<< readPGF path
-  [path,from,to]        -> pgfTrans from to Nothing (maxNumTrees,maxNumVariants) =<< readPGF path
-  _                     -> putStrLn "Usage: pgf-hsbind-trans <path to pgf> <from-lang> <to-lang> [<cat> <#trees> <#variants>]"
-
-pgfTrans from to mcat mx pgf = do
-  cfrom <- getConcr' pgf from
-  cto   <- getConcr' pgf to
-  let cat = maybe (startCat pgf) id mcat
-  interact (unlines . map (translates pgf cfrom cto cat mx) . lines)
-
-getConcr' pgf lang =
-    maybe (fail $ "Concrete syntax not found: "++show lang) return $
-    Map.lookup lang (languages pgf)
-
-linearizeAndShow gr mxv (t,p) = [show t]++take mxv (nub (map unstar (linearizeAll gr t)))++[show p]
-  where
-    unstar s = case s of
-      '*':' ':cs -> cs
-      _ -> s
-
-translates pgf cfrom cto cat (mxt,mxv) s0 = 
-  let s1 = lextext cfrom s0
-      (s,p) = case reverse s1 of c:_ | elem c ".?!" -> (init s1,[c]) ; _ -> (s1,[]) -- separate final punctuation
-  in
-  case cparse pgf cfrom cat s of
-    Left tok -> unlines [s,"Parse error: "++tok]
-    Right ts -> unlines $ (("> "++ s):) $ take mxt $ map ((++p) . unlines . linearizeAndShow cto mxv) ts -- append punctuation
-
-cparse pgf concr cat input = parseWithHeuristics concr cat input (-1) callbacks where
-  callbacks = maybe [] cb $ lookup "App" literalCallbacks
-  cb fs = [(cat,f pgf ("TranslateEng",concr) input)|(cat,f)<-fs]
-
-lextext cnc = unwords . lexText' (\w -> case lookupMorpho cnc w of
-  _:_ -> w
-  _ -> case lookupMorpho cnc (uncapitInit w) of 
-    [] -> w 
-    _ -> uncapitInit w
-  )
- where uncapitInit (c:cs) = toLower c : cs
--- a/src/runtime/haskell-bind/pgf2.cabal
+++ b/src/runtime/haskell-bind/pgf2.cabal
@@ -14,7 +14,9 @@ extra-source-files:  README
 cabal-version:       >=1.10

 library
-  exposed-modules:     PGF2, SG
+  exposed-modules:     PGF2, SG,
+                       -- backwards compatibility API:
+                       PGF, PGF.Internal
  other-modules:       PGF2.FFI, PGF2.Expr, PGF2.Type, SG.FFI
  build-depends:       base >=4.3, bytestring >=0.9,
                       containers, pretty
--- a/src/runtime/haskell/PGF/Lexing.hs
+++ b/src/runtime/haskell/PGF/Lexing.hs
@@ -1,115 +0,0 @@
-module PGF.Lexing where
-import Data.Char(isSpace,toLower,toUpper)
-
-- * Text lexing
-- | Text lexing with standard word capitalization of the first word of every sentence
-lexText :: String -> [String]
-lexText = lexText' uncapitInit
-
-- | Text lexing with custom treatment of the first word of every sentence.
-lexText' :: (String->String) -> String -> [String]
-lexText' uncap1 = uncap . lext where
-  lext s = case s of
-    c:cs | isMajorPunct c -> [c] : uncap (lext cs)
-    c:cs | isMinorPunct c -> [c] : lext cs
-    c:cs | isSpace c ->       lext cs
-    _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs
-    _ -> [s]
-  uncap s = case s of
-    w:ws -> uncap1 w:ws
-    _ -> s
-
-unlexText :: [String] -> String
-unlexText = capitInit . unlext where
-  unlext s = case s of
-    w:[] -> w
-    w:[c]:[] | isPunct c -> w ++ [c]
-    w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ capitInit (unlext cs)
-    w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs
-    w:ws -> w ++ " " ++ unlext ws
-    _ -> []
-
-- | Bind tokens separated by Prelude.BIND, i.e. &+
-bindTok :: [String] -> [String]
-bindTok ws = case ws of
-               w1:"&+":w2:ws -> bindTok ((w1++w2):ws)
-               "&+":ws       -> bindTok ws
-               "&|":(c:cs):ws-> bindTok ((toUpper c:cs) : ws)
-               "&|":ws       -> bindTok ws
-               w:ws          -> w:bindTok ws
-               []            -> []
-
-- * Code lexing
-
-- | Haskell lexer, usable for much code
-lexCode :: String -> [String]
-lexCode ss = case lex ss of
-  [(w@(_:_),ws)] -> w : lexCode ws
-  _ -> []
-
-unlexCode :: [String] -> String
-unlexCode s = case s of
-  w:[] -> w
-  [c]:cs | isParen c -> [c] ++ unlexCode cs
-  w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs
-  w:ws -> w ++ " " ++ unlexCode ws
-  _ -> []
-
-
-- | LaTeX lexer in the math mode: \ should not be separated from the next word
-
-lexLatexCode :: String -> [String]
-lexLatexCode = restoreBackslash . lexCode where --- quick hack: postprocess Haskell's lex
-  restoreBackslash ws = case ws of
-    "\\":w:ww -> ("\\" ++ w) : restoreBackslash ww
-    w:ww -> w:restoreBackslash ww
-    _ -> ws
-
-- * Mixed lexing
-
-- | LaTeX style lexer, with "math" environment using Code between $...$
-lexMixed :: String -> [String]
-lexMixed = concat . alternate False [] where
-  alternate env t s = case s of
-    '$':cs -> lex env (reverse t) : ["$"] : alternate (not env) [] cs
-    '\\':c:cs | elem c "()[]" -> lex env (reverse t) : [['\\',c]] : alternate (not env) [] cs
-    c:cs -> alternate env (c:t) cs
-    _ -> [lex env (reverse t)]
-  lex env = if env then lexLatexCode else lexText
-
-unlexMixed :: [String] -> String
-unlexMixed = capitInit . concat . alternate False where
-  alternate env s = case s of
-    _:_ -> case break (flip elem ["$","\\[","\\]","\\(","\\)"]) s of
-      (t,[])  -> unlex env t : []
-      (t,c:m) -> unlex env t : sep env c m : alternate (not env) m
-    _ -> []
-  unlex env = if env then unlexCode else (uncapitInit . unlexText)
-  sep env c m = case (m,env) of
-    ([p]:_,True) | isPunct p -> c   -- closing $ glued to next punct 
-    (_,  True) -> c ++ " "   -- closing $ otherwise separated by space from what follows
-    _ -> " " ++ c   -- put space before opening $
-
-- * Additional lexing uitilties
-
-- | Capitalize first letter
-capitInit s = case s of
-  c:cs -> toUpper c : cs
-  _ -> s
-
-- | Uncapitalize first letter
-uncapitInit s = case s of
-  c:cs -> toLower c : cs
-  _ -> s
-
-- | Unquote each string wrapped in double quotes
-unquote = map unq where 
-  unq s = case s of
-    '"':cs@(_:_) | last cs == '"' -> init cs
-    _ -> s
-
-isPunct = flip elem ".?!,:;"
-isMajorPunct = flip elem ".?!"
-isMinorPunct = flip elem ",:;"
-isParen = flip elem "()[]{}"
-isClosing = flip elem ")]}"
--- a/src/runtime/haskell/PGF/LexingAGreek.hs
+++ b/src/runtime/haskell/PGF/LexingAGreek.hs
@@ -1,310 +0,0 @@
-module PGF.LexingAGreek where                      -- HL 2a1.2.2016
-import Data.Char(isSpace)
-
-- * Text lexing without word capitalization of the first word of every sentence.
-- Greek sentences in (transliterated) texts don't start with capital character. 
-
-- Ordinary greek text does not have vowel length indicators. We then use '.' as
-- a sentence separator.
-
-lexTextAGreek :: String -> [String]
-lexTextAGreek s = lext s where
-  lext s = case s of
-    c:cs | isAGreekPunct c -> [c] : (lext cs)
-    c:cs | isSpace c -> lext cs
-    _:_ -> let (w,cs) = break (\x -> isSpace x || isAGreekPunct x) s 
-           in w : lext cs
-    [] -> []
-
-- Philological greek text may use vowel length indicators. Then '.' is not a sentence 
-- separator, nor is 'v. ' for vowel v. Sentence ends at 'v..' or 'c. ' with non-vowel c.
-
-lexTextAGreek2 :: String -> [String]
-lexTextAGreek2 s = lext s where
-  lext s = case s of
-    c:cs | isAGreekPunct c -> [c] : (lext cs)
-    c:cs | isSpace c -> lext cs
-    _:_ -> let (w,cs) = break (\x -> isSpace x || isAGreekPunct x) s 
-           in case cs of 
-                '.':'.':d:ds | isSpace d 
-                  -> (w++['.']) : lext ('.':d:ds)
-                '.':d:ds | isAGreekPunct d || isSpace d 
-                  -> (w++['.']) : lext (d:ds)
-                '.':d:ds | not (isSpace d) 
-                  -> case lext (d:ds) of
-                       e:es -> (w++['.']++e) : es
-                       es -> (w++['.']) : es 
-                '.':[] -> (w++['.']) : []
-                _ -> w : lext cs      
-    [] -> []
-
-unlexTextAGreek :: [String] -> String
-unlexTextAGreek = unlext where
-  unlext s = case s of
-    w:[] -> w
-    w:[c]:[] | isAGreekPunct c -> w ++ [c]
-    w:[c]:cs | isAGreekPunct c -> w ++ [c] ++ " " ++ unlext cs
-    w:ws -> w ++ " " ++ unlext ws
-    [] -> []
-
-isAGreekPunct = flip elem ".,;··"  -- colon: first version · not in charset,
-                                   -- second version · = 00B7 standard code point
-
-- * Text lexing and unlexing for Ancient Greek: 
--   1. no capitalization of initial word, 
--   2. grave/acute accent switch on final syllables of words not followed by punctuation, 
--   3. accent move from/to support word to/from following clitic words (iterated).
-
-lexAGreek :: String -> [String]
-lexAGreek = fromAGreek . lexTextAGreek
-
-lexAGreek2 :: String -> [String]
-lexAGreek2 = fromAGreek . lexTextAGreek2
-
-unlexAGreek :: [String] -> String
-unlexAGreek = unlexTextAGreek . toAGreek
-
-- Note: unlexAGreek does not glue punctuation with the previous word, so that short
-- vowel indication (like a.) differs from sentence end (a .). 
-
-- | normalize = change grave accent on sentence internal words to acute, 
-- and shift inherited acutes to the following enclitic (where they are
-- visible only as shown in the list of enclitics above)
-
-normalize :: String -> String
-normalize = (unlexTextAGreek . fromAGreek . lexTextAGreek) 
-
-fromAGreek :: [String] -> [String]
-fromAGreek s = case s of
-  w:[]:vs -> w:[]:(fromAGreek vs)
-  w:(v:vs) | isAGreekPunct (head v) -> w:v:(fromAGreek vs)
-  w:v:vs | wasEnclitic v && wasEnclitic w ->
-    getEnclitic w : fromAGreek (v:vs)
-  w:v:vs | wasEnclitic v && wasProclitic w ->  -- "ei)' tines*"
-    getProclitic w : getEnclitic v : fromAGreek vs
-  w:v:vs | wasEnclitic v && (hasEndCircum w || 
-    (hasEndAcute w && hasSingleAccent w)) ->  
-    w : getEnclitic v : fromAGreek vs        -- ok "sofoi' tines*"
-  w:v:vs | wasEnclitic v && hasPrefinalAcute w ->
-    w : getEnclitic v : fromAGreek vs
-  w:v:vs | wasEnclitic v && hasEndAcute w ->  -- ok "a)'nvrwpoi' tines*"
-    dropLastAccent w : getEnclitic v : fromAGreek vs
-  w:v:vs | wasEnclitic w ->
-    getEnclitic w : fromAGreek (v:vs)
-  w:ws -> (toAcute w) : (fromAGreek ws)
-  ws -> ws 
-
-- | de-normalize = change acute accent of end syllables in sentence internal 
--  (non-enclitic) words to grave accent, and move accents of enclitics to the 
--  previous word to produce ordinary ancient greek
-
-denormalize :: String -> String
-denormalize = (unlexTextAGreek . toAGreek . lexTextAGreek) 
-
-toAGreek :: [String] -> [String]
-toAGreek s = case s of
-  w:[]:vs -> w:[]:(toAGreek vs)
-  w:v:vs | isAGreekPunct (head v) -> w:[]:v:(toAGreek vs)  -- w:[] for following -to_ancientgreek
-  w:v:vs | isEnclitic v && isEnclitic w -> 
-    addAcute w : toAGreek (dropAccent v:vs)    -- BR 11 Anm.2
-  w:v:vs | isEnclitic v && isProclitic w ->    -- BR 11 a.beta
-    addAcute w: (toAGreek (dropAccent v:vs))
-  w:v:vs | isEnclitic v && (hasEndCircum w || hasEndAcute w) -> 
-    w:(toAGreek (dropAccent v:vs))             -- BR 11 a.alpha,beta
-  w:v:vs | isEnclitic v && hasPrefinalAcute w -> 
-    w:v: toAGreek vs   -- bisyllabic v keeps its accent  BR 11 b.
-  w:v:vs | isEnclitic v -> 
-    (addAcute w):(toAGreek (dropAccent v:vs))  -- BR 11 a.gamma
-  w:v:vs | isEnclitic w -> w:(toAGreek (v:vs))
-  w:ws -> (toGrave w) : (toAGreek ws)
-  ws -> ws 
-
-- | Change accent on the final syllable of a word
-
-toGrave :: String -> String 
-toGrave = reverse . grave . reverse where
-  grave s = case s of
-    '\'':cs -> '`':cs 
-    c:cs | isAGreekVowel c -> c:cs 
-    c:cs -> c: grave cs
-    _ -> s
-
-toAcute :: String -> String 
-toAcute = reverse . acute . reverse where
-  acute s = case s of
-    '`':cs -> '\'':cs
-    c:cs | isAGreekVowel c -> c:cs 
-    c:cs -> c: acute cs
-    _ -> s
-
-isAGreekVowel = flip elem "aeioyhw"
-
-- | Accent moves for enclitics and proclitics (atona)
-
-enclitics = [
-  "moy","moi","me",     -- personal pronouns
-  "soy","soi","se",
-  "oy(","oi(","e(",
-  "tis*","ti","tina'",  -- indefinite pronoun 
-  "tino's*","tini'",
-  "tine's*","tina's*",
-  "tinw~n","tisi'","tisi'n",
-  "poy","poi",          -- indefinite adverbs
-  "pove'n","pws*",
-  "ph|","pote'",
-  "ge","te","toi",      -- particles
-  "nyn","per","pw" 
-   -- suffix -"de"
-   -- praes.indik. of fhmi', ei)mi' (except fh's*, ei)~)
-  ] -- and more, BR 11
-
-proclitics = [
-  "o(","h(","oi(","ai(",     -- articles
-  "e)n","ei)s*","e)x","e)k", -- prepositions
-  "ei)","w(s*",              -- conjunctions
-  "oy)","oy)k","oy)c"        -- negation
-  ]
-
-isEnclitic = flip elem enclitics
-isProclitic = flip elem proclitics
-
-- Check if a word is an enclitic or accented enclitic and extract the enclitic
-
-wasEnclitic = let unaccented = (filter (not . hasAccent) enclitics) 
-                    ++ (map dropAccent (filter hasAccent enclitics))
-                  accented = (filter hasAccent enclitics) 
-                    ++ map addAcute (filter (not . hasAccent) enclitics) 
-              in flip elem (accented ++ unaccented)
-
-wasProclitic = flip elem (map addAcute proclitics)
-
-getEnclitic = 
-  let pairs = zip (enclitics ++ (map dropAccent (filter hasAccent enclitics))
-                   ++ (map addAcute (filter (not . hasAccent) enclitics)))
-                  (enclitics ++ (filter hasAccent enclitics)
-                   ++ (filter (not . hasAccent) enclitics))
-      find = \v -> lookup v pairs
-  in \v -> case (find v) of 
-    Just x -> x 
-    _ -> v
-getProclitic = 
-  let pairs = zip (map addAcute proclitics) proclitics 
-      find = \v -> lookup v pairs
-  in \v -> case (find v) of 
-    Just x -> x 
-    _ -> v
-
-- | Accent manipulation
-
-dropAccent = reverse . drop . reverse where 
-  drop s = case s of
-    [] -> []
-    '\'':cs -> cs
-    '`':cs -> cs
-    '~':cs -> cs
-    c:cs -> c:drop cs
-
-dropLastAccent = reverse . drop . reverse where 
-  drop s = case s of
-    [] -> []
-    '\'':cs -> cs
-    '`':cs -> cs
-    '~':cs -> cs
-    c:cs -> c:drop cs
-
-addAcute :: String -> String
-addAcute = reverse . acu . reverse where
-  acu w = case w of 
-    c:cs | c == '\'' -> c:cs
-    c:cs | c == '(' -> '\'':c:cs
-    c:cs | c == ')' -> '\'':c:cs
-    c:cs | isAGreekVowel c -> '\'':c:cs
-    c:cs -> c : acu cs
-    _ -> w
-
-- | Accent checking on end syllables
-
-hasEndAcute = find . reverse where
-  find s = case s of
-    [] -> False
-    '\'':cs -> True
-    '`':cs -> False
-    '~':cs -> False
-    c:cs | isAGreekVowel c -> False
-    _:cs -> find cs
-
-hasEndCircum = find . reverse where
-  find s = case s of
-    [] -> False
-    '\'':cs -> False
-    '`':cs -> False
-    '~':cs -> True
-    c:cs | isAGreekVowel c -> False
-    _:cs -> find cs
-
-hasPrefinalAcute = find . reverse where
-  find s = case s of
-    [] -> False
-    '\'':cs -> False  -- final acute
-    '`':cs -> False
-    '~':cs -> False
-    c:d:cs | isAGreekVowel c && isAGreekVowel d -> findNext cs
-    c:cs | isAGreekVowel c -> findNext cs
-    _:cs -> find cs where
-  findNext s = case s of 
-    [] -> False
-    '\'':cs -> True  -- prefinal acute
-    '`':cs -> False
-    '~':cs -> False
-    c:cs | isAGreekVowel c -> False
-    _:cs -> findNext cs where
-    
-hasSingleAccent v = 
-  hasAccent v && not (hasAccent (dropLastAccent v))
-
-hasAccent v = case v of 
-  [] -> False
-  c:cs -> elem c ['\'','`','~'] || hasAccent cs
-
-{- Tests: 
-
-- denormalization. Examples in BR 11 work: 
-}
-enclitics_expls = -- normalized
-  "sofw~n tis*":"sofw~n tine's*":"sof~n tinw~n":  -- a.alpha
-  "sofo's tis*":"sofoi' tine's*":                 -- a.beta
-  "ei) tis*":"ei) tine's*":
-  "a)'nvrwpos* tis*":"a)'nvrwpoi tine's*":        -- a.gamma
-  "doy~los* tis*":"doy~loi tine's*":
-  "lo'gos* tis*":"lo'goi tine's*":"lo'gwn tinw~n":  -- b.
-  "ei) poy tis* tina' i)'doi":                    -- Anm. 2.
-  [] 
-{-
-test = map denormalize enclitics_expls
-
-*PGF.LexingAGreek> test
-  ["sofw~n tis*","sofw~n tines*","sof~n tinwn",
-  "sofo's tis*","sofoi' tines*",
-  "ei)' tis*","ei)' tines*",
-  "a)'nvrwpo's* tis*","a)'nvrwpoi' tines*",
-  "doy~lo's* tis*","doy~loi' tines*",
-  "lo'gos* tis* ","lo'goi tine's*","lo'gwn tinw~n ",
-  "ei)' poy' ti's* tina i)'doi"]
-
-- normalization:
-
-*PGF.LexingAGreek> map normalize test
-  ["sofw~n tis*","sofw~n tine's*","sof~n tinw~n",
-  "sofo's tis*","sofoi' tine's*",
-  "ei) tis*","ei) tine's*",
-  "a)'nvrwpos* tis*","a)'nvrwpoi tine's*",
-  "doy~los* tis*","doy~loi tine's*",
-  "lo'gos* tis*","lo'goi tine's*","lo'gwn tinw~n",
-  "ei) poy tis* tina' i)'doi"]
-
-*PGF.LexingAGreek> map (normalize . denormalize) enclitics_expls == enclitics_expls
-True
-*PGF.LexingAGreek> map (denormalize . normalize) test == test
-True
-
-}
--- a/src/runtime/haskell/pgf.cabal
+++ b/src/runtime/haskell/pgf.cabal
@@ -50,9 +50,6 @@ Library
  exposed-modules: 
    PGF
    PGF.Internal
-    PGF.Lexing
-    PGF.LexingAGreek
-    PGF.Utilities
    PGF.Haskell

  other-modules:
@@ -78,3 +75,4 @@ Library
    PGF.VisualizeTree
    PGF.ByteCode
    PGF.OldBinary
+    PGF.Utilities