refactored cnc configfile parsing a bit

This commit is contained in:
Aarne Ranta
2018-12-18 18:30:40 +01:00
parent b3a2b53df2
commit 9834b89a30

View File

@@ -35,7 +35,7 @@ import PGF.Macros (lookValCat, BracketedString(..))
import qualified Data.Map as Map import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap --import qualified Data.IntMap as IntMap
import Data.List (intersperse,nub,mapAccumL,find,groupBy,sortBy) import Data.List (intersperse,nub,mapAccumL,find,groupBy,sortBy,partition)
import Data.Ord (comparing) import Data.Ord (comparing)
import Data.Char (isDigit) import Data.Char (isDigit)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
@@ -799,7 +799,7 @@ fixCoNLL cncLabels conll = map fixc conll where
_ -> cat ++ "-" ++ x _ -> cat ++ "-" ++ x
getCncDepLabels :: String -> CncLabels getCncDepLabels :: String -> CncLabels
getCncDepLabels s = wlabels s ++ flabels s getCncDepLabels s = wlabels ws ++ flabels fs
where where
wlabels = wlabels =
map Left . map Left .
@@ -807,26 +807,25 @@ getCncDepLabels s = wlabels s ++ flabels s
groupBy (\ (x,_) (a,_) -> x == a) . groupBy (\ (x,_) (a,_) -> x == a) .
sortBy (comparing fst) . sortBy (comparing fst) .
concatMap analyse . concatMap analyse .
filter chooseW . filter chooseW
-- map rmcomments .
lines
flabels = flabels =
map Right . map Right .
map collectTags . map collectTags .
map words . map words
filter chooseF .
-- map rmcomments . (fs,ws) = partition chooseF $ lines s
lines
--- choose is for compatibility with the general notation --- choose is for compatibility with the general notation
chooseW line = notElem '(' line && chooseW line = notElem '(' line &&
elem '{' line && elem '{' line
--- ignoring non-local (with "(") and abstract (without "{") rules --- ignoring non-local (with "(") and abstract (without "{") rules
---- TODO: this means that "(" cannot be a token ---- TODO: this means that "(" cannot be a token
not (chooseF line)
chooseF line = take 1 line == "@" --- feature assignments have the form e.g. @N SgNom SgGen ; no spaces inside tags chooseF line = take 1 line == "@" --- feature assignments have the form e.g. @N SgNom SgGen ; no spaces inside tags
isComment line = take 2 line == "--"
analyse line = case break (=='{') line of analyse line = case break (=='{') line of
(beg,_:ws) -> case break (=='}') ws of (beg,_:ws) -> case break (=='}') ws of
(toks,_:target) -> case (getToks beg, words target) of (toks,_:target) -> case (getToks beg, words target) of