accepting gf-ud style abslabels in gf-core ; cnclabels TODO

This commit is contained in:
aarneranta
2020-05-05 15:46:48 +02:00
parent b962bcd178
commit 65c810f085

View File

@@ -242,6 +242,10 @@ rmcomments :: String -> String
rmcomments [] = [] rmcomments [] = []
rmcomments ('-':'-':xs) = [] rmcomments ('-':'-':xs) = []
rmcomments ('-':x :xs) = '-':rmcomments (x:xs) rmcomments ('-':x :xs) = '-':rmcomments (x:xs)
rmcomments ('#':xs) = case splitAt 3 xs of -- for compatibility with gf-ud annotations
("cat",rest) -> rmcomments rest
("fun",rest) -> rmcomments rest
_ -> [] --- gf-ud keywords not used in gf-core
rmcomments (x:xs) = x:rmcomments xs rmcomments (x:xs) = x:rmcomments xs
-- | Prepare lines obtained from a configuration file for labels for -- | Prepare lines obtained from a configuration file for labels for
@@ -761,19 +765,21 @@ ppSVG svg =
-- UseComp {"not"} PART neg head -- UseComp {"not"} PART neg head
-- UseComp {*} AUX cop head -- UseComp {*} AUX cop head
type CncLabels = [ type CncLabels = [CncLabel]
Either
(String, String -> Maybe (String -> String,String,String)) data CncLabel =
-- (fun, word -> (pos,label,target)) CncSyncat (String, String -> Maybe (String -> String,String,String))
-- (fun, word/lemma -> (pos,label,target))
-- the pos can remain unchanged, as in the current notation in the article -- the pos can remain unchanged, as in the current notation in the article
(String,[String]) | CncMorpho (String,[String])
-- (category, morphological forms) -- (category, features in ascending order)
] | CncForm (String,(String,String))
-- (wordform, (lemma,features))
fixCoNLL :: CncLabels -> CoNLL -> CoNLL fixCoNLL :: CncLabels -> CoNLL -> CoNLL
fixCoNLL cncLabels conll = map (fixMorpho . fixDep) (markRoot conll) where fixCoNLL cncLabels conll = map (fixMorpho . fixDep) (markRoot conll) where
labels = [l | Left l <- cncLabels] labels = [l | CncSyncat l <- cncLabels]
flabels = [r | Right r <- cncLabels] flabels = [r | CncMorpho r <- cncLabels]
-- change the root label from dep to root -- change the root label from dep to root
--- doing this for the leftmost word of the root node --- doing this for the leftmost word of the root node
@@ -818,7 +824,7 @@ getCncDepLabels :: String -> CncLabels
getCncDepLabels s = wlabels ws ++ flabels fs getCncDepLabels s = wlabels ws ++ flabels fs
where where
wlabels = wlabels =
map Left . map CncSyncat .
map merge . map merge .
groupBy (\ (x,_) (a,_) -> x == a) . groupBy (\ (x,_) (a,_) -> x == a) .
sortBy (comparing fst) . sortBy (comparing fst) .
@@ -826,7 +832,7 @@ getCncDepLabels s = wlabels ws ++ flabels fs
filter chooseW filter chooseW
flabels = flabels =
map Right . map CncMorpho .
map collectTags . map collectTags .
map words map words