jgm · yanntrividic · Feb 19, 2025 · Feb 26, 2025 · Mar 4, 2025 · Mar 4, 2025
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
@@ -44,7 +44,7 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces, addPandocAttributes)
 import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.Transforms (headerShift)
 import Text.TeXMath (readMathML, writeTeX)
@@ -851,15 +851,19 @@ getBlocks :: PandocMonad m => Element -> DB m Blocks
 getBlocks e =  mconcat <$>
                  mapM parseBlock (elContent e)
 
+getRoleAttr :: Element -> [(Text, Text)] -- extract role attribute and add it to the attribute list
+getRoleAttr e = case attrValue "role" e of
+                  "" -> []
+                  r  -> [("role", r)]
 
 parseBlock :: PandocMonad m => Content -> DB m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
 parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
                                      else return $ plain $ trimInlines $ text s
 parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
-parseBlock (Elem e) =
-  case qName (elName e) of
+parseBlock (Elem e) = do
+  parsedBlock <- case qName (elName e) of
         "toc"   -> skip -- skip TOC, since in pandoc it's autogenerated
         "index" -> skip -- skip index, since page numbers meaningless
         "para"  -> parseMixed para (elContent e)
@@ -973,6 +977,7 @@ parseBlock (Elem e) =
         "title" -> return mempty     -- handled in parent element
         "subtitle" -> return mempty  -- handled in parent element
         _ -> skip >> getBlocks e
+  return $ addPandocAttributes (getRoleAttr e) parsedBlock
    where skip = do
            let qn = qName $ elName e
            let name = if "pi-" `T.isPrefixOf` qn
@@ -1099,7 +1104,12 @@ parseBlock (Elem e) =
            modify $ \st -> st{ dbSectionLevel = n }
            b <- getBlocks e
            modify $ \st -> st{ dbSectionLevel = n - 1 }
-           return $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr++attrs) n' headerText <> b
+           let content = headerWith (elId, classes, maybeToList titleabbrevElAsAttr)
+                          n' headerText <> b
+           return $ case attrValue "role" e of
+                      "" -> content
+                      _  -> divWith ("", ["section"],
+                             ("level", T.pack $ show n') : attrs) content
          titleabbrevElAsAttr =
            case filterChild (named "titleabbrev") e `mplus`
                 (filterChild (named "info") e >>=
@@ -1124,7 +1134,6 @@ parseBlock (Elem e) =
              Nothing -> return b
              Just t -> return $ divWith (attrValue "id" e,[],[])
                          (divWith ("", ["title"], []) (plain t) <> b)
-
          -- Admonitions are parsed into a div. Following other Docbook tools that output HTML,
          -- we parse the optional title as a div with the @title@ class, and give the
          -- block itself a class corresponding to the admonition name.
@@ -1206,8 +1215,8 @@ parseInline :: PandocMonad m => Content -> DB m Inlines
 parseInline (Text (CData _ s _)) = return $ text s
 parseInline (CRef ref) =
   return $ text $ fromMaybe (T.toUpper ref) $ lookupEntity ref
-parseInline (Elem e) =
-  case qName (elName e) of
+parseInline (Elem e) = do
+  parsedInline <- case qName (elName e) of
         "anchor" -> do
            return $ spanWith (attrValue "id" e, [], []) mempty
         "phrase" -> do
@@ -1320,7 +1329,8 @@ parseInline (Elem e) =
                              "strong"        -> innerInlines strong
                              "strikethrough" -> innerInlines strikeout
                              "underline"     -> innerInlines underline
-                             _               -> innerInlines emph
+                             _               -> innerInlines $
+                                                  spanWith ("", ["emphasis"], getRoleAttr e)
         "footnote" -> note . mconcat <$>
                          mapM parseBlock (elContent e)
         "title" -> return mempty
@@ -1329,6 +1339,9 @@ parseInline (Elem e) =
         -- <?asciidor-br?> to in handleInstructions, above.
         "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
+  return $ case qName (elName e) of
+    "emphasis" -> parsedInline
+    _ -> addPandocAttributes (getRoleAttr e) parsedInline
    where skip = do
            let qn = qName $ elName e
            let name = if "pi-" `T.isPrefixOf` qn