Skip to content

Commit 01b5459

Browse files
committed
AsciiDoc writer: improve escaping.
Closes #10385. Closes #2337. Closes #6424.
1 parent 9fa4fa3 commit 01b5459

File tree

9 files changed

+141
-77
lines changed

9 files changed

+141
-77
lines changed

src/Text/Pandoc/Writers/AsciiDoc.hs

+60-30
Original file line numberDiff line numberDiff line change
@@ -118,19 +118,46 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
118118
Nothing -> main
119119
Just tpl -> renderTemplate tpl context
120120

121-
-- | Escape special characters for AsciiDoc.
122-
escapeString :: PandocMonad m => Text -> ADW m (Doc Text)
123-
escapeString t = do
124-
parentTableLevel <- gets tableNestingLevel
125-
let needsEscape '{' = True
126-
needsEscape '|' = parentTableLevel > 0
127-
needsEscape _ = False
128-
let escChar c | needsEscape c = "\\" <> T.singleton c
129-
| otherwise = T.singleton c
130-
if T.any needsEscape t
131-
then return $ literal $ T.concatMap escChar t
132-
else return $ literal t
121+
data EscContext = Normal | InTable
122+
deriving (Show, Eq)
133123

124+
-- | Escape special characters for AsciiDoc.
125+
escapeString :: EscContext -> Text -> Doc Text
126+
escapeString context t
127+
| T.any needsEscape t
128+
= literal $
129+
case T.foldl' go (False, mempty) t of
130+
(True, x) -> x <> "++" -- close passthrough context
131+
(False, x) -> x
132+
| otherwise = literal t
133+
where
134+
-- Bool is True when we are in a ++ passthrough context
135+
go :: (Bool, Text) -> Char -> (Bool, Text)
136+
go (True, x) '+' = (False, x <> "++" <> "{plus}") -- close context
137+
go (False, x) '+' = (False, x <> "{plus}")
138+
go (True, x) '|'
139+
| context == InTable = (False, x <> "++" <> "{vbar}") -- close context
140+
go (False, x) '|'
141+
| context == InTable = (False, x <> "{vbar}")
142+
go (True, x) c
143+
| needsEscape c = (True, T.snoc x c)
144+
| otherwise = (False, T.snoc (x <> "++") c)
145+
go (False, x) c
146+
| needsEscape c = (True, x <> "++" <> T.singleton c)
147+
| otherwise = (False, T.snoc x c)
148+
149+
needsEscape '{' = True
150+
needsEscape '+' = True
151+
needsEscape '`' = True
152+
needsEscape '*' = True
153+
needsEscape '_' = True
154+
needsEscape '<' = True
155+
needsEscape '>' = True
156+
needsEscape '[' = True
157+
needsEscape ']' = True
158+
needsEscape '\\' = True
159+
needsEscape '|' = True
160+
needsEscape _ = False
134161

135162
-- | Ordered list start parser for use in Para below.
136163
olMarker :: Parsec Text ParserState Char
@@ -393,11 +420,11 @@ bulletListItemToAsciiDoc opts blocks = do
393420
-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@
394421
-- or @U+2612 BALLOT BOX WITH X@ to asciidoctor checkbox syntax (e.g. @[x]@).
395422
taskListItemToAsciiDoc :: [Block] -> [Block]
396-
taskListItemToAsciiDoc = handleTaskListItem toOrg listExt
423+
taskListItemToAsciiDoc = handleTaskListItem toAd listExt
397424
where
398-
toOrg (Str "" : Space : is) = Str "[ ]" : Space : is
399-
toOrg (Str "" : Space : is) = Str "[x]" : Space : is
400-
toOrg is = is
425+
toAd (Str "" : Space : is) = RawInline (Format "asciidoc") "[ ]" : Space : is
426+
toAd (Str "" : Space : is) = RawInline (Format "asciidoc") "[x]" : Space : is
427+
toAd is = is
401428
listExt = extensionsFromList [Ext_task_lists]
402429

403430
addBlock :: PandocMonad m
@@ -543,24 +570,27 @@ inlineToAsciiDoc opts (Subscript lst) = do
543570
inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst
544571
inlineToAsciiDoc opts (Quoted qt lst) = do
545572
isLegacy <- gets legacy
546-
inlineListToAsciiDoc opts $
547-
case qt of
548-
SingleQuote
549-
| isLegacy -> [Str "`"] ++ lst ++ [Str "'"]
550-
| otherwise -> [Str "'`"] ++ lst ++ [Str "`'"]
551-
DoubleQuote
552-
| isLegacy -> [Str "``"] ++ lst ++ [Str "''"]
553-
| otherwise -> [Str "\"`"] ++ lst ++ [Str "`\""]
573+
contents <- inlineListToAsciiDoc opts lst
574+
pure $ case qt of
575+
SingleQuote
576+
| isLegacy -> "`" <> contents <> "'"
577+
| otherwise -> "'`" <> contents <> "`'"
578+
DoubleQuote
579+
| isLegacy -> "``" <> contents <> "''"
580+
| otherwise -> "\"`" <> contents <> "`\""
554581
inlineToAsciiDoc _ (Code _ str) = do
555582
isLegacy <- gets legacy
556583
let escChar '`' = "\\'"
557584
escChar c = T.singleton c
558-
let contents = literal (T.concatMap escChar str)
559-
return $
560-
if isLegacy
561-
then text "`" <> contents <> "`"
562-
else text "`+" <> contents <> "+`"
563-
inlineToAsciiDoc _ (Str str) = escapeString str
585+
parentTableLevel <- gets tableNestingLevel
586+
let content
587+
| isLegacy = literal (T.concatMap escChar str)
588+
| otherwise = escapeString
589+
(if parentTableLevel > 0 then InTable else Normal) str
590+
return $ text "`" <> content <> "`"
591+
inlineToAsciiDoc _ (Str str) = do
592+
parentTableLevel <- gets tableNestingLevel
593+
pure $ escapeString (if parentTableLevel > 0 then InTable else Normal) str
564594
inlineToAsciiDoc _ (Math InlineMath str) = do
565595
isLegacy <- gets legacy
566596
modify $ \st -> st{ hasMath = True }

test/Tests/Writers/AsciiDoc.hs

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ tests = [ testGroup "emphasis"
3636
"__foo__bar"
3737
, testAsciidoc "emph quoted" $
3838
para (doubleQuoted (emph (text "foo"))) =?>
39-
"``__foo__''"
39+
"``_foo_''"
4040
, testAsciidoc "strong word before" $
4141
para (text "foo" <> strong (text "bar")) =?>
4242
"foo**bar**"
@@ -45,7 +45,7 @@ tests = [ testGroup "emphasis"
4545
"**foo**bar"
4646
, testAsciidoc "strong quoted" $
4747
para (singleQuoted (strong (text "foo"))) =?>
48-
"`**foo**'"
48+
"`*foo*'"
4949
]
5050
, testGroup "blocks"
5151
[ testAsciidoc "code block without line numbers" $

test/command/10385.md

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
````
2+
% pandoc -t asciidoc
3+
C+ C++ C+++ `++`
4+
5+
`` ` `` \`hi\`
6+
7+
`hi\there`` ok`
8+
9+
```
10+
++`
11+
```
12+
^D
13+
C{plus} C{plus}{plus} C{plus}{plus}{plus} `{plus}{plus}`
14+
15+
`++`++` ++`++hi++`++
16+
17+
`hi++\++there++``++ ok`
18+
19+
....
20+
++`
21+
....
22+
````

test/command/2337.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
```
2+
% pandoc -t asciidoc -f html
3+
<a href="http://example.com">][</a>
4+
^D
5+
http://example.com[++][++]
6+
```

test/command/4545.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Test 2
88
^D
99
Test 1
1010
11-
{empty}[my text]
11+
++[++my text++]++
1212
1313
Test 2
1414
```

test/command/6424.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
```
2+
% pandoc -t asciidoc
3+
test^[this is a note\]. and more]
4+
^D
5+
testfootnote:[this is a note++]++. and more]
6+
```

test/command/8665.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@
2424
[cols=",",options="header",]
2525
|===
2626
|h1 |h2
27-
|!@#$%^&*()\{}\|~?+-',."<>[]\` |col 2
27+
|!@#$%^&++*++()++{++}{vbar}~?{plus}-',."++<>[]\`++ |col 2
2828
|===
2929
```

test/writer.asciidoc

+24-24
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item.
4040
Because a hard-wrapped line in the middle of a paragraph looked like a list
4141
item.
4242

43-
Here’s one with a bullet. * criminey.
43+
Here’s one with a bullet. ++*++ criminey.
4444

4545
There should be a hard line break +
4646
here.
@@ -84,7 +84,7 @@ ____
8484
--
8585
____
8686

87-
This should not be a block quote: 2 > 1.
87+
This should not be a block quote: 2 ++>++ 1.
8888

8989
And a following paragraph.
9090

@@ -424,7 +424,7 @@ So is *_this_* word.
424424

425425
So is *_this_* word.
426426

427-
This is code: `+>+`, `+$+`, `+\+`, `+\$+`, `+<html>+`.
427+
This is code: `++>++`, `$`, `++\++`, `++\++$`, `++<++html++>++`.
428428

429429
[line-through]#This is _strikeout_.#
430430

@@ -447,7 +447,7 @@ a^b c^d, a~b c~d.
447447

448448
'`He said, "`I want to go.`"`' Were you alive in the 70’s?
449449

450-
Here is some quoted '``+code+``' and a "`http://example.com/?foo=1&bar=2[quoted
450+
Here is some quoted '``code``' and a "`http://example.com/?foo=1&bar=2[quoted
451451
link]`".
452452

453453
Some dashes: one—two — three—four — five.
@@ -477,11 +477,11 @@ latexmath:[\alpha + \omega \times x^2].
477477

478478
These shouldn’t be math:
479479

480-
* To get the famous equation, write `+$e = mc^2$+`.
480+
* To get the famous equation, write `$e = mc^2$`.
481481
* $22,000 is a _lot_ of money. So is $34,000. (It worked if "`lot`" is
482482
emphasized.)
483483
* Shoes ($20) and socks ($5).
484-
* Escaped `+$+`: $73 _this should be emphasized_ 23$.
484+
* Escaped `$`: $73 _this should be emphasized_ 23$.
485485

486486
Here’s a LaTeX table:
487487

@@ -503,39 +503,39 @@ AT&T is another way to write it.
503503

504504
This & that.
505505

506-
4 < 5.
506+
4 ++<++ 5.
507507

508-
6 > 5.
508+
6 ++>++ 5.
509509

510-
Backslash: \
510+
Backslash: ++\++
511511

512-
Backtick: `
512+
Backtick: ++`++
513513

514-
Asterisk: *
514+
Asterisk: ++*++
515515

516-
Underscore: _
516+
Underscore: ++_++
517517

518-
Left brace: \{
518+
Left brace: ++{++
519519

520520
Right brace: }
521521

522-
Left bracket: [
522+
Left bracket: ++[++
523523

524-
Right bracket: ]
524+
Right bracket: ++]++
525525

526526
Left paren: (
527527

528528
Right paren: )
529529

530-
Greater-than: >
530+
Greater-than: ++>++
531531

532532
Hash: #
533533

534534
Period: .
535535

536536
Bang: !
537537

538-
Plus: +
538+
Plus: {plus}
539539

540540
Minus: -
541541

@@ -557,7 +557,7 @@ link:/url/[URL and title]
557557

558558
link:/url/[URL and title]
559559

560-
link:/url/with_underscore[with_underscore]
560+
link:/url/with_underscore[with++_++underscore]
561561

562562
mailto:[email protected][Email link]
563563

@@ -567,7 +567,7 @@ link:[Empty].
567567

568568
Foo link:/url/[bar].
569569

570-
With link:/url/[embedded [brackets]].
570+
With link:/url/[embedded ++[++brackets++]++].
571571

572572
link:/url/[b] by itself should be a link.
573573

@@ -577,7 +577,7 @@ Indented link:/url[twice].
577577

578578
Indented link:/url[thrice].
579579

580-
This should [not][] be a link.
580+
This should ++[++not++][]++ be a link.
581581

582582
....
583583
[not]: /url
@@ -611,7 +611,7 @@ ____
611611
Blockquoted: http://example.com/
612612
____
613613

614-
Auto-links should not occur here: `+<http://example.com/>+`
614+
Auto-links should not occur here: `++<++http://example.com/++>++`
615615

616616
....
617617
or here: <http://example.com/>
@@ -635,10 +635,10 @@ Here is a movie image:movie.jpg[movie] icon.
635635
Here is a footnote reference,footnote:[Here is the footnote. It can go anywhere
636636
after the footnote reference. It need not be placed at the end of the document.]
637637
and another.[multiblock footnote omitted] This should _not_ be a footnote
638-
reference, because it contains a space.[^my note] Here is an inline
638+
reference, because it contains a space.++[++^my note++]++ Here is an inline
639639
note.footnote:[This is _easier_ to type. Inline notes may contain
640-
http://google.com[links] and `+]+` verbatim characters, as well as [bracketed
641-
text].]
640+
http://google.com[links] and `++]++` verbatim characters, as well as
641+
++[++bracketed text++]++.]
642642

643643
____
644644
Notes can go in quotes.footnote:[In quote.]

0 commit comments

Comments
 (0)