Skip to content

Commit deeb94d

Browse files
committed
Don't include HTML content in title search index
Closes #13355.
1 parent 6d113dc commit deeb94d

File tree

4 files changed

+21
-7
lines changed

4 files changed

+21
-7
lines changed

sphinx/builders/html/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -679,8 +679,9 @@ def write_doc(self, docname: str, doctree: nodes.document) -> None:
679679
def write_doc_serialized(self, docname: str, doctree: nodes.document) -> None:
680680
self.imgpath = relative_uri(self.get_target_uri(docname), self.imagedir)
681681
self.post_process_images(doctree)
682+
# get title as plain text
682683
title_node = self.env.longtitles.get(docname)
683-
title = self.render_partial(title_node)['title'] if title_node else ''
684+
title = title_node.astext() if title_node else ''
684685
self.index_page(docname, doctree, title)
685686

686687
def finish(self) -> None:

tests/js/fixtures/cpp/searchindex.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
`escaped` title with < and > in it
2+
==================================
3+
4+
this document has escaped content in the title but also the characters < and > in it

tests/test_search.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,17 @@ def test_term_in_heading_and_section(app: SphinxTestApp) -> None:
155155
# if search term is in the title of one doc and in the text of another
156156
# both documents should be a hit in the search index as a title,
157157
# respectively text hit
158-
assert '"textinhead":2' in searchindex
159-
assert '"textinhead":0' in searchindex
158+
assert '"textinhead":3' in searchindex
159+
assert '"textinhead":1' in searchindex
160+
161+
162+
@pytest.mark.sphinx('html', testroot='search')
163+
def test_escaped_title(app: SphinxTestApp) -> None:
164+
app.build(force_all=True)
165+
searchindex = load_searchindex(app.outdir / 'searchindex.js')
166+
print(searchindex)
167+
assert 'escapedtitle' in searchindex['docnames']
168+
assert 'escaped title with < and > in it' in searchindex['titles']
160169

161170

162171
@pytest.mark.sphinx('html', testroot='search')
@@ -398,15 +407,15 @@ def test_search_index_gen_zh(app: SphinxTestApp) -> None:
398407
def test_nosearch(app: SphinxTestApp) -> None:
399408
app.build()
400409
index = load_searchindex(app.outdir / 'searchindex.js')
401-
assert index['docnames'] == ['index', 'nosearch', 'tocitem']
410+
assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem']
402411
# latex is in 'nosearch.rst', and nowhere else
403412
assert 'latex' not in index['terms']
404413
# cat is in 'index.rst' but is marked with the 'no-search' class
405414
assert 'cat' not in index['terms']
406415
# bat is indexed from 'index.rst' and 'tocitem.rst' (document IDs 0, 2), and
407416
# not from 'nosearch.rst' (document ID 1)
408417
assert 'bat' in index['terms']
409-
assert index['terms']['bat'] == [0, 2]
418+
assert index['terms']['bat'] == [1, 3]
410419

411420

412421
@pytest.mark.sphinx(
@@ -418,7 +427,7 @@ def test_nosearch(app: SphinxTestApp) -> None:
418427
def test_parallel(app: SphinxTestApp) -> None:
419428
app.build()
420429
index = load_searchindex(app.outdir / 'searchindex.js')
421-
assert index['docnames'] == ['index', 'nosearch', 'tocitem']
430+
assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem']
422431

423432

424433
@pytest.mark.sphinx('html', testroot='search')

0 commit comments

Comments
 (0)