sphinx-doc · wlach · Feb 17, 2025 · Feb 17, 2025 · Feb 18, 2025 · Feb 18, 2025
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -159,6 +159,8 @@ Bugs fixed
   Patch by Adam Turner.
 * #13328: Fix parsing of PEP 695 functions with return annotations.
   Patch by Bénédikt Tran. Initial work by Arash Badie-Modiri.
+* #13355: Don't include escaped title content in the search index.
+  Patch by Will Lachance.
 
 Testing
 -------

diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py
@@ -679,8 +679,9 @@ def write_doc(self, docname: str, doctree: nodes.document) -> None:
     def write_doc_serialized(self, docname: str, doctree: nodes.document) -> None:
         self.imgpath = relative_uri(self.get_target_uri(docname), self.imagedir)
         self.post_process_images(doctree)
+        # get title as plain text
         title_node = self.env.longtitles.get(docname)
-        title = self.render_partial(title_node)['title'] if title_node else ''
+        title = title_node.astext() if title_node else ''
         self.index_page(docname, doctree, title)
 
     def finish(self) -> None:

diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js
@@ -58,6 +58,15 @@ const _removeChildren = (element) => {
 const _escapeRegExp = (string) =>
   string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
 
+const _escapeHTML = (text) => {
+  return text
+    .replaceAll("&", "&amp;")
+    .replaceAll("<", "&lt;")
+    .replaceAll(">", "&gt;")
+    .replaceAll('"', "&quot;")
+    .replaceAll("'", "&apos;");
+}
+
 const _displayItem = (item, searchTerms, highlightTerms) => {
   const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
   const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
@@ -340,7 +349,9 @@ const Search = {
           const boost = titles[file] === title ? 1 : 0;  // add a boost for document titles
           normalResults.push([
             docNames[file],
-            titles[file] !== title ? `${titles[file]} > ${title}` : title,
+            _escapeHTML(
+              titles[file] !== title ? `${titles[file]} > ${title}` : title
+            ),
             id !== null ? "#" + id : "",
             null,
             score + boost,
@@ -358,7 +369,7 @@ const Search = {
           const score = Math.round(100 * queryLower.length / entry.length);
           const result = [
             docNames[file],
-            titles[file],
+            _escapeHTML(titles[file]),
             id ? "#" + id : "",
             null,
             score,

diff --git a/tests/js/fixtures/cpp/searchindex.js b/tests/js/fixtures/cpp/searchindex.js
diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js
@@ -34,7 +34,7 @@ describe('Basic html theme search', function() {
 
       hits = [[
         "index",
-        "&lt;no title&gt;",
+        "<no title>",
         "",
         null,
         5,
@@ -184,7 +184,7 @@ describe('Basic html theme search', function() {
 
       expectedRanking = [
         ['index', 'Main Page', '#index-0'],  /* index entry */
-        ['index', 'Main Page > Result Scoring', '#result-scoring'],  /* title */
+        ['index', 'Main Page &gt; Result Scoring', '#result-scoring'],  /* title */
       ];
 
       searchParameters = Search._parseQuery('scoring');
@@ -198,7 +198,7 @@ describe('Basic html theme search', function() {
 
       expectedRanking = [
         ['relevance', 'Relevance', ''],  /* main title */
-        ['index', 'Main Page > Relevance', '#relevance'],  /* subsection heading title */
+        ['index', 'Main Page &gt; Relevance', '#relevance'],  /* subsection heading title */
       ];
 
       searchParameters = Search._parseQuery('relevance');

diff --git a/tests/roots/test-search/escapedtitle.rst b/tests/roots/test-search/escapedtitle.rst
@@ -0,0 +1,4 @@
+`escaped` title with < and > in it
+==================================
+
+this document has escaped content in the title but also the characters < and > in it
diff --git a/tests/test_search.py b/tests/test_search.py
@@ -155,8 +155,17 @@ def test_term_in_heading_and_section(app: SphinxTestApp) -> None:
     # if search term is in the title of one doc and in the text of another
     # both documents should be a hit in the search index as a title,
     # respectively text hit
-    assert '"textinhead":2' in searchindex
-    assert '"textinhead":0' in searchindex
+    assert '"textinhead":3' in searchindex
+    assert '"textinhead":1' in searchindex
+
+
+@pytest.mark.sphinx('html', testroot='search')
+def test_escaped_title(app: SphinxTestApp) -> None:
+    app.build(force_all=True)
+    searchindex = load_searchindex(app.outdir / 'searchindex.js')
+    print(searchindex)
+    assert 'escapedtitle' in searchindex['docnames']
+    assert 'escaped title with < and > in it' in searchindex['titles']
 
 
 @pytest.mark.sphinx('html', testroot='search')
@@ -398,15 +407,15 @@ def test_search_index_gen_zh(app: SphinxTestApp) -> None:
 def test_nosearch(app: SphinxTestApp) -> None:
     app.build()
     index = load_searchindex(app.outdir / 'searchindex.js')
-    assert index['docnames'] == ['index', 'nosearch', 'tocitem']
+    assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem']
     # latex is in 'nosearch.rst', and nowhere else
     assert 'latex' not in index['terms']
     # cat is in 'index.rst' but is marked with the 'no-search' class
     assert 'cat' not in index['terms']
     # bat is indexed from 'index.rst' and 'tocitem.rst' (document IDs 0, 2), and
     # not from 'nosearch.rst' (document ID 1)
     assert 'bat' in index['terms']
-    assert index['terms']['bat'] == [0, 2]
+    assert index['terms']['bat'] == [1, 3]
 
 
 @pytest.mark.sphinx(
@@ -418,7 +427,7 @@ def test_nosearch(app: SphinxTestApp) -> None:
 def test_parallel(app: SphinxTestApp) -> None:
     app.build()
     index = load_searchindex(app.outdir / 'searchindex.js')
-    assert index['docnames'] == ['index', 'nosearch', 'tocitem']
+    assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem']
 
 
 @pytest.mark.sphinx('html', testroot='search')