From 3bce6012a20fdb2e8e650112cc4e7eba85d100ce Mon Sep 17 00:00:00 2001 From: Vagarth Date: Mon, 23 Feb 2026 05:23:49 +0530 Subject: [PATCH] feat: index Doxygen Participant page in Algolia search --- .github/workflows/update-algolia.yml | 30 +++++++ .gitignore | 3 +- _config.yml | 1 + _plugins/algolia_hooks.rb | 112 +++++++++++++++++++++++++++ js/algolia-search.js | 4 +- 5 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 _plugins/algolia_hooks.rb diff --git a/.github/workflows/update-algolia.yml b/.github/workflows/update-algolia.yml index 5c8b17a6a78..89f5af3e6cd 100644 --- a/.github/workflows/update-algolia.yml +++ b/.github/workflows/update-algolia.yml @@ -14,6 +14,36 @@ jobs: submodules: true lfs: false path: website + - name: Fetch precice main + uses: actions/checkout@v4 + with: + repository: precice/precice + ref: main + path: main + fetch-tags: true + - name: Patch Doxyfile for main + run: | + sed -e "/^ *TAGFILES/d" -e "/^ *SITEMAP_URL/d" -e "/^ *PROJECT_NUMBER/d" -i main/Doxyfile + TAGFILE="../website/doxygen/cppreference-doxygen-web.tag.xml=http://en.cppreference.com/w/" + echo "TAGFILES = $TAGFILE" >> main/Doxyfile + echo "SITEMAP_URL = https://precice.org/doxygen/main/" >> main/Doxyfile + echo "PROJECT_NUMBER = $( cd main && git describe --tags )" >> main/Doxyfile + - name: Install doxygen dependencies + run: | + sudo apt-get -yyq update + sudo apt-get -yyq install graphviz plantuml + mkdir doxygen-bin + curl -sL https://github.com/doxygen/doxygen/releases/download/Release_1_14_0/doxygen-1.14.0.linux.bin.tar.gz | tar -xz --strip-components=1 -C doxygen-bin + readlink -f doxygen-bin/bin >> $GITHUB_PATH + - name: Build doxygen - main + working-directory: main + run: doxygen + env: + PLANTUML_JAR_PATH: /usr/share/plantuml/plantuml.jar + - name: Place doxygen HTML for indexing + run: | + mkdir -p website/_doxygen_html/main + cp -r main/docs/source-code-documentation/html/* website/_doxygen_html/main/ - uses: ruby/setup-ruby@v1 with: bundler-cache: true diff --git a/.gitignore b/.gitignore index d87a59686ac..10dbc8f9a89 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ _pdf .idea vendor/ .bundle/ -Gemfile.lock \ No newline at end of file +Gemfile.lock +_doxygen_html/ \ No newline at end of file diff --git a/_config.yml b/_config.yml index 9e263cb40f0..634b562833e 100644 --- a/_config.yml +++ b/_config.yml @@ -39,6 +39,7 @@ exclude: - createtag - pdf-docs.sh - doxygen/ + - _doxygen_html/ - tools/ - content/**/_index.md # these are the files and directories that jekyll will exclude from the build diff --git a/_plugins/algolia_hooks.rb b/_plugins/algolia_hooks.rb new file mode 100644 index 00000000000..57c5634e197 --- /dev/null +++ b/_plugins/algolia_hooks.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: true + +require 'nokogiri' +require 'digest' + +module Jekyll + module Algolia + module Hooks + DOXYGEN_HTML_PATH = '_doxygen_html/main/classprecice_1_1Participant.html' + DOXYGEN_URL = '/doxygen/main/classprecice_1_1Participant.html' + + def self.before_indexing_all(records, _context) + unless File.exist?(DOXYGEN_HTML_PATH) + Jekyll.logger.warn 'Algolia:', "Doxygen HTML not found at #{DOXYGEN_HTML_PATH}, skipping Doxygen indexing" + return records + end + + Jekyll.logger.info 'Algolia:', 'Parsing Doxygen Participant page for indexing...' + doxygen_records = parse_doxygen_participant + Jekyll.logger.info 'Algolia:', "Added #{doxygen_records.length} Doxygen records" + + records + doxygen_records + end + + def self.parse_doxygen_participant + html = File.read(DOXYGEN_HTML_PATH) + doc = Nokogiri::HTML(html) + records = [] + + # Extract class overview record + brief = doc.at_css('.contents .textblock') + brief_text = brief ? brief.text.strip : 'Main class of the preCICE API for coupling simulations.' + + records << build_record( + title: 'Participant Class Reference', + anchor: nil, + html: brief_text, + headings: %w[precice Participant], + object_id: 'doxygen-participant-overview' + ) + + # Extract public method records from member documentation + doc.css('.contents .memitem').each do |memitem| + # The anchor is on the preceding h2 > a element, or in the memname + anchor_el = memitem.previous_element + anchor = nil + + # Doxygen wraps each method doc in a div.memitem preceded by an h2.memtitle + # with an anchor link. Walk back to find the anchor. + while anchor_el + if anchor_el.name == 'a' && anchor_el['id'] + anchor = anchor_el['id'] + break + end + # Check for anchor inside the element + a_tag = anchor_el.at_css('a[id]') + if a_tag + anchor = a_tag['id'] + break + end + anchor_el = anchor_el.previous_element + end + + # Extract method name from memname + memname_el = memitem.at_css('.memname') + next unless memname_el + + method_name = extract_method_name(memname_el.text.strip) + next if method_name.nil? || method_name.empty? + + # Extract brief description + memdoc = memitem.at_css('.memdoc') + description = memdoc ? memdoc.text.strip.gsub(/\s+/, ' ')[0, 500] : '' + next if description.empty? + + records << build_record( + title: "Participant::#{method_name}", + anchor: anchor, + html: description, + headings: %w[precice Participant], + object_id: "doxygen-participant-#{anchor || Digest::MD5.hexdigest(method_name)}" + ) + end + + records + end + + def self.extract_method_name(memname_text) + # memname_text looks like "void precice::Participant::initialize ()" + # or "precice::Participant::Participant (...)" + # Extract the last component before the parentheses + match = memname_text.match(/(\w+)\s*\(/) + match ? match[1] : nil + end + + def self.build_record(title:, anchor:, html:, headings:, object_id:) + { + title: title, + url: DOXYGEN_URL, + anchor: anchor, + html: html, + headings: headings, + type: 'content', + custom_ranking: { position: 0, heading: 90 }, + objectID: object_id + } + end + + private_class_method :parse_doxygen_participant, :extract_method_name, :build_record + end + end +end diff --git a/js/algolia-search.js b/js/algolia-search.js index 3528c6730f2..d0556c789f2 100644 --- a/js/algolia-search.js +++ b/js/algolia-search.js @@ -20,7 +20,9 @@ const hitTemplate = function(hit) { date = moment.unix(hit.date).format('MMM D, YYYY'); } - let url = `{{ site.baseurl }}${hit.url}#${hit.anchor}`; + let url = hit.anchor + ? `{{ site.baseurl }}${hit.url}#${hit.anchor}` + : `{{ site.baseurl }}${hit.url}`; const title = hit._highlightResult.title.value;