Skip to content

ardigan6/docusaurus-llm-translator

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

10 Commits
 
 
 
 
 
 

Repository files navigation

docusaurus-llm-translator

Translate Docusaurus MDX files using any LLM, without breaking formatting or outbound links.

requirements:

pip install requests

Local use:

  python bin/translate.py  --source-docs-dir ./docs \
    --i18n-root ./i18n --cache-file tr_cache.json \
    --languages fr --docs-plugin-path "docusaurus-plugin-content-docs/current" \
    file.mdx file2.mdx

Or use with a Github Action, similar to:

name: Translate MDX Files

on:
  pull_request:
    branches: # Specify branches to target, e.g., [ main, master ]
      - main
    paths:
      - 'docs/**/*.mdx' # Only run if MDX files in docs/ are changed
  workflow_dispatch: # Allows manual triggering
    inputs:
      languages:
        description: 'Space-separated language codes (e.g., "fr es ja")'
        required: false
        default: 'fr es' # Default languages for manual run

permissions:
  contents: write # Required to push changes back to the PR branch
  pull-requests: read # Required to get PR details

jobs:
  translate_docs:
    runs-on: ubuntu-latest
    env:
      # Secrets for the translation script (matching script's expected environment variables)
      API_KEY: ${{ secrets.LLM_API_KEY }}
      API_ENDPOINT_URL: ${{ secrets.LLM_API_ENDPOINT_URL }} # Optional, if overriding script default
      TRANSLATE_MODEL_NAME: ${{ secrets.LLM_TRANSLATE_MODEL_NAME }} # Optional
      REVIEW_MODEL_NAME: ${{ secrets.LLM_REVIEW_MODEL_NAME }} # Optional

      # Configuration for the script arguments
      SOURCE_DOCS_DIR: "docs"
      I18N_ROOT_DIR: "i18n"
      DOCS_PLUGIN_PATH: "docusaurus-plugin-content-docs/current"
      CACHE_FILE_PATH: "tr_cache.json"
      # Set languages: Use manual input if provided, otherwise default.
      # For PR triggers, it will use the default 'fr es'. You can change this default.
      LANGUAGES: ${{ github.event.inputs.languages || 'fr es' }}

    steps:
      - name: Checkout PR Branch
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.pull_request.head.sha || github.ref }} # Checkout PR head or current ref for dispatch
          fetch-depth: 0 # Fetches all history for git diff

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - name: Install Dependencies
        run: pip install requests

      - name: Cache Translation Data
        uses: actions/cache@v4
        id: trans-cache
        with:
          path: ${{ env.CACHE_FILE_PATH }}
          key: ${{ runner.os }}-translation-cache-${{ hashFiles(env.CACHE_FILE_PATH) }}
          restore-keys: |
            ${{ runner.os }}-translation-cache-

      - name: Get Changed MDX Files
        id: changed_mdx_files
        run: |
          # For PRs, compare head with base. For manual dispatch, use all mdx files (or adjust logic)
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            BASE_SHA="${{ github.event.pull_request.base.sha }}"
            HEAD_SHA="${{ github.event.pull_request.head.sha }}"
            echo "Comparing $BASE_SHA..$HEAD_SHA for changes in ${{ env.SOURCE_DOCS_DIR }}/**/*.mdx"
            
            # Get files that were Added (A), Modified (M), Copied (C), or Renamed (R)
            # Convert absolute paths to relative paths from the docs directory
            CHANGED_FILES_RAW=$(git diff --name-only --diff-filter=AMCR "$BASE_SHA" "$HEAD_SHA" -- "${{ env.SOURCE_DOCS_DIR }}/**/*.mdx")
            
            if [[ -n "$CHANGED_FILES_RAW" ]]; then
              # Remove the SOURCE_DOCS_DIR prefix to get relative paths
              CHANGED_FILES_LIST=$(echo "$CHANGED_FILES_RAW" | sed "s|^${{ env.SOURCE_DOCS_DIR }}/||" | tr '\n' ' ')
            else
              CHANGED_FILES_LIST=""
            fi
          else # For workflow_dispatch, consider all files or a specific subset
            echo "Workflow dispatch: processing all MDX files in ${{ env.SOURCE_DOCS_DIR }}"
            # Find all MDX files and make them relative to SOURCE_DOCS_DIR
            if command -v find >/dev/null 2>&1; then
              CHANGED_FILES_LIST=$(find "${{ env.SOURCE_DOCS_DIR }}" -type f -name "*.mdx" | sed "s|^${{ env.SOURCE_DOCS_DIR }}/||" | tr '\n' ' ')
            else
              # Fallback for systems without find
              CHANGED_FILES_LIST=$(ls -la "${{ env.SOURCE_DOCS_DIR }}"/**/*.mdx 2>/dev/null | sed "s|^${{ env.SOURCE_DOCS_DIR }}/||" | tr '\n' ' ' || echo "")
            fi
          fi

          # Trim whitespace
          CHANGED_FILES_LIST=$(echo "$CHANGED_FILES_LIST" | xargs)

          if [[ -z "$CHANGED_FILES_LIST" ]]; then
            echo "No MDX files to process."
            echo "PROCESS_FILES=false" >> $GITHUB_ENV
          else
            echo "MDX files to process: $CHANGED_FILES_LIST"
            echo "PROCESS_FILES=true" >> $GITHUB_ENV
            echo "CHANGED_FILES_ARGS=$CHANGED_FILES_LIST" >> $GITHUB_ENV
          fi
        shell: bash

      - name: Validate Files Exist
        if: env.PROCESS_FILES == 'true'
        run: |
          echo "Validating that files exist before processing..."
          for file in ${{ env.CHANGED_FILES_ARGS }}; do
            full_path="${{ env.SOURCE_DOCS_DIR }}/$file"
            if [[ ! -f "$full_path" ]]; then
              echo "Warning: File $full_path does not exist"
            else
              echo "✓ Found: $full_path"
            fi
          done
        shell: bash

      - name: Run Translation Script
        if: env.PROCESS_FILES == 'true'
        run: |
          echo "Processing files: ${{ env.CHANGED_FILES_ARGS }}"
          echo "Languages: ${{ env.LANGUAGES }}"
          echo "Source docs dir: ${{ env.SOURCE_DOCS_DIR }}"
          
          python3 bin/translate.py \
            --source-docs-dir "${{ env.SOURCE_DOCS_DIR }}" \
            --i18n-root "${{ env.I18N_ROOT_DIR }}" \
            --cache-file "${{ env.CACHE_FILE_PATH }}" \
            --languages ${{ env.LANGUAGES }} \
            --docs-plugin-path "${{ env.DOCS_PLUGIN_PATH }}" \
            ${{ env.CHANGED_FILES_ARGS }} || {
            echo "Translation script failed with exit code $?"
            exit 1
          }
        shell: bash

      - name: Commit and Push Translations
        if: env.PROCESS_FILES == 'true'
        run: |
          git config --global user.name 'GitHub Actions Bot'
          git config --global user.email '[email protected]'

          # Add all changes in the i18n directory and the cache file
          git add "${{ env.I18N_ROOT_DIR }}" "${{ env.CACHE_FILE_PATH }}"

          # Check if there are changes to commit
          if git diff --staged --quiet; then
            echo "No changes to commit."
          else
            echo "Committing translation changes..."
            git commit -m "Apply automatic translations

          Affected files by this PR processed:
          ${{ env.CHANGED_FILES_ARGS }}
          
          Languages: ${{ env.LANGUAGES }}"

            # Use GITHUB_TOKEN for pushing to the PR branch
            # Handle both PR and workflow_dispatch cases
            if [[ "${{ github.event_name }}" == "pull_request" ]]; then
              TARGET_REF="${{ github.event.pull_request.head.ref }}"
            else
              TARGET_REF="${{ github.ref_name }}"
            fi
            
            echo "Pushing to branch: $TARGET_REF"
            git push https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git HEAD:$TARGET_REF
          fi
        shell: bash

      - name: Summary
        if: always()
        run: |
          if [[ "${{ env.PROCESS_FILES }}" == "true" ]]; then
            echo "✅ Translation workflow completed"
            echo "📁 Files processed: ${{ env.CHANGED_FILES_ARGS }}"
            echo "🌐 Languages: ${{ env.LANGUAGES }}"
          else
            echo "ℹ️ No MDX files found to process"
          fi
        shell: bash

About

Translate Docusaurus using any LLM

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages