Daily Reddit Data Update #12
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Daily Reddit Data Update | |
| on: | |
| schedule: | |
| # Run at 4:30 AM UTC (adjust as needed for your timezone) | |
| - cron: '30 4 * * *' | |
| # Allow manual triggering for testing | |
| workflow_dispatch: | |
| jobs: | |
| update-reddit-data: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '18' | |
| cache: 'npm' | |
| - name: Verify secrets | |
| run: | | |
| echo "Checking if required secrets are set:" | |
| echo "NEXT_PUBLIC_SUPABASE_URL: ${{ secrets.NEXT_PUBLIC_SUPABASE_URL != '' }}" | |
| echo "SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY != '' }}" | |
| - name: Install dependencies | |
| run: | | |
| cd localguru-ingestion | |
| npm ci | |
| # List installed packages for debugging | |
| echo "=== Installed packages ===" | |
| npm list --depth=0 | |
| - name: Create logs directory | |
| run: | | |
| cd localguru-ingestion | |
| mkdir -p logs | |
| - name: Run daily Reddit data collection with automatic retry | |
| env: | |
| # Supabase credentials - using the correct secret names | |
| SUPABASE_URL: ${{ secrets.NEXT_PUBLIC_SUPABASE_URL }} | |
| NEXT_PUBLIC_SUPABASE_URL: ${{ secrets.NEXT_PUBLIC_SUPABASE_URL }} | |
| SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} | |
| NEXT_PUBLIC_SUPABASE_ANON_KEY: ${{ secrets.NEXT_PUBLIC_SUPABASE_ANON_KEY }} | |
| # Development mode for verbose logging | |
| NODE_ENV: "development" | |
| run: | | |
| cd localguru-ingestion | |
| DATE=$(date +%Y-%m-%d) | |
| LOG_FILE="logs/reddit-$DATE.log" | |
| echo "========== Starting Reddit update at $(date) ==========" > $LOG_FILE | |
| echo "=== Environment ===" >> $LOG_FILE | |
| echo "Node version: $(node -v)" >> $LOG_FILE | |
| echo "NPM version: $(npm -v)" >> $LOG_FILE | |
| echo "Working directory: $(pwd)" >> $LOG_FILE | |
| echo "Script path exists: $(test -f src/scripts/daily-reddit-update.ts && echo 'Yes' || echo 'No')" >> $LOG_FILE | |
| # More detailed credential checks - while keeping secrets secure | |
| echo "=== Credential checks (without showing values) ===" >> $LOG_FILE | |
| echo "SUPABASE_URL set: $(test -n "$SUPABASE_URL" && echo 'Yes' || echo 'No')" >> $LOG_FILE | |
| echo "NEXT_PUBLIC_SUPABASE_URL set: $(test -n "$NEXT_PUBLIC_SUPABASE_URL" && echo 'Yes' || echo 'No')" >> $LOG_FILE | |
| echo "SUPABASE_SERVICE_ROLE_KEY set: $(test -n "$SUPABASE_SERVICE_ROLE_KEY" && echo 'Yes' || echo 'No')" >> $LOG_FILE | |
| echo "NEXT_PUBLIC_SUPABASE_ANON_KEY set: $(test -n "$NEXT_PUBLIC_SUPABASE_ANON_KEY" && echo 'Yes' || echo 'No')" >> $LOG_FILE | |
| echo "=== Starting script execution ===" >> $LOG_FILE | |
| # Create temporary .env file to ensure env vars are loaded | |
| echo "# Temporary .env file for GitHub Actions" > .env | |
| echo "NEXT_PUBLIC_SUPABASE_URL=$NEXT_PUBLIC_SUPABASE_URL" >> .env | |
| echo "SUPABASE_URL=$SUPABASE_URL" >> .env | |
| echo "SUPABASE_SERVICE_ROLE_KEY=$SUPABASE_SERVICE_ROLE_KEY" >> .env | |
| echo "NEXT_PUBLIC_SUPABASE_ANON_KEY=$NEXT_PUBLIC_SUPABASE_ANON_KEY" >> .env | |
| # Set max retries for the whole script | |
| MAX_SCRIPT_RETRIES=3 | |
| CURRENT_RETRY=0 | |
| FINAL_EXIT_CODE=0 | |
| # Run with automatic retry for temporary Reddit API issues | |
| while [ $CURRENT_RETRY -lt $MAX_SCRIPT_RETRIES ]; do | |
| echo "Attempt $(($CURRENT_RETRY + 1))/$MAX_SCRIPT_RETRIES to run Reddit update" >> $LOG_FILE | |
| # Run the script | |
| npx ts-node --transpile-only src/scripts/daily-reddit-update.ts >> $LOG_FILE 2>&1 | |
| EXIT_CODE=$? | |
| # Check for 403 errors | |
| if grep -q "403 Forbidden" $LOG_FILE && [ $EXIT_CODE -ne 0 ]; then | |
| CURRENT_RETRY=$((CURRENT_RETRY + 1)) | |
| if [ $CURRENT_RETRY -lt $MAX_SCRIPT_RETRIES ]; then | |
| # Calculate exponential backoff: 2^retry * 5 minutes (in seconds) | |
| BACKOFF_TIME=$((300 * 2**$CURRENT_RETRY)) | |
| echo "$(date) - 403 Forbidden errors detected. Waiting $BACKOFF_TIME seconds before retry ${CURRENT_RETRY}/${MAX_SCRIPT_RETRIES}..." >> $LOG_FILE | |
| echo "Sleeping $BACKOFF_TIME seconds before next attempt..." | tee -a $LOG_FILE | |
| sleep $BACKOFF_TIME | |
| else | |
| echo "$(date) - Maximum retries reached after encountering 403 errors." >> $LOG_FILE | |
| FINAL_EXIT_CODE=1 | |
| fi | |
| else | |
| # Script completed without 403 errors or with success code, exit the loop | |
| FINAL_EXIT_CODE=$EXIT_CODE | |
| break | |
| fi | |
| done | |
| # Improved error detection at the end | |
| if [ $FINAL_EXIT_CODE -ne 0 ] && grep -q "403" $LOG_FILE; then | |
| echo "WARNING: Script completed but encountered 403 errors!" >> $LOG_FILE | |
| echo "This may be due to Reddit rate limiting. The script uses simple user agent authentication" >> $LOG_FILE | |
| echo "based on the same configuration as the proven working script." >> $LOG_FILE | |
| fi | |
| # Clean up temporary .env file | |
| rm .env | |
| echo "========== Finished at $(date) with exit code $FINAL_EXIT_CODE ==========" >> $LOG_FILE | |
| # Output last 50 lines of log for immediate visibility in GitHub Actions | |
| echo "=== Last 50 lines of log ===" | |
| tail -n 50 $LOG_FILE | |
| # Set output for log filename | |
| echo "log_filename=reddit-$DATE.log" >> $GITHUB_OUTPUT | |
| # Exit with the final code | |
| exit $FINAL_EXIT_CODE | |
| id: run_script | |
| - name: Upload log as artifact | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: reddit-update-logs | |
| path: localguru-ingestion/logs/${{ steps.run_script.outputs.log_filename }} | |
| retention-days: 7 | |
| # Optional email notification - manually added when needed | |
| # Uncomment and configure this section after setting up email secrets | |
| # | |
| # - name: Send failure notification | |
| # if: failure() | |
| # uses: dawidd6/action-send-mail@v3 | |
| # with: | |
| # server_address: ${{ secrets.MAIL_SERVER }} | |
| # server_port: ${{ secrets.MAIL_PORT }} | |
| # username: ${{ secrets.MAIL_USERNAME }} | |
| # password: ${{ secrets.MAIL_PASSWORD }} | |
| # subject: "❌ Reddit Data Update Failed" | |
| # body: | | |
| # The daily Reddit data update job failed. | |
| # | |
| # Repository: ${{ github.repository }} | |
| # Workflow: ${{ github.workflow }} | |
| # Run: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
| # | |
| # Please check the logs for more details. | |
| # to: ${{ secrets.MAIL_RECIPIENT }} | |
| # from: GitHub Actions |