Skip to content

Daily Reddit Data Update #12

Daily Reddit Data Update

Daily Reddit Data Update #12

name: Daily Reddit Data Update
on:
schedule:
# Run at 4:30 AM UTC (adjust as needed for your timezone)
- cron: '30 4 * * *'
# Allow manual triggering for testing
workflow_dispatch:
jobs:
update-reddit-data:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '18'
cache: 'npm'
- name: Verify secrets
run: |
echo "Checking if required secrets are set:"
echo "NEXT_PUBLIC_SUPABASE_URL: ${{ secrets.NEXT_PUBLIC_SUPABASE_URL != '' }}"
echo "SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY != '' }}"
- name: Install dependencies
run: |
cd localguru-ingestion
npm ci
# List installed packages for debugging
echo "=== Installed packages ==="
npm list --depth=0
- name: Create logs directory
run: |
cd localguru-ingestion
mkdir -p logs
- name: Run daily Reddit data collection with automatic retry
env:
# Supabase credentials - using the correct secret names
SUPABASE_URL: ${{ secrets.NEXT_PUBLIC_SUPABASE_URL }}
NEXT_PUBLIC_SUPABASE_URL: ${{ secrets.NEXT_PUBLIC_SUPABASE_URL }}
SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }}
NEXT_PUBLIC_SUPABASE_ANON_KEY: ${{ secrets.NEXT_PUBLIC_SUPABASE_ANON_KEY }}
# Development mode for verbose logging
NODE_ENV: "development"
run: |
cd localguru-ingestion
DATE=$(date +%Y-%m-%d)
LOG_FILE="logs/reddit-$DATE.log"
echo "========== Starting Reddit update at $(date) ==========" > $LOG_FILE
echo "=== Environment ===" >> $LOG_FILE
echo "Node version: $(node -v)" >> $LOG_FILE
echo "NPM version: $(npm -v)" >> $LOG_FILE
echo "Working directory: $(pwd)" >> $LOG_FILE
echo "Script path exists: $(test -f src/scripts/daily-reddit-update.ts && echo 'Yes' || echo 'No')" >> $LOG_FILE
# More detailed credential checks - while keeping secrets secure
echo "=== Credential checks (without showing values) ===" >> $LOG_FILE
echo "SUPABASE_URL set: $(test -n "$SUPABASE_URL" && echo 'Yes' || echo 'No')" >> $LOG_FILE
echo "NEXT_PUBLIC_SUPABASE_URL set: $(test -n "$NEXT_PUBLIC_SUPABASE_URL" && echo 'Yes' || echo 'No')" >> $LOG_FILE
echo "SUPABASE_SERVICE_ROLE_KEY set: $(test -n "$SUPABASE_SERVICE_ROLE_KEY" && echo 'Yes' || echo 'No')" >> $LOG_FILE
echo "NEXT_PUBLIC_SUPABASE_ANON_KEY set: $(test -n "$NEXT_PUBLIC_SUPABASE_ANON_KEY" && echo 'Yes' || echo 'No')" >> $LOG_FILE
echo "=== Starting script execution ===" >> $LOG_FILE
# Create temporary .env file to ensure env vars are loaded
echo "# Temporary .env file for GitHub Actions" > .env
echo "NEXT_PUBLIC_SUPABASE_URL=$NEXT_PUBLIC_SUPABASE_URL" >> .env
echo "SUPABASE_URL=$SUPABASE_URL" >> .env
echo "SUPABASE_SERVICE_ROLE_KEY=$SUPABASE_SERVICE_ROLE_KEY" >> .env
echo "NEXT_PUBLIC_SUPABASE_ANON_KEY=$NEXT_PUBLIC_SUPABASE_ANON_KEY" >> .env
# Set max retries for the whole script
MAX_SCRIPT_RETRIES=3
CURRENT_RETRY=0
FINAL_EXIT_CODE=0
# Run with automatic retry for temporary Reddit API issues
while [ $CURRENT_RETRY -lt $MAX_SCRIPT_RETRIES ]; do
echo "Attempt $(($CURRENT_RETRY + 1))/$MAX_SCRIPT_RETRIES to run Reddit update" >> $LOG_FILE
# Run the script
npx ts-node --transpile-only src/scripts/daily-reddit-update.ts >> $LOG_FILE 2>&1
EXIT_CODE=$?
# Check for 403 errors
if grep -q "403 Forbidden" $LOG_FILE && [ $EXIT_CODE -ne 0 ]; then
CURRENT_RETRY=$((CURRENT_RETRY + 1))
if [ $CURRENT_RETRY -lt $MAX_SCRIPT_RETRIES ]; then
# Calculate exponential backoff: 2^retry * 5 minutes (in seconds)
BACKOFF_TIME=$((300 * 2**$CURRENT_RETRY))
echo "$(date) - 403 Forbidden errors detected. Waiting $BACKOFF_TIME seconds before retry ${CURRENT_RETRY}/${MAX_SCRIPT_RETRIES}..." >> $LOG_FILE
echo "Sleeping $BACKOFF_TIME seconds before next attempt..." | tee -a $LOG_FILE
sleep $BACKOFF_TIME
else
echo "$(date) - Maximum retries reached after encountering 403 errors." >> $LOG_FILE
FINAL_EXIT_CODE=1
fi
else
# Script completed without 403 errors or with success code, exit the loop
FINAL_EXIT_CODE=$EXIT_CODE
break
fi
done
# Improved error detection at the end
if [ $FINAL_EXIT_CODE -ne 0 ] && grep -q "403" $LOG_FILE; then
echo "WARNING: Script completed but encountered 403 errors!" >> $LOG_FILE
echo "This may be due to Reddit rate limiting. The script uses simple user agent authentication" >> $LOG_FILE
echo "based on the same configuration as the proven working script." >> $LOG_FILE
fi
# Clean up temporary .env file
rm .env
echo "========== Finished at $(date) with exit code $FINAL_EXIT_CODE ==========" >> $LOG_FILE
# Output last 50 lines of log for immediate visibility in GitHub Actions
echo "=== Last 50 lines of log ==="
tail -n 50 $LOG_FILE
# Set output for log filename
echo "log_filename=reddit-$DATE.log" >> $GITHUB_OUTPUT
# Exit with the final code
exit $FINAL_EXIT_CODE
id: run_script
- name: Upload log as artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: reddit-update-logs
path: localguru-ingestion/logs/${{ steps.run_script.outputs.log_filename }}
retention-days: 7
# Optional email notification - manually added when needed
# Uncomment and configure this section after setting up email secrets
#
# - name: Send failure notification
# if: failure()
# uses: dawidd6/action-send-mail@v3
# with:
# server_address: ${{ secrets.MAIL_SERVER }}
# server_port: ${{ secrets.MAIL_PORT }}
# username: ${{ secrets.MAIL_USERNAME }}
# password: ${{ secrets.MAIL_PASSWORD }}
# subject: "❌ Reddit Data Update Failed"
# body: |
# The daily Reddit data update job failed.
#
# Repository: ${{ github.repository }}
# Workflow: ${{ github.workflow }}
# Run: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
#
# Please check the logs for more details.
# to: ${{ secrets.MAIL_RECIPIENT }}
# from: GitHub Actions