ERA - EmbeddedResourceArchive #320
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: ERA - EmbeddedResourceArchive | |
| on: | |
| schedule: | |
| - cron: '0 0 * * *' # Run daily at midnight UTC | |
| workflow_dispatch: # Allow manual triggering | |
| env: | |
| MAX_FILES_PER_RUN: 100 # Adjust this number based on your needs and quota | |
| MONTHLY_QUOTA_GB: 1000 # 1 TB monthly free tier | |
| jobs: | |
| process-files: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v2 | |
| - name: Set up Python | |
| uses: actions/setup-python@v2 | |
| with: | |
| python-version: '3.x' | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install google-cloud-bigquery pandas | |
| - name: Authenticate with Google Cloud | |
| uses: google-github-actions/auth@v1 | |
| with: | |
| credentials_json: ${{ secrets.GCP_SA_KEY }} | |
| - name: Check BigQuery quota and run query | |
| run: | | |
| python - <<EOF | |
| from google.cloud import bigquery | |
| import pandas as pd | |
| import os | |
| client = bigquery.Client() | |
| # Check monthly quota | |
| query_job = client.query(''' | |
| SELECT SUM(total_bytes_processed) / 1024 / 1024 / 1024 as total_gb_processed | |
| FROM \`region-us\`.INFORMATION_SCHEMA.JOBS_BY_PROJECT | |
| WHERE creation_time >= TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), MONTH) | |
| AND job_type = "QUERY" | |
| ''') | |
| results = query_job.result() | |
| total_gb_processed = list(results)[0]['total_gb_processed'] or 0 | |
| quota_left = float(os.environ['MONTHLY_QUOTA_GB']) - total_gb_processed | |
| print(f"Monthly quota left: {quota_left:.2f} GB") | |
| if quota_left <= 0: | |
| print("Monthly quota exceeded. Exiting.") | |
| exit(1) | |
| # Run the main query with a LIMIT clause | |
| main_query = f''' | |
| SELECT | |
| f.id, | |
| f.repo_name as repo, | |
| f.path, | |
| f.ref as branch | |
| FROM | |
| \`bigquery-public-data.github_repos.files\` f | |
| WHERE | |
| f.path LIKE '%.svd' OR f.path LIKE '%.cmm' | |
| LIMIT {os.environ['MAX_FILES_PER_RUN']} | |
| ''' | |
| df = client.query(main_query).to_dataframe() | |
| df.to_csv('files_to_process.csv', index=False) | |
| print(f"Retrieved {len(df)} files to process.") | |
| EOF | |
| - name: Process SVD and CMM files | |
| run: | | |
| python process_files_script.py | |
| - name: Organize SVD files | |
| run: | | |
| python organize_svd_files.py | |
| - name: Commit changes | |
| run: | | |
| git config --local user.email "action@github.com" | |
| git config --local user.name "GitHub Action" | |
| git add svd cmm | |
| git commit -m "ERA: Update SVD and CMM files" || echo "No changes to commit" | |
| git push |