Skip to content

Commit

Permalink
Merge pull request #77 from ivelin/fix/minor-fixes
Browse files Browse the repository at this point in the history
fix: revert data upload to tgz. Hitting API limit with parquet files.
  • Loading branch information
ivelin authored Feb 23, 2025
2 parents 131ee3b + f10a297 commit c20d645
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 12 deletions.
18 changes: 9 additions & 9 deletions src/canswim/hfhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,11 @@ def download_data(self, repo_id: str = None, local_dir: str = None):
token=self.HF_TOKEN,
)
# Unpack forecast parquet files from tar
# forecast_dir = f"{data_dir}/forecast/"
# forecast_tar = f"{data_dir}/forecast.tar.gz"
# with tarfile.open(forecast_tar, "r:gz") as tar:
# logger.info(f"Extracting {forecast_tar} to folder {forecast_dir}")
# tar.extractall(path=forecast_dir, filter="data")
forecast_dir = f"{data_dir}/forecast/"
forecast_tar = f"{data_dir}/forecast.tar.gz"
with tarfile.open(forecast_tar, "r:gz") as tar:
logger.info(f"Extracting {forecast_tar} to folder {forecast_dir}")
tar.extractall(path=forecast_dir, filter="data")

def upload_data(
self, repo_id: str = None, private: bool = True, local_dir: str = None
Expand Down Expand Up @@ -204,10 +204,10 @@ def upload_data(
)
# Compress forecast parquet files to pass hfhub limitation of 25k LFS files
forecast_dir = f"{data_dir}/forecast/"
# forecast_tar = f"{data_dir}/forecast.tar.gz"
# with tarfile.open(forecast_tar, "w:gz") as tar:
# logger.info(f"Creating {forecast_tar} from folder {forecast_dir}")
# tar.add(forecast_dir, arcname=os.path.basename(forecast_dir))
forecast_tar = f"{data_dir}/forecast.tar.gz"
with tarfile.open(forecast_tar, "w:gz") as tar:
logger.info(f"Creating {forecast_tar} from folder {forecast_dir}")
tar.add(forecast_dir, arcname=os.path.basename(forecast_dir))
# upload select files to hfhub
logger.info(f"uploading folder {data_dir}")
upload_folder(
Expand Down
24 changes: 21 additions & 3 deletions weekend.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
#!/usr/bin/bash
#args=("$@")

# This script is intended to be executed by cron each weekend.
# The goal is to pull latest market data for the week
# and run forecast for the following week.

# stop on first error
set -e
# print verbose messages
set -ex

echo "CANSWIM Weekend Forecast Routine: Starting..."

conda activate canswim
pip install -e ./

#python -m canswim "${args[@]}"

python -m canswim

# gather up to date market data
./canswim gatherdata
##./canswim.sh gatherdata

# run forecast and upload to hf hub
./canswim forecast
##./canswim.sh forecast

echo "CANSWIM Weekend Forecast Routine: Finished."


0 comments on commit c20d645

Please sign in to comment.