diff --git a/python/storage-cleaner/.gitignore b/python/storage-cleaner/.gitignore new file mode 100644 index 00000000..beb0967e --- /dev/null +++ b/python/storage-cleaner/.gitignore @@ -0,0 +1,163 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Directory used by Appwrite CLI for local development +.appwrite \ No newline at end of file diff --git a/python/storage-cleaner/README.md b/python/storage-cleaner/README.md new file mode 100644 index 00000000..19f5f60b --- /dev/null +++ b/python/storage-cleaner/README.md @@ -0,0 +1,45 @@ +# ๐Ÿงน Python Storage Cleaner Function + +Storage cleaner function to remove all files older than X number of days from the specified bucket. + +## ๐Ÿงฐ Usage + +### GET / + +Remove files older than X days from the specified bucket + +**Response** + +Sample `200` Response: Buckets cleaned + +## โš™๏ธ Configuration + +| Setting | Value | +| ----------------- | --------------------------------- | +| Runtime | Python (3.9) | +| Entrypoint | `src/main.py` | +| Build Commands | `pip install -r requirements.txt` | +| Permissions | `any` | +| CRON | `0 1 * * *` | +| Timeout (Seconds) | 15 | + +## ๐Ÿ”’ Environment Variables + +### RETENTION_PERIOD_DAYS + +The number of days you want to retain a file. + +| Question | Answer | +| ------------ | ------ | +| Required | Yes | +| Sample Value | `1` | + +### APPWRITE_BUCKET_ID + +The ID of the bucket from which the files are to be deleted. + +| Question | Answer | +| ------------ | -------------- | +| Required | Yes | +| Sample Value | `652d...b4daf` | + diff --git a/python/storage-cleaner/requirements.txt b/python/storage-cleaner/requirements.txt new file mode 100644 index 00000000..7904ed8d --- /dev/null +++ b/python/storage-cleaner/requirements.txt @@ -0,0 +1 @@ +appwrite>=13.1.0,<14.0.0 \ No newline at end of file diff --git a/python/storage-cleaner/src/appwrite.py b/python/storage-cleaner/src/appwrite.py new file mode 100644 index 00000000..62459c18 --- /dev/null +++ b/python/storage-cleaner/src/appwrite.py @@ -0,0 +1,65 @@ +""" +Appwrite service module to handle storage cleanup operations. +""" + +import os +from appwrite.client import Client +from appwrite.services.storage import Storage +from appwrite.query import Query +from .utils import get_expiry_date + + +class AppwriteService: + """ + Service class to interact with Appwrite's storage service. + """ + + def __init__(self, api_key: str): + client = ( + Client() + .set_endpoint(os.getenv("APPWRITE_FUNCTION_API_ENDPOINT")) + .set_project(os.getenv("APPWRITE_FUNCTION_PROJECT_ID")) + .set_key(api_key) + ) + self.storage = Storage(client) + + def clean_bucket(self, bucket_id: str): + """ + Clean up files from the storage bucket by removing files older than a + specified retention period. + + :param bucket_id: The ID of the storage bucket to clean. + """ + queries = [ + Query.less_than("$createdAt", get_expiry_date()), + Query.limit(25), + ] + + deleted_files_count = 0 + failed_files = [] + + while True: + try: + response = self.storage.list_files(bucket_id, queries) + except Exception as e: + raise RuntimeError( + f"Failed to list files from bucket {bucket_id}: {str(e)}" + ) from e + files = response.get("files", []) + + for f in files: + try: + file_id = f.get("$id") + if file_id: + self.storage.delete_file(bucket_id, file_id) + deleted_files_count += 1 + except Exception as e: + failed_files.append({"id": file_id, "error": str(e)}) + + if not files: + break + + if failed_files: + raise RuntimeError( + f"Deleted {deleted_files_count} files, but failed to delete {len(failed_files)} files: {failed_files}" + ) diff --git a/python/storage-cleaner/src/main.py b/python/storage-cleaner/src/main.py new file mode 100644 index 00000000..6ff2f5bd --- /dev/null +++ b/python/storage-cleaner/src/main.py @@ -0,0 +1,25 @@ +import os +from .appwrite import AppwriteService +from .utils import throw_if_missing + + +def main(context): + throw_if_missing(os.environ, ["RETENTION_PERIOD_DAYS", "APPWRITE_BUCKET_ID"]) + + api_key = context.req.headers.get("x-appwrite-key") + + if not api_key: + return context.res.json( + {"error": "Missing API key in x-appwrite-key header"}, 401 + ) + + appwrite = AppwriteService(api_key) + + try: + appwrite.clean_bucket(os.environ["APPWRITE_BUCKET_ID"]) + return context.res.text("Buckets cleaned", 200) + except ValueError as e: + return context.res.json({"error": str(e)}, 400) + except Exception as e: + print(f"Error cleaning bucket: {e}") + return context.res.json({"error": "Failed to clean bucket"}, 500) diff --git a/python/storage-cleaner/src/utils.py b/python/storage-cleaner/src/utils.py new file mode 100644 index 00000000..1373a8dc --- /dev/null +++ b/python/storage-cleaner/src/utils.py @@ -0,0 +1,41 @@ +""" +Utility functions for storage cleaner. +Includes functions for calculating expiry dates and validating required fields. +""" + +import os +from datetime import datetime, timedelta, timezone + +def get_expiry_date(): + """ + Returns a date subtracted by the retention period from the current date. + The retention period is fetched from the RETENTION_PERIOD_DAYS environment variable. + Defaults to 30 days if the environment variable is not set or invalid. + + :return: The calculated expiry date in ISO 8601 format. + """ + try: + retention_period = int(os.getenv("RETENTION_PERIOD_DAYS", "30")) + except ValueError: + retention_period = 30 + + expiry_date = datetime.now(timezone.utc) - timedelta(days=retention_period) + return expiry_date.isoformat() + "Z" + + +def throw_if_missing(obj, keys): + """ + Throws an error if any of the keys are missing from the dictionary or None/0. + + :param obj: Dictionary to check + :param keys: List of required keys + :raises ValueError: If required keys are missing + """ + missing = [] + for key in keys: + # Disallow 0 retention to prevent immediate deletion of objects, which can cause data loss. + if key not in obj or obj[key] is None or obj[key] == 0: + missing.append(key) + + if missing: + raise ValueError(f"Missing required fields: {', '.join(missing)}")