-
Notifications
You must be signed in to change notification settings - Fork 120
feat: implement Python storage cleaner with Appwrite integration #338
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
#poetry.lock | ||
|
||
# pdm | ||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||
#pdm.lock | ||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||
# in version control. | ||
# https://pdm.fming.dev/#use-with-ide | ||
.pdm.toml | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# PyCharm | ||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||
# and can be added to the global gitignore or merged into this file. For a more nuclear | ||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||
#.idea/ | ||
|
||
# Directory used by Appwrite CLI for local development | ||
.appwrite |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# 🧹 Python Storage Cleaner Function | ||
|
||
Storage cleaner function to remove all files older than X number of days from the specified bucket. | ||
|
||
## 🧰 Usage | ||
|
||
### GET / | ||
|
||
Remove files older than X days from the specified bucket | ||
|
||
**Response** | ||
|
||
Sample `200` Response: Buckets cleaned | ||
|
||
## ⚙️ Configuration | ||
|
||
| Setting | Value | | ||
| ----------------- | --------------------------------- | | ||
| Runtime | Python (3.9) | | ||
| Entrypoint | `src/main.py` | | ||
| Build Commands | `pip install -r requirements.txt` | | ||
| Permissions | `any` | | ||
| CRON | `0 1 * * *` | | ||
| Timeout (Seconds) | 15 | | ||
|
||
## 🔒 Environment Variables | ||
|
||
### RETENTION_PERIOD_DAYS | ||
|
||
The number of days you want to retain a file. | ||
|
||
| Question | Answer | | ||
| ------------ | ------ | | ||
| Required | Yes | | ||
| Sample Value | `1` | | ||
|
||
### APPWRITE_BUCKET_ID | ||
|
||
The ID of the bucket from which the files are to be deleted. | ||
|
||
| Question | Answer | | ||
| ------------ | -------------- | | ||
| Required | Yes | | ||
| Sample Value | `652d...b4daf` | | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
appwrite>=13.1.0,<14.0.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
""" | ||
Appwrite service module to handle storage cleanup operations. | ||
""" | ||
|
||
import os | ||
from appwrite.client import Client | ||
from appwrite.services.storage import Storage | ||
from appwrite.query import Query | ||
from .utils import get_expiry_date | ||
|
||
|
||
class AppwriteService: | ||
""" | ||
Service class to interact with Appwrite's storage service. | ||
""" | ||
|
||
def __init__(self, api_key: str): | ||
client = ( | ||
Client() | ||
.set_endpoint(os.getenv("APPWRITE_FUNCTION_API_ENDPOINT")) | ||
.set_project(os.getenv("APPWRITE_FUNCTION_PROJECT_ID")) | ||
.set_key(api_key) | ||
) | ||
self.storage = Storage(client) | ||
|
||
def clean_bucket(self, bucket_id: str): | ||
""" | ||
Clean up files from the storage bucket by removing files older than a | ||
specified retention period. | ||
|
||
:param bucket_id: The ID of the storage bucket to clean. | ||
""" | ||
queries = [ | ||
Query.less_than("$createdAt", get_expiry_date()), | ||
Query.limit(25), | ||
] | ||
|
||
deleted_files_count = 0 | ||
failed_files = [] | ||
|
||
while True: | ||
try: | ||
response = self.storage.list_files(bucket_id, queries) | ||
except Exception as e: | ||
raise RuntimeError( | ||
f"Failed to list files from bucket {bucket_id}: {str(e)}" | ||
) from e | ||
files = response.get("files", []) | ||
|
||
for f in files: | ||
try: | ||
file_id = f.get("$id") | ||
if file_id: | ||
self.storage.delete_file(bucket_id, file_id) | ||
deleted_files_count += 1 | ||
except Exception as e: | ||
failed_files.append({"id": file_id, "error": str(e)}) | ||
|
||
if not files: | ||
break | ||
|
||
if failed_files: | ||
raise RuntimeError( | ||
f"Deleted {deleted_files_count} files, but failed to delete {len(failed_files)} files: {failed_files}" | ||
) |
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,25 @@ | ||||||||||||||||||
import os | ||||||||||||||||||
from .appwrite import AppwriteService | ||||||||||||||||||
from .utils import throw_if_missing | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
def main(context): | ||||||||||||||||||
throw_if_missing(os.environ, ["RETENTION_PERIOD_DAYS", "APPWRITE_BUCKET_ID"]) | ||||||||||||||||||
|
||||||||||||||||||
api_key = context.req.headers.get("x-appwrite-key") | ||||||||||||||||||
|
||||||||||||||||||
if not api_key: | ||||||||||||||||||
return context.res.json( | ||||||||||||||||||
{"error": "Missing API key in x-appwrite-key header"}, 401 | ||||||||||||||||||
) | ||||||||||||||||||
|
||||||||||||||||||
appwrite = AppwriteService(api_key) | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Validate all required environment variables. The Add the missing variables to the validation list: - throw_if_missing(os.environ, ["RETENTION_PERIOD_DAYS", "APPWRITE_BUCKET_ID"])
+ throw_if_missing(os.environ, [
+ "RETENTION_PERIOD_DAYS",
+ "APPWRITE_BUCKET_ID",
+ "APPWRITE_FUNCTION_API_ENDPOINT",
+ "APPWRITE_FUNCTION_PROJECT_ID"
+ ]) 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Meldiron In the Node.js implementation of the storage cleaner, we only validate the RETENTION_PERIOD_DAYS and APPWRITE_BUCKET_ID environment variables. My understanding is that APPWRITE_FUNCTION_API_ENDPOINT and APPWRITE_FUNCTION_PROJECT_ID might be automatically populated at build or runtime. Could you please clarify if these two variables need explicit validation here, or if they are always guaranteed to be set by the environment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||||||||||
|
||||||||||||||||||
try: | ||||||||||||||||||
appwrite.clean_bucket(os.environ["APPWRITE_BUCKET_ID"]) | ||||||||||||||||||
return context.res.text("Buckets cleaned", 200) | ||||||||||||||||||
except ValueError as e: | ||||||||||||||||||
return context.res.json({"error": str(e)}, 400) | ||||||||||||||||||
except Exception as e: | ||||||||||||||||||
print(f"Error cleaning bucket: {e}") | ||||||||||||||||||
return context.res.json({"error": "Failed to clean bucket"}, 500) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
""" | ||
Utility functions for storage cleaner. | ||
Includes functions for calculating expiry dates and validating required fields. | ||
""" | ||
|
||
import os | ||
from datetime import datetime, timedelta, timezone | ||
|
||
def get_expiry_date(): | ||
""" | ||
Returns a date subtracted by the retention period from the current date. | ||
The retention period is fetched from the RETENTION_PERIOD_DAYS environment variable. | ||
Defaults to 30 days if the environment variable is not set or invalid. | ||
|
||
:return: The calculated expiry date in ISO 8601 format. | ||
""" | ||
try: | ||
retention_period = int(os.getenv("RETENTION_PERIOD_DAYS", "30")) | ||
except ValueError: | ||
retention_period = 30 | ||
|
||
expiry_date = datetime.now(timezone.utc) - timedelta(days=retention_period) | ||
return expiry_date.isoformat() + "Z" | ||
|
||
|
||
def throw_if_missing(obj, keys): | ||
""" | ||
Throws an error if any of the keys are missing from the dictionary or None/0. | ||
|
||
:param obj: Dictionary to check | ||
:param keys: List of required keys | ||
:raises ValueError: If required keys are missing | ||
""" | ||
missing = [] | ||
for key in keys: | ||
# Disallow 0 retention to prevent immediate deletion of objects, which can cause data loss. | ||
if key not in obj or obj[key] is None or obj[key] == 0: | ||
missing.append(key) | ||
|
||
if missing: | ||
raise ValueError(f"Missing required fields: {', '.join(missing)}") |
Uh oh!
There was an error while loading. Please reload this page.