Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions python/storage-cleaner/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Directory used by Appwrite CLI for local development
.appwrite
45 changes: 45 additions & 0 deletions python/storage-cleaner/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# 🧹 Python Storage Cleaner Function

Storage cleaner function to remove all files older than X number of days from the specified bucket.

## 🧰 Usage

### GET /

Remove files older than X days from the specified bucket

**Response**

Sample `200` Response: Buckets cleaned

## ⚙️ Configuration

| Setting | Value |
| ----------------- | --------------------------------- |
| Runtime | Python (3.9) |
| Entrypoint | `src/main.py` |
| Build Commands | `pip install -r requirements.txt` |
| Permissions | `any` |
| CRON | `0 1 * * *` |
| Timeout (Seconds) | 15 |

## 🔒 Environment Variables

### RETENTION_PERIOD_DAYS

The number of days you want to retain a file.

| Question | Answer |
| ------------ | ------ |
| Required | Yes |
| Sample Value | `1` |

### APPWRITE_BUCKET_ID

The ID of the bucket from which the files are to be deleted.

| Question | Answer |
| ------------ | -------------- |
| Required | Yes |
| Sample Value | `652d...b4daf` |

1 change: 1 addition & 0 deletions python/storage-cleaner/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
appwrite>=13.1.0,<14.0.0
65 changes: 65 additions & 0 deletions python/storage-cleaner/src/appwrite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
Appwrite service module to handle storage cleanup operations.
"""

import os
from appwrite.client import Client
from appwrite.services.storage import Storage
from appwrite.query import Query
from .utils import get_expiry_date


class AppwriteService:
"""
Service class to interact with Appwrite's storage service.
"""

def __init__(self, api_key: str):
client = (
Client()
.set_endpoint(os.getenv("APPWRITE_FUNCTION_API_ENDPOINT"))
.set_project(os.getenv("APPWRITE_FUNCTION_PROJECT_ID"))
.set_key(api_key)
)
self.storage = Storage(client)

def clean_bucket(self, bucket_id: str):
"""
Clean up files from the storage bucket by removing files older than a
specified retention period.

:param bucket_id: The ID of the storage bucket to clean.
"""
queries = [
Query.less_than("$createdAt", get_expiry_date()),
Query.limit(25),
]

deleted_files_count = 0
failed_files = []

while True:
try:
response = self.storage.list_files(bucket_id, queries)
except Exception as e:
raise RuntimeError(
f"Failed to list files from bucket {bucket_id}: {str(e)}"
) from e
files = response.get("files", [])

for f in files:
try:
file_id = f.get("$id")
if file_id:
self.storage.delete_file(bucket_id, file_id)
deleted_files_count += 1
except Exception as e:
failed_files.append({"id": file_id, "error": str(e)})

if not files:
break

if failed_files:
raise RuntimeError(
f"Deleted {deleted_files_count} files, but failed to delete {len(failed_files)} files: {failed_files}"
)
25 changes: 25 additions & 0 deletions python/storage-cleaner/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
from .appwrite import AppwriteService
from .utils import throw_if_missing


def main(context):
throw_if_missing(os.environ, ["RETENTION_PERIOD_DAYS", "APPWRITE_BUCKET_ID"])

api_key = context.req.headers.get("x-appwrite-key")

if not api_key:
return context.res.json(
{"error": "Missing API key in x-appwrite-key header"}, 401
)

appwrite = AppwriteService(api_key)
Copy link

@coderabbitai coderabbitai bot Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Validate all required environment variables.

The AppwriteService constructor depends on APPWRITE_FUNCTION_API_ENDPOINT and APPWRITE_FUNCTION_PROJECT_ID from the environment, but these are not validated in the throw_if_missing call on line 7. If either is missing, the Appwrite client initialization will fail with unclear errors.

Add the missing variables to the validation list:

-    throw_if_missing(os.environ, ["RETENTION_PERIOD_DAYS", "APPWRITE_BUCKET_ID"])
+    throw_if_missing(os.environ, [
+        "RETENTION_PERIOD_DAYS",
+        "APPWRITE_BUCKET_ID",
+        "APPWRITE_FUNCTION_API_ENDPOINT",
+        "APPWRITE_FUNCTION_PROJECT_ID"
+    ])
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
appwrite = AppwriteService(api_key)
# Validate required environment variables before anything else
throw_if_missing(os.environ, [
"RETENTION_PERIOD_DAYS",
"APPWRITE_BUCKET_ID",
"APPWRITE_FUNCTION_API_ENDPOINT",
"APPWRITE_FUNCTION_PROJECT_ID"
])
🤖 Prompt for AI Agents
In python/storage-cleaner/src/main.py around line 16, the AppwriteService is
instantiated but the required environment variables
APPWRITE_FUNCTION_API_ENDPOINT and APPWRITE_FUNCTION_PROJECT_ID are not
validated; update the throw_if_missing call (around line 7) to include
"APPWRITE_FUNCTION_API_ENDPOINT" and "APPWRITE_FUNCTION_PROJECT_ID" so missing
values are caught early and a clear error is raised before creating the
AppwriteService.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Meldiron In the Node.js implementation of the storage cleaner, we only validate the RETENTION_PERIOD_DAYS and APPWRITE_BUCKET_ID environment variables. My understanding is that APPWRITE_FUNCTION_API_ENDPOINT and APPWRITE_FUNCTION_PROJECT_ID might be automatically populated at build or runtime. Could you please clarify if these two variables need explicit validation here, or if they are always guaranteed to be set by the environment?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!


try:
appwrite.clean_bucket(os.environ["APPWRITE_BUCKET_ID"])
return context.res.text("Buckets cleaned", 200)
except ValueError as e:
return context.res.json({"error": str(e)}, 400)
except Exception as e:
print(f"Error cleaning bucket: {e}")
return context.res.json({"error": "Failed to clean bucket"}, 500)
41 changes: 41 additions & 0 deletions python/storage-cleaner/src/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Utility functions for storage cleaner.
Includes functions for calculating expiry dates and validating required fields.
"""

import os
from datetime import datetime, timedelta, timezone

def get_expiry_date():
"""
Returns a date subtracted by the retention period from the current date.
The retention period is fetched from the RETENTION_PERIOD_DAYS environment variable.
Defaults to 30 days if the environment variable is not set or invalid.

:return: The calculated expiry date in ISO 8601 format.
"""
try:
retention_period = int(os.getenv("RETENTION_PERIOD_DAYS", "30"))
except ValueError:
retention_period = 30

expiry_date = datetime.now(timezone.utc) - timedelta(days=retention_period)
return expiry_date.isoformat() + "Z"


def throw_if_missing(obj, keys):
"""
Throws an error if any of the keys are missing from the dictionary or None/0.

:param obj: Dictionary to check
:param keys: List of required keys
:raises ValueError: If required keys are missing
"""
missing = []
for key in keys:
# Disallow 0 retention to prevent immediate deletion of objects, which can cause data loss.
if key not in obj or obj[key] is None or obj[key] == 0:
missing.append(key)

if missing:
raise ValueError(f"Missing required fields: {', '.join(missing)}")