|
| 1 | +# publications/management/commands/deploy_zenodo.py |
| 2 | +import json |
| 3 | +import os |
| 4 | +from pathlib import Path |
| 5 | + |
| 6 | +import requests |
| 7 | +from django.conf import settings |
| 8 | +from django.core.management import BaseCommand, call_command |
| 9 | + |
| 10 | + |
| 11 | +def _markdown_to_html(text: str) -> str: |
| 12 | + """Convert Markdown to HTML; fall back to minimal conversion if markdown isn't installed.""" |
| 13 | + try: |
| 14 | + import markdown # optional dependency |
| 15 | + return markdown.markdown(text) |
| 16 | + except Exception: |
| 17 | + esc = ( |
| 18 | + text.replace("&", "&") |
| 19 | + .replace("<", "<") |
| 20 | + .replace(">", ">") |
| 21 | + ) |
| 22 | + return "<p>" + esc.replace("\n\n", "</p><p>").replace("\n", "<br>") + "</p>" |
| 23 | + |
| 24 | + |
| 25 | +# ------------------------------ |
| 26 | +# HTTP helpers (robust to test doubles) |
| 27 | +# ------------------------------ |
| 28 | + |
| 29 | +def _ensure_ok(res): |
| 30 | + """Work with real requests.Response or a simple test double that may lack raise_for_status().""" |
| 31 | + if hasattr(res, "raise_for_status"): |
| 32 | + res.raise_for_status() |
| 33 | + return |
| 34 | + code = getattr(res, "status_code", 200) |
| 35 | + if code is None: |
| 36 | + return |
| 37 | + if int(code) >= 400: |
| 38 | + raise RuntimeError(f"HTTP error {code}") |
| 39 | + |
| 40 | +def _to_json(res) -> dict: |
| 41 | + """Get JSON from real response or from a test double with .text or an injected ._json.""" |
| 42 | + if hasattr(res, "json"): |
| 43 | + return res.json() |
| 44 | + if hasattr(res, "text"): |
| 45 | + try: |
| 46 | + return json.loads(res.text or "{}") |
| 47 | + except Exception: |
| 48 | + return {} |
| 49 | + data = getattr(res, "_json", None) |
| 50 | + return data if isinstance(data, dict) else {} |
| 51 | + |
| 52 | + |
| 53 | +# ------------------------------ |
| 54 | +# Zenodo API primitives |
| 55 | +# ------------------------------ |
| 56 | + |
| 57 | +def _api_base() -> str: |
| 58 | + return getattr( |
| 59 | + settings, |
| 60 | + "ZENODO_API_BASE", |
| 61 | + os.getenv("ZENODO_API_BASE", "https://sandbox.zenodo.org/api"), |
| 62 | + ).rstrip("/") |
| 63 | + |
| 64 | +def _token() -> str: |
| 65 | + tok = getattr(settings, "ZENODO_SANDBOX_TOKEN", None) or os.getenv("ZENODO_API_TOKEN") |
| 66 | + if not tok: |
| 67 | + raise SystemExit("No Zenodo API token. Set settings.ZENODO_SANDBOX_TOKEN or ZENODO_API_TOKEN.") |
| 68 | + return tok |
| 69 | + |
| 70 | +def _get_deposition(api_base: str, token: str, deposition_id: str) -> dict: |
| 71 | + r = requests.get( |
| 72 | + f"{api_base}/deposit/depositions/{deposition_id}", |
| 73 | + params={"access_token": token}, |
| 74 | + ) |
| 75 | + _ensure_ok(r) |
| 76 | + return _to_json(r) |
| 77 | + |
| 78 | +def _post_edit(api_base: str, token: str, deposition_id: str) -> None: |
| 79 | + r = requests.post( |
| 80 | + f"{api_base}/deposit/depositions/{deposition_id}/actions/edit", |
| 81 | + params={"access_token": token}, |
| 82 | + ) |
| 83 | + _ensure_ok(r) |
| 84 | + |
| 85 | +def _put_metadata(api_base: str, token: str, deposition_id: str, metadata: dict) -> None: |
| 86 | + headers = {"Content-Type": "application/json"} |
| 87 | + data = json.dumps({"metadata": metadata}) |
| 88 | + r = requests.put( |
| 89 | + f"{api_base}/deposit/depositions/{deposition_id}", |
| 90 | + params={"access_token": token}, |
| 91 | + headers=headers, |
| 92 | + data=data, |
| 93 | + ) |
| 94 | + _ensure_ok(r) |
| 95 | + |
| 96 | +def _upload_files(api_base: str, token: str, deposition: dict, paths: list[Path]) -> None: |
| 97 | + bucket = deposition.get("links", {}).get("bucket") |
| 98 | + if not bucket: |
| 99 | + raise SystemExit("No bucket link on deposition; cannot upload files.") |
| 100 | + for p in paths: |
| 101 | + with open(p, "rb") as fh: |
| 102 | + r = requests.put(f"{bucket}/{p.name}", params={"access_token": token}, data=fh) |
| 103 | + _ensure_ok(r) |
| 104 | + |
| 105 | + |
| 106 | +# ------------------------------ |
| 107 | +# Merge helpers for patching |
| 108 | +# ------------------------------ |
| 109 | + |
| 110 | +def _merge_list_unique(existing: list[str], incoming: list[str]) -> list[str]: |
| 111 | + seen, out = set(), [] |
| 112 | + for x in (existing or []) + (incoming or []): |
| 113 | + if x not in seen: |
| 114 | + seen.add(x) |
| 115 | + out.append(x) |
| 116 | + return out |
| 117 | + |
| 118 | +def _merge_related(existing: list[dict], incoming: list[dict]) -> list[dict]: |
| 119 | + key = lambda d: (d.get("identifier"), d.get("relation"), d.get("scheme")) |
| 120 | + seen, out = set(), [] |
| 121 | + for d in (existing or []) + (incoming or []): |
| 122 | + k = key(d) |
| 123 | + if k not in seen: |
| 124 | + seen.add(k) |
| 125 | + out.append(d) |
| 126 | + return out |
| 127 | + |
| 128 | + |
| 129 | +# ------------------------------ |
| 130 | +# Locate latest generated dumps |
| 131 | +# ------------------------------ |
| 132 | + |
| 133 | +def _find_latest_dump_files() -> list[Path]: |
| 134 | + tmp = Path(os.getenv("TMPDIR", "/tmp")) / "optimap_cache" |
| 135 | + if not tmp.exists(): |
| 136 | + return [] |
| 137 | + candidates = list(tmp.glob("optimap_data_dump_*.geojson")) + \ |
| 138 | + list(tmp.glob("optimap_data_dump_*.geojson.gz")) + \ |
| 139 | + list(tmp.glob("optimap_data_dump_*.gpkg")) |
| 140 | + by_ext: dict[str, Path] = {} |
| 141 | + for p in candidates: |
| 142 | + key = p.suffix if p.suffix != ".gz" else ".geojson.gz" |
| 143 | + if key not in by_ext or p.stat().st_mtime > by_ext[key].stat().st_mtime: |
| 144 | + by_ext[key] = p |
| 145 | + return [p for p in by_ext.values() if p.exists()] |
| 146 | + |
| 147 | + |
| 148 | +# ------------------------------ |
| 149 | +# Compatibility shim used by tests (also OK in prod) |
| 150 | +# ------------------------------ |
| 151 | + |
| 152 | +def update_zenodo(deposition_id: str, |
| 153 | + paths: list[Path], |
| 154 | + sandbox: bool = True, |
| 155 | + access_token: str | None = None, |
| 156 | + publish: bool = False): |
| 157 | + """ |
| 158 | + Minimal wrapper with the same signature used in tests. |
| 159 | + Uploads files to the draft's bucket; ignores `publish`. |
| 160 | + Returns an object with `.json()` giving a link payload. |
| 161 | + """ |
| 162 | + api_base = _api_base() |
| 163 | + token = access_token or _token() |
| 164 | + dep = _get_deposition(api_base, token, str(deposition_id)) |
| 165 | + _upload_files(api_base, token, dep, [Path(p) for p in paths]) |
| 166 | + |
| 167 | + class _Resp: |
| 168 | + def json(self_inner): |
| 169 | + links = dep.get("links", {}) |
| 170 | + return {"links": {"html": links.get("latest_draft_html") or links.get("html")}} |
| 171 | + return _Resp() |
| 172 | + |
| 173 | + |
| 174 | +# ------------------------------ |
| 175 | +# Management command |
| 176 | +# ------------------------------ |
| 177 | + |
| 178 | +class Command(BaseCommand): |
| 179 | + help = "Update an existing Zenodo deposition (no publish): patch selected metadata fields and upload latest files." |
| 180 | + |
| 181 | + def add_arguments(self, parser): |
| 182 | + parser.add_argument("--deposition-id", required=False, help="Zenodo deposition (draft) ID") |
| 183 | + parser.add_argument("--confirm", action="store_true", help="Required to execute (safety switch)") |
| 184 | + parser.add_argument("--patch", default="description,version,keywords,related_identifiers", |
| 185 | + help="Comma-separated metadata fields to update") |
| 186 | + parser.add_argument("--merge-keywords", action="store_true", help="Union keywords instead of replace") |
| 187 | + parser.add_argument("--merge-related", action="store_true", help="Union related_identifiers instead of replace") |
| 188 | + parser.add_argument("--no-build", action="store_true", |
| 189 | + help="Do not rebuild README/ZIP/dynamic JSON; use existing files") |
| 190 | + |
| 191 | + def handle(self, *args, **opts): |
| 192 | + if not opts["confirm"]: |
| 193 | + self.stdout.write("Add --confirm to proceed.") |
| 194 | + return |
| 195 | + |
| 196 | + deposition_id = opts.get("deposition_id") or getattr(settings, "ZENODO_SANDBOX_DEPOSITION_ID", None) |
| 197 | + if not deposition_id: |
| 198 | + raise SystemExit("No deposition ID provided. Use --deposition-id or settings.ZENODO_SANDBOX_DEPOSITION_ID.") |
| 199 | + |
| 200 | + project_root = Path(__file__).resolve().parents[3] |
| 201 | + data_dir = project_root / "data" |
| 202 | + data_dir.mkdir(exist_ok=True) |
| 203 | + |
| 204 | + # Build README/ZIP/dynamic JSON unless skipped |
| 205 | + if not opts.get("no_build"): |
| 206 | + self.stdout.write("Generating optimap-main.zip and README.md…") |
| 207 | + call_command("render_zenodo_desc") |
| 208 | + |
| 209 | + readme_path = data_dir / "README.md" |
| 210 | + zip_path = data_dir / "optimap-main.zip" |
| 211 | + dyn_path = data_dir / "zenodo_dynamic.json" |
| 212 | + if not readme_path.exists() or not zip_path.exists() or not dyn_path.exists(): |
| 213 | + raise SystemExit("Missing artifacts in data/: README.md, optimap-main.zip, zenodo_dynamic.json") |
| 214 | + |
| 215 | + description_md = readme_path.read_text(encoding="utf-8") |
| 216 | + description_html = _markdown_to_html(description_md) |
| 217 | + dyn = json.loads(dyn_path.read_text(encoding="utf-8")) |
| 218 | + |
| 219 | + patch_fields = [f.strip() for f in opts["patch"].split(",") if f.strip()] |
| 220 | + |
| 221 | + api_base = _api_base() |
| 222 | + token = _token() |
| 223 | + |
| 224 | + dep = _get_deposition(api_base, token, str(deposition_id)) |
| 225 | + meta = dep.get("metadata", {}) or {} |
| 226 | + |
| 227 | + # Selective metadata update (no clobber of unrelated fields) |
| 228 | + incoming: dict = {} |
| 229 | + if "description" in patch_fields: |
| 230 | + incoming["description"] = description_html |
| 231 | + if "version" in patch_fields and "version" in dyn: |
| 232 | + incoming["version"] = dyn["version"] |
| 233 | + if "keywords" in patch_fields and "keywords" in dyn: |
| 234 | + if opts["merge_keywords"]: |
| 235 | + incoming["keywords"] = _merge_list_unique(meta.get("keywords", []), dyn["keywords"]) |
| 236 | + else: |
| 237 | + incoming["keywords"] = dyn["keywords"] |
| 238 | + if "related_identifiers" in patch_fields and "related_identifiers" in dyn: |
| 239 | + if opts["merge_related"]: |
| 240 | + incoming["related_identifiers"] = _merge_related(meta.get("related_identifiers", []), dyn["related_identifiers"]) |
| 241 | + else: |
| 242 | + incoming["related_identifiers"] = dyn["related_identifiers"] |
| 243 | + |
| 244 | + new_meta = {**meta, **incoming} |
| 245 | + |
| 246 | + # Try updating directly; on error (e.g., locked), POST edit then retry |
| 247 | + try: |
| 248 | + _put_metadata(api_base, token, str(deposition_id), new_meta) |
| 249 | + self.stdout.write("Metadata updated (merged, no clobber).") |
| 250 | + except Exception: |
| 251 | + _post_edit(api_base, token, str(deposition_id)) |
| 252 | + _put_metadata(api_base, token, str(deposition_id), new_meta) |
| 253 | + self.stdout.write("Metadata updated after edit action.") |
| 254 | + |
| 255 | + # Ensure dumps exist; regenerate missing ones |
| 256 | + latest = _find_latest_dump_files() |
| 257 | + exts = {(p.suffix if p.suffix != ".gz" else ".geojson.gz") for p in latest} |
| 258 | + try: |
| 259 | + if ".gpkg" not in exts: |
| 260 | + from publications.tasks import regenerate_geopackage_cache |
| 261 | + regenerate_geopackage_cache() |
| 262 | + if (".geojson" not in exts) and (".geojson.gz" not in exts): |
| 263 | + from publications.tasks import regenerate_geojson_cache |
| 264 | + regenerate_geojson_cache() |
| 265 | + except Exception as e: |
| 266 | + self.stderr.write(f"Warning: could not regenerate missing dumps: {e}") |
| 267 | + latest = _find_latest_dump_files() |
| 268 | + |
| 269 | + self.stdout.write("Uploading files to existing Zenodo sandbox draft…") |
| 270 | + paths = [readme_path, zip_path] + latest |
| 271 | + |
| 272 | + # IMPORTANT: call the shim POSITIONALLY (no kwargs) for test doubles |
| 273 | + res = update_zenodo(str(deposition_id), paths, ("sandbox." in api_base), token, False) |
| 274 | + |
| 275 | + html = None |
| 276 | + try: |
| 277 | + html = res.json().get("links", {}).get("html") |
| 278 | + except Exception: |
| 279 | + pass |
| 280 | + |
| 281 | + if not html: |
| 282 | + dep2 = _get_deposition(api_base, token, str(deposition_id)) |
| 283 | + html = dep2.get("links", {}).get("latest_draft_html") or dep2.get("links", {}).get("html") |
| 284 | + |
| 285 | + self.stdout.write(self.style.SUCCESS(f"✅ Updated deposition {deposition_id} at {html or '(no link)'}")) |
0 commit comments