Skip to content

Commit ac17dcd

Browse files
committed
Upload to zenodo with tests
1 parent 530e93d commit ac17dcd

10 files changed

Lines changed: 794 additions & 0 deletions

File tree

data/README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# OPTIMAP FAIR Data Package
2+
**Version:** v17
3+
**Generated on:** 2025-08-25
4+
5+
---
6+
7+
## Dataset Summary
8+
- **Total articles:** 28
9+
- **Articles with spatial data:** 20
10+
- **Articles with temporal coverage:** 28
11+
- **Earliest publication date:** 2010-10-10
12+
- **Latest publication date:**
13+
14+
---
15+
16+
## Sources
17+
18+
---
19+
20+
## Codebook
21+
22+
| Field | Description |
23+
|---|---|
24+
| `id` | Primary key of the publication record |
25+
| `title` | Title of the article |
26+
| `abstract` | Abstract or summary |
27+
| `doi` | Digital Object Identifier (if available) |
28+
| `url` | URL to the article or preprint |
29+
| `publicationDate` | Date of publication (ISO format) |
30+
| `geometry` | Spatial geometry in GeoJSON/WKT |
31+
| `timeperiod_startdate` | Coverage start dates (ISO format) |
32+
| `timeperiod_enddate` | Coverage end dates (ISO format) |
33+
| `provenance` | Source/method by which the record was imported/added |
34+
35+
---
36+
37+
## Keywords
38+
- open access
39+
- open science
40+
- open research information
41+
- ORI
42+
- open data
43+
- FAIR
44+
45+
---
46+
47+
## License
48+
49+
This record includes:
50+
- **Data files** under **CC0-1.0** (https://creativecommons.org/publicdomain/zero/1.0/)
51+
- **optimap-main.zip** (code snapshot) under **GNU GPL v3** (https://opensource.org/licenses/GPL-3.0)

data/last_version.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
v17

data/optimap-main.zip

1020 KB
Binary file not shown.

data/zenodo_dynamic.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"version": "v17",
3+
"keywords": [
4+
"Open Access",
5+
"Open Science",
6+
"ORI",
7+
"Open Data",
8+
"FAIR"
9+
],
10+
"related_identifiers": [
11+
{
12+
"relation": "isSupplementTo",
13+
"identifier": "http://127.0.0.1:8000/data/optimap_data_dump_latest.geojson.gz",
14+
"scheme": "url"
15+
},
16+
{
17+
"relation": "isSupplementTo",
18+
"identifier": "http://127.0.0.1:8000/data/optimap_data_dump_latest.gpkg",
19+
"scheme": "url"
20+
}
21+
]
22+
}

optimap/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@
215215
DATA_DUMP_INTERVAL_HOURS = 6
216216
OPENALEX_MAILTO = "login@optimap.science"
217217

218+
ZENODO_SANDBOX_TOKEN = os.getenv("M9Ps36SO2dlBJNlMOJMsLWzL9G8b6REY8QSsejUo3Ge6gNXQFRMFe915npTT") # put your sandbox token in env
219+
ZENODO_SANDBOX_DEPOSITION_ID = os.getenv("289741") # existing draft ID
220+
ZENODO_API_BASE = os.getenv("ZENODO_API_BASE", "https://sandbox.zenodo.org/api") # or use the real one for production
221+
218222
MIDDLEWARE = [
219223
'django.middleware.cache.UpdateCacheMiddleware',
220224
'django.middleware.common.CommonMiddleware',
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
# publications/management/commands/deploy_zenodo.py
2+
import json
3+
import os
4+
from pathlib import Path
5+
6+
import requests
7+
from django.conf import settings
8+
from django.core.management import BaseCommand, call_command
9+
10+
11+
def _markdown_to_html(text: str) -> str:
12+
"""Convert Markdown to HTML; fall back to minimal conversion if markdown isn't installed."""
13+
try:
14+
import markdown # optional dependency
15+
return markdown.markdown(text)
16+
except Exception:
17+
esc = (
18+
text.replace("&", "&")
19+
.replace("<", "&lt;")
20+
.replace(">", "&gt;")
21+
)
22+
return "<p>" + esc.replace("\n\n", "</p><p>").replace("\n", "<br>") + "</p>"
23+
24+
25+
# ------------------------------
26+
# HTTP helpers (robust to test doubles)
27+
# ------------------------------
28+
29+
def _ensure_ok(res):
30+
"""Work with real requests.Response or a simple test double that may lack raise_for_status()."""
31+
if hasattr(res, "raise_for_status"):
32+
res.raise_for_status()
33+
return
34+
code = getattr(res, "status_code", 200)
35+
if code is None:
36+
return
37+
if int(code) >= 400:
38+
raise RuntimeError(f"HTTP error {code}")
39+
40+
def _to_json(res) -> dict:
41+
"""Get JSON from real response or from a test double with .text or an injected ._json."""
42+
if hasattr(res, "json"):
43+
return res.json()
44+
if hasattr(res, "text"):
45+
try:
46+
return json.loads(res.text or "{}")
47+
except Exception:
48+
return {}
49+
data = getattr(res, "_json", None)
50+
return data if isinstance(data, dict) else {}
51+
52+
53+
# ------------------------------
54+
# Zenodo API primitives
55+
# ------------------------------
56+
57+
def _api_base() -> str:
58+
return getattr(
59+
settings,
60+
"ZENODO_API_BASE",
61+
os.getenv("ZENODO_API_BASE", "https://sandbox.zenodo.org/api"),
62+
).rstrip("/")
63+
64+
def _token() -> str:
65+
tok = getattr(settings, "ZENODO_SANDBOX_TOKEN", None) or os.getenv("ZENODO_API_TOKEN")
66+
if not tok:
67+
raise SystemExit("No Zenodo API token. Set settings.ZENODO_SANDBOX_TOKEN or ZENODO_API_TOKEN.")
68+
return tok
69+
70+
def _get_deposition(api_base: str, token: str, deposition_id: str) -> dict:
71+
r = requests.get(
72+
f"{api_base}/deposit/depositions/{deposition_id}",
73+
params={"access_token": token},
74+
)
75+
_ensure_ok(r)
76+
return _to_json(r)
77+
78+
def _post_edit(api_base: str, token: str, deposition_id: str) -> None:
79+
r = requests.post(
80+
f"{api_base}/deposit/depositions/{deposition_id}/actions/edit",
81+
params={"access_token": token},
82+
)
83+
_ensure_ok(r)
84+
85+
def _put_metadata(api_base: str, token: str, deposition_id: str, metadata: dict) -> None:
86+
headers = {"Content-Type": "application/json"}
87+
data = json.dumps({"metadata": metadata})
88+
r = requests.put(
89+
f"{api_base}/deposit/depositions/{deposition_id}",
90+
params={"access_token": token},
91+
headers=headers,
92+
data=data,
93+
)
94+
_ensure_ok(r)
95+
96+
def _upload_files(api_base: str, token: str, deposition: dict, paths: list[Path]) -> None:
97+
bucket = deposition.get("links", {}).get("bucket")
98+
if not bucket:
99+
raise SystemExit("No bucket link on deposition; cannot upload files.")
100+
for p in paths:
101+
with open(p, "rb") as fh:
102+
r = requests.put(f"{bucket}/{p.name}", params={"access_token": token}, data=fh)
103+
_ensure_ok(r)
104+
105+
106+
# ------------------------------
107+
# Merge helpers for patching
108+
# ------------------------------
109+
110+
def _merge_list_unique(existing: list[str], incoming: list[str]) -> list[str]:
111+
seen, out = set(), []
112+
for x in (existing or []) + (incoming or []):
113+
if x not in seen:
114+
seen.add(x)
115+
out.append(x)
116+
return out
117+
118+
def _merge_related(existing: list[dict], incoming: list[dict]) -> list[dict]:
119+
key = lambda d: (d.get("identifier"), d.get("relation"), d.get("scheme"))
120+
seen, out = set(), []
121+
for d in (existing or []) + (incoming or []):
122+
k = key(d)
123+
if k not in seen:
124+
seen.add(k)
125+
out.append(d)
126+
return out
127+
128+
129+
# ------------------------------
130+
# Locate latest generated dumps
131+
# ------------------------------
132+
133+
def _find_latest_dump_files() -> list[Path]:
134+
tmp = Path(os.getenv("TMPDIR", "/tmp")) / "optimap_cache"
135+
if not tmp.exists():
136+
return []
137+
candidates = list(tmp.glob("optimap_data_dump_*.geojson")) + \
138+
list(tmp.glob("optimap_data_dump_*.geojson.gz")) + \
139+
list(tmp.glob("optimap_data_dump_*.gpkg"))
140+
by_ext: dict[str, Path] = {}
141+
for p in candidates:
142+
key = p.suffix if p.suffix != ".gz" else ".geojson.gz"
143+
if key not in by_ext or p.stat().st_mtime > by_ext[key].stat().st_mtime:
144+
by_ext[key] = p
145+
return [p for p in by_ext.values() if p.exists()]
146+
147+
148+
# ------------------------------
149+
# Compatibility shim used by tests (also OK in prod)
150+
# ------------------------------
151+
152+
def update_zenodo(deposition_id: str,
153+
paths: list[Path],
154+
sandbox: bool = True,
155+
access_token: str | None = None,
156+
publish: bool = False):
157+
"""
158+
Minimal wrapper with the same signature used in tests.
159+
Uploads files to the draft's bucket; ignores `publish`.
160+
Returns an object with `.json()` giving a link payload.
161+
"""
162+
api_base = _api_base()
163+
token = access_token or _token()
164+
dep = _get_deposition(api_base, token, str(deposition_id))
165+
_upload_files(api_base, token, dep, [Path(p) for p in paths])
166+
167+
class _Resp:
168+
def json(self_inner):
169+
links = dep.get("links", {})
170+
return {"links": {"html": links.get("latest_draft_html") or links.get("html")}}
171+
return _Resp()
172+
173+
174+
# ------------------------------
175+
# Management command
176+
# ------------------------------
177+
178+
class Command(BaseCommand):
179+
help = "Update an existing Zenodo deposition (no publish): patch selected metadata fields and upload latest files."
180+
181+
def add_arguments(self, parser):
182+
parser.add_argument("--deposition-id", required=False, help="Zenodo deposition (draft) ID")
183+
parser.add_argument("--confirm", action="store_true", help="Required to execute (safety switch)")
184+
parser.add_argument("--patch", default="description,version,keywords,related_identifiers",
185+
help="Comma-separated metadata fields to update")
186+
parser.add_argument("--merge-keywords", action="store_true", help="Union keywords instead of replace")
187+
parser.add_argument("--merge-related", action="store_true", help="Union related_identifiers instead of replace")
188+
parser.add_argument("--no-build", action="store_true",
189+
help="Do not rebuild README/ZIP/dynamic JSON; use existing files")
190+
191+
def handle(self, *args, **opts):
192+
if not opts["confirm"]:
193+
self.stdout.write("Add --confirm to proceed.")
194+
return
195+
196+
deposition_id = opts.get("deposition_id") or getattr(settings, "ZENODO_SANDBOX_DEPOSITION_ID", None)
197+
if not deposition_id:
198+
raise SystemExit("No deposition ID provided. Use --deposition-id or settings.ZENODO_SANDBOX_DEPOSITION_ID.")
199+
200+
project_root = Path(__file__).resolve().parents[3]
201+
data_dir = project_root / "data"
202+
data_dir.mkdir(exist_ok=True)
203+
204+
# Build README/ZIP/dynamic JSON unless skipped
205+
if not opts.get("no_build"):
206+
self.stdout.write("Generating optimap-main.zip and README.md…")
207+
call_command("render_zenodo_desc")
208+
209+
readme_path = data_dir / "README.md"
210+
zip_path = data_dir / "optimap-main.zip"
211+
dyn_path = data_dir / "zenodo_dynamic.json"
212+
if not readme_path.exists() or not zip_path.exists() or not dyn_path.exists():
213+
raise SystemExit("Missing artifacts in data/: README.md, optimap-main.zip, zenodo_dynamic.json")
214+
215+
description_md = readme_path.read_text(encoding="utf-8")
216+
description_html = _markdown_to_html(description_md)
217+
dyn = json.loads(dyn_path.read_text(encoding="utf-8"))
218+
219+
patch_fields = [f.strip() for f in opts["patch"].split(",") if f.strip()]
220+
221+
api_base = _api_base()
222+
token = _token()
223+
224+
dep = _get_deposition(api_base, token, str(deposition_id))
225+
meta = dep.get("metadata", {}) or {}
226+
227+
# Selective metadata update (no clobber of unrelated fields)
228+
incoming: dict = {}
229+
if "description" in patch_fields:
230+
incoming["description"] = description_html
231+
if "version" in patch_fields and "version" in dyn:
232+
incoming["version"] = dyn["version"]
233+
if "keywords" in patch_fields and "keywords" in dyn:
234+
if opts["merge_keywords"]:
235+
incoming["keywords"] = _merge_list_unique(meta.get("keywords", []), dyn["keywords"])
236+
else:
237+
incoming["keywords"] = dyn["keywords"]
238+
if "related_identifiers" in patch_fields and "related_identifiers" in dyn:
239+
if opts["merge_related"]:
240+
incoming["related_identifiers"] = _merge_related(meta.get("related_identifiers", []), dyn["related_identifiers"])
241+
else:
242+
incoming["related_identifiers"] = dyn["related_identifiers"]
243+
244+
new_meta = {**meta, **incoming}
245+
246+
# Try updating directly; on error (e.g., locked), POST edit then retry
247+
try:
248+
_put_metadata(api_base, token, str(deposition_id), new_meta)
249+
self.stdout.write("Metadata updated (merged, no clobber).")
250+
except Exception:
251+
_post_edit(api_base, token, str(deposition_id))
252+
_put_metadata(api_base, token, str(deposition_id), new_meta)
253+
self.stdout.write("Metadata updated after edit action.")
254+
255+
# Ensure dumps exist; regenerate missing ones
256+
latest = _find_latest_dump_files()
257+
exts = {(p.suffix if p.suffix != ".gz" else ".geojson.gz") for p in latest}
258+
try:
259+
if ".gpkg" not in exts:
260+
from publications.tasks import regenerate_geopackage_cache
261+
regenerate_geopackage_cache()
262+
if (".geojson" not in exts) and (".geojson.gz" not in exts):
263+
from publications.tasks import regenerate_geojson_cache
264+
regenerate_geojson_cache()
265+
except Exception as e:
266+
self.stderr.write(f"Warning: could not regenerate missing dumps: {e}")
267+
latest = _find_latest_dump_files()
268+
269+
self.stdout.write("Uploading files to existing Zenodo sandbox draft…")
270+
paths = [readme_path, zip_path] + latest
271+
272+
# IMPORTANT: call the shim POSITIONALLY (no kwargs) for test doubles
273+
res = update_zenodo(str(deposition_id), paths, ("sandbox." in api_base), token, False)
274+
275+
html = None
276+
try:
277+
html = res.json().get("links", {}).get("html")
278+
except Exception:
279+
pass
280+
281+
if not html:
282+
dep2 = _get_deposition(api_base, token, str(deposition_id))
283+
html = dep2.get("links", {}).get("latest_draft_html") or dep2.get("links", {}).get("html")
284+
285+
self.stdout.write(self.style.SUCCESS(f"✅ Updated deposition {deposition_id} at {html or '(no link)'}"))

0 commit comments

Comments
 (0)