Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
457 changes: 277 additions & 180 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[workspace]
members = [
"crates/orcid-fetcher-logger",
"crates/orcid-works-model",
"crates/orcid-works-cli"
"crates/orcid-works-cli",
]
resolver = "2"
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ orcid-works-cli --id $ORCID_ID [Options]
| `--rate-limit` \<u32\> | Requests-per-second cap (1–40). See also [Guidelines](#guidelines) section. | `12` |
| `--user-agent-note` \<String\> | Text appended to the built-in User-Agent string | *(none)* |
| `--force-fetch` | Ignore diff and refetch every work-detail entry | `false` |
| `-v, -vv`, `--verbose` | Increase console verbosity | — |
| `-q, -qq, -qqq`, `--quiet` | Decrease console verbosity | — |
| `-l`, `--log` \<PathBuf\> | Output trace log file path (parent dirs auto-created) | *(none)* |
| `-h`, `--help` | Print help | — |
| `-V`, `--version` | Print version | — |

Expand Down
10 changes: 10 additions & 0 deletions crates/orcid-fetcher-logger/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "orcid-fetcher-logger"
version = "0.0.0"
edition = "2024"
license = "Apache-2.0"

[dependencies]
anyhow = "1"
flexi_logger = "0.31"
log = "0.4"
32 changes: 32 additions & 0 deletions crates/orcid-fetcher-logger/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use anyhow::{Context, Result};
use flexi_logger::{Duplicate, FileSpec, LevelFilter, Logger};
use std::path::PathBuf;

pub fn console_level(v: u8, q: u8) -> LevelFilter {
let def_idx = 3i8; // def: Info
let idx = def_idx + v as i8 - q as i8;
match idx {
i8::MIN..=0 => LevelFilter::Off, // -qqq
1 => LevelFilter::Error, // -qq
2 => LevelFilter::Warn, // -q
3 => LevelFilter::Info, // default
4 => LevelFilter::Debug, // -v
_ => LevelFilter::Trace, // -vv
}
}

pub fn init_logger(console_level: LevelFilter, log_file: Option<PathBuf>) -> Result<()> {
let logger = if let Some(p) = &log_file {
Logger::with(LevelFilter::Trace)
.log_to_file(FileSpec::try_from(p)?)
.duplicate_to_stderr(Duplicate::from(console_level))
} else {
Logger::with(console_level)
};

if let Err(e) = logger.start() {
return Err(e).context("failed to start logger");
}

Ok(())
}
7 changes: 5 additions & 2 deletions crates/orcid-works-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@ build = "build.rs"
license = "Apache-2.0"

[dependencies]
orcid-fetcher-logger = { path = "../orcid-fetcher-logger" , version = "0.0.0" }
orcid-works-model = { path = "../orcid-works-model" , version = "0.2.1" }

anyhow = "1"
clap = { version = "4", features = ["derive"] }
flexi_logger = "0.31"
futures = "0.3"
governor = "0.10"
log = "0.4"
reqwest = { version = "0.12", features = ["json", "rustls-tls"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_path_to_error = "0.1"
tempfile = "3"
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
tracing = "0.1"
tracing-subscriber = "0.3"
uuid = { version = "1", features = ["v4"] }
53 changes: 34 additions & 19 deletions crates/orcid-works-cli/src/api.rs
Original file line number Diff line number Diff line change
@@ -1,69 +1,85 @@
use anyhow::{Context, Result, bail};
use log::{debug, error};
use reqwest::{
Client,
header::{ACCEPT, HeaderValue},
};
use serde::de::DeserializeOwned;
use tracing::{Instrument, error, instrument};
use uuid::Uuid;

use orcid_works_model::{OrcidWorkDetail, OrcidWorks};

const BASE: &str = "https://pub.orcid.org/v3.0";
const JSON_ACCEPT: &str = "application/json";

// Build HTTP Client
pub(crate) fn build_client(ua: &str) -> Result<Client> {
let client = Client::builder().user_agent(ua).build()?;
let client = match Client::builder().user_agent(ua).build() {
Ok(cli) => cli,
Err(e) => {
let eid = Uuid::new_v4();
error!("[{eid}] failed to build HTTP client");
debug!("[{eid}] ua={ua}");
return Err(e).with_context(|| format!("[{eid}] failed to build HTTP client"));
}
};
Ok(client)
}

// Get JSON from URL
#[instrument(name = "get_json", skip_all)]
async fn get_json<T>(client: &Client, url: &str) -> Result<T>
where
T: DeserializeOwned,
{
let res = client
let res = match client
.get(url)
.header(ACCEPT, HeaderValue::from_static(JSON_ACCEPT))
.header(ACCEPT, HeaderValue::from_static("application/json"))
.send()
.await
.with_context(|| format!("GET {url}"))?;
{
Ok(r) => r,
Err(e) => {
let eid = Uuid::new_v4();
error!("E[{eid}] HTTP connection failure");
debug!("E[{eid}] url={url}");
return Err(e).with_context(|| format!("[{eid}] HTTP connection failure"));
}
};

if res.error_for_status_ref().is_err() {
let status = res.status();
let body = res.text().await.unwrap_or_default();

error!(%status, %url, response_body = %body, "HTTP error");
bail!("HTTP {status} while GET {url}: {body}");
let eid = Uuid::new_v4();
error!("E[{eid}] HTTP {status}");
debug!("E[{eid}] url={url}");
debug!("E[{eid}] response body={body}");
bail!("E[{eid}] HTTP {status}");
}

match res.json::<T>().await {
Ok(parsed) => Ok(parsed),

Err(e) => {
let eid = Uuid::new_v4();
if e.is_decode() {
error!(%url, err = %e, "JSON parse failure");
error!("E[{eid}] JSON parse failure");
} else {
error!(%url, err = %e, "response body read failure");
error!("E[{eid}] response body read failure");
}
Err(e).with_context(|| format!("parse JSON from {url}"))
debug!("E[{eid}] url={url}");
Err(e).with_context(|| format!("E[{eid}] failed to parse JSON"))
}
}
}

// GET /{id}/works
#[instrument(name = "fetch_works", skip_all)]
pub async fn fetch_works(client: &reqwest::Client, id: &str) -> Result<OrcidWorks> {
let url = format!("{BASE}/{id}/works");
get_json::<OrcidWorks>(client, &url)
.in_current_span()
.await
.with_context(|| format!("fetch work summaries for ORCID iD {id}"))
.with_context(|| format!("failed to fetch work summaries of ORCID iD {id}"))
}

// GET /{id}/work/{putcode}
#[instrument(name = "fetch_work_detail", skip_all)]
pub async fn fetch_work_detail(
client: &reqwest::Client,
id: &str,
Expand All @@ -72,7 +88,6 @@ pub async fn fetch_work_detail(
let url = format!("{BASE}/{id}/work/{putcode}");

get_json::<OrcidWorkDetail>(client, &url)
.in_current_span()
.await
.with_context(|| format!("fetch work detail of putcode {putcode}"))
.with_context(|| format!("failed to fetch work detail of putcode {putcode}"))
}
121 changes: 73 additions & 48 deletions crates/orcid-works-cli/src/io.rs
Original file line number Diff line number Diff line change
@@ -1,101 +1,126 @@
use anyhow::{Context, Result};
use anyhow::{Context, Result, bail};
use log::{debug, error, info};
use serde_path_to_error::deserialize;
use std::{
fs::File,
io::{BufReader, BufWriter, ErrorKind, Write},
io::{BufReader, ErrorKind, Write},
path::Path,
};

use tempfile::NamedTempFile;
use tracing::{error, info, instrument, warn};
use uuid::Uuid;

use orcid_works_model::OrcidWorkDetailFile;

// Read the existing JSON file; use the empty list if absent.
#[instrument(name = "read_work_details_json", skip_all)]
pub(crate) fn read_work_details_json<P: AsRef<Path>>(path: P) -> Result<OrcidWorkDetailFile> {
let path = path.as_ref();

match File::open(path) {
Ok(file) => {
// Treat zero-byte files as an empty JSON list.
if file.metadata()?.len() == 0 {
return Ok(OrcidWorkDetailFile { records: vec![] });
}

let reader = BufReader::new(file);
let mut de = serde_json::Deserializer::from_reader(reader);

let data: OrcidWorkDetailFile = deserialize(&mut de).map_err(|e| {
error!(
path = path.display().to_string(),
err = %e,
"JSON parse failure"
);
e
})?;
let eid = Uuid::new_v4();
let data: OrcidWorkDetailFile = deserialize(&mut de)
.inspect_err(|e| {
error!("E[{eid}] JSON parse failure: path={}", path.display());
debug!("E[{eid}] error: {e}");
})
.with_context(|| {
format!("E[{eid}] failed to parse JSON file {}", path.display())
})?;

Ok(data)
}

Err(e) if e.kind() == ErrorKind::NotFound => {
info!(
path = path.display().to_string(),
"file not found; use empty JSON"
);
info!("file not found; use empty JSON: path={}", path.display());
Ok(OrcidWorkDetailFile { records: vec![] })
}

Err(e) => {
error!(path= path.display().to_string(), err = %e, "failed to open work-detail file");
Err(e).with_context(|| format!("open {}", path.display()))
let eid = Uuid::new_v4();
error!(
"E[{eid}] failed to open work-detail file: path={}",
path.display()
);
debug!("E[{eid}] error: {e}");
Err(e).with_context(|| format!("E[{eid}] failed to open {}", path.display()))
}
}
}

// Write JSON file
#[instrument(name = "write_pretty_json", skip_all)]
pub(crate) fn write_pretty_json<P: AsRef<Path>>(
path: P,
value: &OrcidWorkDetailFile,
) -> Result<()> {
let path = path.as_ref();

// Fail fast if the target path is a directory.
if path.is_dir() {
bail!("output path is a directory: {}", path.display());
}

let parent = path.parent().unwrap_or(Path::new("."));

let mut tmp = match NamedTempFile::new_in(parent)
.with_context(|| format!("create temp file for {}", path.display()))
{
let mut temp = match NamedTempFile::new_in(parent) {
Ok(f) => f,
Err(e) => {
error!(path = path.display().to_string(), err = %e, "failed to create temp file");
return Err(e);
let eid = Uuid::new_v4();
debug!("E[{eid}] error: {e}");
error!(
"E[{eid}] failed to create temp file: path={}",
path.display()
);
return Err(e).with_context(|| {
format!("E[{eid}] failed to create temp file for {}", path.display())
});
}
};

if let Err(e) = serde_json::to_writer_pretty(BufWriter::new(&mut tmp), value)
.with_context(|| format!("serialize JSON into {}", path.display()))
{
error!(path = path.display().to_string(), err = %e, "JSON serialization failure");
return Err(e);
if let Err(e) = serde_json::to_writer_pretty(&mut temp, value) {
let eid = Uuid::new_v4();
debug!("E[{eid}] error: {e}");
error!(
"E[{eid}] JSON serialisation failure: path={}",
path.display(),
);
return Err(e)
.with_context(|| format!("E[{eid}] failed to serialise JSON into {}", path.display()));
}

if let Err(e) = tmp.as_file_mut().flush() {
error!(path = path.display().to_string(), err = %e, "flush failure");
return Err(e).context("flush tmp file");
}
if let Err(e) = tmp.as_file_mut().sync_all() {
error!(path = path.display().to_string(), err = %e, "fsync failure");
return Err(e).context("fsync tmp file");
if let Err(e) = temp.as_file_mut().flush() {
let eid = Uuid::new_v4();
debug!("E[{eid}] error: {e}");
error!("E[{eid}] flush failure: path={}", path.display());
return Err(e)
.with_context(|| format!("E[{eid}] failed to flush temp file {}", path.display()));
}

if let Err(e) = tmp
.persist(path)
.map_err(|e| e.error)
.with_context(|| format!("rename temp file into {}", path.display()))
{
error!(path = path.display().to_string(), err = %e, "atomic rename failure");
return Err(e);
if let Err(e) = temp.as_file_mut().sync_all() {
let eid = Uuid::new_v4();
debug!("E[{eid}] error: {e}");
error!("E[{eid}] fsync failure: path={}", path.display());
return Err(e)
.with_context(|| format!("E[{eid}] failed to fsync temp file {}", path.display()));
}

if let Ok(dir) = parent.to_owned().canonicalize() {
if let Ok(dir_fd) = File::open(&dir) {
let _ = dir_fd.sync_all();
}
if let Err(e) = temp.persist(path).map_err(|e| e.error) {
let eid = Uuid::new_v4();
debug!("E[{eid}] error: {e}");
error!("E[{eid}] atomic rename failure path={}", path.display());
return Err(e).with_context(|| {
format!(
"E[{eid}] failed to rename temp file into {}",
path.display()
)
});
}

Ok(())
Expand Down
Loading
Loading