Skip to content

Implement PUT /api/v1/trusted_publishing/tokens API endpoint #11131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d6fa9d9
diesel_helpers: Adjust `lower()` fn to also accept `NULL`
Turbo87 May 8, 2025
6eda81f
trustpub: Implement `load_jwks()` fn
Turbo87 May 7, 2025
512c762
trustpub: Add `GITHUB_ISSUER_URL` constant
Turbo87 May 7, 2025
9ed85fa
trustpub: Add `OidcKeyStore` trait
Turbo87 May 7, 2025
a0592db
trustpub: Implement `OidcKeyStore` trait
Turbo87 May 7, 2025
31e6708
trustpub: Add mock `OidcKeyStore` implementation
Turbo87 May 7, 2025
6a2caee
trustpub: Add RSA keys for testing purposes
Turbo87 May 7, 2025
b5eb62b
trustpub: Add `MockOidcKeyStore::with_test_key()` fn
Turbo87 May 8, 2025
83ce793
trustpub: Implement `extract_workflow_filename()` fn
Turbo87 May 7, 2025
222b2b8
trustpub: Implement `UnverifiedClaims::decode()` fn
Turbo87 May 7, 2025
8a2abad
trustpub: Implement `GitHubClaims` struct
Turbo87 May 7, 2025
35d61c0
trustpub: Implement `FullGitHubClaims` struct for testing purposes
Turbo87 May 8, 2025
8cf78d8
trustpub: Implement `AccessToken` struct
Turbo87 May 8, 2025
6928349
database: Add `NewUsedJti` data access object
Turbo87 Apr 28, 2025
83be81e
database: Add `NewToken` data access object
Turbo87 Apr 28, 2025
e3a2835
config: Add `TRUSTPUB_AUDIENCE` setting
Turbo87 May 8, 2025
b2ace7b
App: Add `oidc_key_stores` hashmap
Turbo87 May 7, 2025
d203be6
AppBuilder: Add `trustpub_providers()` fn
Turbo87 May 8, 2025
3cd0e7b
bin/server: Use `TRUSTPUB_PROVIDERS` env var to configure Trusted Pub…
Turbo87 May 8, 2025
a60534f
tests/TestAppBuilder: Add `with_oidc_keystore()` fn
Turbo87 May 7, 2025
4006490
Implement `PUT /api/v1/trusted_publishing/tokens` API endpoint
Turbo87 May 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,12 @@ crates_io_index = { path = "crates/crates_io_index", features = ["testing"] }
crates_io_tarball = { path = "crates/crates_io_tarball", features = ["builder"] }
crates_io_team_repo = { path = "crates/crates_io_team_repo", features = ["mock"] }
crates_io_test_db = { path = "crates/crates_io_test_db" }
crates_io_trustpub = { path = "crates/crates_io_trustpub", features = ["test-helpers"] }
claims = "=0.8.0"
diesel = { version = "=2.2.10", features = ["r2d2"] }
googletest = "=0.14.0"
insta = { version = "=1.43.1", features = ["glob", "json", "redactions"] }
jsonwebtoken = "=9.3.1"
regex = "=1.11.1"
sentry = { version = "=0.37.0", features = ["test"] }
tokio = "=1.45.0"
Expand Down
3 changes: 2 additions & 1 deletion crates/crates_io_database/src/models/category.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::schema::*;
use chrono::{DateTime, Utc};
use diesel::dsl;
use diesel::prelude::*;
use diesel::sql_types::Text;
use diesel_async::scoped_futures::ScopedFutureExt;
use diesel_async::{AsyncConnection, AsyncPgConnection, RunQueryDsl};
use futures_util::future::BoxFuture;
Expand All @@ -20,7 +21,7 @@ pub struct Category {
pub created_at: DateTime<Utc>,
}

type WithSlug<'a> = dsl::Eq<categories::slug, crates_io_diesel_helpers::lower<&'a str>>;
type WithSlug<'a> = dsl::Eq<categories::slug, crates_io_diesel_helpers::lower<Text, &'a str>>;

#[derive(Associations, Insertable, Identifiable, Debug, Clone, Copy)]
#[diesel(
Expand Down
4 changes: 4 additions & 0 deletions crates/crates_io_database/src/models/trustpub/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
mod github_config;
mod token;
mod used_jti;

pub use self::github_config::{GitHubConfig, NewGitHubConfig};
pub use self::token::NewToken;
pub use self::used_jti::NewUsedJti;
22 changes: 22 additions & 0 deletions crates/crates_io_database/src/models/trustpub/token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use crate::schema::trustpub_tokens;
use chrono::{DateTime, Utc};
use diesel::prelude::*;
use diesel_async::{AsyncPgConnection, RunQueryDsl};

#[derive(Debug, Insertable)]
#[diesel(table_name = trustpub_tokens, check_for_backend(diesel::pg::Pg))]
pub struct NewToken<'a> {
pub expires_at: DateTime<Utc>,
pub hashed_token: &'a [u8],
pub crate_ids: &'a [i32],
}

impl NewToken<'_> {
pub async fn insert(&self, conn: &mut AsyncPgConnection) -> QueryResult<()> {
self.insert_into(trustpub_tokens::table)
.execute(conn)
.await?;

Ok(())
}
}
24 changes: 24 additions & 0 deletions crates/crates_io_database/src/models/trustpub/used_jti.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use crate::schema::trustpub_used_jtis;
use chrono::{DateTime, Utc};
use diesel::prelude::*;
use diesel_async::{AsyncPgConnection, RunQueryDsl};

#[derive(Debug, Insertable)]
#[diesel(table_name = trustpub_used_jtis, check_for_backend(diesel::pg::Pg))]
pub struct NewUsedJti<'a> {
pub jti: &'a str,
pub expires_at: DateTime<Utc>,
}

impl<'a> NewUsedJti<'a> {
pub fn new(jti: &'a str, expires_at: DateTime<Utc>) -> Self {
Self { jti, expires_at }
}

pub async fn insert(&self, conn: &mut AsyncPgConnection) -> QueryResult<usize> {
diesel::insert_into(trustpub_used_jtis::table)
.values(self)
.execute(conn)
.await
}
}
2 changes: 1 addition & 1 deletion crates/crates_io_diesel_helpers/src/fns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use diesel::sql_types::{Date, Double, Integer, Interval, SingleValue, Text, Time
define_sql_function!(#[aggregate] fn array_agg<T: SingleValue>(x: T) -> Array<T>);
define_sql_function!(fn canon_crate_name(x: Text) -> Text);
define_sql_function!(fn to_char(a: Date, b: Text) -> Text);
define_sql_function!(fn lower(x: Text) -> Text);
define_sql_function!(fn lower<T: SingleValue>(x: T) -> T);
define_sql_function!(fn date_part(x: Text, y: Timestamptz) -> Double);
define_sql_function! {
#[sql_name = "date_part"]
Expand Down
22 changes: 21 additions & 1 deletion crates/crates_io_trustpub/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,30 @@ edition = "2024"
[lints]
workspace = true

[features]
test-helpers = ["dep:bon", "dep:mockall", "dep:serde_json"]

[dependencies]
anyhow = "=1.0.98"
async-trait = "=0.1.88"
bon = { version = "=3.6.3", optional = true }
chrono = { version = "=0.4.41", features = ["serde"] }
jsonwebtoken = "=9.3.1"
mockall = { version = "=0.13.1", optional = true }
rand = "=0.9.1"
reqwest = { version = "=0.12.15", features = ["gzip", "json"] }
regex = "=1.11.1"
secrecy = "=0.10.3"
serde = { version = "=1.0.219", features = ["derive"] }
serde_json = { version = "=1.0.140", optional = true }
sha2 = "=0.10.9"
thiserror = "=2.0.12"
tokio = { version = "=1.45.0", features = ["sync"] }

[dev-dependencies]
bon = "=3.6.3"
claims = "=0.8.0"
insta = "=1.43.1"
insta = { version = "=1.43.1", features = ["json", "redactions"] }
mockito = "=1.7.0"
serde_json = "=1.0.140"
tokio = { version = "=1.45.0", features = ["macros", "rt-multi-thread"] }
110 changes: 110 additions & 0 deletions crates/crates_io_trustpub/src/access_token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
use secrecy::{ExposeSecret, SecretString};
use sha2::digest::Output;
use sha2::{Digest, Sha256};

/// A temporary access token used to publish crates to crates.io using
/// the "Trusted Publishing" feature.
#[derive(Debug)]
pub struct AccessToken(SecretString);

impl AccessToken {
const PREFIX: &str = "cio_tp_";
Copy link
Member Author

@Turbo87 Turbo87 May 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for regular tokens we use cio plus 32 alphanumeric characters. when we implemented GitHub Secret Scanning we were told to use a more unique prefix with a separator. therefore the temporary access tokens are using the cio_to_ prefix to make them easier to recognize with less false positives.


/// Generate a new access token.
pub fn generate() -> Self {
Self::from_u64s(rand::random(), rand::random())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small thing, but two questions:

  1. Why two u64s instead of a single [u8; 16]? The per-call overhead for the RNG is minuscule, but a single call would eliminate it and would prevent any implied separation between the halves of the uniformly random token.
  2. Does crates.io have a forking/multi-process model? If so rand::random() might be risky -- I believe it uses ThreadRng by default, which isn't guaranteed to be self-healing over process forks (i.e., it needs to be explicitly reseeded to prevent duplicated randomness between forks). If there are multiple processes at play that can call this generate() function, then my recommendation would be to use OsRng instead.

(The historical reason for not using OsRng is performance concerns around the system calls it can incur, but on Linux getrandom has been in the vDSO since 6.11.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does crates.io have a forking/multi-process model?

unless I'm misunderstanding our application, crates.io only uses a single process, but then has multiple worker threads inside the process. since we don't have a GIL in Rust, or are generally single-threaded like in JS, there is usually no need for multiple processes.

Why two u64s instead of a single [u8; 16]?

no real reason actually. it seemed easiest to get to the hex output from there, but I'm totally open to alternatives 😅

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unless I'm misunderstanding our application, crates.io only uses a single process, but then has multiple worker threads inside the process. since we don't have a GIL in Rust, or are generally single-threaded like in JS, there is usually no need for multiple processes.

Gotcha, thanks! The concern with rand::random is only when there are multiple processes, not multiple threads, so that API should be fine 🙂

no real reason actually. it seemed easiest to get to the hex output from there, but I'm totally open to alternatives 😅

It's an extreme nitpick from me so feel free to ignore! But from a type/domain coherence perspective I'd recommend [u8; 16] so that it's obvious that there's no difference between the "halves" of the uniformly random token 🙂

}

/// Create an access token from two u64 values.
///
/// This is used internally by the `generate()` fn and is extracted
/// to a separate function for testing purposes.
fn from_u64s(r1: u64, r2: u64) -> Self {
let plaintext = format!("{}{r1:016x}{r2:016x}", Self::PREFIX);
Self(SecretString::from(plaintext))
}
Comment on lines +13 to +25
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • is 128bits of randomness enough?

Yep, 128 bits of entropy is sufficient! You could also do 256 bits to match what PyPI does for its tokens, but that's IMO overkill.

suggests adding a (32bit) checksum at the end for validation purposes. That would require us to find and add yet another dependency. Not sure if that's really worth it? 🤔

I think it depends partially on expected load/how expensive you expect credential revocation to be -- if there's a mass revocation event where GitHub is sending you tends of thousands of potential credentials, it can be helpful to have a pre-DB filter that weeds out false positives.

(This of course doesn't prevent someone from spamming you with fake tokens, since a CRC32 or similar is easy to crunch. But it limits the spam computationally, i.e. requires the spammer to waste time on that and keeps them from having a multiplicative impact on the DB itself.)

TL;DR: if the DB roundtrip is or could become expensive for token revocation, then a checksum or check-digit sequence is a good idea. Otherwise, you could probably leave it out of the MVP and version it in later 🙂

(I haven't used it myself, but https://crates.io/crates/crc32fast might be a good candidate?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the past, I've generally regretted not adding in the support for the checksum. 😓 That said, the tokens often looked like other hashes, so there were a lot of false positive submissions.

Since these have the unique prefix the number of submissions should end up restricted to things that actually look like tokens.

TL;DR: if the DB roundtrip is or could become expensive for token revocation, then a checksum or check-digit sequence is a good idea. Otherwise, you could probably leave it out of the MVP and version it in later 🙂

+1 to leaving this out for the MVP and only implementing if it becomes an issue later on.


pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
let str = String::from_utf8(bytes.into()).ok()?;

let suffix = str.strip_prefix(Self::PREFIX)?;
if suffix.len() != 32 {
return None;
}

let is_hexdigit = |c| matches!(c, 'a'..='f') || c.is_ascii_digit();
if !suffix.chars().all(is_hexdigit) {
return None;
}

Some(Self(SecretString::from(str)))
}

/// Generate a SHA256 hash of the access token.
pub fn sha256(&self) -> Output<Sha256> {
Sha256::digest(self.0.expose_secret())
}
}

impl ExposeSecret<str> for AccessToken {
fn expose_secret(&self) -> &str {
self.0.expose_secret()
}
}

#[cfg(test)]
mod tests {
use super::*;
use claims::{assert_none, assert_some};
use insta::assert_snapshot;

#[test]
fn test_generate() {
let token = AccessToken::generate();
let token_str = token.expose_secret();
assert!(token_str.starts_with(AccessToken::PREFIX));
assert_eq!(token_str.len(), AccessToken::PREFIX.len() + 32);
}

#[test]
fn test_serialization() {
let token = AccessToken::from_u64s(0, 0);
assert_snapshot!(token.expose_secret(), @"cio_tp_00000000000000000000000000000000");

let token = AccessToken::from_u64s(u64::MAX, u64::MAX);
assert_snapshot!(token.expose_secret(), @"cio_tp_ffffffffffffffffffffffffffffffff");

let token = AccessToken::from_u64s(0xc0ffee, 0xfa8072);
assert_snapshot!(token.expose_secret(), @"cio_tp_0000000000c0ffee0000000000fa8072");
}

#[test]
fn test_sha256() {
let token = AccessToken::generate();
let hash = token.sha256();
assert_eq!(hash.len(), 32);
}

#[test]
fn test_from_bytes() {
let token = AccessToken::generate();
let bytes = token.expose_secret().as_bytes();
let token2 = assert_some!(AccessToken::from_bytes(bytes));
assert_eq!(token.expose_secret(), token2.expose_secret());

let bytes = b"cio_tp_00000000000000000000000000000000";
assert_some!(AccessToken::from_bytes(bytes));

let invalid_bytes = b"invalid_token";
assert_none!(AccessToken::from_bytes(invalid_bytes));

let invalid_bytes = b"cio_tp_invalid_token";
assert_none!(AccessToken::from_bytes(invalid_bytes));

let invalid_bytes = b"cio_tp_00000000000000000000000000";
assert_none!(AccessToken::from_bytes(invalid_bytes));

let invalid_bytes = b"cio_tp_000000x0000000000000000000000000";
assert_none!(AccessToken::from_bytes(invalid_bytes));
}
}
Loading