Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ sha1 = "0.10.6"
sha2 = "0.10.9"
shell-escape = "0.1.5"
similar = "2.7.0"
spdx = "0.10.9"
supports-hyperlinks = "3.1.0"
supports-unicode = "3.0.0"
snapbox = { version = "0.6.21", features = ["diff", "dir", "term-svg", "regex", "json"] }
Expand Down Expand Up @@ -205,6 +206,7 @@ serde_ignored.workspace = true
serde_json = { workspace = true, features = ["raw_value"] }
sha1.workspace = true
shell-escape.workspace = true
spdx.workspace = true
supports-hyperlinks.workspace = true
supports-unicode.workspace = true
tar.workspace = true
Expand Down
14 changes: 13 additions & 1 deletion src/cargo/core/workspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ use crate::util::context::FeatureUnification;
use crate::util::edit_distance;
use crate::util::errors::{CargoResult, ManifestError};
use crate::util::interning::InternedString;
use crate::util::lints::{analyze_cargo_lints_table, check_im_a_teapot};
use crate::util::lints::analyze_cargo_lints_table;
use crate::util::lints::check_im_a_teapot;
use crate::util::lints::check_invalid_spdx_license_expression;
use crate::util::toml::{InheritableFields, read_manifest};
use crate::util::{
Filesystem, GlobalContext, IntoUrl, context::CargoResolverConfig, context::ConfigRelativePath,
Expand Down Expand Up @@ -1270,6 +1272,16 @@ impl<'gctx> Workspace<'gctx> {
self.gctx,
)?;
check_im_a_teapot(pkg, &path, &cargo_lints, &mut error_count, self.gctx)?;
check_invalid_spdx_license_expression(
pkg,
&path,
&cargo_lints,
ws_contents,
ws_document,
self.root_manifest(),
&mut error_count,
self.gctx,
)?;
if error_count > 0 {
Err(crate::util::errors::AlreadyPrintedError::new(anyhow!(
"encountered {error_count} errors(s) while running lints"
Expand Down
168 changes: 167 additions & 1 deletion src/cargo/util/lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::ops::Range;
use std::path::Path;

const LINT_GROUPS: &[LintGroup] = &[TEST_DUMMY_UNSTABLE];
pub const LINTS: &[Lint] = &[IM_A_TEAPOT, UNKNOWN_LINTS];
pub const LINTS: &[Lint] = &[IM_A_TEAPOT, INVALID_SPDX_LICENSE_EXPRESSION, UNKNOWN_LINTS];

pub fn analyze_cargo_lints_table(
pkg: &Package,
Expand Down Expand Up @@ -467,6 +467,172 @@ pub fn check_im_a_teapot(
Ok(())
}

const INVALID_SPDX_LICENSE_EXPRESSION: Lint = Lint {
name: "invalid_spdx_license_expression",
desc: "invalid SPDX license expression",
groups: &[],
default_level: LintLevel::Warn,
edition_lint_opts: Some((Edition::EditionFuture, LintLevel::Deny)),
feature_gate: None,
docs: Some(
r#"
### What it does

Checks that the `license` field in `Cargo.toml` is a valid SPDX license expression.
See the doc of [the `license` field] for the SPDX specification version Cargo currently supports.

[the `license` field]: manifest.md#the-license-and-license-file-fields

### Why it is bad

Build tools, package registries, and compliance systems may fail to handle
non-SPDX licenses, which can lead to build failures, rejected uploads,
incorrect license reporting, or legal risks.

### Examples

```toml
license = "MIT / Apache-2.0" # Invalid: uses "/" instead of "OR"
license = "GPL-3.0 with exception" # Invalid: uses lowercase "with" instead of "WITH"
license = "GPL-3.0+" # Invalid: uses the deprecated "+" operator instead of "GPL-3.0-or-later"
license = "MIT OR (Apache-2.0" # Invalid: unclosed parenthesis
```

Use instead:

```toml
license = "MIT OR Apache-2.0"
license = "GPL-3.0 WITH exception"
license = "GPL-3.0-or-later"
license = "(MIT OR Apache-2.0) AND GPL-3.0-or-later WITH Classpath-exception-2.0"
```

"#,
),
};

pub fn check_invalid_spdx_license_expression(
pkg: &Package,
path: &Path,
pkg_lints: &TomlToolLints,
ws_contents: &str,
ws_document: &toml::Spanned<toml::de::DeTable<'static>>,
ws_path: &Path,
error_count: &mut usize,
gctx: &GlobalContext,
) -> CargoResult<()> {
let manifest = pkg.manifest();
let (lint_level, reason) = INVALID_SPDX_LICENSE_EXPRESSION.level(
pkg_lints,
manifest.edition(),
manifest.unstable_features(),
);

if lint_level == LintLevel::Allow {
return Ok(());
}

let Some(license) = manifest.metadata().license.as_ref() else {
return Ok(());
};

// We must be a subset of crates.io's parse mode:
// https://github.com/rust-lang/crates.io/blob/bc421ae1/src/licenses.rs#L3-L8
// we don't want to allow something that would be rejected by crates.io
let Err(e) = spdx::Expression::parse_mode(license, spdx::ParseMode::STRICT) else {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't just checking the validity of the expression but whether the identifier is from a known list or not

https://github.com/EmbarkStudios/spdx/blob/e4eac6db2030a7deeca57cc7f5d0485598d8c9e5/src/lexer.rs#L253-L282

https://github.com/EmbarkStudios/spdx/blob/main/src/identifiers.rs#L18

I thought we only wanted to check syntax (and maybe have a separate lint for being a known identifier). Unfortunately, it looks like there isn't a way to do that with spdx.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good for checking the expression, and yeah spdx doesn't offer that 😞. It might be fine as normally people want known identifiers. The remaining questions are:

  • If we have a new lint for checking known identifier, is it a breaking change if this one turns into a expression-only lint?
  • Will we have per-line configuration so people can allowlist their identifiers?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the main concern is people using a new identifier that cargo doesn't support yet.

It would be bad to have to turn off the syntax lint "temporarily" (if people remember to turn it back on). These also seem like different levels of concern. For example, we were talking about transitioning this to an error on an edition but we likely shouldn't do that for identifiers, leaving that as a warning or allow.

If we have a new lint for checking known identifier, is it a breaking change if this one turns into a expression-only lint?

Lints are allowed to change, unsure how much though.

Will we have per-line configuration so people can allowlist their identifiers?

Having a way to add new identifers through lint config might be a reasonable way to mitigate this but that also requires a change to spdx iiuc

It sounds like we'd need a change either way to spdx to stabilize this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing spdx is definitely the right approach, totally agreed.

However, in practice,

  • I doubt people will use new license identifier right after it gets into SPDX spec.
  • Even if they do, it is likely pretty rare.
  • Will crates.io fail an upload due to license parse error? Asked because if yes, then they will still fail during publish, and this lint must adhere with what crates.io is doing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will crates.io fail an upload due to license parse error? Asked because if yes, then they will still fail during publish, and this lint must adhere with what crates.io is doing.

We can be less strict than crates.io as we support multiple registries.

We also have different update requirements than crates.io (every 6 weeks, no updates to previous version vs crates.io effectively living at HEAD)

Copy link
Member Author

@weihanglo weihanglo Aug 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just had a discussion with @Muscraft. Let's say we have two lints:

  • invalid_spdx_license_expression
  • invalid_spdx_license_identifier

And here we have dependency problem betweewn two lints dependency issue. If the expression is invalid, identifiers can't be meaningfully validated.

To fix this issue, we have some ideas:

  • Have two lints. Run these lints in one function with two passes: first expression, then identifier.
    • Lint level: expression > identifier -> No issue. Just fire lints.
    • Lint level: expression < identifier -> We may need a special diagnostic saying "identifiers lint could not be checked due to invalid expression".
  • Make just one lint but have its failure cases be configurable

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally, I'd go with the two lints in one function.

Might also be good to get someone's opinion who has dealt more with licensing, like @joshtriplett

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See also the discussion in spdx crate with its author (also the author of cargo-deny): EmbarkStudios/spdx#82.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 for the idea of separating "this is a valid SPDX expression" from "all the identifiers in this SPDX expression are known". I'd also love to see this allow for future extensibility, so that people can say "here's a license file, let this new term refer to it" and then write an expression that uses that term. (We should probably check with SPDX at some point about whether there's a reserved portion of the identifier namespace.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put some of my thoughts on the linked issue EmbarkStudios/spdx#82 (comment).

Added a bit more:

  • It is not uncommon one allow a lint because it has false-positive / doesn't meet their their requirements.
  • The more lints the better? Probably yes, but the more lints with similar functionalities, the more confusing (on both implementation and user side).
  • Setting up lint rules is kinda showing a stance of the team. If we being strict on SPDX license, people follow, if we prove invalid identifiers that implies we think that should be a thing people want.
  • Configuration are a bit better as people may want only OSI-approved or copyleft licenses.
  • The initial idea of SPDX dual-license inconsistency #2039 is for SPDX license syntax consistency. I feel like we are drifting to a broader licensing issue now 😆.

return Ok(());
};

if lint_level == LintLevel::Forbid || lint_level == LintLevel::Deny {
*error_count += 1;
}

// Check if `package.license` is inherited
let is_license_inherited = manifest
.original_toml()
.package()
.and_then(|p| p.license.as_ref())
.map(|f| f.is_inherited())
.unwrap_or_default();

let level = lint_level.to_diagnostic_level();
let manifest_path = rel_cwd_manifest_path(path, gctx);
let ws_path = rel_cwd_manifest_path(ws_path, gctx);
let emitted_reason = format!(
"`cargo::{}` is set to `{lint_level}` {reason}",
INVALID_SPDX_LICENSE_EXPRESSION.name
);

let title = format!("{}: `{license}`", INVALID_SPDX_LICENSE_EXPRESSION.desc);
let help =
Level::Help.title("see https://spdx.org/licenses/ for valid SPDX license expressions");
let note = Level::Note.title(&emitted_reason);
let error_reason = e.reason.to_string();

// Calculate the precise error span within the license string,
// since ParseError preserves a span for us.
let error_span = |license_span: Range<usize>| {
let open_quote = 1;
let start = license_span.start + open_quote + e.span.start;
let end = license_span.start + open_quote + e.span.end;
start..end
};

let message = if is_license_inherited {
let license_span =
get_span(ws_document, &["workspace", "package", "license"], true).unwrap();
let span = error_span(license_span);
let inherit_title = "the `package.license` field was inherited";
let inherited_note = if let (Some(inherit_span_key), Some(inherit_span_value)) = (
get_span(
manifest.document(),
&["package", "license", "workspace"],
false,
),
get_span(
manifest.document(),
&["package", "license", "workspace"],
true,
),
) {
Level::Note.title(inherit_title).snippet(
Snippet::source(manifest.contents())
.origin(&manifest_path)
.annotation(Level::Note.span(inherit_span_key.start..inherit_span_value.end))
.fold(true),
)
} else {
Level::Note.title(inherit_title)
};
level
.title(&title)
.snippet(
Snippet::source(ws_contents)
.origin(&ws_path)
.annotation(level.span(span).label(&error_reason))
.fold(true),
)
.footer(inherited_note)
.footer(note)
.footer(help)
} else {
let license_span = get_span(manifest.document(), &["package", "license"], true).unwrap();
let span = error_span(license_span);
level
.title(&title)
.snippet(
Snippet::source(manifest.contents())
.origin(&manifest_path)
.annotation(level.span(span).label(&error_reason))
.fold(true),
)
.footer(note)
.footer(help)
};
gctx.shell().print_message(message)?;
Ok(())
}

const UNKNOWN_LINTS: Lint = Lint {
name: "unknown_lints",
desc: "unknown lint",
Expand Down
37 changes: 37 additions & 0 deletions src/doc/src/reference/lints.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,45 @@ Note: [Cargo's linting system is unstable](unstable.md#lintscargo) and can only
## Warn-by-default

These lints are all set to the 'warn' level by default.
- [`invalid_spdx_license_expression`](#invalid_spdx_license_expression)
- [`unknown_lints`](#unknown_lints)

## `invalid_spdx_license_expression`
Set to `warn` by default

### What it does

Checks that the `license` field in `Cargo.toml` is a valid SPDX license expression.
See the doc of [the `license` field] for the SPDX specification version Cargo currently supports.

[the `license` field]: manifest.md#the-license-and-license-file-fields

### Why it is bad

Build tools, package registries, and compliance systems may fail to handle
non-SPDX licenses, which can lead to build failures, rejected uploads,
incorrect license reporting, or legal risks.

### Examples

```toml
license = "MIT / Apache-2.0" # Invalid: uses "/" instead of "OR"
license = "GPL-3.0 with exception" # Invalid: uses lowercase "with" instead of "WITH"
license = "GPL-3.0+" # Invalid: uses the deprecated "+" operator instead of "GPL-3.0-or-later"
license = "MIT OR (Apache-2.0" # Invalid: unclosed parenthesis
```

Use instead:

```toml
license = "MIT OR Apache-2.0"
license = "GPL-3.0 WITH exception"
license = "GPL-3.0-or-later"
license = "(MIT OR Apache-2.0) AND GPL-3.0-or-later WITH Classpath-exception-2.0"
```



## `unknown_lints`
Set to `warn` by default

Expand Down
Loading
Loading