Skip to content

Commit fe7d734

Browse files
committed
Implement combine_expressions_with_exceptions function to correctly handle license exceptions using the WITH operator for valid SPDX expressions. Update get_detected_license_expression to utilize this new function.
Signed-off-by: asmitbanik <[email protected]>
1 parent c05a13d commit fe7d734

File tree

1 file changed

+139
-2
lines changed

1 file changed

+139
-2
lines changed

src/licensedcode/detection.py

Lines changed: 139 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,141 @@
4747
heuristics.
4848
"""
4949

50+
51+
def combine_expressions_with_exceptions(expressions, licensing=None):
52+
"""
53+
Combine a list of license ``expressions`` into a single expression string,
54+
using WITH for license exceptions and AND for regular licenses.
55+
56+
According to SPDX specification, exceptions must be combined with their
57+
base license using the WITH operator, not AND. For example:
58+
- "gpl-3.0 WITH gcc-exception-3.1" is valid SPDX
59+
- "gpl-3.0 AND gcc-exception-3.1" is NOT valid SPDX
60+
61+
This function detects when an expression contains only an exception license
62+
and combines it with the preceding license using WITH instead of AND.
63+
"""
64+
if not expressions:
65+
return None
66+
67+
if not licensing:
68+
licensing = get_licensing()
69+
70+
# Filter out None/empty expressions
71+
expressions = [e for e in expressions if e]
72+
if not expressions:
73+
return None
74+
75+
if len(expressions) == 1:
76+
return str(licensing.parse(expressions[0]))
77+
78+
# Get the license database to check for exceptions
79+
licenses_db = get_cache().db
80+
81+
def is_exception_only(expr):
82+
"""
83+
Return True if the expression contains ONLY a single exception license key.
84+
We only handle simple single-key exceptions for WITH combining.
85+
"""
86+
try:
87+
license_keys = licensing.license_keys(expr, unique=True)
88+
if len(license_keys) != 1:
89+
return False
90+
key = license_keys[0]
91+
lic = licenses_db.get(key)
92+
return lic and lic.is_exception
93+
except:
94+
return False
95+
96+
def is_simple_license(expr):
97+
"""
98+
Return True if expression is a simple single license (not an exception).
99+
"""
100+
try:
101+
license_keys = licensing.license_keys(expr, unique=True)
102+
if len(license_keys) != 1:
103+
return False
104+
key = license_keys[0]
105+
lic = licenses_db.get(key)
106+
return lic and not lic.is_exception
107+
except:
108+
return False
109+
110+
def get_single_key(expr):
111+
"""Return the single license key from a simple expression."""
112+
try:
113+
keys = licensing.license_keys(expr, unique=True)
114+
return keys[0] if len(keys) == 1 else None
115+
except:
116+
return None
117+
118+
# Build the combined expression
119+
# Strategy: iterate through expressions, combining exceptions with WITH
120+
result_parts = []
121+
pending_base_license = None
122+
123+
for expr in expressions:
124+
if is_exception_only(expr):
125+
exception_key = get_single_key(expr)
126+
if pending_base_license and is_simple_license(pending_base_license):
127+
# Combine the pending base license with this exception using WITH
128+
base_key = get_single_key(pending_base_license)
129+
# Use proper WITH syntax: "license WITH exception" (no parentheses for simple keys)
130+
combined = f"{base_key} WITH {exception_key}"
131+
result_parts.append(combined)
132+
pending_base_license = None
133+
elif result_parts:
134+
# Check if the last result part is a simple license we can attach to
135+
last_part = result_parts[-1]
136+
if is_simple_license(last_part):
137+
result_parts.pop()
138+
base_key = get_single_key(last_part)
139+
combined = f"{base_key} WITH {exception_key}"
140+
result_parts.append(combined)
141+
else:
142+
# Can't properly attach, fall back to AND
143+
if pending_base_license:
144+
result_parts.append(pending_base_license)
145+
pending_base_license = None
146+
result_parts.append(expr)
147+
else:
148+
# Standalone exception at start (unusual) - just add it
149+
# This might produce invalid SPDX but we can't do better
150+
if pending_base_license:
151+
result_parts.append(pending_base_license)
152+
pending_base_license = None
153+
result_parts.append(expr)
154+
else:
155+
# Not a simple exception
156+
if pending_base_license:
157+
# Add the pending base license to results (it wasn't followed by exception)
158+
result_parts.append(pending_base_license)
159+
# This becomes the new pending base license (in case next is exception)
160+
pending_base_license = expr
161+
162+
# Don't forget any pending base license
163+
if pending_base_license:
164+
result_parts.append(pending_base_license)
165+
166+
if not result_parts:
167+
return None
168+
169+
# Combine all parts with AND
170+
if len(result_parts) == 1:
171+
return str(licensing.parse(result_parts[0]))
172+
173+
combined = combine_expressions(
174+
expressions=result_parts,
175+
relation='AND',
176+
unique=True,
177+
licensing=licensing,
178+
)
179+
180+
# combine_expressions returns a string or None
181+
if combined is None:
182+
return None
183+
return str(combined)
184+
50185
TRACE = os.environ.get('SCANCODE_DEBUG_LICENSE_DETECTION', False)
51186
TRACE_REFERENCE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE_REFERENCE', False)
52187

@@ -1591,15 +1726,17 @@ def get_detected_license_expression(
15911726
if TRACE:
15921727
logger_debug(f'matches_for_expression: {matches_for_expression}', f'detection_log: {detection_log}')
15931728

1594-
combined_expression = combine_expressions(
1729+
# Use combine_expressions_with_exceptions to properly handle license exceptions
1730+
# with the WITH operator instead of AND (required for valid SPDX expressions)
1731+
combined_expression = combine_expressions_with_exceptions(
15951732
expressions=[match.rule.license_expression for match in matches_for_expression],
15961733
licensing=get_licensing(),
15971734
)
15981735

15991736
if TRACE or TRACE_ANALYSIS:
16001737
logger_debug(f'combined_expression {combined_expression}')
16011738

1602-
return detection_log, str(combined_expression)
1739+
return detection_log, str(combined_expression) if combined_expression else None
16031740

16041741

16051742
def get_unknown_license_detection(query_string):

0 commit comments

Comments
 (0)