|
47 | 47 | heuristics. |
48 | 48 | """ |
49 | 49 |
|
| 50 | + |
| 51 | +def combine_expressions_with_exceptions(expressions, licensing=None): |
| 52 | + """ |
| 53 | + Combine a list of license ``expressions`` into a single expression string, |
| 54 | + using WITH for license exceptions and AND for regular licenses. |
| 55 | +
|
| 56 | + According to SPDX specification, exceptions must be combined with their |
| 57 | + base license using the WITH operator, not AND. For example: |
| 58 | + - "gpl-3.0 WITH gcc-exception-3.1" is valid SPDX |
| 59 | + - "gpl-3.0 AND gcc-exception-3.1" is NOT valid SPDX |
| 60 | +
|
| 61 | + This function detects when an expression contains only an exception license |
| 62 | + and combines it with the preceding license using WITH instead of AND. |
| 63 | + """ |
| 64 | + if not expressions: |
| 65 | + return None |
| 66 | + |
| 67 | + if not licensing: |
| 68 | + licensing = get_licensing() |
| 69 | + |
| 70 | + # Filter out None/empty expressions |
| 71 | + expressions = [e for e in expressions if e] |
| 72 | + if not expressions: |
| 73 | + return None |
| 74 | + |
| 75 | + if len(expressions) == 1: |
| 76 | + return str(licensing.parse(expressions[0])) |
| 77 | + |
| 78 | + # Get the license database to check for exceptions |
| 79 | + licenses_db = get_cache().db |
| 80 | + |
| 81 | + def is_exception_only(expr): |
| 82 | + """ |
| 83 | + Return True if the expression contains ONLY a single exception license key. |
| 84 | + We only handle simple single-key exceptions for WITH combining. |
| 85 | + """ |
| 86 | + try: |
| 87 | + license_keys = licensing.license_keys(expr, unique=True) |
| 88 | + if len(license_keys) != 1: |
| 89 | + return False |
| 90 | + key = license_keys[0] |
| 91 | + lic = licenses_db.get(key) |
| 92 | + return lic and lic.is_exception |
| 93 | + except: |
| 94 | + return False |
| 95 | + |
| 96 | + def is_simple_license(expr): |
| 97 | + """ |
| 98 | + Return True if expression is a simple single license (not an exception). |
| 99 | + """ |
| 100 | + try: |
| 101 | + license_keys = licensing.license_keys(expr, unique=True) |
| 102 | + if len(license_keys) != 1: |
| 103 | + return False |
| 104 | + key = license_keys[0] |
| 105 | + lic = licenses_db.get(key) |
| 106 | + return lic and not lic.is_exception |
| 107 | + except: |
| 108 | + return False |
| 109 | + |
| 110 | + def get_single_key(expr): |
| 111 | + """Return the single license key from a simple expression.""" |
| 112 | + try: |
| 113 | + keys = licensing.license_keys(expr, unique=True) |
| 114 | + return keys[0] if len(keys) == 1 else None |
| 115 | + except: |
| 116 | + return None |
| 117 | + |
| 118 | + # Build the combined expression |
| 119 | + # Strategy: iterate through expressions, combining exceptions with WITH |
| 120 | + result_parts = [] |
| 121 | + pending_base_license = None |
| 122 | + |
| 123 | + for expr in expressions: |
| 124 | + if is_exception_only(expr): |
| 125 | + exception_key = get_single_key(expr) |
| 126 | + if pending_base_license and is_simple_license(pending_base_license): |
| 127 | + # Combine the pending base license with this exception using WITH |
| 128 | + base_key = get_single_key(pending_base_license) |
| 129 | + # Use proper WITH syntax: "license WITH exception" (no parentheses for simple keys) |
| 130 | + combined = f"{base_key} WITH {exception_key}" |
| 131 | + result_parts.append(combined) |
| 132 | + pending_base_license = None |
| 133 | + elif result_parts: |
| 134 | + # Check if the last result part is a simple license we can attach to |
| 135 | + last_part = result_parts[-1] |
| 136 | + if is_simple_license(last_part): |
| 137 | + result_parts.pop() |
| 138 | + base_key = get_single_key(last_part) |
| 139 | + combined = f"{base_key} WITH {exception_key}" |
| 140 | + result_parts.append(combined) |
| 141 | + else: |
| 142 | + # Can't properly attach, fall back to AND |
| 143 | + if pending_base_license: |
| 144 | + result_parts.append(pending_base_license) |
| 145 | + pending_base_license = None |
| 146 | + result_parts.append(expr) |
| 147 | + else: |
| 148 | + # Standalone exception at start (unusual) - just add it |
| 149 | + # This might produce invalid SPDX but we can't do better |
| 150 | + if pending_base_license: |
| 151 | + result_parts.append(pending_base_license) |
| 152 | + pending_base_license = None |
| 153 | + result_parts.append(expr) |
| 154 | + else: |
| 155 | + # Not a simple exception |
| 156 | + if pending_base_license: |
| 157 | + # Add the pending base license to results (it wasn't followed by exception) |
| 158 | + result_parts.append(pending_base_license) |
| 159 | + # This becomes the new pending base license (in case next is exception) |
| 160 | + pending_base_license = expr |
| 161 | + |
| 162 | + # Don't forget any pending base license |
| 163 | + if pending_base_license: |
| 164 | + result_parts.append(pending_base_license) |
| 165 | + |
| 166 | + if not result_parts: |
| 167 | + return None |
| 168 | + |
| 169 | + # Combine all parts with AND |
| 170 | + if len(result_parts) == 1: |
| 171 | + return str(licensing.parse(result_parts[0])) |
| 172 | + |
| 173 | + combined = combine_expressions( |
| 174 | + expressions=result_parts, |
| 175 | + relation='AND', |
| 176 | + unique=True, |
| 177 | + licensing=licensing, |
| 178 | + ) |
| 179 | + |
| 180 | + # combine_expressions returns a string or None |
| 181 | + if combined is None: |
| 182 | + return None |
| 183 | + return str(combined) |
| 184 | + |
50 | 185 | TRACE = os.environ.get('SCANCODE_DEBUG_LICENSE_DETECTION', False) |
51 | 186 | TRACE_REFERENCE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE_REFERENCE', False) |
52 | 187 |
|
@@ -1591,15 +1726,17 @@ def get_detected_license_expression( |
1591 | 1726 | if TRACE: |
1592 | 1727 | logger_debug(f'matches_for_expression: {matches_for_expression}', f'detection_log: {detection_log}') |
1593 | 1728 |
|
1594 | | - combined_expression = combine_expressions( |
| 1729 | + # Use combine_expressions_with_exceptions to properly handle license exceptions |
| 1730 | + # with the WITH operator instead of AND (required for valid SPDX expressions) |
| 1731 | + combined_expression = combine_expressions_with_exceptions( |
1595 | 1732 | expressions=[match.rule.license_expression for match in matches_for_expression], |
1596 | 1733 | licensing=get_licensing(), |
1597 | 1734 | ) |
1598 | 1735 |
|
1599 | 1736 | if TRACE or TRACE_ANALYSIS: |
1600 | 1737 | logger_debug(f'combined_expression {combined_expression}') |
1601 | 1738 |
|
1602 | | - return detection_log, str(combined_expression) |
| 1739 | + return detection_log, str(combined_expression) if combined_expression else None |
1603 | 1740 |
|
1604 | 1741 |
|
1605 | 1742 | def get_unknown_license_detection(query_string): |
|
0 commit comments