Private
Public Access
0
0

chore(audit): add license_cve audit script + initial report

scripts/audit_license_cve.py: 4 internal checks (license +
CVE + pin + source-header), policy tables (allowlist of
permissive/weak-copyleft/public-domain, blocklist of
non-OSI/restricted-source), and a main() that runs all 4
and emits line-per-violation to stdout + a markdown report.

Tests (26 unit + integration) cover license classifier (16
variants across MIT, BSD, Apache, LGPL, MPL, CC0, WTFPL,
GPL, AGPL, SSPL, BSL, Commons Clause, Elastic, Anti-996,
Hippocratic, unknown), pin check (3), source-header check
(3), license check via importlib.metadata (1), CVE check
via subprocess pip-audit (2), and a smoke test of the main
loop (1).

No new pip deps in the project: pure stdlib
(importlib.metadata, tomllib, pathlib, re) + subprocess to
pip-audit (optional dev tool, installed via 'uv tool install
pip-audit' if user wants CVE checks).

Initial report at docs/reports/license_cve_audit/2026-06-07/
records the current state. The Phase 2 commit will apply
the fixes (tilde-pin, delete requirements.txt); the Phase 3
commit will add --strict mode + baseline file for CI.
This commit is contained in:
2026-06-07 15:07:46 -04:00
parent e09e6823af
commit a8ae11d3a8
5 changed files with 600 additions and 1 deletions
+190
View File
@@ -0,0 +1,190 @@
"""Tests for scripts/audit_license_cve."""
from pathlib import Path
import json
import sys
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
import pytest
from scripts.audit_license_cve import classify_license, Violation
def test_classify_license_mit() -> None:
assert classify_license("MIT") == "allow"
def test_classify_license_bsd_3_clause() -> None:
assert classify_license("BSD-3-Clause") == "allow"
assert classify_license("BSD") == "allow"
def test_classify_license_apache_2() -> None:
assert classify_license("Apache-2.0") == "allow"
assert classify_license("Apache 2.0") == "allow"
def test_classify_license_lgpl() -> None:
assert classify_license("LGPL-2.1") == "allow"
assert classify_license("LGPL-3.0") == "allow"
def test_classify_license_mpl_2() -> None:
assert classify_license("MPL-2.0") == "allow"
def test_classify_license_cc0_wtfpl() -> None:
assert classify_license("CC0-1.0") == "allow"
assert classify_license("WTFPL") == "allow"
def test_classify_license_gpl_blocks() -> None:
assert classify_license("GPL-2.0") == "block"
assert classify_license("GPL-3.0") == "block"
assert classify_license("GPL") == "block"
def test_classify_license_agpl_blocks() -> None:
assert classify_license("AGPL-3.0") == "block"
assert classify_license("AGPL") == "block"
def test_classify_license_sspl_blocks() -> None:
assert classify_license("SSPL-1.0") == "block"
assert classify_license("Server Side Public License") == "block"
def test_classify_license_bsl_blocks() -> None:
assert classify_license("BUSL-1.1") == "block"
assert classify_license("BSL-1.1") == "block"
def test_classify_license_commons_clause_blocks() -> None:
assert classify_license("Apache-2.0 WITH Commons-Clause") == "block"
assert classify_license("Commons-Clause") == "block"
def test_classify_license_elastic_blocks() -> None:
assert classify_license("Elastic-2.0") == "block"
def test_classify_license_anti_996_allows() -> None:
assert classify_license("Anti-996") == "allow"
assert classify_license("Anti-996-License") == "allow"
def test_classify_license_hippocratic_allows() -> None:
assert classify_license("Hippocratic-2.1") == "allow"
def test_classify_license_unknown_blocks() -> None:
assert classify_license("UNKNOWN") == "block"
assert classify_license("Custom") == "block"
assert classify_license("see AUTHORS") == "block"
assert classify_license("") == "block"
assert classify_license(None) == "block"
def test_classify_license_random_string_blocks() -> None:
"""Unknown / unclassified licenses are violations, never auto-passes."""
assert classify_license("Made Up License v1.0") == "block"
assert classify_license("Proprietary-EULA") == "block"
from scripts.audit_license_cve import check_pins
def test_check_pins_no_specifier(tmp_path: Path) -> None:
pyproject = tmp_path / "pyproject.toml"
pyproject.write_text(
'[project]\nname = "x"\nversion = "0.1.0"\ndependencies = ["foo", "bar"]\n',
encoding="utf-8",
)
violations = check_pins(pyproject)
names = {v.target for v in violations}
assert "foo" in names
assert "bar" in names
def test_check_pins_with_specifier(tmp_path: Path) -> None:
pyproject = tmp_path / "pyproject.toml"
pyproject.write_text(
'[project]\nname = "x"\nversion = "0.1.0"\ndependencies = ["foo>=1.0.0", "bar~2.0.0", "baz==3.0.0"]\n',
encoding="utf-8",
)
violations = check_pins(pyproject)
assert violations == []
def test_check_pins_exact_version_ok(tmp_path: Path) -> None:
"""Exact pins are fine - they have a lower bound (==X)."""
pyproject = tmp_path / "pyproject.toml"
pyproject.write_text(
'[project]\nname = "x"\nversion = "0.1.0"\ndependencies = ["foo==1.0.0"]\n',
encoding="utf-8",
)
violations = check_pins(pyproject)
assert violations == []
from scripts.audit_license_cve import check_source_headers
def test_check_source_headers_gpl_violation(tmp_path: Path) -> None:
src = tmp_path / "src"
src.mkdir()
(src / "foo.py").write_text(
"# SPDX-License-Identifier: GPL-3.0\n# A file.\n",
encoding="utf-8",
)
violations = check_source_headers(src)
assert any("foo.py" in v.target and "GPL" in v.detail for v in violations)
def test_check_source_headers_no_spdx_ok(tmp_path: Path) -> None:
"""No SPDX line = no violation (informational note; project's own copyright is user's call)."""
src = tmp_path / "src"
src.mkdir()
(src / "bar.py").write_text("# A file with no SPDX.\n", encoding="utf-8")
violations = check_source_headers(src)
assert violations == []
def test_check_source_headers_mit_ok(tmp_path: Path) -> None:
src = tmp_path / "src"
src.mkdir()
(src / "baz.py").write_text("# SPDX-License-Identifier: MIT\n# A file.\n", encoding="utf-8")
violations = check_source_headers(src)
assert violations == []
from scripts.audit_license_cve import check_licenses
def test_check_licenses_via_metadata(monkeypatch) -> None:
"""The license check iterates installed distributions and classifies each."""
class FakeDist:
def __init__(self, name: str, license_str: str | None) -> None:
self.metadata = {"Name": name, "License": license_str, "Version": "1.0.0"}
fake_dists = [
FakeDist("good-pkg", "MIT"),
FakeDist("bad-pkg", "GPL-3.0"),
FakeDist("unknown-pkg", "UNKNOWN"),
FakeDist("missing-pkg", None),
]
monkeypatch.setattr("importlib.metadata.distributions", lambda: fake_dists)
violations = check_licenses()
names = {v.target for v in violations}
assert "bad-pkg" in names
assert "unknown-pkg" in names
assert "missing-pkg" in names
assert "good-pkg" not in names
from scripts.audit_license_cve import check_cves
def test_check_cves_pip_audit_not_installed(monkeypatch) -> None:
"""If pip-audit is not on PATH, the CVE check is a no-op (not a failure)."""
monkeypatch.setattr("shutil.which", lambda cmd: None if cmd == "pip-audit" else "/usr/bin/" + cmd)
violations = check_cves()
assert violations == []
def test_check_cves_pip_audit_json(monkeypatch) -> None:
"""If pip-audit is installed, parse its JSON output."""
fake_json = json.dumps({
"dependencies": [
{"name": "vuln-pkg", "version": "1.0.0", "vulns": [
{"id": "CVE-2024-12345", "fix_versions": [">=1.2.3"], "severity": "high"}
]},
],
}).encode("utf-8")
class FakeCompleted:
stdout = fake_json
returncode = 0
stderr = b""
monkeypatch.setattr("shutil.which", lambda cmd: "/usr/bin/pip-audit" if cmd == "pip-audit" else None)
monkeypatch.setattr("subprocess.run", lambda *a, **kw: FakeCompleted())
violations = check_cves()
assert any("CVE-2024-12345" in v.detail and v.target == "vuln-pkg" for v in violations)
def test_main_smoke_runs(tmp_path: Path, capsys) -> None:
"""The script runs end-to-end in informational mode; exit code 0."""
import subprocess
result = subprocess.run(
["python", "-m", "scripts.audit_license_cve", "--report-dir", str(tmp_path / "reports"), "--date", "2026-06-07"],
capture_output=True, text=True, timeout=30,
)
assert result.returncode == 0
assert "Wrote" in result.stdout