Source code for meta_package_manager.sbom.cyclonedx

# Copyright Kevin Deldycke <kevin@deldycke.com> and contributors.
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
"""CycloneDX 1.7 writer.

Heavy ``cyclonedx-python-lib`` imports are guarded behind a ``try/except``
block; ``cyclonedx_support`` reports whether the
:py:class:`CycloneDX` class can actually be used.

The license-normalization helper is shared with :py:mod:`.spdx` and is
imported from there rather than duplicated: SPDX license expressions are
the lingua franca CycloneDX builds on, so the dependency direction is
intentional and acyclic.
"""

from __future__ import annotations

import logging

from packageurl import PackageURL

from .. import __version__
from ..package import (
    EMPTY_METADATA,
    ChecksumAlgorithm,
    PackageMetadata,
)
from .base import SBOM, ExportFormat
from .spdx import _parse_license_expression

cyclonedx_support = True
try:
    from cyclonedx.model import (
        ExternalReference,
        ExternalReferenceType,
        HashAlgorithm,
        HashType,
        Property,
        XsUri,
    )
    from cyclonedx.model.bom import Bom
    from cyclonedx.model.component import Component, ComponentType
    from cyclonedx.model.contact import OrganizationalContact, OrganizationalEntity
    from cyclonedx.model.license import (
        DisjunctiveLicense,
        LicenseExpression,
        LicenseExpressionDetails,
    )
    from cyclonedx.model.lifecycle import LifecyclePhase, PredefinedLifecycle
    from cyclonedx.output import make_outputter
    from cyclonedx.output.json import JsonV1Dot7
    from cyclonedx.schema import OutputFormat, SchemaVersion
except ImportError:
    cyclonedx_support = False
    logging.getLogger("meta_package_manager").debug(
        "CycloneDX support disabled: install meta-package-manager[sbom] to enable it.",
    )

TYPE_CHECKING = False
if TYPE_CHECKING:
    from collections.abc import Iterator
    from typing import Any

    from ..manager import PackageManager
    from ..package import Package


# ``Any``-valued map to avoid cascading mypy errors at every call site: the
# values are typed ``HashAlgorithm`` instances but the conditional
# ``try/except`` import above hides that fact from the type checker.
_CYCLONEDX_HASH_MAP: dict[str, Any] = {}
if cyclonedx_support:
    _CYCLONEDX_HASH_MAP = {
        ChecksumAlgorithm.MD5.value: HashAlgorithm.MD5,
        ChecksumAlgorithm.SHA1.value: HashAlgorithm.SHA_1,
        ChecksumAlgorithm.SHA256.value: HashAlgorithm.SHA_256,
        ChecksumAlgorithm.SHA512.value: HashAlgorithm.SHA_512,
        ChecksumAlgorithm.SHA3_256.value: HashAlgorithm.SHA3_256,
        ChecksumAlgorithm.SHA3_512.value: HashAlgorithm.SHA3_512,
        ChecksumAlgorithm.BLAKE2B_256.value: HashAlgorithm.BLAKE2B_256,
        ChecksumAlgorithm.BLAKE2B_512.value: HashAlgorithm.BLAKE2B_512,
    }


[docs] class CycloneDX(SBOM): """Generates a CycloneDX document from a list of packages. `CycloneDX 1.7 specifications <https://cyclonedx.org/docs/1.7>`_. """ document: Bom component_index: dict[tuple[str, str], Component] pending_dependencies: list[tuple[Component, str, str]]
[docs] def init_doc(self) -> None: """ `CycloneDX document metadata specifications <https://cyclonedx.org/docs/1.7/json/#metadata>`_. """ gh_url = "https://github.com/kdeldycke/meta-package-manager" doc_url = "https://kdeldycke.github.io/meta-package-manager" self.document = Bom() # ``(manager_id, package_id) -> Component`` lookup, used by # :py:meth:`finalize` to wire declared-dependency edges to their # already-emitted Component instances. self.component_index = {} # ``(source_component, manager_id, target_id)`` queue: dependency # edges deferred because the target may not have been added yet. self.pending_dependencies = [] # mpm produces an inventory of what is installed on a live system. self.document.metadata.lifecycles = [ PredefinedLifecycle(phase=LifecyclePhase.OPERATIONS), ] self.document.metadata.component = Component( name="meta-package-manager", type=ComponentType.APPLICATION, bom_ref=f"meta-package-manager@{__version__}", supplier=OrganizationalEntity( name="Meta Package Manager", urls=[XsUri(gh_url)], ), version=__version__, purl=PackageURL( type="pypi", name="meta-package-manager", version=__version__ ), external_references=[ ExternalReference( type=ExternalReferenceType.ADVISORIES, url=XsUri(f"{gh_url}/security"), ), ExternalReference( type=ExternalReferenceType.BUILD_META, url=XsUri(f"{gh_url}/blob/v{__version__}/uv.lock"), ), ExternalReference( type=ExternalReferenceType.BUILD_SYSTEM, url=XsUri(f"{gh_url}/actions"), ), ExternalReference( type=ExternalReferenceType.CONFIGURATION, url=XsUri(f"{doc_url}/configuration.html"), ), ExternalReference( type=ExternalReferenceType.DISTRIBUTION, url=XsUri("https://pypi.org/project/meta-package-manager"), ), ExternalReference( type=ExternalReferenceType.DISTRIBUTION_INTAKE, url=XsUri(f"{gh_url}/releases/tag/v{__version__}"), ), ExternalReference( type=ExternalReferenceType.DOCUMENTATION, url=XsUri(doc_url), ), ExternalReference( type=ExternalReferenceType.ISSUE_TRACKER, url=XsUri(f"{gh_url}/issues"), ), ExternalReference( type=ExternalReferenceType.LICENSE, url=XsUri(f"{gh_url}/blob/main/license"), ), ExternalReference( type=ExternalReferenceType.MATURITY_REPORT, url=XsUri(f"{gh_url}/pulse"), ), ExternalReference( type=ExternalReferenceType.RELEASE_NOTES, url=XsUri(f"{doc_url}/changelog.html"), ), ExternalReference( type=ExternalReferenceType.SOURCE_DISTRIBUTION, url=XsUri( f"{gh_url}/releases/download/v{__version__}" f"/meta_package_manager-{__version__}.tar.gz" ), ), ExternalReference( type=ExternalReferenceType.VCS, url=XsUri(gh_url), ), ExternalReference( type=ExternalReferenceType.WEBSITE, url=XsUri(gh_url), ), ExternalReference( type=ExternalReferenceType.OTHER, url=XsUri("https://github.com/sponsors/kdeldycke"), comment="Funding", ), ], )
@staticmethod def _supplier_for( manager: PackageManager, metadata: PackageMetadata ) -> OrganizationalEntity: """Map metadata's supplier (or the manager itself) to a CycloneDX ``OrganizationalEntity``. """ if metadata.supplier: urls = [XsUri(metadata.supplier.url)] if metadata.supplier.url else None return OrganizationalEntity(name=metadata.supplier.name, urls=urls) return OrganizationalEntity(name=manager.name) @staticmethod def _hashes_for(metadata: PackageMetadata) -> list: """Build CycloneDX ``HashType`` objects from the portable ``Checksum`` list, dropping algorithms unsupported by 1.7. """ out = [] for c in metadata.checksums: algo = _CYCLONEDX_HASH_MAP.get(c.algorithm.value) if algo is not None: out.append(HashType(alg=algo, content=c.value)) return out @staticmethod def _licenses_for(metadata: PackageMetadata) -> list: """Translate license metadata into CycloneDX license objects. Tries the parsed SPDX expression first (handles compound expressions like ``MIT AND Apache-2.0``). Falls back to a named ``DisjunctiveLicense`` for free-text strings the SPDX parser rejects. """ # ``list[Any]`` because the function appends two different concrete # license-object types (``DisjunctiveLicense`` and ``LicenseExpression``) # whose common ancestor is not exposed in CycloneDX's public API. out: list[Any] = [] candidate = metadata.license_concluded or metadata.license_declared if not candidate: return out parsed = _parse_license_expression(candidate) if parsed is not None and " " not in candidate: try: out.append(DisjunctiveLicense(id=candidate)) except Exception: # noqa: BLE001, S110 # DisjunctiveLicense validation rejects some technically-valid # SPDX IDs (case variants, deprecated identifiers). Falling # through to the LicenseExpression path below is the right # recovery; no logging needed. pass else: return out if parsed is not None: # Attach SPDX canonical URLs to each identifier inside the # expression. ``_parse_license_expression`` has already # rejected every ``LicenseRef-`` and unknown-symbol case, so # every leaf yielded here is a known SPDX identifier (license # or exception). Sorting by key keeps the emitted ``details`` # order deterministic across runs, independent of CycloneDX's # internal ``SortedSet``. identifiers = sorted(set(CycloneDX._iter_spdx_identifiers(parsed))) details = tuple( LicenseExpressionDetails( license_identifier=ident, url=XsUri(f"https://spdx.org/licenses/{ident}.html"), ) for ident in identifiers ) out.append(LicenseExpression(value=candidate, details=details)) else: out.append(DisjunctiveLicense(name=candidate)) return out @staticmethod def _iter_spdx_identifiers(node) -> Iterator[str]: """Walk a ``license_expression`` AST and yield each leaf SPDX identifier as a string. ``parsed.symbols`` on the top-level expression collapses a ``LicenseWithExceptionSymbol`` into a single symbol whose key is the full ``"<license> WITH <exception>"`` string, which is not the SPDX identifier of either component. Walk the AST instead: ``LicenseWithExceptionSymbol`` exposes the license and exception symbols separately; boolean nodes (``AND``/``OR``) expose children via ``args``; bare ``LicenseSymbol`` instances expose their identifier via ``key``. Duck-typed so this module does not need an explicit import of ``license_expression``'s class hierarchy. """ if hasattr(node, "license_symbol") and hasattr(node, "exception_symbol"): yield node.license_symbol.key yield node.exception_symbol.key elif hasattr(node, "key"): yield node.key else: for arg in getattr(node, "args", ()): yield from CycloneDX._iter_spdx_identifiers(arg) @staticmethod def _external_references_for(metadata: PackageMetadata) -> list: """Map metadata URLs to CycloneDX ``externalReferences``.""" refs = [] if metadata.homepage: refs.append( ExternalReference( type=ExternalReferenceType.WEBSITE, url=XsUri(metadata.homepage), ) ) if metadata.vcs_url: refs.append( ExternalReference( type=ExternalReferenceType.VCS, url=XsUri(metadata.vcs_url), ) ) if metadata.issue_tracker_url: refs.append( ExternalReference( type=ExternalReferenceType.ISSUE_TRACKER, url=XsUri(metadata.issue_tracker_url), ) ) if metadata.distribution_url: refs.append( ExternalReference( type=ExternalReferenceType.DISTRIBUTION, url=XsUri(metadata.distribution_url), ) ) if metadata.download_url and metadata.download_url != metadata.distribution_url: refs.append( ExternalReference( type=ExternalReferenceType.DISTRIBUTION, url=XsUri(metadata.download_url), ) ) if metadata.external_sbom_path is not None: refs.append( ExternalReference( type=ExternalReferenceType.BOM, url=XsUri(f"file://{metadata.external_sbom_path}"), comment="Per-package upstream SBOM (e.g. HOMEBREW_SBOM).", ) ) return refs @staticmethod def _properties_for(metadata: PackageMetadata) -> list: """Encode manager-native ``extras`` as CycloneDX ``properties``. Properties are namespaced under ``mpm:`` so consumers can filter them away when they only care about the standard fields. """ out = [] for key, value in sorted(metadata.extras.items()): if value is None: continue out.append(Property(name=f"mpm:{key}", value=str(value))) return out
[docs] def add_package( self, manager: PackageManager, package: Package, metadata: PackageMetadata = EMPTY_METADATA, ) -> None: """ `CycloneDX package metadata specifications <https://cyclonedx.org/docs/1.7/json/#components>`_. """ authors = None if metadata.originator and not metadata.originator.is_organization: authors = [ OrganizationalContact( name=metadata.originator.name, email=metadata.originator.email, ) ] data = Component( name=package.id, type=ComponentType.APPLICATION, # pURL string, by its virtue of containing all important metadata of a # package, makes perfect unique IDs. bom_ref=package.purl.to_string(), group=package.manager_id, version=str(package.installed_version), description=metadata.description or package.description, purl=package.purl, supplier=self._supplier_for(manager, metadata), hashes=self._hashes_for(metadata), licenses=self._licenses_for(metadata), external_references=self._external_references_for(metadata), properties=self._properties_for(metadata), copyright=metadata.copyright_text, cpe=metadata.cpe, authors=authors, ) self.document.components.add(data) self.component_index[(manager.id, package.id)] = data self._track_addition(manager.id, package.id, metadata) self.document.register_dependency( self.document.metadata.component, # type:ignore[arg-type] [data], ) for dep in metadata.dependencies: self.pending_dependencies.append((data, manager.id, dep.target_id))
[docs] def finalize(self) -> None: """Resolve queued dependency edges between Components. Mirrors :py:meth:`meta_package_manager.sbom.spdx.SPDX.finalize`. Dangling references (the dependency target is not in the inventory) are dropped silently. """ for source, manager_id, target_id in self.pending_dependencies: target = self.component_index.get((manager_id, target_id)) if target is None: continue self.document.register_dependency(source, [target])
[docs] def stats(self) -> dict[str, object]: """Extend the base stats with CycloneDX-specific counters. CycloneDX has no merge-content equivalent: per-package upstream SBOMs are linked through ``externalReferences[type=bom]`` rather than spliced in. The merged-document count therefore reports the number of components carrying a BOM external reference. The dependency-edge total walks the registered dependency graph and sums the ``dependsOn`` collection size across every entry. """ base = super().stats() components_with_bom = sum( 1 for component in self.document.components for ref in component.external_references if ref.type == ExternalReferenceType.BOM ) dependency_edges = sum( len(dep.dependencies) for dep in self.document.dependencies ) base.update({ "components_in_document": len(self.document.components), "external_bom_references": components_with_bom, "dependency_edges": dependency_edges, }) return base
[docs] def export(self) -> str: """Serialize the document to its string representation. .. note:: Unlike :py:meth:`meta_package_manager.sbom.spdx.SPDX.export`, the generated document is not validated against its schema here. CycloneDX schema validation relies on ``cyclonedx-python-lib``'s ``[validation]`` extra, which pulls in ``jsonschema`` and, transitively, ``rfc3987-syntax``, ``lark``, and ``lxml``. To keep that stack out of ``mpm``'s runtime dependencies, the validation runs in the test suite instead. See ``tests/test_cli_sbom.py``. """ if self.export_format == ExportFormat.JSON: return str(JsonV1Dot7(self.document).output_as_string(indent=2)) if self.export_format == ExportFormat.XML: writer = make_outputter(self.document, OutputFormat.XML, SchemaVersion.V1_7) return str(writer.output_as_string(indent=2)) raise ValueError(f"{self.export_format} not supported.")