# Copyright Kevin Deldycke <kevin@deldycke.com> and contributors.
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""Manager-agnostic :py:class:`meta_package_manager.package.Package` data model
and the :py:class:`meta_package_manager.package.PackageMetadata` companion
that augments it with data pulled from sources outside the package manager
itself.
Defines the lightweight representation of a package (ID, name, installed and latest
versions, architecture) that every manager operation yields, plus
:py:func:`meta_package_manager.package.packages_asdict` to serialize a subset of its
fields for output.
:py:class:`Package` is the inventory plane: what the package manager itself
reports through its native query commands. It backs every operation in
:py:mod:`meta_package_manager.manager`.
:py:class:`PackageMetadata` is the enrichment plane: licenses, supplier,
checksums, declared dependency graph, on-disk per-package SBOMs, and other
facts gathered through extra queries (CLI sub-commands, on-disk parsers,
upstream registries). Populated by
:py:meth:`meta_package_manager.manager.PackageManager.package_metadata_batch`,
consumed by :py:mod:`meta_package_manager.sbom` today and reserved for any
future caller that wants more than the bare inventory.
Kept deliberately free of manager logic, so it can be imported without pulling in the
manager engine (:py:mod:`meta_package_manager.manager`).
"""
from __future__ import annotations
import re
from dataclasses import asdict, dataclass, field
from enum import Enum
from functools import cached_property
from packageurl import PackageURL
from .version import parse_version
TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import Iterable
from datetime import datetime
from pathlib import Path
from .version import TokenizedString
[docs]
@dataclass
class Package:
"""Lightweight representation of a package and its metadata."""
id: str
"""ID is required and is the primary key used by the manager."""
manager_id: str
"""Handy to backtrack whose manager this package belongs to.
The manager ID is good enough and allows for no coupling with the parent manager
object.
"""
name: str | None = None
"""Optional human-readable display name. Falls back to ``id`` in output rendering,
so only set this when the manager provides a name that differs from the package ID.
"""
description: str | None = None
installed_version: TokenizedString | str | None = None
latest_version: TokenizedString | str | None = None
"""Installed and latest versions are optional: they're not always provided by the
package manager.
``installed_version`` and ``latest_version`` are allowed to temporarily be strings
between ``__init__`` and ``__post_init__``. Once they reach the later, they're
parsed and normalized into either ``TokenizedString`` or `None`. They can't be
strings beyond that point, i.e. after the Package instance has been fully
instantiated. We don't know how to declare this transient state with type hints,
so we're just going to allow string type.
"""
arch: str | None = None
def __post_init__(self) -> None:
# Make sure version strings are parsed into proper objects.
self.installed_version = parse_version(self.installed_version) # type: ignore[arg-type]
self.latest_version = parse_version(self.latest_version) # type: ignore[arg-type]
@cached_property
def purl(self) -> PackageURL:
"""Returns the package's pURL object."""
qualifiers = {}
if self.arch:
qualifiers["arch"] = self.arch
return PackageURL(
type=self.manager_id,
name=self.id,
version=str(self.installed_version),
qualifiers=qualifiers,
)
[docs]
@staticmethod
def query_parts(query: str) -> set[str]:
"""Split ``query`` into its contiguous alphanumeric segments.
Contrary to :py:class:`meta_package_manager.version.TokenizedString`,
does not split on collated number/alphabetic junctions.
Canonical tokenizer behind :py:meth:`matches` and the
``search``/``installed``/``outdated`` query matching.
:py:meth:`meta_package_manager.manager.PackageManager.query_parts`
delegates here.
"""
return {p for p in re.split(r"\W+", query) if p}
[docs]
def matches(
self,
query: str,
extended: bool = False,
exact: bool = False,
) -> bool:
"""Tell whether this package matches the free-form ``query``.
Shared predicate behind the ``search``, ``installed`` and ``outdated``
subcommands, so all three honor the same matching semantics:
- **Fuzzy** (default): a case-insensitive, tokenized substring match.
Any alphanumeric segment of ``query`` (see :py:meth:`query_parts`)
found in the package ID or name counts as a match.
- **Exact** (``exact=True``): the raw ``query`` must equal the package
ID or name verbatim (case-sensitive, whole-string).
- **Extended** (``extended=True``): also look into the package
``description``. Only meaningful when the description is populated,
as it is for ``search`` results.
A query with no alphanumeric segment (empty or punctuation-only) never
matches.
"""
# Look by default into package ID and name.
content = {self.id, self.name}
# Reject fuzzy results: only keep packages strictly matching ID or name.
if exact and query not in content:
return False
# Add description to the content to look into.
if extended:
content.add(self.description)
serialized_content = "".join(s.lower() for s in content if s)
return any(
part.lower() in serialized_content for part in self.query_parts(query)
)
[docs]
def packages_asdict(packages: Iterable[Package], keep_fields: tuple[str, ...]):
"""Returns a list of packages casted to a ``dict`` with only a subset of its
fields."""
return ({k: v for k, v in asdict(p).items() if k in keep_fields} for p in packages)
[docs]
class DependencyScope(str, Enum):
"""Maps loosely onto SPDX ``RelationshipType`` variants.
SBOM renderers translate these into ``RUNTIME_DEPENDENCY_OF``,
``BUILD_DEPENDENCY_OF``, etc.; CycloneDX collapses everything to its
flat ``dependencies`` graph. Future non-SBOM consumers can apply
their own mapping or just expose the raw scope label.
"""
RUNTIME = "runtime"
BUILD = "build"
DEV = "dev"
OPTIONAL = "optional"
TEST = "test"
RECOMMENDED = "recommended"
[docs]
class ChecksumAlgorithm(str, Enum):
"""Subset of algorithms shared by SPDX and CycloneDX schemas.
Used by :py:class:`Checksum` to identify a content hash without
coupling the data model to any specific SBOM library's enum.
"""
MD5 = "MD5"
SHA1 = "SHA1"
SHA256 = "SHA256"
SHA512 = "SHA512"
SHA3_256 = "SHA3-256"
SHA3_512 = "SHA3-512"
BLAKE2B_256 = "BLAKE2b-256"
BLAKE2B_512 = "BLAKE2b-512"
[docs]
@dataclass(frozen=True)
class Checksum:
"""A single ``(algorithm, value)`` pair."""
algorithm: ChecksumAlgorithm
value: str
[docs]
@dataclass(frozen=True)
class Supplier:
"""Distributor of the package.
Distinct from the originator: the supplier is whoever served the bits
(Homebrew, Debian, PyPI), the originator is the upstream author.
"""
name: str
url: str | None = None
[docs]
@dataclass(frozen=True)
class Originator:
"""Upstream author or organization that produced the package."""
name: str
email: str | None = None
is_organization: bool = False
[docs]
@dataclass(frozen=True)
class Dependency:
"""A single edge in the package's declared dependency graph.
``target_id`` is the dependency's manager-native identifier (e.g.
``openssl@3`` for Homebrew). Renderers match it against the inventory's
installed packages to decide whether to emit a relationship.
"""
target_id: str
scope: DependencyScope = DependencyScope.RUNTIME
version_constraint: str | None = None
[docs]
@dataclass(frozen=True)
class FileEntry:
"""An installed file shipped by the package.
Only populated for managers that can cheaply enumerate file contents and
hashes (dpkg ``.md5sums``, pip ``RECORD``). Omitted otherwise; the SBOM
renderer leaves ``filesAnalyzed=False`` on the SPDX Package.
"""
path: str
sha256: str | None = None
sha1: str | None = None
md5: str | None = None
EMPTY_METADATA = PackageMetadata()
"""Sentinel returned by the default no-op extractor on the base
:py:class:`meta_package_manager.manager.PackageManager`. Consumers (the SBOM
renderers today) treat ``EMPTY_METADATA`` exactly like ``--minimal`` mode for
the package: no enrichment, no placeholders.
"""