Source code for tests.test_pygments
# Copyright Kevin Deldycke <kevin@deldycke.com> and contributors.
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from __future__ import annotations
import sys
import tarfile
from importlib import metadata
from operator import itemgetter
from pathlib import Path
import requests
from boltons.strutils import camel2under
from boltons.typeutils import issubclass
from pygments.filter import Filter
from pygments.filters import get_filter_by_name
from pygments.formatter import Formatter
from pygments.formatters import get_formatter_by_name
from pygments.lexer import Lexer
from pygments.lexers import find_lexer_class_by_name, get_lexer_by_name
from click_extra import pygments as extra_pygments
from click_extra.pygments import DEFAULT_TOKEN_TYPE, collect_session_lexers
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib # type: ignore[import-not-found]
PROJECT_ROOT = Path(__file__).parent.parent
[docs]
def test_ansi_lexers_candidates(tmp_path):
"""Look into Pygments test suite to find all ANSI lexers candidates.
Good candidates for ANSI colorization are lexers that are producing
``Generic.Output`` tokens, which are often used by REPL-like and scripting
terminal to render text in a console.
The list is manually maintained in Click Extra code, and this test is here to
detect new candidates from new releases of Pygments.
.. attention::
The Pygments source code is downloaded from GitHub in the form of an archive,
and extracted in a temporary folder.
The version of Pygments used for this test is the one installed in the current
environment.
.. danger:: Security check
While extracting the archive, we double check we are not fed an archive
exploiting relative ``..`` or ``.`` path attacks.
"""
version = metadata.version("pygments")
source_url = (
f"https://github.com/pygments/pygments/archive/refs/tags/{version}.tar.gz"
)
base_folder = f"pygments-{version}"
archive_path = tmp_path / f"{base_folder}.tar.gz"
# Download the source distribution from GitHub.
with requests.get(source_url) as response:
assert response.ok
archive_path.write_bytes(response.content)
assert archive_path.exists()
assert archive_path.is_file()
assert archive_path.stat().st_size > 0
# Locations of lexer artifacts in test suite.
parser_token_traces = {
str(tmp_path / base_folder / "tests" / "examplefiles" / "*" / "*.output"),
str(tmp_path / base_folder / "tests" / "snippets" / "*" / "*.txt"),
}
# Browse the downloaded package to find the test suite, and inspect the
# traces of parsed tokens used as gold master for lexers tests.
lexer_candidates = set()
with tarfile.open(archive_path, "r:gz") as tar:
for member in tar.getmembers():
# Skip non-test files.
if not member.isfile():
continue
# XXX Security check of relative ``..`` or ``.`` path attacks.
filename = tmp_path.joinpath(member.name).resolve()
assert filename.is_relative_to(tmp_path)
# Skip files that are not part of the test suite data.
match = False
for pattern in parser_token_traces:
if filename.match(pattern):
match = True
break
if not match:
continue
file = tar.extractfile(member)
# Skip empty files.
if not file:
continue
content = file.read().decode("utf-8")
# Skip lexers that are rendering generic, terminal-like output tokens.
if f" {'.'.join(DEFAULT_TOKEN_TYPE)}\n" not in content:
continue
# Extarct lexer alias from the test file path.
lexer_candidates.add(filename.parent.name)
assert lexer_candidates
lexer_classes = {find_lexer_class_by_name(alias) for alias in lexer_candidates}
# We cannot test for strict equality yet, as some ANSI-ready lexers do not
# have any test artifacts producing ``Generic.Output`` tokens.
assert lexer_classes <= set(collect_session_lexers())
[docs]
def collect_classes(klass, prefix="Ansi"):
"""Returns all classes defined in ``click_extra.pygments`` that are a subclass of
``klass``, and whose name starts with the provided ``prefix``."""
klasses = {}
for name, var in extra_pygments.__dict__.items():
if issubclass(var, klass) and name.startswith(prefix):
klasses[name] = var
return klasses
[docs]
def get_pyproject_section(*section_path: str) -> dict[str, str]:
"""Descends into the TOML tree of ``pyproject.toml`` to reach the value specified by
``section_path``."""
toml_path = PROJECT_ROOT.joinpath("pyproject.toml").resolve()
section: dict = tomllib.loads(toml_path.read_text(encoding="utf-8"))
for section_id in section_path:
section = section[section_id]
return section
[docs]
def check_entry_points(entry_points: dict[str, str], *section_path: str) -> None:
entry_points = dict(sorted(entry_points.items(), key=itemgetter(0)))
project_entry_points = get_pyproject_section(*section_path)
assert project_entry_points == entry_points
[docs]
def test_formatter_entry_points():
entry_points = {}
for name in collect_classes(Formatter):
entry_id = camel2under(name).replace("_", "-")
entry_points[entry_id] = f"click_extra.pygments:{name}"
check_entry_points(entry_points, "project", "entry-points", "pygments.formatters")
[docs]
def test_filter_entry_points():
entry_points = {}
for name in collect_classes(Filter):
entry_id = camel2under(name).replace("_", "-")
entry_points[entry_id] = f"click_extra.pygments:{name}"
check_entry_points(entry_points, "project", "entry-points", "pygments.filters")
[docs]
def test_lexer_entry_points():
entry_points = {}
for lexer in collect_session_lexers():
# Check an ANSI lexer variant is available for import from Click Extra.
ansi_lexer_id = f"Ansi{lexer.__name__}"
assert ansi_lexer_id in extra_pygments.__dict__
# Transform ANSI lexer class ID into entry point ID.
entry_id = "-".join(
w for w in camel2under(ansi_lexer_id).split("_") if w != "lexer"
)
# Generate the lexer entry point.
class_path = f"click_extra.pygments:{ansi_lexer_id}"
entry_points[entry_id] = class_path
check_entry_points(entry_points, "project", "entry-points", "pygments.lexers")
[docs]
def test_registered_filters():
for name in collect_classes(Filter):
entry_id = camel2under(name).replace("_", "-")
get_filter_by_name(entry_id)
[docs]
def test_registered_lexers():
for klass in collect_classes(Lexer).values():
for alias in klass.aliases:
get_lexer_by_name(alias)
[docs]
def test_ansi_lexers_doc():
doc_content = PROJECT_ROOT.joinpath("docs/pygments.md").read_text(encoding="utf-8")
for lexer in collect_session_lexers():
assert lexer.__name__ in doc_content