Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix detection of dependencies where key and name differ #36

Merged
merged 14 commits into from
Jul 24, 2024
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [UNRELEASED]

* Fixed a problem with dependency detection when the package name differed from the key in package-lock.json (#36).

## [0.5.0] - 2025-07-18

* Updated to Shinylive web assets 0.5.0.
Expand Down
124 changes: 85 additions & 39 deletions shinylive/_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
}

# Packages that should always be included in a Shinylive deployment.
BASE_PYODIDE_PACKAGES = {"distutils", "micropip", "ssl"}
BASE_PYODIDE_PACKAGE_NAMES = {"distutils", "micropip", "ssl"}
AssetType = Literal["base", "python", "r"]


Expand All @@ -56,6 +56,11 @@ class PyodidePackageInfo(TypedDict):


# The package information structure used by Pyodide's pyodide-lock.json.
# Note that the key in `packages` may be something like "jsonschema-specifications",
# but the actual name of the package may be different, like "jsonschema_specifications".
# (The "name" entry in the PyodidePackageInfo object is the actual package name.)
# And also further note that the module names in the "imports" list are not necessarily
# the same as either: the "opencv-python" package has a module name "cv2".
class PyodideLockFile(TypedDict):
info: dict[str, str]
packages: dict[str, PyodidePackageInfo]
Expand Down Expand Up @@ -92,9 +97,15 @@ def _dep_names_to_pyodide_pkg_infos(
dep_names: Iterable[str],
) -> list[PyodidePackageInfo]:
pyodide_lock = _pyodide_lock_data()
pkg_infos: list[PyodidePackageInfo] = [
copy.deepcopy(pyodide_lock["packages"][dep_name]) for dep_name in dep_names
]
pkg_infos: list[PyodidePackageInfo] = []

for dep_name in dep_names:
dep_key = dep_name_to_dep_key(dep_name)
if dep_key is None:
continue
pkg_info = copy.deepcopy(pyodide_lock["packages"][dep_key])
pkg_infos.append(pkg_info)

return pkg_infos


Expand Down Expand Up @@ -378,7 +389,7 @@ def base_package_deps() -> list[PyodidePackageInfo]:
Return list of python packages that should be included in all python Shinylive
deployments. The returned data structure is a list of PyodidePackageInfo objects.
"""
dep_names = _find_recursive_deps(BASE_PYODIDE_PACKAGES)
dep_names = _find_recursive_deps(BASE_PYODIDE_PACKAGE_NAMES)
pkg_infos = _dep_names_to_pyodide_pkg_infos(dep_names)

return pkg_infos
Expand All @@ -388,7 +399,7 @@ def base_package_deps() -> list[PyodidePackageInfo]:
# Internal functions
# =============================================================================
def _find_recursive_deps(
pkgs: Iterable[str],
dep_names: Iterable[str],
verbose_print: Callable[..., None] = lambda *args: None,
) -> list[str]:
"""
Expand All @@ -397,45 +408,77 @@ def _find_recursive_deps(
packages passed in.
"""
pyodide_lock = _pyodide_lock_data()
deps = list(pkgs)

# The keys in pyodide_lock are not the same as the package names. For example, the
# key "jsonschema-specifications" points to an object where the "name" entry is
# "jsonschema_specifications". The dependencies are listed with names, not keys.

dep_names = list(dep_names)
i = 0
while i < len(deps):
dep = deps[i]
if dep not in pyodide_lock["packages"]:
# TODO: Need to distinguish between built-in packages and external ones in
# requirements.txt.
verbose_print(
f" {dep} not in pyodide-lock.json. Assuming it is in base Pyodide or in requirements.txt."
)
deps.remove(dep)
while i < len(dep_names):
dep_name = dep_names[i]
dep_key: str | None = dep_name_to_dep_key(dep_name)

if dep_key not in pyodide_lock["packages"]:
if dep_name not in BASE_PYODIDE_PACKAGE_NAMES:
# TODO: Need to distinguish between built-in packages and external ones in
# requirements.txt.
verbose_print(
f" {dep_name} not in pyodide-lock.json. Assuming it is in base Pyodide or in requirements.txt."
)
dep_names.remove(dep_name)
continue

dep_deps = set(pyodide_lock["packages"][dep]["depends"])
new_deps = dep_deps.difference(deps)
deps.extend(new_deps)
dep_depnames = set(pyodide_lock["packages"][dep_key]["depends"])
new_depnames = dep_depnames.difference(dep_names)
dep_names.extend(new_depnames)
i += 1

return deps
return dep_names


def _dep_name_to_dep_file(dep_name: str) -> str:
def dep_name_to_dep_key(name: str) -> str | None:
"""
Given the name of a dependency, like "pandas", return the name of the .whl file,
like "pandas-1.4.2-cp310-cp310-emscripten_3_1_14_wasm32.whl".
Convert a package name to a key that can be used to look up the package in
pyodide-lock.json.

The keys in pyodide-lock.json are not the same as the package names. For example,
the key "jsonschema-specifications" points to an object where the "name" entry is
"jsonschema_specifications".

Note that the names are lowercased because the package names should be treated as
case-insensitive. https://github.com/pyodide/pyodide/issues/1614
"""
pyodide_lock = _pyodide_lock_data()
return pyodide_lock["packages"][dep_name]["file_name"]
# Special case for base pyodide packages
if name in BASE_PYODIDE_PACKAGE_NAMES:
return name

name = name.lower()
if name not in _dep_name_to_dep_key_mappings():
return None
Comment on lines +457 to +458
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this maybe throw a warning?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh nvm, now I see the warning above

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be fairly common to not have a package in the pyodide_lock.json file. In that case, micropip will go out and try to install it from PyPI.


return _dep_name_to_dep_key_mappings()[name]


def _dep_names_to_dep_files(dep_names: list[str]) -> list[str]:
@functools.lru_cache
def _dep_name_to_dep_key_mappings() -> dict[str, str]:
"""
Given a list of dependency names, like ["pandas"], return a list with the names of
corresponding .whl files (from data in pyodide-lock.json), like
["pandas-1.4.2-cp310-cp310-emscripten_3_1_14_wasm32.whl"].
Return a dictionary that maps package names to keys. This is needed because
sometimes the package name and package key are different. For example, the package
name is "jsonschema_specifications", but the package key is
"jsonschema-specifications".

Note that the names are lowercased because the package names should be treated as
case-insensitive. https://github.com/pyodide/pyodide/issues/1614
"""
name_to_key: dict[str, str] = {}

pyodide_lock = _pyodide_lock_data()
dep_files = [pyodide_lock["packages"][x]["file_name"] for x in dep_names]
return dep_files
for key, pkg_info in pyodide_lock["packages"].items():
name = pkg_info["name"].lower()
name_to_key[name] = key

return name_to_key


def _find_import_app_contents(app_contents: list[FileContentJson]) -> set[str]:
Expand All @@ -452,7 +495,7 @@ def _find_import_app_contents(app_contents: list[FileContentJson]) -> set[str]:
# Note that at this point, the imports are module names, like "cv2", but these can
# sometimes differ from the package names, like "opencv-python". We need to map from
# module names to package names.
packages = [module_to_package(x) for x in imports]
packages = [module_to_package_key(x) for x in imports]
packages = [x for x in packages if x is not None]

return set(packages)
Expand All @@ -468,7 +511,7 @@ def _find_requirements_app_contents(app_contents: list[FileContentJson]) -> set[
"""
packages: set[str] = set()
for file_content in app_contents:
if not file_content["name"] != "requirements.txt":
if file_content["name"] != "requirements.txt":
continue

packages = packages.union(
Expand All @@ -478,31 +521,31 @@ def _find_requirements_app_contents(app_contents: list[FileContentJson]) -> set[
return packages


def module_to_package(module: str) -> str | None:
def module_to_package_key(module: str) -> str | None:
"""
Given a module name, like "cv2", return the corresponding package name, like
"opencv-python". If not found, return None.
"""
module_to_package = _module_to_package_mappings()
module_to_package = _module_to_package_key_mappings()
if module in module_to_package:
return module_to_package[module]
else:
return None


@functools.lru_cache
def _module_to_package_mappings() -> dict[str, str]:
def _module_to_package_key_mappings() -> dict[str, str]:
"""
Return a dictionary that maps module names to package names. This is needed because
sometimes the module name and package name are different. For example, the module
name is "cv2", but the package name is "opencv-python".
"""
pyodide_lock = _pyodide_lock_data()
module_to_package: dict[str, str] = {}
for pkg_name, pkg_info in pyodide_lock["packages"].items():
for pkg_key, pkg_info in pyodide_lock["packages"].items():
modules = pkg_info["imports"]
for module in modules:
module_to_package[module] = pkg_name
module_to_package[module] = pkg_key

return module_to_package

Expand Down Expand Up @@ -597,7 +640,10 @@ def _find_packages_in_requirements(req_txt: str) -> list[str]:
else:
# If we got here, it's a package specification.
# Remove any trailing version info: "my-package (>= 1.0.0)" -> "my-package"
pkg_name = re.sub(r"([a-zA-Z0-9._-]+)(.*)", r"\\1", line).strip()
pkg_name = re.sub(r"([a-zA-Z0-9._-]+)(.*)", r"\1", line).strip()
# Replace underscores with hyphens: "typing_extensions" -> "typing-extensions"
pkg_name = pkg_name.replace("_", "-")

reqs.append(pkg_name)

return reqs
2 changes: 1 addition & 1 deletion shinylive/_version/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# The version of this Python package.
SHINYLIVE_PACKAGE_VERSION = "0.5.0"
SHINYLIVE_PACKAGE_VERSION = "0.5.0.9000"

# This is the version of the Shinylive assets to use.
SHINYLIVE_ASSETS_VERSION = "0.5.0"
2 changes: 1 addition & 1 deletion tests/test_assets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Tests for Shinlyive assets."""
"""Tests for Shinylive assets."""

import os

Expand Down
96 changes: 96 additions & 0 deletions tests/test_deps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Tests for Shinylive dependency detection."""

import os

import pytest


def test_requirements_txt():
from shinylive._deps import _find_packages_in_requirements

requirements_txt = """
typing_extensions
jsonschema-specifications (<1.0)
# comment
"""

# This should convert '_' to '-', and remove the version constraints.
assert _find_packages_in_requirements(requirements_txt) == [
"typing-extensions",
"jsonschema-specifications",
]

# Should preserve case here (in other steps it will be lowercased).
assert _find_packages_in_requirements("Jinja2") == ["Jinja2"]
assert _find_packages_in_requirements("jinja2") == ["jinja2"]


# ======================================================================================
# Don't run remaining tests in CI, unless we're triggered by a release event. This is
# because they require the assets to be installed. In the future, it would make sense to
# run this test when we're on an rc branch.
# ======================================================================================
if os.environ.get("CI") == "true" and os.environ.get("GITHUB_EVENT_NAME") != "release":
pytest.skip(
reason="Don't run this test in CI, unless we're on a release branch.",
allow_module_level=True,
)


def test_module_to_package_key():
from shinylive._deps import module_to_package_key

assert module_to_package_key("cv2") == "opencv-python"
assert module_to_package_key("black") == "black"
assert module_to_package_key("jinja2") == "jinja2"

# Should be case sensitive for module names.
assert module_to_package_key("Jinja2") is None

assert module_to_package_key("foobar") is None


def test_dep_name_to_dep_key():
from shinylive._deps import dep_name_to_dep_key

assert dep_name_to_dep_key("black") == "black"
assert dep_name_to_dep_key("typing-extensions") == "typing-extensions"
assert (
dep_name_to_dep_key("jsonschema_specifications-tests")
== "jsonschema-specifications-tests"
)

# Should not convert `_` to `-`
assert dep_name_to_dep_key("typing_extensions") is None

# Should be case insensitive to input.
assert dep_name_to_dep_key("Jinja2") == "jinja2"
assert dep_name_to_dep_key("JiNJa2") == "jinja2"

assert dep_name_to_dep_key("cv2") is None

# Special case for a base pyodide package. It is not in pyodide_lock.json but should
# be included in the list of dependencies.
assert dep_name_to_dep_key("distutils") == "distutils"


def test_find_recursive_deps():
from shinylive._deps import _find_recursive_deps

# It is possible that these dependencies will change in future versions of Pyodide,
# but the reason we're testing jsonschema specifically is because it includes
# jsonschema_specifications, which is the package name (and not the key).
assert sorted(_find_recursive_deps(["jsonschema"])) == [
"attrs",
"jsonschema",
"jsonschema_specifications",
"pyrsistent",
"referencing",
"rpds-py",
"six",
]

assert sorted(_find_recursive_deps(["opencv-python"])) == [
"numpy",
"opencv-python",
]
Loading