Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 67 additions & 81 deletions morgan/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import argparse
import configparser
import hashlib
import json
import inspect
import os
import os.path
import re
Expand All @@ -20,21 +20,21 @@

from morgan import configurator, metadata, server
from morgan.__about__ import __version__
from morgan.utils import Cache, to_single_dash
from morgan.utils import RCACHE, to_single_dash

PYPI_ADDRESS = "https://pypi.org/simple/"
PREFERRED_HASH_ALG = "sha256"


class Mirrorer:
class Mirrorer: # pylint: disable=too-few-public-methods
"""
Mirrorer is a class that implements the mirroring capabilities of Morgan.
A class is used to maintain state, as the mirrorer needs to keep track of
packages it already processed in the (very common) case that it encounters
them again as dependencies.
"""

def __init__(self, args: argparse.Namespace):
def __init__(self, args: argparse.Namespace, config: str):
"""
The constructor only needs to path to the package index.
"""
Expand All @@ -45,7 +45,7 @@ def __init__(self, args: argparse.Namespace):
self.index_url = args.index_url
self.mirror_all_versions: bool = args.mirror_all_versions
self.config = configparser.ConfigParser()
self.config.read(args.config)
self.config.read(config)
self.envs = {}
self._supported_pyversions = []
self._supported_platforms = []
Expand All @@ -70,8 +70,6 @@ def __init__(self, args: argparse.Namespace):
)
)

self._processed_pkgs = Cache()

def mirror(self, requirement_string: str):
"""
Mirror a package according to a PEP 508-compliant requirement string.
Expand Down Expand Up @@ -101,54 +99,18 @@ def mirror(self, requirement_string: str):
next_deps.update(more_deps)
deps = next_deps.copy()

def copy_server(self):
"""
Copy the server script to the package index. This method will first
attempt to find the server file directly, and if that fails, it will
use the inspect module to get the source code.
"""

print("Copying server script")
thispath = os.path.realpath(__file__)
serverpath = os.path.join(os.path.dirname(thispath), "server.py")
outpath = os.path.join(self.index_path, "server.py")
if os.path.exists(serverpath):
with open(serverpath, "rb") as inp, open(outpath, "wb") as out:
out.write(inp.read())
else:
import inspect

with open(outpath, "w") as out:
out.write(inspect.getsource(server))

def _mirror(
self,
requirement: packaging.requirements.Requirement,
required_by: packaging.requirements.Requirement = None,
) -> dict:
if self._processed_pkgs.check(requirement):
return None

if required_by:
print("[{}]: {}".format(required_by, requirement))
else:
print("{}".format(requirement))

data: dict = None

# get information about this package from the Simple API in JSON
# format as per PEP 691
request = urllib.request.Request(
"{}{}/".format(self.index_url, requirement.name),
headers={
"Accept": "application/vnd.pypi.simple.v1+json",
},
)

response_url = ""
with urllib.request.urlopen(request) as response:
data = json.load(response)
response_url = str(response.url)
data: dict = RCACHE.get(self.index_url, requirement.name)
response_url = data['response_url']

# check metadata version ~1.0
v_str = data["meta"]["api-version"]
Expand All @@ -172,7 +134,7 @@ def _mirror(
# for any of our environments and don't return an error
return None

if len(files) == 0:
if not files:
raise Exception(f"No files match requirement {requirement}")

# download all files
Expand All @@ -190,8 +152,6 @@ def _mirror(
traceback.print_exc()
continue

self._processed_pkgs.add(requirement)

return depdict

def _filter_files(
Expand Down Expand Up @@ -255,15 +215,15 @@ def _filter_files(
)
)

if len(files) == 0:
if not files:
print(f"Skipping {requirement}, no version matches requirement")
return None

# Now we only have files that satisfy the requirement, and we need to
# filter out files that do not match our environments.
files = list(filter(lambda file: self._matches_environments(file), files))
files = list(filter(self._matches_environments, files))

if len(files) == 0:
if not files:
print(f"Skipping {requirement}, no file matches environments")
return None

Expand All @@ -276,7 +236,8 @@ def _filter_files(
return files

def _matches_environments(self, fileinfo: dict) -> bool:
if req := fileinfo.get("requires-python", None):
req = fileinfo.get("requires-python", None)
if req:
# The Python versions in all of our environments must be supported
# by this file in order to match.
# Some packages specify their required Python versions with a simple
Expand Down Expand Up @@ -312,10 +273,7 @@ def _matches_environments(self, fileinfo: dict) -> bool:
# check if the version matches any of the supported Pythons, and
# only skip it if it does not match any.
intrp_ver_matched = any(
map(
lambda supported_python: intrp_set.contains(supported_python),
self._supported_pyversions,
)
map(intrp_set.contains, self._supported_pyversions)
)

if (
Expand Down Expand Up @@ -495,25 +453,48 @@ def mirror(args: argparse.Namespace):
times on the same index path, files are only downloaded if necessary.
"""

m = Mirrorer(args)
for package in m.config["requirements"]:
reqs = m.config["requirements"][package].splitlines()
if not reqs:
# empty requirements
# morgan =
m.mirror(f"{package}")
else:
# multiline requirements
# urllib3 =
# <1.27
# >=2
# [brotli]
for req in reqs:
req = req.strip()
m.mirror(f"{package}{req}")
for c in args.config:
print('-----------------------------------------------')
print(f'config: {c}')
print('-----------------------------------------------')
m = Mirrorer(args, c)
for package in m.config["requirements"]:
reqs = m.config["requirements"][package].splitlines()
if not reqs:
# empty requirements
# morgan =
m.mirror(f"{package}")
else:
# multiline requirements
# urllib3 =
# <1.27
# >=2
# [brotli]
for req in reqs:
req = req.strip()
m.mirror(f"{package}{req}")

if not args.skip_server_copy:
m.copy_server()
copy_server(args.index_path)


def copy_server(index_path: str):
"""
Copy the server script to the package index. This method will first
attempt to find the server file directly, and if that fails, it will
use the inspect module to get the source code.
"""

print("Copying server script")
thispath = os.path.realpath(__file__)
serverpath = os.path.join(os.path.dirname(thispath), "server.py")
outpath = os.path.join(index_path, "server.py")
if os.path.exists(serverpath):
with open(serverpath, "rb") as inp, open(outpath, "wb") as out:
out.write(inp.read())
else:
with open(outpath, "w") as out:
out.write(inspect.getsource(server))


def main():
Expand Down Expand Up @@ -550,12 +531,14 @@ def my_url(arg):
type=my_url,
help="Base URL of the Python Package Index",
)

# one request cache for all configs
parser.add_argument(
"-c",
"--config",
dest="config",
nargs="?",
help="Config file (default: <INDEX_PATH>/morgan.ini)",
nargs="*",
help="Config files (default: <INDEX_PATH>/morgan.ini)",
)
parser.add_argument(
"--skip-server-copy",
Expand Down Expand Up @@ -610,16 +593,19 @@ def my_url(arg):
return

if not args.config:
args.config = os.path.join(args.index_path, "morgan.ini")
if not os.path.isfile(args.config):
# If a file named in filenames cannot be opened, that file will be ignored
# https://docs.python.org/3.12/library/configparser.html#configparser.ConfigParser.read
raise argparse.ArgumentTypeError(f"Invalid config: {args.config}")
args.config = [
os.path.join(args.index_path, "morgan.ini"),
]
for c in args.config:
if not os.path.isfile(c):
# If a file named in filenames cannot be opened, that file will be ignored
# https://docs.python.org/3.12/library/configparser.html#configparser.ConfigParser.read
raise argparse.ArgumentTypeError(f"Invalid config: {c}")

if args.command == "mirror":
mirror(args)
elif args.command == "copy_server":
Mirrorer(args).copy_server()
copy_server(args.index_path)


if __name__ == "__main__":
Expand Down
55 changes: 30 additions & 25 deletions morgan/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import re

from packaging.requirements import Requirement
import urllib.parse
import urllib.request
from typing import Dict


def to_single_dash(filename):
Expand All @@ -19,26 +21,29 @@ def to_single_dash(filename):
# selenium-2.0.dev9429.tar.gz


class Cache: # pylint: disable=protected-access
def __init__(self):
self.cache: set[str] = set()

def check(self, req: Requirement) -> bool:
if self.is_simple_case(req):
return req.name in self.cache
return str(req) in self.cache

def add(self, req: Requirement):
if self.is_simple_case(req):
self.cache.add(req.name)
else:
self.cache.add(str(req))

def is_simple_case(self, req):
if not req.marker and not req.extras:
specifier = req.specifier
if not specifier:
return True
if all(spec.operator in ('>', '>=') for spec in specifier._specs):
return True
return False
class RequestCache: # pylint: disable=too-few-public-methods
d: Dict[str, Dict] = {} # name: data

def get(self, url: str, name: str) -> dict:
if name in self.d:
return self.d[name]

if not url.endswith('/'):
url += '/'

# get information about this package from the Simple API in JSON
# format as per PEP 691
request = urllib.request.Request(
f"{url}{name}/",
headers={
"Accept": "application/vnd.pypi.simple.v1+json",
},
)

with urllib.request.urlopen(request) as response:
data = self.d[name] = json.load(response)
data['response_url'] = str(response.url)
return data


RCACHE = RequestCache()
12 changes: 6 additions & 6 deletions tests/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import packaging.requirements
import pytest

from morgan import PYPI_ADDRESS, Mirrorer, parse_interpreter, parse_requirement, server
from morgan import PYPI_ADDRESS, Mirrorer, parse_interpreter, parse_requirement, server, copy_server


class TestParseInterpreter:
Expand Down Expand Up @@ -89,7 +89,7 @@ def test_mirrorer_initialization(self, temp_index_path):
mirror_all_versions=False,
)

mirrorer = Mirrorer(args)
mirrorer = Mirrorer(args, args.config)

assert mirrorer.index_path == temp_index_path
assert mirrorer.index_url == "https://pypi.org/simple/"
Expand All @@ -106,9 +106,9 @@ def test_server_file_copying(self, temp_index_path):
config=os.path.join(temp_index_path, "morgan.ini"),
mirror_all_versions=False,
)
mirrorer = Mirrorer(args)
mirrorer = Mirrorer(args, args.config)

mirrorer.copy_server()
copy_server(args.index_path)

expected_serverpath = os.path.join(temp_index_path, "server.py")
assert os.path.exists(
Expand All @@ -129,7 +129,7 @@ def test_file_hashing(self, temp_index_path):
config=os.path.join(temp_index_path, "morgan.ini"),
mirror_all_versions=False,
)
mirrorer = Mirrorer(args)
mirrorer = Mirrorer(args, args.config)

test_data = b"test content for hashing"
test_file = os.path.join(temp_index_path, "test_artifact.whl")
Expand Down Expand Up @@ -177,7 +177,7 @@ def _make_mirrorer(mirror_all_versions):
config=os.path.join(temp_index_path, "morgan.ini"),
mirror_all_versions=mirror_all_versions,
)
return Mirrorer(args)
return Mirrorer(args, args.config)

return _make_mirrorer

Expand Down