From 131cfd043261fb7aa44f7067ab406b2dcae3165c Mon Sep 17 00:00:00 2001 From: shamilbi Date: Sat, 22 Nov 2025 20:27:27 +0200 Subject: [PATCH 1/7] +RequestCache --- morgan/utils.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/morgan/utils.py b/morgan/utils.py index 425efc1..11f69d6 100644 --- a/morgan/utils.py +++ b/morgan/utils.py @@ -1,4 +1,7 @@ +import json import re +import urllib.parse +import urllib.request from packaging.requirements import Requirement @@ -42,3 +45,31 @@ def is_simple_case(self, req): if all(spec.operator in ('>', '>=') for spec in specifier._specs): return True return False + + +class RequestCache: # pylint: disable=too-few-public-methods + d: dict[str, dict] = {} # name: data + + def get(self, url: str, name: str) -> dict: + if name in self.d: + return self.d[name] + + if not url.endswith('/'): + url += '/' + + # get information about this package from the Simple API in JSON + # format as per PEP 691 + request = urllib.request.Request( + f"{url}{name}/", + headers={ + "Accept": "application/vnd.pypi.simple.v1+json", + }, + ) + + with urllib.request.urlopen(request) as response: + data = self.d[name] = json.load(response) + data['response_url'] = str(response.url) + return data + + +RCACHE = RequestCache() From 8cd7517cea7d4517adb7f7c00fc94386f835dc47 Mon Sep 17 00:00:00 2001 From: shamilbi Date: Sat, 22 Nov 2025 20:31:03 +0200 Subject: [PATCH 2/7] refactor: use RequestCache for all configs --- morgan/__init__.py | 139 +++++++++++++++++++++------------------------ 1 file changed, 66 insertions(+), 73 deletions(-) diff --git a/morgan/__init__.py b/morgan/__init__.py index 755fed9..97adb87 100644 --- a/morgan/__init__.py +++ b/morgan/__init__.py @@ -1,7 +1,7 @@ import argparse import configparser import hashlib -import json +import inspect import os import os.path import re @@ -20,7 +20,7 @@ from morgan import configurator, metadata, server from morgan.__about__ import __version__ -from morgan.utils import Cache, to_single_dash +from morgan.utils import RCACHE, Cache, to_single_dash PYPI_ADDRESS = "https://pypi.org/simple/" PREFERRED_HASH_ALG = "sha256" @@ -34,7 +34,7 @@ class Mirrorer: them again as dependencies. """ - def __init__(self, args: argparse.Namespace): + def __init__(self, args: argparse.Namespace, config: str): """ The constructor only needs to path to the package index. """ @@ -45,7 +45,7 @@ def __init__(self, args: argparse.Namespace): self.index_url = args.index_url self.mirror_all_versions: bool = args.mirror_all_versions self.config = configparser.ConfigParser() - self.config.read(args.config) + self.config.read(config) self.envs = {} self._supported_pyversions = [] self._supported_platforms = [] @@ -101,26 +101,6 @@ def mirror(self, requirement_string: str): next_deps.update(more_deps) deps = next_deps.copy() - def copy_server(self): - """ - Copy the server script to the package index. This method will first - attempt to find the server file directly, and if that fails, it will - use the inspect module to get the source code. - """ - - print("Copying server script") - thispath = os.path.realpath(__file__) - serverpath = os.path.join(os.path.dirname(thispath), "server.py") - outpath = os.path.join(self.index_path, "server.py") - if os.path.exists(serverpath): - with open(serverpath, "rb") as inp, open(outpath, "wb") as out: - out.write(inp.read()) - else: - import inspect - - with open(outpath, "w") as out: - out.write(inspect.getsource(server)) - def _mirror( self, requirement: packaging.requirements.Requirement, @@ -134,21 +114,8 @@ def _mirror( else: print("{}".format(requirement)) - data: dict = None - - # get information about this package from the Simple API in JSON - # format as per PEP 691 - request = urllib.request.Request( - "{}{}/".format(self.index_url, requirement.name), - headers={ - "Accept": "application/vnd.pypi.simple.v1+json", - }, - ) - - response_url = "" - with urllib.request.urlopen(request) as response: - data = json.load(response) - response_url = str(response.url) + data: dict = RCACHE.get(self.index_url, requirement.name) + response_url = data['response_url'] # check metadata version ~1.0 v_str = data["meta"]["api-version"] @@ -172,7 +139,7 @@ def _mirror( # for any of our environments and don't return an error return None - if len(files) == 0: + if not files: raise Exception(f"No files match requirement {requirement}") # download all files @@ -255,15 +222,15 @@ def _filter_files( ) ) - if len(files) == 0: + if not files: print(f"Skipping {requirement}, no version matches requirement") return None # Now we only have files that satisfy the requirement, and we need to # filter out files that do not match our environments. - files = list(filter(lambda file: self._matches_environments(file), files)) + files = list(filter(self._matches_environments, files)) - if len(files) == 0: + if not files: print(f"Skipping {requirement}, no file matches environments") return None @@ -276,7 +243,8 @@ def _filter_files( return files def _matches_environments(self, fileinfo: dict) -> bool: - if req := fileinfo.get("requires-python", None): + req = fileinfo.get("requires-python", None) + if req: # The Python versions in all of our environments must be supported # by this file in order to match. # Some packages specify their required Python versions with a simple @@ -312,10 +280,7 @@ def _matches_environments(self, fileinfo: dict) -> bool: # check if the version matches any of the supported Pythons, and # only skip it if it does not match any. intrp_ver_matched = any( - map( - lambda supported_python: intrp_set.contains(supported_python), - self._supported_pyversions, - ) + map(intrp_set.contains, self._supported_pyversions) ) if ( @@ -495,25 +460,48 @@ def mirror(args: argparse.Namespace): times on the same index path, files are only downloaded if necessary. """ - m = Mirrorer(args) - for package in m.config["requirements"]: - reqs = m.config["requirements"][package].splitlines() - if not reqs: - # empty requirements - # morgan = - m.mirror(f"{package}") - else: - # multiline requirements - # urllib3 = - # <1.27 - # >=2 - # [brotli] - for req in reqs: - req = req.strip() - m.mirror(f"{package}{req}") + for c in args.config: + print('-----------------------------------------------') + print(f'config: {c}') + print('-----------------------------------------------') + m = Mirrorer(args, c) + for package in m.config["requirements"]: + reqs = m.config["requirements"][package].splitlines() + if not reqs: + # empty requirements + # morgan = + m.mirror(f"{package}") + else: + # multiline requirements + # urllib3 = + # <1.27 + # >=2 + # [brotli] + for req in reqs: + req = req.strip() + m.mirror(f"{package}{req}") if not args.skip_server_copy: - m.copy_server() + copy_server(args.index_path) + + +def copy_server(index_path: str): + """ + Copy the server script to the package index. This method will first + attempt to find the server file directly, and if that fails, it will + use the inspect module to get the source code. + """ + + print("Copying server script") + thispath = os.path.realpath(__file__) + serverpath = os.path.join(os.path.dirname(thispath), "server.py") + outpath = os.path.join(index_path, "server.py") + if os.path.exists(serverpath): + with open(serverpath, "rb") as inp, open(outpath, "wb") as out: + out.write(inp.read()) + else: + with open(outpath, "w") as out: + out.write(inspect.getsource(server)) def main(): @@ -550,12 +538,14 @@ def my_url(arg): type=my_url, help="Base URL of the Python Package Index", ) + + # one request cache for all configs parser.add_argument( "-c", "--config", dest="config", - nargs="?", - help="Config file (default: /morgan.ini)", + nargs="*", + help="Config files (default: /morgan.ini)", ) parser.add_argument( "--skip-server-copy", @@ -610,16 +600,19 @@ def my_url(arg): return if not args.config: - args.config = os.path.join(args.index_path, "morgan.ini") - if not os.path.isfile(args.config): - # If a file named in filenames cannot be opened, that file will be ignored - # https://docs.python.org/3.12/library/configparser.html#configparser.ConfigParser.read - raise argparse.ArgumentTypeError(f"Invalid config: {args.config}") + args.config = [ + os.path.join(args.index_path, "morgan.ini"), + ] + for c in args.config: + if not os.path.isfile(c): + # If a file named in filenames cannot be opened, that file will be ignored + # https://docs.python.org/3.12/library/configparser.html#configparser.ConfigParser.read + raise argparse.ArgumentTypeError(f"Invalid config: {c}") if args.command == "mirror": mirror(args) elif args.command == "copy_server": - Mirrorer(args).copy_server() + copy_server(args.index_path) if __name__ == "__main__": From 65cab94a2f3635fd89ad7400b908a62c4b3801d2 Mon Sep 17 00:00:00 2001 From: shamilbi Date: Sat, 22 Nov 2025 20:58:42 +0200 Subject: [PATCH 3/7] fix: Mirrorer(args, args.config) --- tests/test_init.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_init.py b/tests/test_init.py index 7a164c6..9483d72 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -89,7 +89,7 @@ def test_mirrorer_initialization(self, temp_index_path): mirror_all_versions=False, ) - mirrorer = Mirrorer(args) + mirrorer = Mirrorer(args, args.config) assert mirrorer.index_path == temp_index_path assert mirrorer.index_url == "https://pypi.org/simple/" @@ -106,7 +106,7 @@ def test_server_file_copying(self, temp_index_path): config=os.path.join(temp_index_path, "morgan.ini"), mirror_all_versions=False, ) - mirrorer = Mirrorer(args) + mirrorer = Mirrorer(args, args.config) mirrorer.copy_server() @@ -129,7 +129,7 @@ def test_file_hashing(self, temp_index_path): config=os.path.join(temp_index_path, "morgan.ini"), mirror_all_versions=False, ) - mirrorer = Mirrorer(args) + mirrorer = Mirrorer(args, args.config) test_data = b"test content for hashing" test_file = os.path.join(temp_index_path, "test_artifact.whl") @@ -177,7 +177,7 @@ def _make_mirrorer(mirror_all_versions): config=os.path.join(temp_index_path, "morgan.ini"), mirror_all_versions=mirror_all_versions, ) - return Mirrorer(args) + return Mirrorer(args, args.config) return _make_mirrorer From 0f84dc2ea26e735aef88a571a054eb238ffc9c1b Mon Sep 17 00:00:00 2001 From: shamilbi Date: Sat, 22 Nov 2025 21:02:08 +0200 Subject: [PATCH 4/7] fix: copy_server() --- tests/test_init.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_init.py b/tests/test_init.py index 9483d72..def7363 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -6,7 +6,7 @@ import packaging.requirements import pytest -from morgan import PYPI_ADDRESS, Mirrorer, parse_interpreter, parse_requirement, server +from morgan import PYPI_ADDRESS, Mirrorer, parse_interpreter, parse_requirement, server, copy_server class TestParseInterpreter: @@ -108,7 +108,7 @@ def test_server_file_copying(self, temp_index_path): ) mirrorer = Mirrorer(args, args.config) - mirrorer.copy_server() + copy_server(args.index_path) expected_serverpath = os.path.join(temp_index_path, "server.py") assert os.path.exists( From 3f864763936b8e652b1c4cd09a8ddd0800215313 Mon Sep 17 00:00:00 2001 From: shamilbi Date: Sat, 22 Nov 2025 21:06:27 +0200 Subject: [PATCH 5/7] fix: python3.8: dict[str, dict] -> Dict[str, Dict] --- morgan/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/morgan/utils.py b/morgan/utils.py index 11f69d6..e9b54f6 100644 --- a/morgan/utils.py +++ b/morgan/utils.py @@ -2,6 +2,7 @@ import re import urllib.parse import urllib.request +from typing import Dict from packaging.requirements import Requirement @@ -48,7 +49,7 @@ def is_simple_case(self, req): class RequestCache: # pylint: disable=too-few-public-methods - d: dict[str, dict] = {} # name: data + d: Dict[str, Dict] = {} # name: data def get(self, url: str, name: str) -> dict: if name in self.d: From e60a98d04b85f9d186e6ee587231e0acdaf72fce Mon Sep 17 00:00:00 2001 From: shamilbi Date: Mon, 24 Nov 2025 17:45:15 +0200 Subject: [PATCH 6/7] delete Cache --- morgan/utils.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/morgan/utils.py b/morgan/utils.py index e9b54f6..c33a26d 100644 --- a/morgan/utils.py +++ b/morgan/utils.py @@ -4,8 +4,6 @@ import urllib.request from typing import Dict -from packaging.requirements import Requirement - def to_single_dash(filename): 'https://packaging.python.org/en/latest/specifications/version-specifiers/#version-specifiers' @@ -23,31 +21,6 @@ def to_single_dash(filename): # selenium-2.0.dev9429.tar.gz -class Cache: # pylint: disable=protected-access - def __init__(self): - self.cache: set[str] = set() - - def check(self, req: Requirement) -> bool: - if self.is_simple_case(req): - return req.name in self.cache - return str(req) in self.cache - - def add(self, req: Requirement): - if self.is_simple_case(req): - self.cache.add(req.name) - else: - self.cache.add(str(req)) - - def is_simple_case(self, req): - if not req.marker and not req.extras: - specifier = req.specifier - if not specifier: - return True - if all(spec.operator in ('>', '>=') for spec in specifier._specs): - return True - return False - - class RequestCache: # pylint: disable=too-few-public-methods d: Dict[str, Dict] = {} # name: data From df9ec6f57732485e33a0ed8cf119130c11709fb8 Mon Sep 17 00:00:00 2001 From: shamilbi Date: Mon, 24 Nov 2025 17:47:06 +0200 Subject: [PATCH 7/7] Mirrorer: delete _processed_pkgs = Cache() --- morgan/__init__.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/morgan/__init__.py b/morgan/__init__.py index 97adb87..88649ed 100644 --- a/morgan/__init__.py +++ b/morgan/__init__.py @@ -20,13 +20,13 @@ from morgan import configurator, metadata, server from morgan.__about__ import __version__ -from morgan.utils import RCACHE, Cache, to_single_dash +from morgan.utils import RCACHE, to_single_dash PYPI_ADDRESS = "https://pypi.org/simple/" PREFERRED_HASH_ALG = "sha256" -class Mirrorer: +class Mirrorer: # pylint: disable=too-few-public-methods """ Mirrorer is a class that implements the mirroring capabilities of Morgan. A class is used to maintain state, as the mirrorer needs to keep track of @@ -70,8 +70,6 @@ def __init__(self, args: argparse.Namespace, config: str): ) ) - self._processed_pkgs = Cache() - def mirror(self, requirement_string: str): """ Mirror a package according to a PEP 508-compliant requirement string. @@ -106,9 +104,6 @@ def _mirror( requirement: packaging.requirements.Requirement, required_by: packaging.requirements.Requirement = None, ) -> dict: - if self._processed_pkgs.check(requirement): - return None - if required_by: print("[{}]: {}".format(required_by, requirement)) else: @@ -157,8 +152,6 @@ def _mirror( traceback.print_exc() continue - self._processed_pkgs.add(requirement) - return depdict def _filter_files(