From f7bb2d9eb4442a32cc2b0d7793dae679037fce61 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 6 May 2025 17:18:01 +0100 Subject: [PATCH 1/5] Add initial repartition script. Fixes #5 --- scripts/repartition-index.py | 212 +++++++++++++++++++++++++++++++++++ src/manage/tagutils.py | 6 + src/manage/urlutils.py | 7 +- src/manage/verutils.py | 3 + 4 files changed, 225 insertions(+), 3 deletions(-) create mode 100644 scripts/repartition-index.py diff --git a/scripts/repartition-index.py b/scripts/repartition-index.py new file mode 100644 index 0000000..b1ca375 --- /dev/null +++ b/scripts/repartition-index.py @@ -0,0 +1,212 @@ +import json +import re +import sys + +from collections import OrderedDict +from pathlib import Path +from urllib.request import Request, urlopen + +REPO = Path(__file__).absolute().parent.parent +sys.path.append(str(REPO / "src")) + +from manage.urlutils import IndexDownloader +from manage.tagutils import CompanyTag, tag_or_range +from manage.verutils import Version + + +def usage(): + print("Usage: repartition-index.py [-i options ...] [options ...]") + print() + print(" -i One or more files to read existing entries from.") + print(" -i -n/--no-recurse Do not follow 'next' info") + print() + print(" Filename to write entries into") + print(" -d/--allow-dup Include entries written in previous outputs") + print(" --pre Include entries marked as prereleases") + print(" -t/--tag TAG Include entries matching the specified tag") + print(" -r/--range RANGE Include entries included within the specified range") + print(" --latest-micro Include entries that are the latest x.y.z version") + print() + print("An output of 'nul' is permitted to drop entries.") + print("Providing the same inputs and outputs is permitted, as all inputs are read") + print("before any outputs are written.") + sys.exit(1) + + +class ReadFile: + def __init__(self): + self.source = None + self.recurse = True + + def add_arg(self, arg): + if arg[:1] != "-": + self.source = arg + return True + if arg in ("-n", "--no-recurse"): + self.recurse = False + return False + raise ValueError("Unknown argument: " + arg) + + def execute(self, versions, context): + for _, data in IndexDownloader(self.source, lambda *a: a): + versions.extend(data["versions"]) + if not self.recurse: + break + + +class SortVersions: + def __init__(self): + pass + + def add_arg(self, arg): + raise ValueError("Unknown argument: " + arg) + + def _number_sortkey(self, k): + bits = [] + for n in re.split(r"(\d+)", k): + try: + bits.append(f"{int(n):020}") + except ValueError: + bits.append(n) + return tuple(bits) + + def _sort_key(self, v): + from manage.tagutils import _CompanyKey, _DescendingVersion + return ( + _DescendingVersion(v["sort-version"]), + _CompanyKey(v["company"]), + self._number_sortkey(v["id"]), + ) + + def execute(self, versions, context): + versions.sort(key=self._sort_key) + + +class SplitToFile: + def __init__(self): + self.target = None + self.allow_dup = False + self.pre = False + self.tag_or_range = None + self._expect_tag_or_range = False + self.latest_micro = False + + def add_arg(self, arg): + if arg[:1] != "-": + if self._expect_tag_or_range: + self.tag_or_range = tag_or_range(arg) + self._expect_tag_or_range = False + return False + self.target = arg + return True + if arg in ("-d", "--allow-dup"): + self.allow_dup = True + return False + if arg == "--pre": + self.pre = True + return False + if arg in ("-t", "--tag", "-r", "--range"): + self._expect_tag_or_range = True + return False + if arg == "--latest-micro": + self.latest_micro = True + return False + raise ValueError("Unknown argument: " + arg) + + def execute(self, versions, context): + written = context.setdefault("written", set()) + outputs = context.setdefault("outputs", {}) + if self.target != "nul": + try: + output = outputs[self.target] + except KeyError: + context.setdefault("output_order", []).append(self.target) + output = outputs.setdefault(self.target, []) + else: + # Write to a list that'll be forgotten + output = [] + + latest_micro_skip = set() + + for i in versions: + k = i["id"].casefold(), i["sort-version"].casefold() + v = Version(i["sort-version"]) + if not self.allow_dup and k in written: + continue + if not self.pre and v.is_prerelease: + continue + if self.tag_or_range and not any( + self.tag_or_range.satisfied_by(CompanyTag(i["company"], t)) + for t in i["install-for"] + ): + continue + if self.latest_micro: + k2 = i["id"].casefold(), v.to_python_style(2, with_dev=False) + if k2 in latest_micro_skip: + continue + latest_micro_skip.add(k2) + output.append(i) + written.add(k) + + +class WriteFiles: + def __init__(self): + self.indent = None + + def add_arg(self, arg): + if arg == "-w-indent": + self.indent = 4 + return False + if arg == "-w-indent1": + self.indent = 1 + return False + raise ValueError("Unknown argument: " + arg) + + def execute(self, versions, context): + outputs = context.get("outputs") or {} + output_order = context.get("output_order", []) + for target, next_target in zip(output_order, [*output_order[1:], None]): + data = { + "versions": outputs[target] + } + if next_target: + data["next"] = next_target + with open(target, "w", encoding="utf-8") as f: + json.dump(data, f, indent=self.indent) + + +def parse_cli(args): + plan_read = [] + plan_split = [] + sort = SortVersions() + action = None + write = WriteFiles() + for a in args: + if a == "-i": + action = ReadFile() + plan_read.append(action) + elif a.startswith("-s-"): + sort.add_arg(a) + elif a.startswith("-w-"): + write.add_arg(a) + else: + try: + if action is None: + action = SplitToFile() + plan_split.append(action) + if action.add_arg(a): + action = None + continue + except ValueError: + pass + usage() + return [*plan_read, sort, *plan_split, write] + + +if __name__ == "__main__": + plan = parse_cli(sys.argv[1:]) + VERSIONS = [] + CONTEXT = {} + for p in plan: + p.execute(VERSIONS, CONTEXT) + diff --git a/src/manage/tagutils.py b/src/manage/tagutils.py index b31fc39..cf00fd2 100644 --- a/src/manage/tagutils.py +++ b/src/manage/tagutils.py @@ -26,6 +26,9 @@ def startswith(self, other): return self._company.startswith(other._company) return self._company == other._company + def __hash__(self): + return hash(self._company) + def __eq__(self, other): return self._company == other._company @@ -64,6 +67,9 @@ def startswith(self, other): return not self.s return self.s.startswith(other.s) + def __hash__(self): + return hash(self.s) + def __eq__(self, other): if not isinstance(other, type(self)): return False diff --git a/src/manage/urlutils.py b/src/manage/urlutils.py index 4e00638..81f40e3 100644 --- a/src/manage/urlutils.py +++ b/src/manage/urlutils.py @@ -677,10 +677,11 @@ def __next__(self): LOGGER.error("An unexpected error occurred while downloading the index: %s", ex) raise - index = self.index_cls(self._url, json.loads(data)) + j = json.loads(data) + index = self.index_cls(self._url, j) - if index.next_url: - self._url = urljoin(url, index.next_url, to_parent=True) + if j.get("next"): + self._url = urljoin(url, j["next"], to_parent=True) else: self._url = None return index diff --git a/src/manage/verutils.py b/src/manage/verutils.py index e8412c4..2a4cbda 100644 --- a/src/manage/verutils.py +++ b/src/manage/verutils.py @@ -53,6 +53,9 @@ def __str__(self): def __repr__(self): return self.s + def __hash__(self): + return hash(self.sortkey) + def _are_equal(self, other, prefix_match=None, other_prefix_match=None, prerelease_match=None): if other is None: return False From bebe1689655bb0411bbf7b31abe34d0de668c276 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 6 May 2025 20:38:34 +0100 Subject: [PATCH 2/5] Improved partition script --- scripts/repartition-index.py | 44 +++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/scripts/repartition-index.py b/scripts/repartition-index.py index b1ca375..ffd7919 100644 --- a/scripts/repartition-index.py +++ b/scripts/repartition-index.py @@ -17,15 +17,19 @@ def usage(): print("Usage: repartition-index.py [-i options ...] [options ...]") print() - print(" -i One or more files to read existing entries from.") + print(" --windows-default Implies default output files and configurations.") + print() + print(" -i One or more files or URLs to read existing entries from.") print(" -i -n/--no-recurse Do not follow 'next' info") + print("If no files are provided, uses the current online index") print() print(" Filename to write entries into") print(" -d/--allow-dup Include entries written in previous outputs") + print(" --only-dup Only include entries written in previous outputs") print(" --pre Include entries marked as prereleases") - print(" -t/--tag TAG Include entries matching the specified tag") - print(" -r/--range RANGE Include entries included within the specified range") - print(" --latest-micro Include entries that are the latest x.y.z version") + print(" -t/--tag TAG Include only the specified tags (comma-separated)") + print(" -r/--range RANGE Include only the specified range (comma-separated)") + print(" --latest-micro Include only the latest x.y.z version") print() print("An output of 'nul' is permitted to drop entries.") print("Providing the same inputs and outputs is permitted, as all inputs are read") @@ -80,12 +84,14 @@ def _sort_key(self, v): def execute(self, versions, context): versions.sort(key=self._sort_key) + print("Processing {} entries".format(len(versions))) class SplitToFile: def __init__(self): self.target = None self.allow_dup = False + self.only_dup = False self.pre = False self.tag_or_range = None self._expect_tag_or_range = False @@ -102,6 +108,10 @@ def add_arg(self, arg): if arg in ("-d", "--allow-dup"): self.allow_dup = True return False + if arg == "--only-dup": + self.allow_dup = True + self.only_dup = True + return False if arg == "--pre": self.pre = True return False @@ -115,6 +125,7 @@ def add_arg(self, arg): def execute(self, versions, context): written = context.setdefault("written", set()) + written_now = set() outputs = context.setdefault("outputs", {}) if self.target != "nul": try: @@ -131,6 +142,9 @@ def execute(self, versions, context): for i in versions: k = i["id"].casefold(), i["sort-version"].casefold() v = Version(i["sort-version"]) + if self.only_dup and k not in written_now: + written_now.add(k) + continue if not self.allow_dup and k in written: continue if not self.pre and v.is_prerelease: @@ -145,8 +159,8 @@ def execute(self, versions, context): if k2 in latest_micro_skip: continue latest_micro_skip.add(k2) - output.append(i) written.add(k) + output.append(i) class WriteFiles: @@ -173,6 +187,9 @@ def execute(self, versions, context): data["next"] = next_target with open(target, "w", encoding="utf-8") as f: json.dump(data, f, indent=self.indent) + print("Wrote {} ({} entries, {} bytes)".format( + target, len(data["versions"]), Path(target).stat().st_size + )) def parse_cli(args): @@ -182,7 +199,16 @@ def parse_cli(args): action = None write = WriteFiles() for a in args: - if a == "-i": + if a == "--windows-default": + plan_split = [SplitToFile(), SplitToFile(), SplitToFile()] + plan_split[0].target = "index-windows.json" + plan_split[1].target = "index-windows-recent.json" + plan_split[2].target = "index-windows-legacy.json" + plan_split[0].pre = plan_split[1].pre = plan_split[2].pre = True + plan_split[0].latest_micro = True + plan_split[0].tag_or_range = tag_or_range(">=3.11.0") + plan_split[1].tag_or_range = tag_or_range(">=3.11.0") + elif a == "-i": action = ReadFile() plan_read.append(action) elif a.startswith("-s-"): @@ -200,6 +226,12 @@ def parse_cli(args): except ValueError: pass usage() + if not plan_read: + action = ReadFile() + action.source = "https://www.python.org/ftp/python/index-windows.json" + plan_read.append(action) + if not plan_split: + usage() return [*plan_read, sort, *plan_split, write] From c1feabeeea637c592b974ae46bb137405d6d451d Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 6 May 2025 21:07:18 +0100 Subject: [PATCH 3/5] Add repartition job --- ci/repartition-index.yml | 87 ++++++++++++++++++++++++++++++++++++++++ ci/upload.py | 13 ++++-- 2 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 ci/repartition-index.yml diff --git a/ci/repartition-index.yml b/ci/repartition-index.yml new file mode 100644 index 0000000..1fe1c4e --- /dev/null +++ b/ci/repartition-index.yml @@ -0,0 +1,87 @@ +# Repartitioning runs on Azure Pipelines, because that's where we have SSH +# access to the download server. + +name: $(Date:yyyyMMdd).$(Rev:r) + +# Do not run automatically +trigger: none + + +parameters: +- name: Publish + displayName: "Publish" + type: boolean + default: false +- name: TestPublish + displayName: "Run all steps without publishing" + type: boolean + default: false + +stages: +- stage: PyManagerIndexPartition + displayName: 'Repartition PyManager Index' + + jobs: + - job: Repartition + + pool: + vmImage: 'windows-latest' + + variables: + - group: PythonOrgPublish + + steps: + - checkout: self + + - task: NugetToolInstaller@0 + displayName: 'Install Nuget' + + - powershell: | + nuget install -o host_python -x -noninteractive -prerelease python + Write-Host "##vso[task.prependpath]$(gi host_python\python\tools)" + displayName: 'Install host Python' + workingDirectory: $(Build.BinariesDirectory) + + - powershell: | + cd (mkdir -Force index) + python "$(Build.SourcesDirectory)\scripts\repartition-index.py" --default-windows + displayName: 'Repartition index' + workingDirectory: $(Build.BinariesDirectory) + + - publish: $(Build.BinariesDirectory)\index + artifact: index + displayName: Publish index artifact + + - ${{ if or(eq(parameters.Publish, 'true'), eq(parameters.TestPublish, 'true')) }}: + - ${{ if ne(parameters.TestPublish, 'true') }}: + - task: DownloadSecureFile@1 + name: sshkey + inputs: + secureFile: pydotorg-ssh.ppk + displayName: 'Download PuTTY key' + + - powershell: | + git clone https://github.com/python/cpython-bin-deps --branch putty --single-branch --depth 1 --progress -v "putty" + "##vso[task.prependpath]$(gi putty)" + workingDirectory: $(Pipeline.Workspace) + displayName: 'Download PuTTY binaries' + + - powershell: | + dir *.json | %{ + pscp -batch -hostkey $env:UPLOAD_HOST_KEY -noagent -i $env:UPLOAD_KEYFILE ` + $_ "${env:UPLOAD_USER}@${env:UPLOAD_HOST}:/srv/www.python.org/ftp/python/$($_.Name)" + plink -batch -hostkey $env:UPLOAD_HOST_KEY + } + displayName: 'Publish packages' + env: + UPLOAD_URL: $(PyDotOrgUrlPrefix)python/pymanager + UPLOAD_DIR: $(DIST_DIR) + UPLOAD_URL_PREFIX: $(PyDotOrgUrlPrefix) + UPLOAD_PATH_PREFIX: $(PyDotOrgUploadPathPrefix) + UPLOAD_HOST: $(PyDotOrgServer) + UPLOAD_HOST_KEY: $(PyDotOrgHostKey) + UPLOAD_USER: $(PyDotOrgUsername) + ${{ if eq(parameters.TestPublish, 'true') }}: + NO_UPLOAD: 1 + ${{ else }}: + UPLOAD_KEYFILE: $(sshkey.secureFilePath) diff --git a/ci/upload.py b/ci/upload.py index 889c11e..fd5a075 100644 --- a/ci/upload.py +++ b/ci/upload.py @@ -9,14 +9,14 @@ UPLOAD_PATH_PREFIX = os.getenv("UPLOAD_PATH_PREFIX", "/srv/www.python.org/ftp/") UPLOAD_URL = os.getenv("UPLOAD_URL") UPLOAD_DIR = os.getenv("UPLOAD_DIR") -# A version will be inserted before the extension later on -MANIFEST_FILE = os.getenv("MANIFEST_FILE") UPLOAD_HOST = os.getenv("UPLOAD_HOST", "") UPLOAD_HOST_KEY = os.getenv("UPLOAD_HOST_KEY", "") UPLOAD_KEYFILE = os.getenv("UPLOAD_KEYFILE", "") UPLOAD_USER = os.getenv("UPLOAD_USER", "") NO_UPLOAD = os.getenv("NO_UPLOAD", "no")[:1].lower() in "yt1" +UPLOADING_INDEX = os.getenv("UPLOADING_INDEX", "no")[:1].lower() in "yt1" + if not UPLOAD_URL: print("##[error]Cannot upload without UPLOAD_URL") @@ -179,10 +179,15 @@ def purge(url): UPLOADS = [] -for pat in ("python-manager-*.msix", "python-manager-*.msi", "pymanager.appinstaller"): - for f in UPLOAD_DIR.glob(pat): +if UPLOADING_INDEX: + for f in UPLOAD_DIR.glob("*.json"): u = UPLOAD_URL + f.name UPLOADS.append((f, u, url2path(u))) +else: + for pat in ("python-manager-*.msix", "python-manager-*.msi", "pymanager.appinstaller"): + for f in UPLOAD_DIR.glob(pat): + u = UPLOAD_URL + f.name + UPLOADS.append((f, u, url2path(u))) print("Planned uploads:") for f, u, p in UPLOADS: From 8891a6eff671a2673021813b93bf0ef36ed23d78 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 6 May 2025 21:15:15 +0100 Subject: [PATCH 4/5] Fix build steps --- ci/repartition-index.yml | 13 +++++-------- ci/upload.py | 1 + 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/ci/repartition-index.yml b/ci/repartition-index.yml index 1fe1c4e..c498c55 100644 --- a/ci/repartition-index.yml +++ b/ci/repartition-index.yml @@ -44,7 +44,7 @@ stages: - powershell: | cd (mkdir -Force index) - python "$(Build.SourcesDirectory)\scripts\repartition-index.py" --default-windows + python "$(Build.SourcesDirectory)\scripts\repartition-index.py" --windows-default displayName: 'Repartition index' workingDirectory: $(Build.BinariesDirectory) @@ -67,20 +67,17 @@ stages: displayName: 'Download PuTTY binaries' - powershell: | - dir *.json | %{ - pscp -batch -hostkey $env:UPLOAD_HOST_KEY -noagent -i $env:UPLOAD_KEYFILE ` - $_ "${env:UPLOAD_USER}@${env:UPLOAD_HOST}:/srv/www.python.org/ftp/python/$($_.Name)" - plink -batch -hostkey $env:UPLOAD_HOST_KEY - } + python ci\upload.py displayName: 'Publish packages' env: - UPLOAD_URL: $(PyDotOrgUrlPrefix)python/pymanager - UPLOAD_DIR: $(DIST_DIR) + UPLOAD_URL: $(PyDotOrgUrlPrefix)python/ + UPLOAD_DIR: $(Build.BinariesDirectory)\index UPLOAD_URL_PREFIX: $(PyDotOrgUrlPrefix) UPLOAD_PATH_PREFIX: $(PyDotOrgUploadPathPrefix) UPLOAD_HOST: $(PyDotOrgServer) UPLOAD_HOST_KEY: $(PyDotOrgHostKey) UPLOAD_USER: $(PyDotOrgUsername) + UPLOADING_INDEX: true ${{ if eq(parameters.TestPublish, 'true') }}: NO_UPLOAD: 1 ${{ else }}: diff --git a/ci/upload.py b/ci/upload.py index fd5a075..2dbb04b 100644 --- a/ci/upload.py +++ b/ci/upload.py @@ -15,6 +15,7 @@ UPLOAD_USER = os.getenv("UPLOAD_USER", "") NO_UPLOAD = os.getenv("NO_UPLOAD", "no")[:1].lower() in "yt1" +# Set to 'true' when updating index.json, rather than the app UPLOADING_INDEX = os.getenv("UPLOADING_INDEX", "no")[:1].lower() in "yt1" From f4758eb2a0afc26a6c5b330487eb7539680a025f Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 6 May 2025 21:19:58 +0100 Subject: [PATCH 5/5] Improved output --- scripts/repartition-index.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/repartition-index.py b/scripts/repartition-index.py index ffd7919..a255810 100644 --- a/scripts/repartition-index.py +++ b/scripts/repartition-index.py @@ -200,6 +200,9 @@ def parse_cli(args): write = WriteFiles() for a in args: if a == "--windows-default": + print("Using equivalent of: --pre --latest-micro -r >=3.11.0 index-windows.json") + print(" --pre -r >=3.11.0 index-windows-recent.json") + print(" index-windows-legacy.json") plan_split = [SplitToFile(), SplitToFile(), SplitToFile()] plan_split[0].target = "index-windows.json" plan_split[1].target = "index-windows-recent.json" @@ -223,14 +226,16 @@ def parse_cli(args): if action.add_arg(a): action = None continue - except ValueError: - pass + except ValueError as ex: + print(ex) usage() if not plan_read: action = ReadFile() action.source = "https://www.python.org/ftp/python/index-windows.json" plan_read.append(action) if not plan_split: + print("No outputs specified") + print(args) usage() return [*plan_read, sort, *plan_split, write]