diff --git a/ci/repartition-index.yml b/ci/repartition-index.yml new file mode 100644 index 0000000..c498c55 --- /dev/null +++ b/ci/repartition-index.yml @@ -0,0 +1,84 @@ +# Repartitioning runs on Azure Pipelines, because that's where we have SSH +# access to the download server. + +name: $(Date:yyyyMMdd).$(Rev:r) + +# Do not run automatically +trigger: none + + +parameters: +- name: Publish + displayName: "Publish" + type: boolean + default: false +- name: TestPublish + displayName: "Run all steps without publishing" + type: boolean + default: false + +stages: +- stage: PyManagerIndexPartition + displayName: 'Repartition PyManager Index' + + jobs: + - job: Repartition + + pool: + vmImage: 'windows-latest' + + variables: + - group: PythonOrgPublish + + steps: + - checkout: self + + - task: NugetToolInstaller@0 + displayName: 'Install Nuget' + + - powershell: | + nuget install -o host_python -x -noninteractive -prerelease python + Write-Host "##vso[task.prependpath]$(gi host_python\python\tools)" + displayName: 'Install host Python' + workingDirectory: $(Build.BinariesDirectory) + + - powershell: | + cd (mkdir -Force index) + python "$(Build.SourcesDirectory)\scripts\repartition-index.py" --windows-default + displayName: 'Repartition index' + workingDirectory: $(Build.BinariesDirectory) + + - publish: $(Build.BinariesDirectory)\index + artifact: index + displayName: Publish index artifact + + - ${{ if or(eq(parameters.Publish, 'true'), eq(parameters.TestPublish, 'true')) }}: + - ${{ if ne(parameters.TestPublish, 'true') }}: + - task: DownloadSecureFile@1 + name: sshkey + inputs: + secureFile: pydotorg-ssh.ppk + displayName: 'Download PuTTY key' + + - powershell: | + git clone https://github.com/python/cpython-bin-deps --branch putty --single-branch --depth 1 --progress -v "putty" + "##vso[task.prependpath]$(gi putty)" + workingDirectory: $(Pipeline.Workspace) + displayName: 'Download PuTTY binaries' + + - powershell: | + python ci\upload.py + displayName: 'Publish packages' + env: + UPLOAD_URL: $(PyDotOrgUrlPrefix)python/ + UPLOAD_DIR: $(Build.BinariesDirectory)\index + UPLOAD_URL_PREFIX: $(PyDotOrgUrlPrefix) + UPLOAD_PATH_PREFIX: $(PyDotOrgUploadPathPrefix) + UPLOAD_HOST: $(PyDotOrgServer) + UPLOAD_HOST_KEY: $(PyDotOrgHostKey) + UPLOAD_USER: $(PyDotOrgUsername) + UPLOADING_INDEX: true + ${{ if eq(parameters.TestPublish, 'true') }}: + NO_UPLOAD: 1 + ${{ else }}: + UPLOAD_KEYFILE: $(sshkey.secureFilePath) diff --git a/ci/upload.py b/ci/upload.py index 889c11e..2dbb04b 100644 --- a/ci/upload.py +++ b/ci/upload.py @@ -9,14 +9,15 @@ UPLOAD_PATH_PREFIX = os.getenv("UPLOAD_PATH_PREFIX", "/srv/www.python.org/ftp/") UPLOAD_URL = os.getenv("UPLOAD_URL") UPLOAD_DIR = os.getenv("UPLOAD_DIR") -# A version will be inserted before the extension later on -MANIFEST_FILE = os.getenv("MANIFEST_FILE") UPLOAD_HOST = os.getenv("UPLOAD_HOST", "") UPLOAD_HOST_KEY = os.getenv("UPLOAD_HOST_KEY", "") UPLOAD_KEYFILE = os.getenv("UPLOAD_KEYFILE", "") UPLOAD_USER = os.getenv("UPLOAD_USER", "") NO_UPLOAD = os.getenv("NO_UPLOAD", "no")[:1].lower() in "yt1" +# Set to 'true' when updating index.json, rather than the app +UPLOADING_INDEX = os.getenv("UPLOADING_INDEX", "no")[:1].lower() in "yt1" + if not UPLOAD_URL: print("##[error]Cannot upload without UPLOAD_URL") @@ -179,10 +180,15 @@ def purge(url): UPLOADS = [] -for pat in ("python-manager-*.msix", "python-manager-*.msi", "pymanager.appinstaller"): - for f in UPLOAD_DIR.glob(pat): +if UPLOADING_INDEX: + for f in UPLOAD_DIR.glob("*.json"): u = UPLOAD_URL + f.name UPLOADS.append((f, u, url2path(u))) +else: + for pat in ("python-manager-*.msix", "python-manager-*.msi", "pymanager.appinstaller"): + for f in UPLOAD_DIR.glob(pat): + u = UPLOAD_URL + f.name + UPLOADS.append((f, u, url2path(u))) print("Planned uploads:") for f, u, p in UPLOADS: diff --git a/scripts/repartition-index.py b/scripts/repartition-index.py new file mode 100644 index 0000000..a255810 --- /dev/null +++ b/scripts/repartition-index.py @@ -0,0 +1,249 @@ +import json +import re +import sys + +from collections import OrderedDict +from pathlib import Path +from urllib.request import Request, urlopen + +REPO = Path(__file__).absolute().parent.parent +sys.path.append(str(REPO / "src")) + +from manage.urlutils import IndexDownloader +from manage.tagutils import CompanyTag, tag_or_range +from manage.verutils import Version + + +def usage(): + print("Usage: repartition-index.py [-i options ...] [options ...]") + print() + print(" --windows-default Implies default output files and configurations.") + print() + print(" -i One or more files or URLs to read existing entries from.") + print(" -i -n/--no-recurse Do not follow 'next' info") + print("If no files are provided, uses the current online index") + print() + print(" Filename to write entries into") + print(" -d/--allow-dup Include entries written in previous outputs") + print(" --only-dup Only include entries written in previous outputs") + print(" --pre Include entries marked as prereleases") + print(" -t/--tag TAG Include only the specified tags (comma-separated)") + print(" -r/--range RANGE Include only the specified range (comma-separated)") + print(" --latest-micro Include only the latest x.y.z version") + print() + print("An output of 'nul' is permitted to drop entries.") + print("Providing the same inputs and outputs is permitted, as all inputs are read") + print("before any outputs are written.") + sys.exit(1) + + +class ReadFile: + def __init__(self): + self.source = None + self.recurse = True + + def add_arg(self, arg): + if arg[:1] != "-": + self.source = arg + return True + if arg in ("-n", "--no-recurse"): + self.recurse = False + return False + raise ValueError("Unknown argument: " + arg) + + def execute(self, versions, context): + for _, data in IndexDownloader(self.source, lambda *a: a): + versions.extend(data["versions"]) + if not self.recurse: + break + + +class SortVersions: + def __init__(self): + pass + + def add_arg(self, arg): + raise ValueError("Unknown argument: " + arg) + + def _number_sortkey(self, k): + bits = [] + for n in re.split(r"(\d+)", k): + try: + bits.append(f"{int(n):020}") + except ValueError: + bits.append(n) + return tuple(bits) + + def _sort_key(self, v): + from manage.tagutils import _CompanyKey, _DescendingVersion + return ( + _DescendingVersion(v["sort-version"]), + _CompanyKey(v["company"]), + self._number_sortkey(v["id"]), + ) + + def execute(self, versions, context): + versions.sort(key=self._sort_key) + print("Processing {} entries".format(len(versions))) + + +class SplitToFile: + def __init__(self): + self.target = None + self.allow_dup = False + self.only_dup = False + self.pre = False + self.tag_or_range = None + self._expect_tag_or_range = False + self.latest_micro = False + + def add_arg(self, arg): + if arg[:1] != "-": + if self._expect_tag_or_range: + self.tag_or_range = tag_or_range(arg) + self._expect_tag_or_range = False + return False + self.target = arg + return True + if arg in ("-d", "--allow-dup"): + self.allow_dup = True + return False + if arg == "--only-dup": + self.allow_dup = True + self.only_dup = True + return False + if arg == "--pre": + self.pre = True + return False + if arg in ("-t", "--tag", "-r", "--range"): + self._expect_tag_or_range = True + return False + if arg == "--latest-micro": + self.latest_micro = True + return False + raise ValueError("Unknown argument: " + arg) + + def execute(self, versions, context): + written = context.setdefault("written", set()) + written_now = set() + outputs = context.setdefault("outputs", {}) + if self.target != "nul": + try: + output = outputs[self.target] + except KeyError: + context.setdefault("output_order", []).append(self.target) + output = outputs.setdefault(self.target, []) + else: + # Write to a list that'll be forgotten + output = [] + + latest_micro_skip = set() + + for i in versions: + k = i["id"].casefold(), i["sort-version"].casefold() + v = Version(i["sort-version"]) + if self.only_dup and k not in written_now: + written_now.add(k) + continue + if not self.allow_dup and k in written: + continue + if not self.pre and v.is_prerelease: + continue + if self.tag_or_range and not any( + self.tag_or_range.satisfied_by(CompanyTag(i["company"], t)) + for t in i["install-for"] + ): + continue + if self.latest_micro: + k2 = i["id"].casefold(), v.to_python_style(2, with_dev=False) + if k2 in latest_micro_skip: + continue + latest_micro_skip.add(k2) + written.add(k) + output.append(i) + + +class WriteFiles: + def __init__(self): + self.indent = None + + def add_arg(self, arg): + if arg == "-w-indent": + self.indent = 4 + return False + if arg == "-w-indent1": + self.indent = 1 + return False + raise ValueError("Unknown argument: " + arg) + + def execute(self, versions, context): + outputs = context.get("outputs") or {} + output_order = context.get("output_order", []) + for target, next_target in zip(output_order, [*output_order[1:], None]): + data = { + "versions": outputs[target] + } + if next_target: + data["next"] = next_target + with open(target, "w", encoding="utf-8") as f: + json.dump(data, f, indent=self.indent) + print("Wrote {} ({} entries, {} bytes)".format( + target, len(data["versions"]), Path(target).stat().st_size + )) + + +def parse_cli(args): + plan_read = [] + plan_split = [] + sort = SortVersions() + action = None + write = WriteFiles() + for a in args: + if a == "--windows-default": + print("Using equivalent of: --pre --latest-micro -r >=3.11.0 index-windows.json") + print(" --pre -r >=3.11.0 index-windows-recent.json") + print(" index-windows-legacy.json") + plan_split = [SplitToFile(), SplitToFile(), SplitToFile()] + plan_split[0].target = "index-windows.json" + plan_split[1].target = "index-windows-recent.json" + plan_split[2].target = "index-windows-legacy.json" + plan_split[0].pre = plan_split[1].pre = plan_split[2].pre = True + plan_split[0].latest_micro = True + plan_split[0].tag_or_range = tag_or_range(">=3.11.0") + plan_split[1].tag_or_range = tag_or_range(">=3.11.0") + elif a == "-i": + action = ReadFile() + plan_read.append(action) + elif a.startswith("-s-"): + sort.add_arg(a) + elif a.startswith("-w-"): + write.add_arg(a) + else: + try: + if action is None: + action = SplitToFile() + plan_split.append(action) + if action.add_arg(a): + action = None + continue + except ValueError as ex: + print(ex) + usage() + if not plan_read: + action = ReadFile() + action.source = "https://www.python.org/ftp/python/index-windows.json" + plan_read.append(action) + if not plan_split: + print("No outputs specified") + print(args) + usage() + return [*plan_read, sort, *plan_split, write] + + +if __name__ == "__main__": + plan = parse_cli(sys.argv[1:]) + VERSIONS = [] + CONTEXT = {} + for p in plan: + p.execute(VERSIONS, CONTEXT) + diff --git a/src/manage/tagutils.py b/src/manage/tagutils.py index b31fc39..cf00fd2 100644 --- a/src/manage/tagutils.py +++ b/src/manage/tagutils.py @@ -26,6 +26,9 @@ def startswith(self, other): return self._company.startswith(other._company) return self._company == other._company + def __hash__(self): + return hash(self._company) + def __eq__(self, other): return self._company == other._company @@ -64,6 +67,9 @@ def startswith(self, other): return not self.s return self.s.startswith(other.s) + def __hash__(self): + return hash(self.s) + def __eq__(self, other): if not isinstance(other, type(self)): return False diff --git a/src/manage/urlutils.py b/src/manage/urlutils.py index 4e00638..81f40e3 100644 --- a/src/manage/urlutils.py +++ b/src/manage/urlutils.py @@ -677,10 +677,11 @@ def __next__(self): LOGGER.error("An unexpected error occurred while downloading the index: %s", ex) raise - index = self.index_cls(self._url, json.loads(data)) + j = json.loads(data) + index = self.index_cls(self._url, j) - if index.next_url: - self._url = urljoin(url, index.next_url, to_parent=True) + if j.get("next"): + self._url = urljoin(url, j["next"], to_parent=True) else: self._url = None return index diff --git a/src/manage/verutils.py b/src/manage/verutils.py index e8412c4..2a4cbda 100644 --- a/src/manage/verutils.py +++ b/src/manage/verutils.py @@ -53,6 +53,9 @@ def __str__(self): def __repr__(self): return self.s + def __hash__(self): + return hash(self.sortkey) + def _are_equal(self, other, prefix_match=None, other_prefix_match=None, prerelease_match=None): if other is None: return False