From 54a2f069ce43d31f125ee158aee020c2079d4f2e Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 21 May 2021 13:21:20 +0300 Subject: [PATCH 01/14] exceptions: Make more errors RepositoryErrors Try to make errors derive from RepositoryError if they can be the result of malicious or malfunctioning remote repository: The idea here is that client can handle just RepositoryError instead of individual errors (as it cannot do anything about any of them). Also improve variable naming. This is backwards compatible. Signed-off-by: Jussi Kukkonen --- tuf/exceptions.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/tuf/exceptions.py b/tuf/exceptions.py index 04eed8bc5e..2a24a0429e 100755 --- a/tuf/exceptions.py +++ b/tuf/exceptions.py @@ -70,7 +70,10 @@ class UnsupportedAlgorithmError(Error): class LengthOrHashMismatchError(Error): """Indicate an error while checking the length and hash values of an object""" -class BadHashError(Error): +class RepositoryError(Error): + """Indicate an error with a repository's state, such as a missing file.""" + +class BadHashError(RepositoryError): """Indicate an error while checking the value of a hash object.""" def __init__(self, expected_hash: str, observed_hash: str): @@ -92,9 +95,6 @@ def __repr__(self) -> str: # self.__class__.__name__ + '(' + repr(self.expected_hash) + ', ' + # repr(self.observed_hash) + ')') -class BadVersionNumberError(Error): - """Indicate an error for metadata that contains an invalid version number.""" - class BadPasswordError(Error): """Indicate an error after encountering an invalid password.""" @@ -104,8 +104,8 @@ class UnknownKeyError(Error): """Indicate an error while verifying key-like objects (e.g., keyids).""" -class RepositoryError(Error): - """Indicate an error with a repository's state, such as a missing file.""" +class BadVersionNumberError(RepositoryError): + """Indicate an error for metadata that contains an invalid version number.""" class MissingLocalRepositoryError(RepositoryError): @@ -120,35 +120,29 @@ class ForbiddenTargetError(RepositoryError): """Indicate that a role signed for a target that it was not delegated to.""" -class ExpiredMetadataError(Error): +class ExpiredMetadataError(RepositoryError): """Indicate that a TUF Metadata file has expired.""" class ReplayedMetadataError(RepositoryError): """Indicate that some metadata has been replayed to the client.""" - def __init__(self, metadata_role: str, previous_version: int, current_version: int): + def __init__(self, metadata_role: str, downloaded_version: int, current_version: int): super(ReplayedMetadataError, self).__init__() self.metadata_role = metadata_role - self.previous_version = previous_version + self.downloaded_version = downloaded_version self.current_version = current_version def __str__(self) -> str: return ( 'Downloaded ' + repr(self.metadata_role) + ' is older (' + - repr(self.previous_version) + ') than the version currently ' + repr(self.downloaded_version) + ') than the version currently ' 'installed (' + repr(self.current_version) + ').') def __repr__(self) -> str: return self.__class__.__name__ + ' : ' + str(self) - # # Directly instance-reproducing: - # return ( - # self.__class__.__name__ + '(' + repr(self.metadata_role) + ', ' + - # repr(self.previous_version) + ', ' + repr(self.current_version) + ')') - - class CryptoError(Error): """Indicate any cryptography-related errors.""" @@ -250,7 +244,7 @@ class InvalidNameError(Error): """Indicate an error while trying to validate any type of named object.""" -class UnsignedMetadataError(Error): +class UnsignedMetadataError(RepositoryError): """Indicate metadata object with insufficient threshold of signatures.""" # signable is not used but kept in method signature for backwards compat From 6da206516baa56552b22ab33d98f0005bd273c7e Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 21 May 2021 13:25:42 +0300 Subject: [PATCH 02/14] ngclient: Add new modules Start building the next-gen client: Copy existing components from the current client. All of these files have some changes compared to the already existing copies (because ngclient uses the same linting rules as Metadata API). * download.py is likely to see major changes in the future. * requests_fetcher is likely to see some minor changes (like allowing compression) Signed-off-by: Jussi Kukkonen --- tuf/ngclient/_internal/download.py | 243 +++++++++++++++++++++ tuf/ngclient/_internal/requests_fetcher.py | 187 ++++++++++++++++ tuf/ngclient/fetcher.py | 41 ++++ 3 files changed, 471 insertions(+) create mode 100644 tuf/ngclient/_internal/download.py create mode 100644 tuf/ngclient/_internal/requests_fetcher.py create mode 100644 tuf/ngclient/fetcher.py diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py new file mode 100644 index 0000000000..31b59f6630 --- /dev/null +++ b/tuf/ngclient/_internal/download.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + download.py + + + February 21, 2012. Based on previous version by Geremy Condra. + + + Konstantin Andrianov + Vladimir Diaz + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Download metadata and target files and check their validity. The hash and + length of a downloaded file has to match the hash and length supplied by the + metadata of that file. +""" + +import logging +import tempfile +import timeit +from urllib import parse + +from securesystemslib import formats as sslib_formats + +import tuf +from tuf import exceptions, formats + +# See 'log.py' to learn how logging is handled in TUF. +logger = logging.getLogger(__name__) + + +def download_file(url, required_length, fetcher, strict_required_length=True): + """ + + Given the url and length of the desired file, this function opens a + connection to 'url' and downloads the file while ensuring its length + matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, + the file's length is not checked and a slow retrieval exception is raised + if the downloaded rate falls below the acceptable rate). + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. + + strict_required_length: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + A file object is created on disk to store the contents of 'url'. + + + exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + sslib_formats.URL_SCHEMA.check_match(url) + formats.LENGTH_SCHEMA.check_match(required_length) + + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based + # systems, because they might use back-slashes in place of forward-slashes. + # This converts it to the common format. unquote() replaces %xx escapes in + # a url with their single-character equivalent. A back-slash may be + # encoded as %5c in the url, which should also be replaced with a forward + # slash. + url = parse.unquote(url).replace("\\", "/") + logger.info("Downloading: %s", url) + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. + temp_file = tempfile.TemporaryFile() # pylint: disable=consider-using-with + + average_download_speed = 0 + number_of_bytes_received = 0 + + try: + chunks = fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: + + stop_time = timeit.default_timer() + temp_file.write(chunk) + + # Measure the average download speed. + number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time + average_download_speed = ( + number_of_bytes_received / seconds_spent_receiving + ) + + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + logger.debug( + "The average download speed dropped below the minimum" + " average download speed set in tuf.settings.py." + " Stopping the download!" + ) + break + + logger.debug( + "The average download speed has not dipped below the" + " minimum average download speed set in tuf.settings.py." + ) + + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length( + number_of_bytes_received, + required_length, + strict_required_length=strict_required_length, + average_download_speed=average_download_speed, + ) + + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug("Could not download URL: %s", url) + raise + + else: + temp_file.seek(0) + return temp_file + + +def download_bytes(url, required_length, fetcher, strict_required_length=True): + """Download bytes from given url + + Returns the downloaded bytes, otherwise like download_file() + """ + with download_file( + url, required_length, fetcher, strict_required_length + ) as dl_file: + return dl_file.read() + + +def _check_downloaded_length( + total_downloaded, + required_length, + strict_required_length=True, + average_download_speed=None, +): + """ + + A helper function which checks whether the total number of downloaded + bytes matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in + question. + + required_length: + The total number of bytes expected of the file as seen from its metadata + The Timestamp role is always downloaded without a known file length, and + the Root role when the client cannot download any of the required + top-level roles. In both cases, 'required_length' is actually an upper + limit on the length of the downloaded file. + + strict_required_length: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + average_download_speed: + The average download speed for the downloaded file. + + + None. + + + securesystemslib.exceptions.DownloadLengthMismatchError, if + strict_required_length is True and total_downloaded is not equal + required_length. + + exceptions.SlowRetrievalError, if the total downloaded was + done in less than the acceptable download speed (as set in + tuf.settings.py). + + + None. + """ + + if total_downloaded == required_length: + logger.info("Downloaded %d bytes as expected.", total_downloaded) + + else: + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if strict_required_length: + logger.info( + "Downloaded %d bytes, but expected %d bytes", + total_downloaded, + required_length, + ) + + # If the average download speed is below a certain threshold, we + # flag this as a possible slow-retrieval attack. + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise exceptions.SlowRetrievalError(average_download_speed) + + raise exceptions.DownloadLengthMismatchError( + required_length, total_downloaded + ) + + # We specifically disabled strict checking of required length, but + # we will log a warning anyway. This is useful when we wish to + # download the Timestamp or Root metadata, for which we have no + # signed metadata; so, we must guess a reasonable required_length + # for it. + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise exceptions.SlowRetrievalError(average_download_speed) + + logger.debug( + "Good average download speed: %f bytes per second", + average_download_speed, + ) + + logger.info( + "Downloaded %d bytes out of upper limit of %d bytes.", + total_downloaded, + required_length, + ) diff --git a/tuf/ngclient/_internal/requests_fetcher.py b/tuf/ngclient/_internal/requests_fetcher.py new file mode 100644 index 0000000000..6913b27edd --- /dev/null +++ b/tuf/ngclient/_internal/requests_fetcher.py @@ -0,0 +1,187 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an implementation of FetcherInterface using the Requests HTTP + library. +""" + +import logging +import time +from urllib import parse + +# Imports +import requests +import urllib3.exceptions + +import tuf +from tuf import exceptions, settings +from tuf.ngclient.fetcher import FetcherInterface + +# Globals +logger = logging.getLogger(__name__) + +# Classess +class RequestsFetcher(FetcherInterface): + """A concrete implementation of FetcherInterface based on the Requests + library. + + Attributes: + _sessions: A dictionary of Requests.Session objects storing a separate + session per scheme+hostname combination. + """ + + def __init__(self): + # http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if + # you're making several requests to the same host, the underlying TCP + # connection will be reused, which can result in a significant + # performance increase (see HTTP persistent connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname + # combination, in order to reuse connections to the same hostname to + # improve efficiency, but avoiding sharing state between different + # hosts-scheme combinations to minimize subtle security issues. + # Some cookies may not be HTTP-safe. + self._sessions = {} + + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in + bytes. + + Raises: + exceptions.SlowRetrievalError: A timeout occurs while receiving + data. + exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by + # requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + response = session.get( + url, stream=True, timeout=settings.SOCKET_TIMEOUT + ) + # Check response status. + try: + response.raise_for_status() + except requests.HTTPError as e: + response.close() + status = e.response.status_code + raise exceptions.FetcherHTTPError(str(e), status) + + # Define a generator function to be returned by fetch. This way the + # caller of fetch can differentiate between connection and actual data + # download and measure download times accordingly. + def chunks(): + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is + # so that we can defend against slow retrieval attacks. + # Furthermore, we do not wish to download an extremely + # large file in one shot. Before beginning the round, sleep + # (if set) for a short amount of time so that the CPU is not + # hogged in the while loop. + if settings.SLEEP_BEFORE_ROUND: + time.sleep(settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + settings.CHUNK_SIZE, + required_length - bytes_received, + ) + + # NOTE: This may not handle some servers adding a + # Content-Encoding header, which may cause urllib3 to + # misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes + # downloaded. + if not data: + logger.debug( + "Downloaded %d out of %d bytes", + bytes_received, + required_length, + ) + + # Finally, we signal that the download is complete. + break + + yield data + + if bytes_received >= required_length: + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise exceptions.SlowRetrievalError(str(e)) + + finally: + response.close() + + return chunks() + + def _get_session(self, url): + """Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise exceptions.URLParsingError( + "Could not get scheme and hostname from URL: " + url + ) + + session_index = parsed_url.scheme + "+" + parsed_url.hostname + + logger.debug("url: %s", url) + logger.debug("session index: %s", session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers["User-Agent"] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = ( + "tuf/" + tuf.__version__ + " " + requests_user_agent + ) + session.headers.update( + { + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + "Accept-Encoding": "identity", + # The TUF user agent. + "User-Agent": tuf_user_agent, + } + ) + + logger.debug("Made new session %s", session_index) + + else: + logger.debug("Reusing session %s", session_index) + + return session diff --git a/tuf/ngclient/fetcher.py b/tuf/ngclient/fetcher.py new file mode 100644 index 0000000000..8a6cae34d7 --- /dev/null +++ b/tuf/ngclient/fetcher.py @@ -0,0 +1,41 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an interface for network IO abstraction. +""" + +# Imports +import abc + + +# Classes +class FetcherInterface: + """Defines an interface for abstract network download. + + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. + """ + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in + bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving + data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + raise NotImplementedError # pragma: no cover From 775c5c6a8140eceff17cbebfb2be6d45a07a93f4 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 21 May 2021 13:30:45 +0300 Subject: [PATCH 03/14] tox: Enable testing and linting ngclient * Use the same rules as tuf/api * omit ngclient from coverage limits for now: #1309 Signed-off-by: Jussi Kukkonen --- tox.ini | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tox.ini b/tox.ini index 94d0d0683f..f460f69c5a 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ changedir = tests commands = python --version python -m coverage run aggregate_tests.py - python -m coverage report -m --fail-under 97 + python -m coverage report -m --fail-under 97 --omit "{toxinidir}/tuf/ngclient/*" deps = -r{toxinidir}/requirements-test.txt @@ -43,13 +43,13 @@ changedir = {toxinidir} commands = # Use different configs for new (tuf/api/*) and legacy code # TODO: configure black and isort args in pyproject.toml (see #1161) - black --check --diff --line-length 80 tuf/api - isort --check --diff --line-length 80 --profile black -p tuf tuf/api - pylint -j 0 tuf/api --rcfile=tuf/api/pylintrc + black --check --diff --line-length 80 tuf/api tuf/ngclient + isort --check --diff --line-length 80 --profile black -p tuf tuf/api tuf/ngclient + pylint -j 0 tuf/api tuf/ngclient --rcfile=tuf/api/pylintrc # NOTE: Contrary to what the pylint docs suggest, ignoring full paths does # work, unfortunately each subdirectory has to be ignored explicitly. - pylint -j 0 tuf --ignore=tuf/api,tuf/api/serialization + pylint -j 0 tuf --ignore=tuf/api,tuf/api/serialization,tuf/ngclient,tuf/ngclient/_internal mypy From 765c6fe0200d90531a3dccbd854f9a3ab01f7bf5 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 21 May 2021 13:39:22 +0300 Subject: [PATCH 04/14] ngclient: Add new client code This is a new client library implementation using the Metadata API (the only "old" file used is exceptions.py) * Functional but largely still untested * Requires work before it can replace tuf.client but should be good enough to include in the repo The major changes compared to current client so far are: * Use of Metadata API * Major simplification in the way downloads are handled due to removing mirrors support * Separating tracking of valid metadata into a separate component * There's no MultiRepoUpdater We do not expect other major changes (in the sense of moving large amounts of code) but do plan to possibly improve the client API. The API has already changed so this is not going to be a 1:1 compatible implementation, but porting should not be difficult. Signed-off-by: Jussi Kukkonen --- tuf/ngclient/README.md | 22 + tuf/ngclient/__init__.py | 8 + tuf/ngclient/_internal/__init__.py | 0 tuf/ngclient/_internal/metadata_bundle.py | 458 +++++++++++++++++ tuf/ngclient/updater.py | 575 ++++++++++++++++++++++ 5 files changed, 1063 insertions(+) create mode 100644 tuf/ngclient/README.md create mode 100644 tuf/ngclient/__init__.py create mode 100644 tuf/ngclient/_internal/__init__.py create mode 100644 tuf/ngclient/_internal/metadata_bundle.py create mode 100644 tuf/ngclient/updater.py diff --git a/tuf/ngclient/README.md b/tuf/ngclient/README.md new file mode 100644 index 0000000000..ad1de19b78 --- /dev/null +++ b/tuf/ngclient/README.md @@ -0,0 +1,22 @@ +## Next-gen TUF client for Python + +This package provides modules for TUF client implementers. + +**tuf.ngclient.Updater** is a class that implements the client workflow +described in the TUF specification (see +https://theupdateframework.github.io/specification/latest/#detailed-client-workflow) + +**tuf.ngclient.FetcherInterface** is an abstract class that client +implementers can optionally use to integrate with their own +network/download infrastructure -- a Requests-based implementation is +used by default. + +This package: +* Aims to be a clean, easy-to-validate reference client implementation + written in modern Python +* At the same time aims to be the library choice for anyone + implementing a TUF client in Python: light-weight, easy to integrate + and with minimal required dependencies +* Is still under development but planned to become the default client + in this code base (as in the older tuf.client will be deprecated in + the future) diff --git a/tuf/ngclient/__init__.py b/tuf/ngclient/__init__.py new file mode 100644 index 0000000000..0a572962ba --- /dev/null +++ b/tuf/ngclient/__init__.py @@ -0,0 +1,8 @@ +# Copyright New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""TUF client public API +""" + +from tuf.ngclient.fetcher import FetcherInterface +from tuf.ngclient.updater import Updater diff --git a/tuf/ngclient/_internal/__init__.py b/tuf/ngclient/_internal/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tuf/ngclient/_internal/metadata_bundle.py b/tuf/ngclient/_internal/metadata_bundle.py new file mode 100644 index 0000000000..202a2598e5 --- /dev/null +++ b/tuf/ngclient/_internal/metadata_bundle.py @@ -0,0 +1,458 @@ +# Copyright the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""TUF client bundle-of-metadata + +MetadataBundle keeps track of current valid set of metadata for the client, +and handles almost every step of the "Detailed client workflow" ( +https://theupdateframework.github.io/specification/latest#detailed-client-workflow) +in the TUF specification: the remaining steps are related to filesystem and +network IO which is not handled here. + +Loaded metadata can be accessed via the index access with rolename as key +(bundle["root"]) or, in the case of top-level metadata using the helper +properties (bundle.root). + +The rules for top-level metadata are + * Metadata is loadable only if metadata it depends on is loaded + * Metadata is immutable if any metadata depending on it has been loaded + * Metadata must be loaded/updated in order: + root -> timestamp -> snapshot -> targets -> (other delegated targets) + + +Exceptions are raised if metadata fails to load in any way. + +Example of loading root, timestamp and snapshot: + +>>> # Load local root (RepositoryErrors here stop the update) +>>> with open(root_path, "rb") as f: +>>> bundle = MetadataBundle(f.read()) +>>> +>>> # update root from remote until no more are available +>>> with download("root", bundle.root.signed.version + 1) as f: +>>> bundle.update_root(f.read()) +>>> # ... +>>> bundle.root_update_finished() +>>> +>>> # load local timestamp, then update from remote +>>> try: +>>> with open(timestamp_path, "rb") as f: +>>> bundle.update_timestamp(f.read()) +>>> except (RepositoryError, OSError): +>>> pass # failure to load a local file is ok +>>> +>>> with download("timestamp") as f: +>>> bundle.update_timestamp(f.read()) +>>> +>>> # load local snapshot, then update from remote if needed +>>> try: +>>> with open(snapshot_path, "rb") as f: +>>> bundle.update_snapshot(f.read()) +>>> except (RepositoryError, OSError): +>>> # local snapshot is not valid, load from remote +>>> # (RepositoryErrors here stop the update) +>>> with download("snapshot", version) as f: +>>> bundle.update_snapshot(f.read()) + +TODO: + * exceptions are not final: the idea is that client could just handle + a generic RepositoryError that covers every issue that server provided + metadata could inflict (other errors would be user errors), but this is not + yet the case + * usefulness of root_update_finished() can be debated: it could be done + in the beginning of load_timestamp()... + * some metadata interactions might work better in Metadata itself + * Progress through Specification update process should be documented + (not sure yet how: maybe a spec_logger that logs specification events?) +""" + +import logging +from collections import abc +from datetime import datetime +from typing import Dict, Iterator, Optional + +from securesystemslib import hash as sslib_hash + +from tuf import exceptions +from tuf.api.metadata import Metadata, Root, Targets +from tuf.api.serialization import DeserializationError + +logger = logging.getLogger(__name__) + +# This is a placeholder until ... +# TODO issue 1306: implement this in Metadata API +def verify_with_threshold( + delegator: Metadata, role_name: str, unverified: Metadata +) -> bool: + """Verify 'unverified' with keys and threshold defined in delegator""" + role = None + keys = {} + if isinstance(delegator.signed, Root): + keys = delegator.signed.keys + role = delegator.signed.roles.get(role_name) + elif isinstance(delegator.signed, Targets): + if delegator.signed.delegations: + keys = delegator.signed.delegations.keys + # role names are unique: first match is enough + roles = delegator.signed.delegations.roles + role = next((r for r in roles if r.name == role_name), None) + else: + raise ValueError("Call is valid only on delegator metadata") + + if role is None: + raise ValueError(f"Delegated role {role_name} not found") + + # verify that delegate is signed by correct threshold of unique keys + unique_keys = set() + for keyid in role.keyids: + key = keys[keyid] + try: + key.verify_signature(unverified) + unique_keys.add(key.keyval["public"]) + except Exception as e: # pylint: disable=broad-except + # TODO specify the Exceptions (see issue #1351) + logger.info("verify failed: %s", e) + + return len(unique_keys) >= role.threshold + + +class MetadataBundle(abc.Mapping): + """Internal class to keep track of valid metadata in Updater + + MetadataBundle ensures that the collection of metadata in the bundle is + valid. It provides easy ways to update the metadata with the caller making + decisions on what is updated. + """ + + def __init__(self, root_data: bytes): + """Initialize bundle by loading trusted root metadata + + Args: + root_data: Trusted root metadata as bytes. Note that this metadata + will only be verified by itself: it is the source of trust for + all metadata in the bundle. + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + self._bundle = {} # type: Dict[str: Metadata] + self.reference_time = datetime.utcnow() + self._root_update_finished = False + + # Load and validate the local root metadata. Valid initial trusted root + # metadata is required + logger.debug("Updating initial trusted root") + self.update_root(root_data) + + def __getitem__(self, role: str) -> Metadata: + """Returns current Metadata for 'role'""" + return self._bundle[role] + + def __len__(self) -> int: + """Returns number of Metadata objects in bundle""" + return len(self._bundle) + + def __iter__(self) -> Iterator[Metadata]: + """Returns iterator over all Metadata objects in bundle""" + return iter(self._bundle) + + # Helper properties for top level metadata + @property + def root(self) -> Optional[Metadata]: + """Current root Metadata or None""" + return self._bundle.get("root") + + @property + def timestamp(self) -> Optional[Metadata]: + """Current timestamp Metadata or None""" + return self._bundle.get("timestamp") + + @property + def snapshot(self) -> Optional[Metadata]: + """Current snapshot Metadata or None""" + return self._bundle.get("snapshot") + + @property + def targets(self) -> Optional[Metadata]: + """Current targets Metadata or None""" + return self._bundle.get("targets") + + # Methods for updating metadata + def update_root(self, data: bytes): + """Verifies and loads 'data' as new root metadata. + + Note that an expired intermediate root is considered valid: expiry is + only checked for the final root in root_update_finished(). + + Args: + data: unverified new root metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + if self._root_update_finished: + raise RuntimeError( + "Cannot update root after root update is finished" + ) + logger.debug("Updating root") + + try: + new_root = Metadata.from_bytes(data) + except DeserializationError as e: + raise exceptions.RepositoryError("Failed to load root") from e + + if new_root.signed.type != "root": + raise exceptions.RepositoryError( + f"Expected 'root', got '{new_root.signed.type}'" + ) + + if self.root is not None: + # We are not loading initial trusted root: verify the new one + if not verify_with_threshold(self.root, "root", new_root): + raise exceptions.UnsignedMetadataError( + "New root is not signed by root", new_root.signed + ) + + if new_root.signed.version != self.root.signed.version + 1: + raise exceptions.ReplayedMetadataError( + "root", new_root.signed.version, self.root.signed.version + ) + + if not verify_with_threshold(new_root, "root", new_root): + raise exceptions.UnsignedMetadataError( + "New root is not signed by itself", new_root.signed + ) + + self._bundle["root"] = new_root + logger.debug("Updated root") + + def root_update_finished(self): + """Marks root metadata as final and verifies it is not expired + + Raises: + ExpiredMetadataError: The final root metadata is expired. + """ + if self._root_update_finished: + raise RuntimeError("Root update is already finished") + + if self.root.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError("New root.json is expired") + + self._root_update_finished = True + logger.debug("Verified final root.json") + + def update_timestamp(self, data: bytes): + """Verifies and loads 'data' as new timestamp metadata. + + Args: + data: unverified new timestamp metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + if not self._root_update_finished: + # root_update_finished() not called + raise RuntimeError("Cannot update timestamp before root") + if self.snapshot is not None: + raise RuntimeError("Cannot update timestamp after snapshot") + + try: + new_timestamp = Metadata.from_bytes(data) + except DeserializationError as e: + raise exceptions.RepositoryError("Failed to load timestamp") from e + + if new_timestamp.signed.type != "timestamp": + raise exceptions.RepositoryError( + f"Expected 'timestamp', got '{new_timestamp.signed.type}'" + ) + + if not verify_with_threshold(self.root, "timestamp", new_timestamp): + raise exceptions.UnsignedMetadataError( + "New timestamp is not signed by root", new_timestamp.signed + ) + + if self.timestamp is not None: + # Prevent rolling back timestamp version + if new_timestamp.signed.version < self.timestamp.signed.version: + raise exceptions.ReplayedMetadataError( + "timestamp", + new_timestamp.signed.version, + self.timestamp.signed.version, + ) + # Prevent rolling back snapshot version + if ( + new_timestamp.signed.meta["snapshot.json"].version + < self.timestamp.signed.meta["snapshot.json"].version + ): + # TODO not sure about the correct exception here + raise exceptions.ReplayedMetadataError( + "snapshot", + new_timestamp.signed.meta["snapshot.json"].version, + self.timestamp.signed.meta["snapshot.json"].version, + ) + + if new_timestamp.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError("New timestamp is expired") + + self._bundle["timestamp"] = new_timestamp + logger.debug("Updated timestamp") + + # TODO: remove pylint disable once the hash verification is in metadata.py + def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches + """Verifies and loads 'data' as new snapshot metadata. + + Args: + data: unverified new snapshot metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + + if self.timestamp is None: + raise RuntimeError("Cannot update snapshot before timestamp") + if self.targets is not None: + raise RuntimeError("Cannot update snapshot after targets") + logger.debug("Updating snapshot") + + meta = self.timestamp.signed.meta["snapshot.json"] + + # Verify against the hashes in timestamp, if any + hashes = meta.hashes or {} + for algo, stored_hash in hashes.items(): + digest_object = sslib_hash.digest(algo) + digest_object.update(data) + observed_hash = digest_object.hexdigest() + if observed_hash != stored_hash: + # TODO: Error should derive from RepositoryError + raise exceptions.BadHashError(stored_hash, observed_hash) + + try: + new_snapshot = Metadata.from_bytes(data) + except DeserializationError as e: + raise exceptions.RepositoryError("Failed to load snapshot") from e + + if new_snapshot.signed.type != "snapshot": + raise exceptions.RepositoryError( + f"Expected 'snapshot', got '{new_snapshot.signed.type}'" + ) + + if not verify_with_threshold(self.root, "snapshot", new_snapshot): + raise exceptions.UnsignedMetadataError( + "New snapshot is not signed by root", new_snapshot.signed + ) + + if ( + new_snapshot.signed.version + != self.timestamp.signed.meta["snapshot.json"].version + ): + raise exceptions.BadVersionNumberError( + f"Expected snapshot version" + f"{self.timestamp.signed.meta['snapshot.json'].version}," + f"got {new_snapshot.signed.version}" + ) + + if self.snapshot: + for filename, fileinfo in self.snapshot.signed.meta.items(): + new_fileinfo = new_snapshot.signed.meta.get(filename) + + # Prevent removal of any metadata in meta + if new_fileinfo is None: + raise exceptions.RepositoryError( + f"New snapshot is missing info for '{filename}'" + ) + + # Prevent rollback of any metadata versions + if new_fileinfo.version < fileinfo.version: + raise exceptions.BadVersionNumberError( + f"Expected {filename} version" + f"{new_fileinfo.version}, got {fileinfo.version}" + ) + + if new_snapshot.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError("New snapshot is expired") + + self._bundle["snapshot"] = new_snapshot + logger.debug("Updated snapshot") + + def update_targets(self, data: bytes): + """Verifies and loads 'data' as new top-level targets metadata. + + Args: + data: unverified new targets metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + self.update_delegated_targets(data, "targets", "root") + + def update_delegated_targets( + self, data: bytes, role_name: str, delegator_name: str + ): + """Verifies and loads 'data' as new metadata for target 'role_name'. + + Args: + data: unverified new metadata as bytes + role_name: The role name of the new metadata + delegator_name: The name of the role delegating the new metadata + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + if self.snapshot is None: + raise RuntimeError("Cannot load targets before snapshot") + + delegator = self.get(delegator_name) + if delegator is None: + raise RuntimeError("Cannot load targets before delegator") + + logger.debug("Updating %s delegated by %s", role_name, delegator_name) + + # Verify against the hashes in snapshot, if any + meta = self.snapshot.signed.meta.get(f"{role_name}.json") + if meta is None: + raise exceptions.RepositoryError( + f"Snapshot does not contain information for '{role_name}'" + ) + + hashes = meta.hashes or {} + for algo, stored_hash in hashes.items(): + digest_object = sslib_hash.digest(algo) + digest_object.update(data) + observed_hash = digest_object.hexdigest() + if observed_hash != stored_hash: + # TODO: Error should derive from RepositoryError + raise exceptions.BadHashError(stored_hash, observed_hash) + + try: + new_delegate = Metadata.from_bytes(data) + except DeserializationError as e: + raise exceptions.RepositoryError("Failed to load snapshot") from e + + if new_delegate.signed.type != "targets": + raise exceptions.RepositoryError( + f"Expected 'targets', got '{new_delegate.signed.type}'" + ) + + if not verify_with_threshold(delegator, role_name, new_delegate): + raise exceptions.UnsignedMetadataError( + f"New {role_name} is not signed by {delegator_name}", + new_delegate, + ) + + if new_delegate.signed.version != meta.version: + raise exceptions.BadVersionNumberError( + f"Expected {role_name} version" + f"{meta.version}, got {new_delegate.signed.version}" + ) + + if new_delegate.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError(f"New {role_name} is expired") + + self._bundle[role_name] = new_delegate + logger.debug("Updated %s delegated by %s", role_name, delegator_name) diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py new file mode 100644 index 0000000000..27ac74617c --- /dev/null +++ b/tuf/ngclient/updater.py @@ -0,0 +1,575 @@ +# Copyright 2020, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""TUF client workflow implementation. +""" + +import fnmatch +import logging +import os +from typing import Dict, Optional +from urllib import parse + +from securesystemslib import exceptions as sslib_exceptions +from securesystemslib import hash as sslib_hash +from securesystemslib import util as sslib_util + +from tuf import exceptions +from tuf.ngclient._internal import download, metadata_bundle, requests_fetcher +from tuf.ngclient.fetcher import FetcherInterface + +# Globals +MAX_ROOT_ROTATIONS = 32 +MAX_DELEGATIONS = 32 +DEFAULT_ROOT_MAX_LENGTH = 512000 # bytes +DEFAULT_TIMESTAMP_MAX_LENGTH = 16384 # bytes +DEFAULT_SNAPSHOT_MAX_LENGTH = 2000000 # bytes +DEFAULT_TARGETS_MAX_LENGTH = 5000000 # bytes + +logger = logging.getLogger(__name__) + +# Classes +class Updater: + """ + An implemetation of the TUF client workflow. + Provides a public API for integration in client applications. + """ + + def __init__( + self, + repository_dir: str, + metadata_base_url: str, + target_base_url: Optional[str] = None, + fetcher: Optional[FetcherInterface] = None, + ): + """ + Args: + repository_dir: Local metadata directory. Directory must be + writable and it must contain at least a root.json file. + metadata_base_url: Base URL for all remote metadata downloads + target_base_url: Optional; Default base URL for all remote target + downloads. Can be individually set in download_target() + fetcher: Optional; FetcherInterface implementation used to download + both metadata and targets. Default is RequestsFetcher + + Raises: + OSError: Local root.json cannot be read + RepositoryError: Local root.json is invalid + """ + self._dir = repository_dir + self._metadata_base_url = _ensure_trailing_slash(metadata_base_url) + if target_base_url is None: + self._target_base_url = None + else: + self._target_base_url = _ensure_trailing_slash(target_base_url) + + # Read trusted local root metadata + data = self._load_local_metadata("root") + self._bundle = metadata_bundle.MetadataBundle(data) + + if fetcher is None: + self._fetcher = requests_fetcher.RequestsFetcher() + else: + self._fetcher = fetcher + + def refresh(self) -> None: + """ + This method downloads, verifies, and loads metadata for the top-level + roles in a specific order (root -> timestamp -> snapshot -> targets) + The expiration time for downloaded metadata is also verified. + + The metadata for delegated roles are not refreshed by this method, but + by the method that returns targetinfo (i.e., + get_one_valid_targetinfo()). + + The refresh() method should be called by the client before any target + requests. + + Raises: + OSError: New metadata could not be written to disk + RepositoryError: Metadata failed to verify in some way + TODO: download-related errors + """ + + self._load_root() + self._load_timestamp() + self._load_snapshot() + self._load_targets("targets", "root") + + def get_one_valid_targetinfo(self, target_path: str) -> Dict: + """ + Returns the target information for a target identified by target_path. + + As a side-effect this method downloads all the metadata it needs to + return the target information. + + Args: + target_path: A target identifier that is a path-relative-URL string + (https://url.spec.whatwg.org/#path-relative-url-string). + Typically this is also the unix file path of the eventually + downloaded file. + + Raises: + OSError: New metadata could not be written to disk + RepositoryError: Metadata failed to verify in some way + TODO: download-related errors + """ + return self._preorder_depth_first_walk(target_path) + + @staticmethod + def updated_targets(targets: Dict, destination_directory: str) -> Dict: + """ + After the client has retrieved the target information for those targets + they are interested in updating, they would call this method to + determine which targets have changed from those saved locally on disk. + All the targets that have changed are returns in a list. From this + list, they can request a download by calling 'download_target()'. + """ + # Keep track of the target objects and filepaths of updated targets. + # Return 'updated_targets' and use 'updated_targetpaths' to avoid + # duplicates. + updated_targets = [] + updated_targetpaths = [] + + for target in targets: + # Prepend 'destination_directory' to the target's relative filepath + # (as stored in metadata.) Verify the hash of 'target_filepath' + # against each hash listed for its fileinfo. Note: join() discards + # 'destination_directory' if 'filepath' contains a leading path + # separator (i.e., is treated as an absolute path). + filepath = target["filepath"] + target_filepath = os.path.join(destination_directory, filepath) + + if target_filepath in updated_targetpaths: + continue + + # Try one of the algorithm/digest combos for a mismatch. We break + # as soon as we find a mismatch. + for algorithm, digest in target["fileinfo"].hashes.items(): + digest_object = None + try: + digest_object = sslib_hash.digest_filename( + target_filepath, algorithm=algorithm + ) + + # This exception will occur if the target does not exist + # locally. + except sslib_exceptions.StorageError: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + # The file does exist locally, check if its hash differs. + if digest_object.hexdigest() != digest: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + return updated_targets + + def download_target( + self, + targetinfo: Dict, + destination_directory: str, + target_base_url: Optional[str] = None, + ): + """ + Download target specified by 'targetinfo' into 'destination_directory'. + + Args: + targetinfo: data received from get_one_valid_targetinfo() + destination_directory: existing local directory to download into. + Note that new directories may be created inside + destination_directory as required. + target_base_url: Optional; Base URL used to form the final target + download URL. Default is the value provided in Updater() + + Raises: + TODO: download-related errors + TODO: file write errors + """ + if target_base_url is None and self._target_base_url is None: + raise ValueError( + "target_base_url must be set in either download_target() or " + "constructor" + ) + if target_base_url is None: + target_base_url = self._target_base_url + else: + target_base_url = _ensure_trailing_slash(target_base_url) + + full_url = parse.urljoin(target_base_url, targetinfo["filepath"]) + + with download.download_file( + full_url, targetinfo["fileinfo"].length, self._fetcher + ) as target_file: + _check_file_length(target_file, targetinfo["fileinfo"].length) + _check_hashes_obj(target_file, targetinfo["fileinfo"].hashes) + + filepath = os.path.join( + destination_directory, targetinfo["filepath"] + ) + sslib_util.persist_temp_file(target_file, filepath) + + def _download_metadata( + self, rolename: str, length: int, version: Optional[int] = None + ) -> bytes: + """Download a metadata file and return it as bytes""" + if version is None: + filename = f"{rolename}.json" + else: + filename = f"{version}.{rolename}.json" + url = parse.urljoin(self._metadata_base_url, filename) + return download.download_bytes( + url, + length, + self._fetcher, + strict_required_length=False, + ) + + def _load_local_metadata(self, rolename: str) -> bytes: + with open(os.path.join(self._dir, f"{rolename}.json"), "rb") as f: + return f.read() + + def _persist_metadata(self, rolename: str, data: bytes): + with open(os.path.join(self._dir, f"{rolename}.json"), "wb") as f: + f.write(data) + + def _load_root(self) -> None: + """Load remote root metadata. + + Sequentially load and persist on local disk every newer root metadata + version available on the remote. + """ + + # Update the root role + lower_bound = self._bundle.root.signed.version + 1 + upper_bound = lower_bound + MAX_ROOT_ROTATIONS + + for next_version in range(lower_bound, upper_bound): + try: + data = self._download_metadata( + "root", DEFAULT_ROOT_MAX_LENGTH, next_version + ) + self._bundle.update_root(data) + self._persist_metadata("root", data) + + except exceptions.FetcherHTTPError as exception: + if exception.status_code not in {403, 404}: + raise + # 404/403 means current root is newest available + break + + # Verify final root + self._bundle.root_update_finished() + + def _load_timestamp(self) -> None: + """Load local and remote timestamp metadata""" + try: + data = self._load_local_metadata("timestamp") + self._bundle.update_timestamp(data) + except (OSError, exceptions.RepositoryError) as e: + # Local timestamp does not exist or is invalid + logger.debug("Failed to load local timestamp %s", e) + + # Load from remote (whether local load succeeded or not) + data = self._download_metadata( + "timestamp", DEFAULT_TIMESTAMP_MAX_LENGTH + ) + self._bundle.update_timestamp(data) + self._persist_metadata("timestamp", data) + + def _load_snapshot(self) -> None: + """Load local (and if needed remote) snapshot metadata""" + try: + data = self._load_local_metadata("snapshot") + self._bundle.update_snapshot(data) + logger.debug("Local snapshot is valid: not downloading new one") + except (OSError, exceptions.RepositoryError) as e: + # Local snapshot does not exist or is invalid: update from remote + logger.debug("Failed to load local snapshot %s", e) + + metainfo = self._bundle.timestamp.signed.meta["snapshot.json"] + length = metainfo.length or DEFAULT_SNAPSHOT_MAX_LENGTH + version = None + if self._bundle.root.signed.consistent_snapshot: + version = metainfo.version + + data = self._download_metadata("snapshot", length, version) + self._bundle.update_snapshot(data) + self._persist_metadata("snapshot", data) + + def _load_targets(self, role: str, parent_role: str) -> None: + """Load local (and if needed remote) metadata for 'role'.""" + try: + data = self._load_local_metadata(role) + self._bundle.update_delegated_targets(data, role, parent_role) + logger.debug("Local %s is valid: not downloading new one", role) + except (OSError, exceptions.RepositoryError) as e: + # Local 'role' does not exist or is invalid: update from remote + logger.debug("Failed to load local %s: %s", role, e) + + metainfo = self._bundle.snapshot.signed.meta[f"{role}.json"] + length = metainfo.length or DEFAULT_TARGETS_MAX_LENGTH + version = None + if self._bundle.root.signed.consistent_snapshot: + version = metainfo.version + + data = self._download_metadata(role, length, version) + self._bundle.update_delegated_targets(data, role, parent_role) + self._persist_metadata(role, data) + + def _preorder_depth_first_walk(self, target_filepath) -> Dict: + """ + Interrogates the tree of target delegations in order of appearance + (which implicitly order trustworthiness), and returns the matching + target found in the most trusted role. + """ + + target = None + role_names = [("targets", "root")] + visited_role_names = set() + number_of_delegations = MAX_DELEGATIONS + + # Preorder depth-first traversal of the graph of target delegations. + while ( + target is None and number_of_delegations > 0 and len(role_names) > 0 + ): + + # Pop the role name from the top of the stack. + role_name, parent_role = role_names.pop(-1) + self._load_targets(role_name, parent_role) + # Skip any visited current role to prevent cycles. + if (role_name, parent_role) in visited_role_names: + msg = f"Skipping visited current role {role_name}" + logger.debug(msg) + continue + + # The metadata for 'role_name' must be downloaded/updated before + # its targets, delegations, and child roles can be inspected. + + role_metadata = self._bundle[role_name].signed + target = role_metadata.targets.get(target_filepath) + + # After preorder check, add current role to set of visited roles. + visited_role_names.add((role_name, parent_role)) + + # And also decrement number of visited roles. + number_of_delegations -= 1 + child_roles = [] + if role_metadata.delegations is not None: + child_roles = role_metadata.delegations.roles + + if target is None: + + child_roles_to_visit = [] + # NOTE: This may be a slow operation if there are many + # delegated roles. + for child_role in child_roles: + child_role_name = _visit_child_role( + child_role, target_filepath + ) + + if child_role.terminating and child_role_name is not None: + msg = ( + f"Adding child role {child_role_name}.\n", + "Not backtracking to other roles.", + ) + logger.debug(msg) + role_names = [] + child_roles_to_visit.append( + (child_role_name, role_name) + ) + break + + if child_role_name is None: + msg = f"Skipping child role {child_role_name}" + logger.debug(msg) + + else: + msg = f"Adding child role {child_role_name}" + logger.debug(msg) + child_roles_to_visit.append( + (child_role_name, role_name) + ) + + # Push 'child_roles_to_visit' in reverse order of appearance + # onto 'role_names'. Roles are popped from the end of + # the 'role_names' list. + child_roles_to_visit.reverse() + role_names.extend(child_roles_to_visit) + + else: + msg = f"Found target in current role {role_name}" + logger.debug(msg) + + if ( + target is None + and number_of_delegations == 0 + and len(role_names) > 0 + ): + msg = ( + f"{len(role_names)} roles left to visit, but allowed to ", + f"visit at most {MAX_DELEGATIONS} delegations.", + ) + logger.debug(msg) + + return {"filepath": target_filepath, "fileinfo": target} + + +def _visit_child_role(child_role: Dict, target_filepath: str) -> str: + """ + + Non-public method that determines whether the given 'target_filepath' + is an allowed path of 'child_role'. + + Ensure that we explore only delegated roles trusted with the target. The + metadata for 'child_role' should have been refreshed prior to this point, + however, the paths/targets that 'child_role' signs for have not been + verified (as intended). The paths/targets that 'child_role' is allowed + to specify in its metadata depends on the delegating role, and thus is + left to the caller to verify. We verify here that 'target_filepath' + is an allowed path according to the delegated 'child_role'. + + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? + + + child_role: + The delegation targets role object of 'child_role', containing its + paths, path_hash_prefixes, keys, and so on. + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + None. + + + None. + + + If 'child_role' has been delegated the target with the name + 'target_filepath', then we return the role name of 'child_role'. + + Otherwise, we return None. + """ + + child_role_name = child_role.name + child_role_paths = child_role.paths + child_role_path_hash_prefixes = child_role.path_hash_prefixes + + if child_role_path_hash_prefixes is not None: + target_filepath_hash = _get_filepath_hash(target_filepath) + for child_role_path_hash_prefix in child_role_path_hash_prefixes: + if not target_filepath_hash.startswith(child_role_path_hash_prefix): + continue + + return child_role_name + + elif child_role_paths is not None: + # Is 'child_role_name' allowed to sign for 'target_filepath'? + for child_role_path in child_role_paths: + # A child role path may be an explicit path or glob pattern (Unix + # shell-style wildcards). The child role 'child_role_name' is + # returned if 'target_filepath' is equal to or matches + # 'child_role_path'. Explicit filepaths are also considered + # matches. A repo maintainer might delegate a glob pattern with a + # leading path separator, while the client requests a matching + # target without a leading path separator - make sure to strip any + # leading path separators so that a match is made. + # Example: "foo.tgz" should match with "/*.tgz". + if fnmatch.fnmatch( + target_filepath.lstrip(os.sep), child_role_path.lstrip(os.sep) + ): + logger.debug( + "Child role " + + repr(child_role_name) + + " is allowed to sign for " + + repr(target_filepath) + ) + + return child_role_name + + logger.debug( + "The given target path " + + repr(target_filepath) + + " does not match the trusted path or glob pattern: " + + repr(child_role_path) + ) + continue + + else: + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefixes' fields should not be missing, + # so we raise a format error here in case they are both missing. + raise exceptions.FormatError( + repr(child_role_name) + " " + 'has neither a "paths" nor "path_hash_prefixes". At least' + " one of these attributes must be present." + ) + + return None + + +def _check_file_length(file_object, trusted_file_length): + """ + Given a file_object, checks whether its length matches + trusted_file_length. + + Raises: + DownloadLengthMismatchError: File length does not match + expected length. + """ + file_object.seek(0, 2) + observed_length = file_object.tell() + file_object.seek(0) + + if observed_length != trusted_file_length: + raise exceptions.DownloadLengthMismatchError( + trusted_file_length, observed_length + ) + + +def _check_hashes_obj(file_object, trusted_hashes): + """ + Given a file_object, checks whether its hash matches + trusted_hashes. + + Raises: + BadHashError: Hashes do not match + """ + for algorithm, trusted_hash in trusted_hashes.items(): + digest_object = sslib_hash.digest_fileobject(file_object, algorithm) + + computed_hash = digest_object.hexdigest() + + # Raise an exception if any of the hashes are incorrect. + if trusted_hash != computed_hash: + raise exceptions.BadHashError(trusted_hash, computed_hash) + + logger.info( + "The file's " + algorithm + " hash is" " correct: " + trusted_hash + ) + + +def _get_filepath_hash(target_filepath, hash_function="sha256"): + """ + Calculate the hash of the filepath to determine which bin to find the + target. + """ + # The client currently assumes the repository (i.e., repository + # tool) uses 'hash_function' to generate hashes and UTF-8. + digest_object = sslib_hash.digest(hash_function) + encoded_target_filepath = target_filepath.encode("utf-8") + digest_object.update(encoded_target_filepath) + target_filepath_hash = digest_object.hexdigest() + + return target_filepath_hash + + +def _ensure_trailing_slash(url: str): + """Return url guaranteed to end in a slash""" + return url if url.endswith("/") else f"{url}/" From fac151da3d7894b6d4cfee1475f7511308522bc5 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 21 May 2021 13:51:49 +0300 Subject: [PATCH 05/14] ngclient: Add initial testing This testing lacks coverage but demonstrates the happy cases. Signed-off-by: Jussi Kukkonen --- tests/test_metadata_bundle.py | 124 ++++++++++++++++++++++++ tests/test_updater_ng.py | 171 ++++++++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+) create mode 100644 tests/test_metadata_bundle.py create mode 100644 tests/test_updater_ng.py diff --git a/tests/test_metadata_bundle.py b/tests/test_metadata_bundle.py new file mode 100644 index 0000000000..a988b8d3f5 --- /dev/null +++ b/tests/test_metadata_bundle.py @@ -0,0 +1,124 @@ +import json +import logging +import os +import shutil +import sys +import tempfile +import unittest + +from tuf import exceptions +from tuf.api.metadata import Metadata +from tuf.ngclient._internal.metadata_bundle import MetadataBundle + +from tests import utils + +logger = logging.getLogger(__name__) + +class TestMetadataBundle(unittest.TestCase): + + def test_update(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + + with open(os.path.join(repo_dir, "root.json"), "rb") as f: + bundle = MetadataBundle(f.read()) + bundle.root_update_finished() + + with open(os.path.join(repo_dir, "timestamp.json"), "rb") as f: + bundle.update_timestamp(f.read()) + with open(os.path.join(repo_dir, "snapshot.json"), "rb") as f: + bundle.update_snapshot(f.read()) + with open(os.path.join(repo_dir, "targets.json"), "rb") as f: + bundle.update_targets(f.read()) + with open(os.path.join(repo_dir, "role1.json"), "rb") as f: + bundle.update_delegated_targets(f.read(), "role1", "targets") + with open(os.path.join(repo_dir, "role2.json"), "rb") as f: + bundle.update_delegated_targets(f.read(), "role2", "role1") + + def test_out_of_order_ops(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + data={} + for md in ["root", "timestamp", "snapshot", "targets", "role1"]: + with open(os.path.join(repo_dir, f"{md}.json"), "rb") as f: + data[md] = f.read() + + bundle = MetadataBundle(data["root"]) + + # Update timestamp before root is finished + with self.assertRaises(RuntimeError): + bundle.update_timestamp(data["timestamp"]) + + bundle.root_update_finished() + with self.assertRaises(RuntimeError): + bundle.root_update_finished() + + # Update snapshot before timestamp + with self.assertRaises(RuntimeError): + bundle.update_snapshot(data["snapshot"]) + + bundle.update_timestamp(data["timestamp"]) + + # Update targets before snapshot + with self.assertRaises(RuntimeError): + bundle.update_targets(data["targets"]) + + bundle.update_snapshot(data["snapshot"]) + + #update timestamp after snapshot + with self.assertRaises(RuntimeError): + bundle.update_timestamp(data["timestamp"]) + + # Update delegated targets before targets + with self.assertRaises(RuntimeError): + bundle.update_delegated_targets(data["role1"], "role1", "targets") + + bundle.update_targets(data["targets"]) + bundle.update_delegated_targets(data["role1"], "role1", "targets") + + def test_update_with_invalid_json(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + data={} + for md in ["root", "timestamp", "snapshot", "targets", "role1"]: + with open(os.path.join(repo_dir, f"{md}.json"), "rb") as f: + data[md] = f.read() + + # root.json not a json file at all + with self.assertRaises(exceptions.RepositoryError): + MetadataBundle(b"") + # root.json is invalid + root = Metadata.from_bytes(data["root"]) + root.signed.version += 1 + with self.assertRaises(exceptions.RepositoryError): + MetadataBundle(json.dumps(root.to_dict()).encode()) + + bundle = MetadataBundle(data["root"]) + bundle.root_update_finished() + + top_level_md = [ + (data["timestamp"], bundle.update_timestamp), + (data["snapshot"], bundle.update_snapshot), + (data["targets"], bundle.update_targets), + ] + for metadata, update_func in top_level_md: + # metadata is not json + with self.assertRaises(exceptions.RepositoryError): + update_func(b"") + # metadata is invalid + md = Metadata.from_bytes(metadata) + md.signed.version += 1 + with self.assertRaises(exceptions.RepositoryError): + update_func(json.dumps(md.to_dict()).encode()) + + # metadata is of wrong type + with self.assertRaises(exceptions.RepositoryError): + update_func(data["root"]) + + update_func(metadata) + + + # TODO test updating over initial metadata (new keys, newer timestamp, etc) + # TODO test the actual specification checks + + +if __name__ == '__main__': + utils.configure_test_logging(sys.argv) + unittest.main() diff --git a/tests/test_updater_ng.py b/tests/test_updater_ng.py new file mode 100644 index 0000000000..44dae1d006 --- /dev/null +++ b/tests/test_updater_ng.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Test Updater class +""" + +import os +import time +import shutil +import copy +import tempfile +import logging +import errno +import sys +import unittest +import json +import tracemalloc + +if sys.version_info >= (3, 3): + import unittest.mock as mock +else: + import mock + +import tuf +import tuf.exceptions +import tuf.log +import tuf.repository_tool as repo_tool +import tuf.unittest_toolbox as unittest_toolbox + +from tests import utils +from tuf.api import metadata +from tuf import ngclient + +import securesystemslib + +logger = logging.getLogger(__name__) + + +class TestUpdater(unittest_toolbox.Modified_TestCase): + + @classmethod + def setUpClass(cls): + # Create a temporary directory to store the repository, metadata, and target + # files. 'temporary_directory' must be deleted in TearDownModule() so that + # temporary files are always removed, even when exceptions occur. + cls.temporary_directory = tempfile.mkdtemp(dir=os.getcwd()) + + # Needed because in some tests simple_server.py cannot be found. + # The reason is that the current working directory + # has been changed when executing a subprocess. + cls.SIMPLE_SERVER_PATH = os.path.join(os.getcwd(), 'simple_server.py') + + # Launch a SimpleHTTPServer (serves files in the current directory). + # Test cases will request metadata and target files that have been + # pre-generated in 'tuf/tests/repository_data', which will be served + # by the SimpleHTTPServer launched here. The test cases of 'test_updater.py' + # assume the pre-generated metadata files have a specific structure, such + # as a delegated role 'targets/role1', three target files, five key files, + # etc. + cls.server_process_handler = utils.TestServerProcess(log=logger, + server=cls.SIMPLE_SERVER_PATH) + + + + @classmethod + def tearDownClass(cls): + # Cleans the resources and flush the logged lines (if any). + cls.server_process_handler.clean() + + # Remove the temporary repository directory, which should contain all the + # metadata, targets, and key files generated for the test cases + shutil.rmtree(cls.temporary_directory) + + + + def setUp(self): + # We are inheriting from custom class. + unittest_toolbox.Modified_TestCase.setUp(self) + + # Copy the original repository files provided in the test folder so that + # any modifications made to repository files are restricted to the copies. + # The 'repository_data' directory is expected to exist in 'tuf.tests/'. + original_repository_files = os.path.join(os.getcwd(), 'repository_data') + temporary_repository_root = \ + self.make_temp_directory(directory=self.temporary_directory) + + # The original repository, keystore, and client directories will be copied + # for each test case. + original_repository = os.path.join(original_repository_files, 'repository') + original_keystore = os.path.join(original_repository_files, 'keystore') + original_client = os.path.join(original_repository_files, 'client', 'test_repository1', 'metadata', 'current') + + # Save references to the often-needed client repository directories. + # Test cases need these references to access metadata and target files. + self.repository_directory = \ + os.path.join(temporary_repository_root, 'repository') + self.keystore_directory = \ + os.path.join(temporary_repository_root, 'keystore') + + self.client_directory = os.path.join(temporary_repository_root, 'client') + + # Copy the original 'repository', 'client', and 'keystore' directories + # to the temporary repository the test cases can use. + shutil.copytree(original_repository, self.repository_directory) + shutil.copytree(original_client, self.client_directory) + shutil.copytree(original_keystore, self.keystore_directory) + + # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. + repository_basepath = self.repository_directory[len(os.getcwd()):] + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + + str(self.server_process_handler.port) + repository_basepath + + metadata_url = f"{url_prefix}/metadata/" + targets_url = f"{url_prefix}/targets/" + # Creating a repository instance. The test cases will use this client + # updater to refresh metadata, fetch target files, etc. + self.repository_updater = ngclient.Updater(self.client_directory, + metadata_url, + targets_url) + + def tearDown(self): + # We are inheriting from custom class. + unittest_toolbox.Modified_TestCase.tearDown(self) + + # Logs stdout and stderr from the sever subprocess. + self.server_process_handler.flush_log() + + def test_refresh(self): + # All metadata is in local directory already + self.repository_updater.refresh() + + # Get targetinfo for 'file1.txt' listed in targets + targetinfo1 = self.repository_updater.get_one_valid_targetinfo('file1.txt') + # Get targetinfo for 'file3.txt' listed in the delegated role1 + targetinfo3= self.repository_updater.get_one_valid_targetinfo('file3.txt') + + destination_directory = self.make_temp_directory() + updated_targets = self.repository_updater.updated_targets([targetinfo1, targetinfo3], + destination_directory) + + self.assertListEqual(updated_targets, [targetinfo1, targetinfo3]) + + self.repository_updater.download_target(targetinfo1, destination_directory) + updated_targets = self.repository_updater.updated_targets(updated_targets, + destination_directory) + + self.assertListEqual(updated_targets, [targetinfo3]) + + + self.repository_updater.download_target(targetinfo3, destination_directory) + updated_targets = self.repository_updater.updated_targets(updated_targets, + destination_directory) + + self.assertListEqual(updated_targets, []) + + def test_refresh_with_only_local_root(self): + os.remove(os.path.join(self.client_directory, "timestamp.json")) + os.remove(os.path.join(self.client_directory, "snapshot.json")) + os.remove(os.path.join(self.client_directory, "targets.json")) + os.remove(os.path.join(self.client_directory, "role1.json")) + + self.repository_updater.refresh() + + # Get targetinfo for 'file3.txt' listed in the delegated role1 + targetinfo3= self.repository_updater.get_one_valid_targetinfo('file3.txt') + +if __name__ == '__main__': + utils.configure_test_logging(sys.argv) + unittest.main() From a3ebc7c3bb74f5842ac6bf34f6539b966e3090dc Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 16 Jun 2021 16:17:57 +0300 Subject: [PATCH 06/14] Improve README Improve the README text. Signed-off-by: Teodora Sechkova --- tuf/ngclient/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tuf/ngclient/README.md b/tuf/ngclient/README.md index ad1de19b78..ad1b396c0d 100644 --- a/tuf/ngclient/README.md +++ b/tuf/ngclient/README.md @@ -7,16 +7,16 @@ described in the TUF specification (see https://theupdateframework.github.io/specification/latest/#detailed-client-workflow) **tuf.ngclient.FetcherInterface** is an abstract class that client -implementers can optionally use to integrate with their own -network/download infrastructure -- a Requests-based implementation is +implementers can implement a concrete class of in order to reuse their +own networking/download libraries -- a Requests-based implementation is used by default. This package: * Aims to be a clean, easy-to-validate reference client implementation written in modern Python -* At the same time aims to be the library choice for anyone +* At the same time aims to be the library of choice for anyone implementing a TUF client in Python: light-weight, easy to integrate and with minimal required dependencies -* Is still under development but planned to become the default client - in this code base (as in the older tuf.client will be deprecated in - the future) +* Is still under development but is planned to become the default client + in this implementation (i.e., the older `tuf.client` will be deprecated + in the future) From 78a0f4d3b5b18bb2e2f8258ffb3e8f7cbac8704a Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 16 Jun 2021 16:18:48 +0300 Subject: [PATCH 07/14] Remove unsued imports in tests Remove unused and outdated imports in test_updater_ng.py Signed-off-by: Teodora Sechkova --- tests/test_updater_ng.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/test_updater_ng.py b/tests/test_updater_ng.py index 44dae1d006..eec10d73cb 100644 --- a/tests/test_updater_ng.py +++ b/tests/test_updater_ng.py @@ -7,34 +7,16 @@ """ import os -import time import shutil -import copy import tempfile import logging -import errno import sys import unittest -import json -import tracemalloc - -if sys.version_info >= (3, 3): - import unittest.mock as mock -else: - import mock - -import tuf -import tuf.exceptions -import tuf.log -import tuf.repository_tool as repo_tool import tuf.unittest_toolbox as unittest_toolbox from tests import utils -from tuf.api import metadata from tuf import ngclient -import securesystemslib - logger = logging.getLogger(__name__) From 10b28f5c75ef90aea35fd960e4189f16613efea1 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 16 Jun 2021 16:22:25 +0300 Subject: [PATCH 08/14] Improve comments Remove outdated comments. Add explanations to non-obvious cases. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/metadata_bundle.py | 7 ++++--- tuf/ngclient/updater.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tuf/ngclient/_internal/metadata_bundle.py b/tuf/ngclient/_internal/metadata_bundle.py index 202a2598e5..ed54b54a47 100644 --- a/tuf/ngclient/_internal/metadata_bundle.py +++ b/tuf/ngclient/_internal/metadata_bundle.py @@ -254,7 +254,6 @@ def update_timestamp(self, data: bytes): error type and content will contain more details. """ if not self._root_update_finished: - # root_update_finished() not called raise RuntimeError("Cannot update timestamp before root") if self.snapshot is not None: raise RuntimeError("Cannot update timestamp after snapshot") @@ -274,6 +273,8 @@ def update_timestamp(self, data: bytes): "New timestamp is not signed by root", new_timestamp.signed ) + # If an existing trusted timestamp is updated, + # check for a rollback attack if self.timestamp is not None: # Prevent rolling back timestamp version if new_timestamp.signed.version < self.timestamp.signed.version: @@ -287,7 +288,6 @@ def update_timestamp(self, data: bytes): new_timestamp.signed.meta["snapshot.json"].version < self.timestamp.signed.meta["snapshot.json"].version ): - # TODO not sure about the correct exception here raise exceptions.ReplayedMetadataError( "snapshot", new_timestamp.signed.meta["snapshot.json"].version, @@ -327,7 +327,6 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches digest_object.update(data) observed_hash = digest_object.hexdigest() if observed_hash != stored_hash: - # TODO: Error should derive from RepositoryError raise exceptions.BadHashError(stored_hash, observed_hash) try: @@ -355,6 +354,8 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches f"got {new_snapshot.signed.version}" ) + # If an existing trusted snapshot is updated, + # check for a rollback attack if self.snapshot: for filename, fileinfo in self.snapshot.signed.meta.items(): new_fileinfo = new_snapshot.signed.meta.get(filename) diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 27ac74617c..8b2b8a2a30 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -122,7 +122,7 @@ def updated_targets(targets: Dict, destination_directory: str) -> Dict: After the client has retrieved the target information for those targets they are interested in updating, they would call this method to determine which targets have changed from those saved locally on disk. - All the targets that have changed are returns in a list. From this + All the targets that have changed are returned in a list. From this list, they can request a download by calling 'download_target()'. """ # Keep track of the target objects and filepaths of updated targets. From 0f69f287103062a196c54b25ef51b14e0ecd5557 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 16 Jun 2021 16:23:44 +0300 Subject: [PATCH 09/14] Check snapshot value for None For consistency with the rest if the checks. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/metadata_bundle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tuf/ngclient/_internal/metadata_bundle.py b/tuf/ngclient/_internal/metadata_bundle.py index ed54b54a47..8eaf739098 100644 --- a/tuf/ngclient/_internal/metadata_bundle.py +++ b/tuf/ngclient/_internal/metadata_bundle.py @@ -356,7 +356,7 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches # If an existing trusted snapshot is updated, # check for a rollback attack - if self.snapshot: + if self.snapshot is not None: for filename, fileinfo in self.snapshot.signed.meta.items(): new_fileinfo = new_snapshot.signed.meta.get(filename) From a31f2ecb4a548b92ea64ee1ef0581f4b7d403ee5 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 16 Jun 2021 16:25:08 +0300 Subject: [PATCH 10/14] Document fast-forward attack recovery Document why deleting the timestamp and snapshot files is not needed to recover from a fast-forward attack. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/metadata_bundle.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tuf/ngclient/_internal/metadata_bundle.py b/tuf/ngclient/_internal/metadata_bundle.py index 8eaf739098..ec7322ad05 100644 --- a/tuf/ngclient/_internal/metadata_bundle.py +++ b/tuf/ngclient/_internal/metadata_bundle.py @@ -240,6 +240,11 @@ def root_update_finished(self): if self.root.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError("New root.json is expired") + # No need to recover from fast-forward attack here since + # timestamp and snapshot are not loaded at this point and + # when loaded later will be verified with the new rotated + # keys. + self._root_update_finished = True logger.debug("Verified final root.json") From 4811e880c08245f844f0c4de8988e22c44e31817 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 22 Jun 2021 16:01:13 +0300 Subject: [PATCH 11/14] Rename MetadataBundle to TrustedMetadataSet TrustedMetadataSet is a long name but * it better describes the main feature * the name isn't used in too many places Change the variable names "bundle" -> "trusted_set" Signed-off-by: Jussi Kukkonen --- ...bundle.py => test_trusted_metadata_set.py} | 56 ++++++++-------- ...data_bundle.py => trusted_metadata_set.py} | 66 +++++++++---------- tuf/ngclient/updater.py | 36 +++++----- 3 files changed, 81 insertions(+), 77 deletions(-) rename tests/{test_metadata_bundle.py => test_trusted_metadata_set.py} (65%) rename tuf/ngclient/_internal/{metadata_bundle.py => trusted_metadata_set.py} (89%) diff --git a/tests/test_metadata_bundle.py b/tests/test_trusted_metadata_set.py similarity index 65% rename from tests/test_metadata_bundle.py rename to tests/test_trusted_metadata_set.py index a988b8d3f5..b59e9de78b 100644 --- a/tests/test_metadata_bundle.py +++ b/tests/test_trusted_metadata_set.py @@ -8,31 +8,31 @@ from tuf import exceptions from tuf.api.metadata import Metadata -from tuf.ngclient._internal.metadata_bundle import MetadataBundle +from tuf.ngclient._internal.trusted_metadata_set import TrustedMetadataSet from tests import utils logger = logging.getLogger(__name__) -class TestMetadataBundle(unittest.TestCase): +class TestTrustedMetadataSet(unittest.TestCase): def test_update(self): repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') with open(os.path.join(repo_dir, "root.json"), "rb") as f: - bundle = MetadataBundle(f.read()) - bundle.root_update_finished() + trusted_set = TrustedMetadataSet(f.read()) + trusted_set.root_update_finished() with open(os.path.join(repo_dir, "timestamp.json"), "rb") as f: - bundle.update_timestamp(f.read()) + trusted_set.update_timestamp(f.read()) with open(os.path.join(repo_dir, "snapshot.json"), "rb") as f: - bundle.update_snapshot(f.read()) + trusted_set.update_snapshot(f.read()) with open(os.path.join(repo_dir, "targets.json"), "rb") as f: - bundle.update_targets(f.read()) + trusted_set.update_targets(f.read()) with open(os.path.join(repo_dir, "role1.json"), "rb") as f: - bundle.update_delegated_targets(f.read(), "role1", "targets") + trusted_set.update_delegated_targets(f.read(), "role1", "targets") with open(os.path.join(repo_dir, "role2.json"), "rb") as f: - bundle.update_delegated_targets(f.read(), "role2", "role1") + trusted_set.update_delegated_targets(f.read(), "role2", "role1") def test_out_of_order_ops(self): repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') @@ -41,38 +41,38 @@ def test_out_of_order_ops(self): with open(os.path.join(repo_dir, f"{md}.json"), "rb") as f: data[md] = f.read() - bundle = MetadataBundle(data["root"]) + trusted_set = TrustedMetadataSet(data["root"]) # Update timestamp before root is finished with self.assertRaises(RuntimeError): - bundle.update_timestamp(data["timestamp"]) + trusted_set.update_timestamp(data["timestamp"]) - bundle.root_update_finished() + trusted_set.root_update_finished() with self.assertRaises(RuntimeError): - bundle.root_update_finished() + trusted_set.root_update_finished() # Update snapshot before timestamp with self.assertRaises(RuntimeError): - bundle.update_snapshot(data["snapshot"]) + trusted_set.update_snapshot(data["snapshot"]) - bundle.update_timestamp(data["timestamp"]) + trusted_set.update_timestamp(data["timestamp"]) # Update targets before snapshot with self.assertRaises(RuntimeError): - bundle.update_targets(data["targets"]) + trusted_set.update_targets(data["targets"]) - bundle.update_snapshot(data["snapshot"]) + trusted_set.update_snapshot(data["snapshot"]) #update timestamp after snapshot with self.assertRaises(RuntimeError): - bundle.update_timestamp(data["timestamp"]) + trusted_set.update_timestamp(data["timestamp"]) # Update delegated targets before targets with self.assertRaises(RuntimeError): - bundle.update_delegated_targets(data["role1"], "role1", "targets") + trusted_set.update_delegated_targets(data["role1"], "role1", "targets") - bundle.update_targets(data["targets"]) - bundle.update_delegated_targets(data["role1"], "role1", "targets") + trusted_set.update_targets(data["targets"]) + trusted_set.update_delegated_targets(data["role1"], "role1", "targets") def test_update_with_invalid_json(self): repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') @@ -83,20 +83,20 @@ def test_update_with_invalid_json(self): # root.json not a json file at all with self.assertRaises(exceptions.RepositoryError): - MetadataBundle(b"") + TrustedMetadataSet(b"") # root.json is invalid root = Metadata.from_bytes(data["root"]) root.signed.version += 1 with self.assertRaises(exceptions.RepositoryError): - MetadataBundle(json.dumps(root.to_dict()).encode()) + TrustedMetadataSet(json.dumps(root.to_dict()).encode()) - bundle = MetadataBundle(data["root"]) - bundle.root_update_finished() + trusted_set = TrustedMetadataSet(data["root"]) + trusted_set.root_update_finished() top_level_md = [ - (data["timestamp"], bundle.update_timestamp), - (data["snapshot"], bundle.update_snapshot), - (data["targets"], bundle.update_targets), + (data["timestamp"], trusted_set.update_timestamp), + (data["snapshot"], trusted_set.update_snapshot), + (data["targets"], trusted_set.update_targets), ] for metadata, update_func in top_level_md: # metadata is not json diff --git a/tuf/ngclient/_internal/metadata_bundle.py b/tuf/ngclient/_internal/trusted_metadata_set.py similarity index 89% rename from tuf/ngclient/_internal/metadata_bundle.py rename to tuf/ngclient/_internal/trusted_metadata_set.py index ec7322ad05..d5674d8b10 100644 --- a/tuf/ngclient/_internal/metadata_bundle.py +++ b/tuf/ngclient/_internal/trusted_metadata_set.py @@ -1,17 +1,17 @@ # Copyright the TUF contributors # SPDX-License-Identifier: MIT OR Apache-2.0 -"""TUF client bundle-of-metadata +"""Trusted collection of client-side TUF Metadata -MetadataBundle keeps track of current valid set of metadata for the client, +TrustedMetadataSet keeps track of current valid set of metadata for the client, and handles almost every step of the "Detailed client workflow" ( https://theupdateframework.github.io/specification/latest#detailed-client-workflow) in the TUF specification: the remaining steps are related to filesystem and network IO which is not handled here. Loaded metadata can be accessed via the index access with rolename as key -(bundle["root"]) or, in the case of top-level metadata using the helper -properties (bundle.root). +(trusted_set["root"]) or, in the case of top-level metadata using the helper +properties (trusted_set.root). The rules for top-level metadata are * Metadata is loadable only if metadata it depends on is loaded @@ -26,33 +26,33 @@ >>> # Load local root (RepositoryErrors here stop the update) >>> with open(root_path, "rb") as f: ->>> bundle = MetadataBundle(f.read()) +>>> trusted_set = TrustedMetadataSet(f.read()) >>> >>> # update root from remote until no more are available ->>> with download("root", bundle.root.signed.version + 1) as f: ->>> bundle.update_root(f.read()) +>>> with download("root", trusted_set.root.signed.version + 1) as f: +>>> trusted_set.update_root(f.read()) >>> # ... ->>> bundle.root_update_finished() +>>> trusted_set.root_update_finished() >>> >>> # load local timestamp, then update from remote >>> try: >>> with open(timestamp_path, "rb") as f: ->>> bundle.update_timestamp(f.read()) +>>> trusted_set.update_timestamp(f.read()) >>> except (RepositoryError, OSError): >>> pass # failure to load a local file is ok >>> >>> with download("timestamp") as f: ->>> bundle.update_timestamp(f.read()) +>>> trusted_set.update_timestamp(f.read()) >>> >>> # load local snapshot, then update from remote if needed >>> try: >>> with open(snapshot_path, "rb") as f: ->>> bundle.update_snapshot(f.read()) +>>> trusted_set.update_snapshot(f.read()) >>> except (RepositoryError, OSError): >>> # local snapshot is not valid, load from remote >>> # (RepositoryErrors here stop the update) >>> with download("snapshot", version) as f: ->>> bundle.update_snapshot(f.read()) +>>> trusted_set.update_snapshot(f.read()) TODO: * exceptions are not final: the idea is that client could just handle @@ -116,27 +116,27 @@ def verify_with_threshold( return len(unique_keys) >= role.threshold -class MetadataBundle(abc.Mapping): - """Internal class to keep track of valid metadata in Updater +class TrustedMetadataSet(abc.Mapping): + """Internal class to keep track of trusted metadata in Updater - MetadataBundle ensures that the collection of metadata in the bundle is - valid. It provides easy ways to update the metadata with the caller making - decisions on what is updated. + TrustedMetadataSet ensures that the collection of metadata in it is valid + and trusted through the whole client update workflow. It provides easy ways + to update the metadata with the caller making decisions on what is updated. """ def __init__(self, root_data: bytes): - """Initialize bundle by loading trusted root metadata + """Initialize TrustedMetadataSet by loading trusted root metadata Args: root_data: Trusted root metadata as bytes. Note that this metadata will only be verified by itself: it is the source of trust for - all metadata in the bundle. + all metadata in the TrustedMetadataSet Raises: RepositoryError: Metadata failed to load or verify. The actual error type and content will contain more details. """ - self._bundle = {} # type: Dict[str: Metadata] + self._trusted_set = {} # type: Dict[str: Metadata] self.reference_time = datetime.utcnow() self._root_update_finished = False @@ -147,36 +147,36 @@ def __init__(self, root_data: bytes): def __getitem__(self, role: str) -> Metadata: """Returns current Metadata for 'role'""" - return self._bundle[role] + return self._trusted_set[role] def __len__(self) -> int: - """Returns number of Metadata objects in bundle""" - return len(self._bundle) + """Returns number of Metadata objects in TrustedMetadataSet""" + return len(self._trusted_set) def __iter__(self) -> Iterator[Metadata]: - """Returns iterator over all Metadata objects in bundle""" - return iter(self._bundle) + """Returns iterator over all Metadata objects in TrustedMetadataSet""" + return iter(self._trusted_set) # Helper properties for top level metadata @property def root(self) -> Optional[Metadata]: """Current root Metadata or None""" - return self._bundle.get("root") + return self._trusted_set.get("root") @property def timestamp(self) -> Optional[Metadata]: """Current timestamp Metadata or None""" - return self._bundle.get("timestamp") + return self._trusted_set.get("timestamp") @property def snapshot(self) -> Optional[Metadata]: """Current snapshot Metadata or None""" - return self._bundle.get("snapshot") + return self._trusted_set.get("snapshot") @property def targets(self) -> Optional[Metadata]: """Current targets Metadata or None""" - return self._bundle.get("targets") + return self._trusted_set.get("targets") # Methods for updating metadata def update_root(self, data: bytes): @@ -225,7 +225,7 @@ def update_root(self, data: bytes): "New root is not signed by itself", new_root.signed ) - self._bundle["root"] = new_root + self._trusted_set["root"] = new_root logger.debug("Updated root") def root_update_finished(self): @@ -302,7 +302,7 @@ def update_timestamp(self, data: bytes): if new_timestamp.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError("New timestamp is expired") - self._bundle["timestamp"] = new_timestamp + self._trusted_set["timestamp"] = new_timestamp logger.debug("Updated timestamp") # TODO: remove pylint disable once the hash verification is in metadata.py @@ -381,7 +381,7 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches if new_snapshot.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError("New snapshot is expired") - self._bundle["snapshot"] = new_snapshot + self._trusted_set["snapshot"] = new_snapshot logger.debug("Updated snapshot") def update_targets(self, data: bytes): @@ -460,5 +460,5 @@ def update_delegated_targets( if new_delegate.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError(f"New {role_name} is expired") - self._bundle[role_name] = new_delegate + self._trusted_set[role_name] = new_delegate logger.debug("Updated %s delegated by %s", role_name, delegator_name) diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 8b2b8a2a30..a21b292869 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -15,7 +15,11 @@ from securesystemslib import util as sslib_util from tuf import exceptions -from tuf.ngclient._internal import download, metadata_bundle, requests_fetcher +from tuf.ngclient._internal import ( + download, + requests_fetcher, + trusted_metadata_set, +) from tuf.ngclient.fetcher import FetcherInterface # Globals @@ -65,7 +69,7 @@ def __init__( # Read trusted local root metadata data = self._load_local_metadata("root") - self._bundle = metadata_bundle.MetadataBundle(data) + self._trusted_set = trusted_metadata_set.TrustedMetadataSet(data) if fetcher is None: self._fetcher = requests_fetcher.RequestsFetcher() @@ -243,7 +247,7 @@ def _load_root(self) -> None: """ # Update the root role - lower_bound = self._bundle.root.signed.version + 1 + lower_bound = self._trusted_set.root.signed.version + 1 upper_bound = lower_bound + MAX_ROOT_ROTATIONS for next_version in range(lower_bound, upper_bound): @@ -251,7 +255,7 @@ def _load_root(self) -> None: data = self._download_metadata( "root", DEFAULT_ROOT_MAX_LENGTH, next_version ) - self._bundle.update_root(data) + self._trusted_set.update_root(data) self._persist_metadata("root", data) except exceptions.FetcherHTTPError as exception: @@ -261,13 +265,13 @@ def _load_root(self) -> None: break # Verify final root - self._bundle.root_update_finished() + self._trusted_set.root_update_finished() def _load_timestamp(self) -> None: """Load local and remote timestamp metadata""" try: data = self._load_local_metadata("timestamp") - self._bundle.update_timestamp(data) + self._trusted_set.update_timestamp(data) except (OSError, exceptions.RepositoryError) as e: # Local timestamp does not exist or is invalid logger.debug("Failed to load local timestamp %s", e) @@ -276,47 +280,47 @@ def _load_timestamp(self) -> None: data = self._download_metadata( "timestamp", DEFAULT_TIMESTAMP_MAX_LENGTH ) - self._bundle.update_timestamp(data) + self._trusted_set.update_timestamp(data) self._persist_metadata("timestamp", data) def _load_snapshot(self) -> None: """Load local (and if needed remote) snapshot metadata""" try: data = self._load_local_metadata("snapshot") - self._bundle.update_snapshot(data) + self._trusted_set.update_snapshot(data) logger.debug("Local snapshot is valid: not downloading new one") except (OSError, exceptions.RepositoryError) as e: # Local snapshot does not exist or is invalid: update from remote logger.debug("Failed to load local snapshot %s", e) - metainfo = self._bundle.timestamp.signed.meta["snapshot.json"] + metainfo = self._trusted_set.timestamp.signed.meta["snapshot.json"] length = metainfo.length or DEFAULT_SNAPSHOT_MAX_LENGTH version = None - if self._bundle.root.signed.consistent_snapshot: + if self._trusted_set.root.signed.consistent_snapshot: version = metainfo.version data = self._download_metadata("snapshot", length, version) - self._bundle.update_snapshot(data) + self._trusted_set.update_snapshot(data) self._persist_metadata("snapshot", data) def _load_targets(self, role: str, parent_role: str) -> None: """Load local (and if needed remote) metadata for 'role'.""" try: data = self._load_local_metadata(role) - self._bundle.update_delegated_targets(data, role, parent_role) + self._trusted_set.update_delegated_targets(data, role, parent_role) logger.debug("Local %s is valid: not downloading new one", role) except (OSError, exceptions.RepositoryError) as e: # Local 'role' does not exist or is invalid: update from remote logger.debug("Failed to load local %s: %s", role, e) - metainfo = self._bundle.snapshot.signed.meta[f"{role}.json"] + metainfo = self._trusted_set.snapshot.signed.meta[f"{role}.json"] length = metainfo.length or DEFAULT_TARGETS_MAX_LENGTH version = None - if self._bundle.root.signed.consistent_snapshot: + if self._trusted_set.root.signed.consistent_snapshot: version = metainfo.version data = self._download_metadata(role, length, version) - self._bundle.update_delegated_targets(data, role, parent_role) + self._trusted_set.update_delegated_targets(data, role, parent_role) self._persist_metadata(role, data) def _preorder_depth_first_walk(self, target_filepath) -> Dict: @@ -348,7 +352,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # The metadata for 'role_name' must be downloaded/updated before # its targets, delegations, and child roles can be inspected. - role_metadata = self._bundle[role_name].signed + role_metadata = self._trusted_set[role_name].signed target = role_metadata.targets.get(target_filepath) # After preorder check, add current role to set of visited roles. From 82dcb507c791fe15f281876eac1cd92e50fcd4f2 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Mon, 5 Jul 2021 09:49:55 +0300 Subject: [PATCH 12/14] ngclient TrustedMetadataSet: Improve docstrings Signed-off-by: Jussi Kukkonen --- .../_internal/trusted_metadata_set.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/tuf/ngclient/_internal/trusted_metadata_set.py b/tuf/ngclient/_internal/trusted_metadata_set.py index d5674d8b10..6a93cc6e87 100644 --- a/tuf/ngclient/_internal/trusted_metadata_set.py +++ b/tuf/ngclient/_internal/trusted_metadata_set.py @@ -3,22 +3,21 @@ """Trusted collection of client-side TUF Metadata -TrustedMetadataSet keeps track of current valid set of metadata for the client, -and handles almost every step of the "Detailed client workflow" ( +TrustedMetadataSet keeps track of the current valid set of metadata for the +client, and handles almost every step of the "Detailed client workflow" ( https://theupdateframework.github.io/specification/latest#detailed-client-workflow) in the TUF specification: the remaining steps are related to filesystem and -network IO which is not handled here. +network IO, which are not handled here. -Loaded metadata can be accessed via the index access with rolename as key -(trusted_set["root"]) or, in the case of top-level metadata using the helper +Loaded metadata can be accessed via index access with rolename as key +(trusted_set["root"]) or, in the case of top-level metadata, using the helper properties (trusted_set.root). The rules for top-level metadata are - * Metadata is loadable only if metadata it depends on is loaded - * Metadata is immutable if any metadata depending on it has been loaded - * Metadata must be loaded/updated in order: - root -> timestamp -> snapshot -> targets -> (other delegated targets) - + * Metadata is updatable only if metadata it depends on is loaded + * Metadata is not updatable if any metadata depending on it has been loaded + * Metadata must be updated in order: + root -> timestamp -> snapshot -> targets -> (delegated targets) Exceptions are raised if metadata fails to load in any way. @@ -240,10 +239,10 @@ def root_update_finished(self): if self.root.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError("New root.json is expired") - # No need to recover from fast-forward attack here since - # timestamp and snapshot are not loaded at this point and - # when loaded later will be verified with the new rotated - # keys. + # No need to delete timestamp/snapshot here as specification instructs + # for fast-forward attack recovery: timestamp/snapshot can not be + # loaded at this point and when loaded later they will be verified + # with current root keys. self._root_update_finished = True logger.debug("Verified final root.json") From 1b404f3328f86fec6edebe34114337525243f7ac Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Mon, 5 Jul 2021 09:55:13 +0300 Subject: [PATCH 13/14] ngclient Updater: Fix annotations/docstrings * updated_targets() both takes and returns a list * download_target() argument can come from either updated_targets() or get_one_valid_targetinfo() Signed-off-by: Jussi Kukkonen --- tuf/ngclient/updater.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index a21b292869..3e5dcda4ac 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -7,7 +7,7 @@ import fnmatch import logging import os -from typing import Dict, Optional +from typing import Any, Dict, List, Optional from urllib import parse from securesystemslib import exceptions as sslib_exceptions @@ -121,7 +121,9 @@ def get_one_valid_targetinfo(self, target_path: str) -> Dict: return self._preorder_depth_first_walk(target_path) @staticmethod - def updated_targets(targets: Dict, destination_directory: str) -> Dict: + def updated_targets( + targets: List[Dict[str, Any]], destination_directory: str + ) -> List[Dict[str, Any]]: """ After the client has retrieved the target information for those targets they are interested in updating, they would call this method to @@ -181,7 +183,8 @@ def download_target( Download target specified by 'targetinfo' into 'destination_directory'. Args: - targetinfo: data received from get_one_valid_targetinfo() + targetinfo: data received from get_one_valid_targetinfo() or + updated_targets(). destination_directory: existing local directory to download into. Note that new directories may be created inside destination_directory as required. From ffff7f5597c9f40714f5fb494269833efe2191a7 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Mon, 5 Jul 2021 10:58:58 +0300 Subject: [PATCH 14/14] ngclient: Improve dosctrings and error messages Signed-off-by: Jussi Kukkonen --- tuf/ngclient/_internal/trusted_metadata_set.py | 14 +++++++------- tuf/ngclient/updater.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tuf/ngclient/_internal/trusted_metadata_set.py b/tuf/ngclient/_internal/trusted_metadata_set.py index 6a93cc6e87..ff629eff8c 100644 --- a/tuf/ngclient/_internal/trusted_metadata_set.py +++ b/tuf/ngclient/_internal/trusted_metadata_set.py @@ -353,8 +353,8 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches != self.timestamp.signed.meta["snapshot.json"].version ): raise exceptions.BadVersionNumberError( - f"Expected snapshot version" - f"{self.timestamp.signed.meta['snapshot.json'].version}," + f"Expected snapshot version " + f"{self.timestamp.signed.meta['snapshot.json'].version}, " f"got {new_snapshot.signed.version}" ) @@ -373,8 +373,8 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches # Prevent rollback of any metadata versions if new_fileinfo.version < fileinfo.version: raise exceptions.BadVersionNumberError( - f"Expected {filename} version" - f"{new_fileinfo.version}, got {fileinfo.version}" + f"Expected {filename} version " + f"{new_fileinfo.version}, got {fileinfo.version}." ) if new_snapshot.signed.is_expired(self.reference_time): @@ -403,7 +403,7 @@ def update_delegated_targets( Args: data: unverified new metadata as bytes role_name: The role name of the new metadata - delegator_name: The name of the role delegating the new metadata + delegator_name: The name of the role delegating to the new metadata Raises: RepositoryError: Metadata failed to load or verify. The actual @@ -452,8 +452,8 @@ def update_delegated_targets( if new_delegate.signed.version != meta.version: raise exceptions.BadVersionNumberError( - f"Expected {role_name} version" - f"{meta.version}, got {new_delegate.signed.version}" + f"Expected {role_name} version " + f"{meta.version}, got {new_delegate.signed.version}." ) if new_delegate.signed.is_expired(self.reference_time): diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 3e5dcda4ac..75cf229ec0 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) -# Classes + class Updater: """ An implemetation of the TUF client workflow. @@ -79,7 +79,7 @@ def __init__( def refresh(self) -> None: """ This method downloads, verifies, and loads metadata for the top-level - roles in a specific order (root -> timestamp -> snapshot -> targets) + roles in the specified order (root -> timestamp -> snapshot -> targets) The expiration time for downloaded metadata is also verified. The metadata for delegated roles are not refreshed by this method, but @@ -104,8 +104,8 @@ def get_one_valid_targetinfo(self, target_path: str) -> Dict: """ Returns the target information for a target identified by target_path. - As a side-effect this method downloads all the metadata it needs to - return the target information. + As a side-effect this method downloads all the additional (delegated + targets) metadata required to return the target information. Args: target_path: A target identifier that is a path-relative-URL string