diff --git a/README.md b/README.md index 374fc3e4f3..695f00be0a 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,13 @@ implementation on top of `pymongo`. PyMongo supports MongoDB 4.0, 4.2, 4.4, 5.0, 6.0, 7.0, and 8.0. PyMongo follows [semantic versioning](https://semver.org/spec/v2.0.0.html) for its releases. +## Documentation + +Documentation is available at +[mongodb.com](https://www.mongodb.com/docs/languages/python/pymongo-driver/current/). + +[API documentation](https://pymongo.readthedocs.io/en/stable/api/) and the [full changelog](https://pymongo.readthedocs.io/en/stable/changelog.html) for each release is available at [readthedocs.io](https://pymongo.readthedocs.io/en/stable/index.html). + ## Support / Feedback For issues with, questions about, or feedback for PyMongo, please look @@ -191,13 +198,6 @@ ObjectId('4aba160ee23f6b543e000002') [8, 11] ``` -## Documentation - -Documentation is available at -[pymongo.readthedocs.io](https://pymongo.readthedocs.io/en/stable/). - -See the [contributing guide](./CONTRIBUTING.md#documentation) for how to build the documentation. - ## Learning Resources - MongoDB Learn - [Python diff --git a/bson/__init__.py b/bson/__init__.py index 790ac06ef1..b655e30c2c 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -58,10 +58,10 @@ the microsecond field is truncated. .. [#dt2] all datetime.datetime instances are encoded as UTC. By default, they are decoded as *naive* but timezone aware datetimes are also supported. - See :doc:`/examples/datetimes` for examples. + See `Dates and Times `_ for examples. .. [#dt3] To enable decoding a bson UTC datetime to a :class:`~bson.datetime_ms.DatetimeMS` - instance see :ref:`handling-out-of-range-datetimes`. -.. [#uuid] For :py:class:`uuid.UUID` encoding and decoding behavior see :doc:`/examples/uuid`. + instance see `handling out of range datetimes `_. +.. [#uuid] For :py:class:`uuid.UUID` encoding and decoding behavior see ``_. .. [#re] :class:`~bson.regex.Regex` instances and regular expression objects from ``re.compile()`` are both saved as BSON regular expressions. BSON regular expressions are decoded as :class:`~bson.regex.Regex` diff --git a/bson/binary.py b/bson/binary.py index 693b838b80..b48ae4fcc6 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -79,7 +79,7 @@ class UuidRepresentation: :class:`~bson.binary.Binary` instance will be returned instead of a :class:`uuid.UUID` instance. - See :ref:`unspecified-representation-details` for details. + See `unspecified representation details `_ for details. .. versionadded:: 3.11 """ @@ -91,7 +91,7 @@ class UuidRepresentation: and decoded from BSON binary, using RFC-4122 byte order with binary subtype :data:`UUID_SUBTYPE`. - See :ref:`standard-representation-details` for details. + See `standard representation details `_ for details. .. versionadded:: 3.11 """ @@ -103,7 +103,7 @@ class UuidRepresentation: and decoded from BSON binary, using RFC-4122 byte order with binary subtype :data:`OLD_UUID_SUBTYPE`. - See :ref:`python-legacy-representation-details` for details. + See `python legacy representation details `_ for details. .. versionadded:: 3.11 """ @@ -115,7 +115,7 @@ class UuidRepresentation: and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, using the Java driver's legacy byte order. - See :ref:`java-legacy-representation-details` for details. + See `Java Legacy UUID `_ for details. .. versionadded:: 3.11 """ @@ -127,7 +127,7 @@ class UuidRepresentation: and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, using the C# driver's legacy byte order. - See :ref:`csharp-legacy-representation-details` for details. + See `C# Legacy UUID `_ for details. .. versionadded:: 3.11 """ @@ -328,7 +328,7 @@ def from_uuid( :param uuid_representation: A member of :class:`~bson.binary.UuidRepresentation`. Default: :const:`~bson.binary.UuidRepresentation.STANDARD`. - See :ref:`handling-uuid-data-example` for details. + See `UUID representations `_ for details. .. versionadded:: 3.11 """ @@ -377,7 +377,7 @@ def as_uuid(self, uuid_representation: int = UuidRepresentation.STANDARD) -> UUI :param uuid_representation: A member of :class:`~bson.binary.UuidRepresentation`. Default: :const:`~bson.binary.UuidRepresentation.STANDARD`. - See :ref:`handling-uuid-data-example` for details. + See `UUID representations `_ for details. .. versionadded:: 3.11 """ diff --git a/bson/codec_options.py b/bson/codec_options.py index 0428cf843f..add5416a5b 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -57,7 +57,7 @@ class TypeEncoder(abc.ABC): Codec classes must implement the ``python_type`` attribute, and the ``transform_python`` method to support encoding. - See :ref:`custom-type-type-codec` documentation for an example. + See `encode data with type codecs `_ documentation for an example. """ @abc.abstractproperty @@ -76,7 +76,7 @@ class TypeDecoder(abc.ABC): Codec classes must implement the ``bson_type`` attribute, and the ``transform_bson`` method to support decoding. - See :ref:`custom-type-type-codec` documentation for an example. + See `encode data with type codecs `_ documentation for an example. """ @abc.abstractproperty @@ -98,7 +98,7 @@ class TypeCodec(TypeEncoder, TypeDecoder): ``bson_type`` attribute, and the ``transform_bson`` method to support decoding. - See :ref:`custom-type-type-codec` documentation for an example. + See `encode data with type codecs `_ documentation for an example. """ @@ -118,7 +118,7 @@ class TypeRegistry: >>> type_registry = TypeRegistry([Codec1, Codec2, Codec3, ...], ... fallback_encoder) - See :ref:`custom-type-type-registry` documentation for an example. + See `add codec to the type registry `_ documentation for an example. :param type_codecs: iterable of type codec instances. If ``type_codecs`` contains multiple codecs that transform a single @@ -128,7 +128,7 @@ class TypeRegistry: type. :param fallback_encoder: callable that accepts a single, unencodable python value and transforms it into a type that - :mod:`bson` can encode. See :ref:`fallback-encoder-callable` + :mod:`bson` can encode. See `define a fallback encoder `_ documentation for an example. """ @@ -327,10 +327,10 @@ def __init__(self, *args, **kwargs): >>> doc._id ObjectId('5b3016359110ea14e8c58b93') - See :doc:`/examples/datetimes` for examples using the `tz_aware` and + See `Dates and Times `_ for examples using the `tz_aware` and `tzinfo` options. - See :doc:`/examples/uuid` for examples using the `uuid_representation` + See `UUID `_ for examples using the `uuid_representation` option. :param document_class: BSON documents returned in queries will be decoded @@ -344,7 +344,7 @@ def __init__(self, *args, **kwargs): :data:`~bson.binary.UuidRepresentation.UNSPECIFIED`. New applications should consider setting this to :data:`~bson.binary.UuidRepresentation.STANDARD` for cross language - compatibility. See :ref:`handling-uuid-data-example` for details. + compatibility. See `UUID representations `_ for details. :param unicode_decode_error_handler: The error handler to apply when a Unicode-related error occurs during BSON decoding that would otherwise raise :exc:`UnicodeDecodeError`. Valid options include diff --git a/bson/datetime_ms.py b/bson/datetime_ms.py index 679524cb60..2047bd30b2 100644 --- a/bson/datetime_ms.py +++ b/bson/datetime_ms.py @@ -51,7 +51,7 @@ def __init__(self, value: Union[int, datetime.datetime]): To decode UTC datetimes as a ``DatetimeMS``, `datetime_conversion` in :class:`~bson.codec_options.CodecOptions` must be set to 'datetime_ms' or - 'datetime_auto'. See :ref:`handling-out-of-range-datetimes` for + 'datetime_auto'. See `handling out of range datetimes `_ for details. :param value: An instance of :class:`datetime.datetime` to be diff --git a/bson/json_util.py b/bson/json_util.py index ecae103b55..971231a7d7 100644 --- a/bson/json_util.py +++ b/bson/json_util.py @@ -281,7 +281,7 @@ def __init__(self, *args: Any, **kwargs: Any): return DatetimeMS objects when the underlying datetime is out-of-range and 'datetime_clamp' to clamp to the minimum and maximum possible datetimes. Defaults to 'datetime'. See - :ref:`handling-out-of-range-datetimes` for details. + `handling out of range datetimes `_ for details. :param args: arguments to :class:`~bson.codec_options.CodecOptions` :param kwargs: arguments to :class:`~bson.codec_options.CodecOptions` diff --git a/doc/async-tutorial.rst b/doc/async-tutorial.rst deleted file mode 100644 index b3e33e4b5c..0000000000 --- a/doc/async-tutorial.rst +++ /dev/null @@ -1,425 +0,0 @@ -Async Tutorial -============== - - -.. code-block:: pycon - - from pymongo import AsyncMongoClient - - client = AsyncMongoClient() - await client.drop_database("test-database") - -This tutorial is intended as an introduction to working with -**MongoDB** and **PyMongo** using the asynchronous API. - -Prerequisites -------------- -Before we start, make sure that you have the **PyMongo** distribution -:doc:`installed `. In the Python shell, the following -should run without raising an exception: - -.. code-block:: pycon - - >>> import pymongo - -This tutorial also assumes that a MongoDB instance is running on the -default host and port. Assuming you have `downloaded and installed -`_ MongoDB, you -can start it like so: - -.. code-block:: bash - - $ mongod - -Making a Connection with AsyncMongoClient ------------------------------------------ -The first step when working with **PyMongo** is to create a -:class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient` to the running **mongod** -instance. Doing so is easy: - -.. code-block:: pycon - - >>> from pymongo import AsyncMongoClient - >>> client = AsyncMongoClient() - -The above code will connect on the default host and port. We can also -specify the host and port explicitly, as follows: - -.. code-block:: pycon - - >>> client = AsyncMongoClient("localhost", 27017) - -Or use the MongoDB URI format: - -.. code-block:: pycon - - >>> client = AsyncMongoClient("mongodb://localhost:27017/") - -By default, :class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient` only connects to the database on its first operation. -To explicitly connect before performing an operation, use :meth:`~pymongo.asynchronous.mongo_client.AsyncMongoClient.aconnect`: - -.. code-block:: pycon - - >>> client = await AsyncMongoClient().aconnect() - -Getting a Database ------------------- -A single instance of MongoDB can support multiple independent -`databases `_. When -working with PyMongo you access databases using attribute style access -on :class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient` instances: - -.. code-block:: pycon - - >>> db = client.test_database - -If your database name is such that using attribute style access won't -work (like ``test-database``), you can use dictionary style access -instead: - -.. code-block:: pycon - - >>> db = client["test-database"] - -Getting a Collection --------------------- -A `collection `_ is a -group of documents stored in MongoDB, and can be thought of as roughly -the equivalent of a table in a relational database. Getting a -collection in PyMongo works the same as getting a database: - -.. code-block:: pycon - - >>> collection = db.test_collection - -or (using dictionary style access): - -.. code-block:: pycon - - >>> collection = db["test-collection"] - -An important note about collections (and databases) in MongoDB is that -they are created lazily - none of the above commands have actually -performed any operations on the MongoDB server. Collections and -databases are created when the first document is inserted into them. - -Documents ---------- -Data in MongoDB is represented (and stored) using JSON-style -documents. In PyMongo we use dictionaries to represent documents. As -an example, the following dictionary might be used to represent a blog -post: - -.. code-block:: pycon - - >>> import datetime - >>> post = { - ... "author": "Mike", - ... "text": "My first blog post!", - ... "tags": ["mongodb", "python", "pymongo"], - ... "date": datetime.datetime.now(tz=datetime.timezone.utc), - ... } - -Note that documents can contain native Python types (like -:class:`datetime.datetime` instances) which will be automatically -converted to and from the appropriate `BSON -`_ types. - -Inserting a Document --------------------- -To insert a document into a collection we can use the -:meth:`~pymongo.asynchronous.collection.AsyncCollection.insert_one` method: - -.. code-block:: pycon - - >>> posts = db.posts - >>> post_id = (await posts.insert_one(post)).inserted_id - >>> post_id - ObjectId('...') - -When a document is inserted a special key, ``"_id"``, is automatically -added if the document doesn't already contain an ``"_id"`` key. The value -of ``"_id"`` must be unique across the -collection. :meth:`~pymongo.asynchronous.collection.AsyncCollection.insert_one` returns an -instance of :class:`~pymongo.results.InsertOneResult`. For more information -on ``"_id"``, see the `documentation on _id -`_. - -After inserting the first document, the *posts* collection has -actually been created on the server. We can verify this by listing all -of the collections in our database: - -.. code-block:: pycon - - >>> await db.list_collection_names() - ['posts'] - -Getting a Single Document With :meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one` ------------------------------------------------------------------------------------------------- -The most basic type of query that can be performed in MongoDB is -:meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one`. This method returns a -single document matching a query (or ``None`` if there are no -matches). It is useful when you know there is only one matching -document, or are only interested in the first match. Here we use -:meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one` to get the first -document from the posts collection: - -.. code-block:: pycon - - >>> import pprint - >>> pprint.pprint(await posts.find_one()) - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - -The result is a dictionary matching the one that we inserted previously. - -.. note:: The returned document contains an ``"_id"``, which was - automatically added on insert. - -:meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one` also supports querying -on specific elements that the resulting document must match. To limit -our results to a document with author "Mike" we do: - -.. code-block:: pycon - - >>> pprint.pprint(await posts.find_one({"author": "Mike"})) - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - -If we try with a different author, like "Eliot", we'll get no result: - -.. code-block:: pycon - - >>> await posts.find_one({"author": "Eliot"}) - >>> - -.. _async-querying-by-objectid: - -Querying By ObjectId --------------------- -We can also find a post by its ``_id``, which in our example is an ObjectId: - -.. code-block:: pycon - - >>> post_id - ObjectId(...) - >>> pprint.pprint(await posts.find_one({"_id": post_id})) - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - -Note that an ObjectId is not the same as its string representation: - -.. code-block:: pycon - - >>> post_id_as_str = str(post_id) - >>> await posts.find_one({"_id": post_id_as_str}) # No result - >>> - -A common task in web applications is to get an ObjectId from the -request URL and find the matching document. It's necessary in this -case to **convert the ObjectId from a string** before passing it to -``find_one``:: - - from bson.objectid import ObjectId - - # The web framework gets post_id from the URL and passes it as a string - async def get(post_id): - # Convert from string to ObjectId: - document = await client.db.collection.find_one({'_id': ObjectId(post_id)}) - -.. seealso:: :ref:`web-application-querying-by-objectid` - -Bulk Inserts ------------- -In order to make querying a little more interesting, let's insert a -few more documents. In addition to inserting a single document, we can -also perform *bulk insert* operations, by passing a list as the -first argument to :meth:`~pymongo.asynchronous.collection.AsyncCollection.insert_many`. -This will insert each document in the list, sending only a single -command to the server: - -.. code-block:: pycon - - >>> new_posts = [ - ... { - ... "author": "Mike", - ... "text": "Another post!", - ... "tags": ["bulk", "insert"], - ... "date": datetime.datetime(2009, 11, 12, 11, 14), - ... }, - ... { - ... "author": "Eliot", - ... "title": "MongoDB is fun", - ... "text": "and pretty easy too!", - ... "date": datetime.datetime(2009, 11, 10, 10, 45), - ... }, - ... ] - >>> result = await posts.insert_many(new_posts) - >>> result.inserted_ids - [ObjectId('...'), ObjectId('...')] - -There are a couple of interesting things to note about this example: - - - The result from :meth:`~pymongo.asynchronous.collection.AsyncCollection.insert_many` now - returns two :class:`~bson.objectid.ObjectId` instances, one for - each inserted document. - - ``new_posts[1]`` has a different "shape" than the other posts - - there is no ``"tags"`` field and we've added a new field, - ``"title"``. This is what we mean when we say that MongoDB is - *schema-free*. - -Querying for More Than One Document ------------------------------------ -To get more than a single document as the result of a query we use the -:meth:`~pymongo.asynchronous.collection.AsyncCollection.find` -method. :meth:`~pymongo.asynchronous.collection.AsyncCollection.find` returns a -:class:`~pymongo.asynchronous.cursor.AsyncCursor` instance, which allows us to iterate -over all matching documents. For example, we can iterate over every -document in the ``posts`` collection: - -.. code-block:: pycon - - >>> async for post in posts.find(): - ... pprint.pprint(post) - ... - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['bulk', 'insert'], - 'text': 'Another post!'} - {'_id': ObjectId('...'), - 'author': 'Eliot', - 'date': datetime.datetime(...), - 'text': 'and pretty easy too!', - 'title': 'MongoDB is fun'} - -Just like we did with :meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one`, -we can pass a document to :meth:`~pymongo.asynchronous.collection.AsyncCollection.find` -to limit the returned results. Here, we get only those documents whose -author is "Mike": - -.. code-block:: pycon - - >>> async for post in posts.find({"author": "Mike"}): - ... pprint.pprint(post) - ... - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['bulk', 'insert'], - 'text': 'Another post!'} - -Counting --------- -If we just want to know how many documents match a query we can -perform a :meth:`~pymongo.asynchronous.collection.AsyncCollection.count_documents` operation -instead of a full query. We can get a count of all of the documents -in a collection: - -.. code-block:: pycon - - >>> await posts.count_documents({}) - 3 - -or just of those documents that match a specific query: - -.. code-block:: pycon - - >>> await posts.count_documents({"author": "Mike"}) - 2 - -Range Queries -------------- -MongoDB supports many different types of `advanced queries -`_. As an -example, lets perform a query where we limit results to posts older -than a certain date, but also sort the results by author: - -.. code-block:: pycon - - >>> d = datetime.datetime(2009, 11, 12, 12) - >>> async for post in posts.find({"date": {"$lt": d}}).sort("author"): - ... pprint.pprint(post) - ... - {'_id': ObjectId('...'), - 'author': 'Eliot', - 'date': datetime.datetime(...), - 'text': 'and pretty easy too!', - 'title': 'MongoDB is fun'} - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['bulk', 'insert'], - 'text': 'Another post!'} - -Here we use the special ``"$lt"`` operator to do a range query, and -also call :meth:`~pymongo.asynchronous.cursor.AsyncCursor.sort` to sort the results -by author. - -Indexing --------- - -Adding indexes can help accelerate certain queries and can also add additional -functionality to querying and storing documents. In this example, we'll -demonstrate how to create a `unique index -`_ on a key that rejects -documents whose value for that key already exists in the index. - -First, we'll need to create the index: - -.. code-block:: pycon - - >>> result = await db.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True) - >>> sorted(list(await db.profiles.index_information())) - ['_id_', 'user_id_1'] - -Notice that we have two indexes now: one is the index on ``_id`` that MongoDB -creates automatically, and the other is the index on ``user_id`` we just -created. - -Now let's set up some user profiles: - -.. code-block:: pycon - - >>> user_profiles = [{"user_id": 211, "name": "Luke"}, {"user_id": 212, "name": "Ziltoid"}] - >>> result = await db.profiles.insert_many(user_profiles) - -The index prevents us from inserting a document whose ``user_id`` is already in -the collection: - -.. code-block:: pycon - - >>> new_profile = {"user_id": 213, "name": "Drew"} - >>> duplicate_profile = {"user_id": 212, "name": "Tommy"} - >>> result = await db.profiles.insert_one(new_profile) # This is fine. - >>> result = await db.profiles.insert_one(duplicate_profile) - Traceback (most recent call last): - DuplicateKeyError: E11000 duplicate key error index: test_database.profiles.$user_id_1 dup key: { : 212 } - -.. seealso:: The MongoDB documentation on `indexes `_ - -Task Cancellation ------------------ -`Cancelling `_ an asyncio Task -that is running a PyMongo operation is treated as a fatal interrupt. Any connections, cursors, and transactions -involved in a cancelled Task will be safely closed and cleaned up as part of the cancellation. If those resources are -also used elsewhere, attempting to utilize them after the cancellation will result in an error. diff --git a/doc/atlas.rst b/doc/atlas.rst deleted file mode 100644 index 19ba9732f2..0000000000 --- a/doc/atlas.rst +++ /dev/null @@ -1,43 +0,0 @@ -Using PyMongo with MongoDB Atlas -================================ - -`Atlas `_ is MongoDB, Inc.'s hosted MongoDB as a -service offering. To connect to Atlas, pass the connection string provided by -Atlas to :class:`~pymongo.mongo_client.MongoClient`:: - - client = pymongo.MongoClient() - -Connections to Atlas require TLS/SSL. - -.. warning:: Industry best practices recommend, and some regulations require, - the use of TLS 1.1 or newer. Though no application changes are required for - PyMongo to make use of the newest protocols, some operating systems or - versions may not provide an OpenSSL version new enough to support them. - - Users of macOS older than 10.13 (High Sierra) will need to install Python - from `python.org`_, `homebrew`_, `macports`_, or another similar source. - - Users of Linux or other non-macOS Unix can check their OpenSSL version like - this:: - - $ openssl version - - If the version number is less than 1.0.1 support for TLS 1.1 or newer is not - available. Contact your operating system vendor for a solution or upgrade to - a newer distribution. - - You can check your Python interpreter by installing the `requests`_ module - and executing the following command:: - - python -c "import requests; print(requests.get('https://www.howsmyssl.com/a/check', verify=False).json()['tls_version'])" - - You should see "TLS 1.X" where X is >= 1. - - You can read more about TLS versions and their security implications here: - - ``_ - -.. _python.org: https://www.python.org/downloads/ -.. _homebrew: https://brew.sh/ -.. _macports: https://www.macports.org/ -.. _requests: https://pypi.python.org/pypi/requests diff --git a/doc/changelog.rst b/doc/changelog.rst index d88b114fc6..25d412364f 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -115,7 +115,7 @@ PyMongo 4.12 brings a number of changes including: - Support for configuring DEK cache lifetime via the ``key_expiration_ms`` argument to :class:`~pymongo.encryption_options.AutoEncryptionOpts`. - Support for $lookup in CSFLE and QE supported on MongoDB 8.1+. -- pymongocrypt>=1.13 is now required for :ref:`In-Use Encryption` support. +- pymongocrypt>=1.13 is now required for `In-Use Encryption `_ support. - Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.rename_by_name` and :meth:`gridfs.grid_file.GridFSBucket.rename_by_name` for more performant renaming of a file with multiple revisions. - Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.delete_by_name` and :meth:`gridfs.grid_file.GridFSBucket.delete_by_name` @@ -177,7 +177,7 @@ PyMongo 4.11 brings a number of changes including: - Dropped support for Python 3.8 and PyPy 3.9. - Dropped support for MongoDB 3.6. - Dropped support for the MONGODB-CR authenticate mechanism, which is no longer supported by MongoDB 4.0+. -- pymongocrypt>=1.12 is now required for :ref:`In-Use Encryption` support. +- pymongocrypt>=1.12 is now required for `In-Use Encryption `_ support. - Added support for free-threaded Python with the GIL disabled. For more information see: `Free-threaded CPython `_. We do not yet support free-threaded Python on Windows (`PYTHON-5027`_) or with In-Use Encryption (`PYTHON-5024`_). @@ -299,7 +299,7 @@ PyMongo 4.9 brings a number of improvements including: ``sparsity`` and ``trim_factor`` are now optional in :class:`~pymongo.encryption_options.RangeOpts`. - Added support for the "delegated" option for the KMIP ``master_key`` in :meth:`~pymongo.encryption.ClientEncryption.create_data_key`. -- pymongocrypt>=1.10 is now required for :ref:`In-Use Encryption` support. +- pymongocrypt>=1.10 is now required for `In-Use Encryption `_ support. - Added :meth:`~pymongo.cursor.Cursor.to_list` to :class:`~pymongo.cursor.Cursor`, :class:`~pymongo.command_cursor.CommandCursor`, :class:`~pymongo.asynchronous.cursor.AsyncCursor`, @@ -309,7 +309,7 @@ PyMongo 4.9 brings a number of improvements including: and :class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient`, enabling users to perform insert, update, and delete operations against mixed namespaces in a minimized number of round trips. - Please see :doc:`examples/client_bulk` for more information. + Please see `Client Bulk Write `_ for more information. - Added support for the ``namespace`` parameter to the :class:`~pymongo.operations.InsertOne`, :class:`~pymongo.operations.ReplaceOne`, @@ -339,7 +339,7 @@ PyMongo 4.9 brings a number of improvements including: unction-as-a-service (FaaS) like AWS Lambda, Google Cloud Functions, and Microsoft Azure Functions. On some FaaS systems, there is a ``fork()`` operation at function startup. By delaying the connection to the first operation, we avoid a deadlock. See - :ref:`pymongo-fork-safe` for more information. + `multiple forks `_ for more information. Issues Resolved @@ -446,10 +446,10 @@ PyMongo 4.7 brings a number of improvements including: using an OpenID Connect (OIDC) access token. The driver supports OIDC for workload identity, defined as an identity you assign to a software workload (such as an application, service, script, or container) to authenticate and access other services and resources. - Please see :doc:`examples/authentication` for more information. + Please see `Authentication `_ for more information. - Added support for Python's `native logging library `_, enabling developers to customize the verbosity of log messages for their applications. - Please see :doc:`examples/logging` for more information. + Please see `Logging `_ for more information. - Significantly improved the performance of encoding BSON documents to JSON. - Added support for named KMS providers for client side field level encryption. Previously supported KMS providers were only: aws, azure, gcp, kmip, and local. @@ -608,7 +608,7 @@ PyMongo 4.6 brings a number of improvements including: "mongodb://example.com?tls=true" is now a valid URI. - Fixed a bug where PyMongo would incorrectly promote all cursors to exhaust cursors when connected to load balanced MongoDB clusters or Serverless clusters. -- Added the :ref:`network-compression-example` documentation page. +- Added the `network compression `_ documentation page. - Added more timeout information to network errors. Issues Resolved @@ -633,7 +633,7 @@ PyMongo 4.5 brings a number of improvements including: - Added :meth:`~pymongo.database.Database.cursor_command` and :meth:`~pymongo.command_cursor.CommandCursor.try_next` to support executing an arbitrary command that returns a cursor. -- ``cryptography`` 2.5 or later is now required for :ref:`OCSP` support. +- ``cryptography`` 2.5 or later is now required for `OCSP `_ support. - Improved bson encoding and decoding performance by up to 134%(`PYTHON-3729`_, `PYTHON-3797`_, `PYTHON-3816`_, `PYTHON-3817`_, `PYTHON-3820`_, `PYTHON-3824`_, and `PYTHON-3846`_). .. warning:: PyMongo no longer supports PyPy3 versions older than 3.8. Users @@ -694,7 +694,7 @@ PyMongo 4.4 brings a number of improvements including: :class:`~pymongo.encryption_options.RangeOpts`, and :attr:`~pymongo.encryption.Algorithm.RANGEPREVIEW` as part of the experimental Queryable Encryption beta. -- pymongocrypt 1.6.0 or later is now required for :ref:`In-Use Encryption` support. MongoDB +- pymongocrypt 1.6.0 or later is now required for `In-Use Encryption `_ support. MongoDB Server 7.0 introduced a backwards breaking change to the QE protocol. Users taking advantage of the Queryable Encryption beta must now upgrade to MongoDB 7.0+ and PyMongo 4.4+. @@ -722,9 +722,9 @@ Changes in Version 4.3.3 (2022/11/17) Version 4.3.3 documents support for the following: -- :ref:`CSFLE on-demand credentials` for cloud KMS providers. -- Authentication support for :ref:`EKS Clusters`. -- Added the :ref:`timeout-example` example page to improve the documentation +- `CSFLE on-demand credentials `_ for cloud KMS providers. +- Authentication support for `EKS Clusters `_. +- Added the `timeout `_ example page to improve the documentation for :func:`pymongo.timeout`. Bug Fixes @@ -759,7 +759,7 @@ PyMongo 4.3 brings a number of improvements including: - Added support for decoding BSON datetimes outside of the range supported by Python's :class:`~datetime.datetime` builtin. See - :ref:`handling-out-of-range-datetimes` for examples, as well as + `handling out of range datetimes `_ for examples, as well as :class:`bson.datetime_ms.DatetimeMS`, :class:`bson.codec_options.DatetimeConversion`, and :class:`bson.codec_options.CodecOptions`'s ``datetime_conversion`` @@ -768,7 +768,7 @@ PyMongo 4.3 brings a number of improvements including: after a :py:func:`os.fork` to reduce the frequency of deadlocks. Note that deadlocks are still possible because libraries that PyMongo depends like OpenSSL cannot be made fork() safe in multithreaded applications. - (`PYTHON-2484`_). For more info see :ref:`pymongo-fork-safe`. + (`PYTHON-2484`_). For more info see `multiple forks `_. - When used with MongoDB 6.0+, :class:`~pymongo.change_stream.ChangeStream` s now allow for new types of events (such as DDL and C2C replication events) to be recorded with the new parameter ``show_expanded_events`` @@ -778,7 +778,7 @@ PyMongo 4.3 brings a number of improvements including: credentials expire or an error is encountered. - When using the ``MONGODB-AWS`` authentication mechanism with the ``aws`` extra, the behavior of credential fetching has changed with - ``pymongo_auth_aws>=1.1.0``. Please see :doc:`examples/authentication` for + ``pymongo_auth_aws>=1.1.0``. Please see `Authentication `_ for more information. Bug fixes @@ -811,9 +811,9 @@ PyMongo 4.2 brings a number of improvements including: - Support for MongoDB 6.0. - Support for the Queryable Encryption beta with MongoDB 6.0. Note that backwards-breaking - changes may be made before the final release. See :ref:`automatic-queryable-client-side-encryption` for example usage. + changes may be made before the final release. See `automatic queryable client-side encryption `_ for example usage. - Provisional (beta) support for :func:`pymongo.timeout` to apply a single timeout - to an entire block of pymongo operations. See :ref:`timeout-example` for examples. + to an entire block of pymongo operations. See `timeout `_ for examples. - Added the ``timeoutMS`` URI and keyword argument to :class:`~pymongo.mongo_client.MongoClient`. - Added the :attr:`pymongo.errors.PyMongoError.timeout` property which is ``True`` when the error was caused by a timeout. @@ -861,7 +861,7 @@ Unavoidable breaking changes encryption support. - :meth:`~pymongo.collection.Collection.estimated_document_count` now always uses the `count`_ command. Due to an oversight in versions 5.0.0-5.0.8 of MongoDB, - the count command was not included in V1 of the :ref:`versioned-api-ref`. + the count command was not included in V1 of the `Stable API `_. Users of the Stable API with estimated_document_count are recommended to upgrade their server version to 5.0.9+ or set :attr:`pymongo.server_api.ServerApi.strict` to ``False`` to avoid encountering errors (`PYTHON-3167`_). @@ -924,7 +924,7 @@ Changes in Version 4.1 (2021/12/07) PyMongo 4.1 brings a number of improvements including: -- Type Hinting support (formerly provided by `pymongo-stubs`_). See :doc:`examples/type_hints` for more information. +- Type Hinting support (formerly provided by `pymongo-stubs`_). See `Type Hints `_ for more information. - Added support for the ``comment`` parameter to all helpers. For example see :meth:`~pymongo.collection.Collection.insert_one`. - Added support for the ``let`` parameter to @@ -1013,7 +1013,7 @@ Breaking Changes in 4.0 :data:`bson.binary.UuidRepresentation.PYTHON_LEGACY` to :data:`bson.binary.UuidRepresentation.UNSPECIFIED`. Attempting to encode a :class:`uuid.UUID` instance to BSON or JSON now produces an error by default. - See :ref:`handling-uuid-data-example` for details. + See `UUID representations `_ for details. - Removed the ``waitQueueMultiple`` keyword argument to :class:`~pymongo.mongo_client.MongoClient` and removed :exc:`pymongo.errors.ExceededMaxWaiters`. @@ -1352,7 +1352,7 @@ Notable improvements - Added support for MongoDB 5.0. - Support for MongoDB Stable API, see :class:`~pymongo.server_api.ServerApi`. -- Support for snapshot reads on secondaries (see :ref:`snapshot-reads-ref`). +- Support for snapshot reads on secondaries (see `snapshot reads `_). - Support for Azure and GCP KMS providers for client side field level encryption. See the docstring for :class:`~pymongo.mongo_client.MongoClient`, :class:`~pymongo.encryption_options.AutoEncryptionOpts`, @@ -1409,7 +1409,7 @@ Deprecations same API. - Deprecated the :mod:`pymongo.messeage` module. - Deprecated the ``ssl_keyfile`` and ``ssl_certfile`` URI options in favor - of ``tlsCertificateKeyFile`` (see :doc:`examples/tls`). + of ``tlsCertificateKeyFile`` (see `TLS `_). .. _PYTHON-2466: https://jira.mongodb.org/browse/PYTHON-2466 .. _PYTHON-1690: https://jira.mongodb.org/browse/PYTHON-1690 @@ -1507,12 +1507,12 @@ Changes in Version 3.11.0 (2020/07/30) Version 3.11 adds support for MongoDB 4.4 and includes a number of bug fixes. Highlights include: -- Support for :ref:`OCSP` (Online Certificate Status Protocol). +- Support for `OCSP `_ (Online Certificate Status Protocol). - Support for `PyOpenSSL `_ as an - alternative TLS implementation. PyOpenSSL is required for :ref:`OCSP` + alternative TLS implementation. PyOpenSSL is required for `OCSP `_ support. It will also be installed when using the "tls" extra if the version of Python in use is older than 2.7.9. -- Support for the :ref:`MONGODB-AWS` authentication mechanism. +- Support for the `MONGODB-AWS `_ authentication mechanism. - Support for the ``directConnection`` URI option and kwarg to :class:`~pymongo.mongo_client.MongoClient`. - Support for speculative authentication attempts in connection handshakes @@ -1538,7 +1538,7 @@ Highlights include: - Added support for :data:`bson.binary.UuidRepresentation.UNSPECIFIED` and ``MongoClient(uuidRepresentation='unspecified')`` which will become the default UUID representation starting in PyMongo 4.0. See - :ref:`handling-uuid-data-example` for details. + `UUID representations `_ for details. - New methods :meth:`bson.binary.Binary.from_uuid` and :meth:`bson.binary.Binary.as_uuid`. - Added the ``background`` parameter to @@ -1622,7 +1622,7 @@ Version 3.10 includes a number of improvements and bug fixes. Highlights include: - Support for Client-Side Field Level Encryption with MongoDB 4.2. See - :doc:`examples/encryption` for examples. + `Client-Side Field Level Encryption `_ for examples. - Support for Python 3.8. - Added :attr:`pymongo.client_session.ClientSession.in_transaction`. - Do not hold the Topology lock while creating connections in a MongoClient's @@ -1648,7 +1648,7 @@ Changes in Version 3.9.0 (2019/08/13) Version 3.9 adds support for MongoDB 4.2. Highlights include: - Support for MongoDB 4.2 sharded transactions. Sharded transactions have - the same API as replica set transactions. See :ref:`transactions-ref`. + the same API as replica set transactions. See `Transactions `_. - New method :meth:`pymongo.client_session.ClientSession.with_transaction` to support conveniently running a transaction in a session with automatic retries and at-most-once semantics. @@ -1776,7 +1776,7 @@ Changes in Version 3.8.0 (2019/04/22) - Custom types can now be directly encoded to, and decoded from MongoDB using the :class:`~bson.codec_options.TypeCodec` and :class:`~bson.codec_options.TypeRegistry` APIs. For more information, see - the :doc:`custom type example `. + `Custom Types `_. - Attempting a multi-document transaction on a sharded cluster now raises a :exc:`~pymongo.errors.ConfigurationError`. - :meth:`pymongo.cursor.Cursor.distinct` and @@ -1806,7 +1806,7 @@ Changes in Version 3.8.0 (2019/04/22) - Iterating over a :class:`~bson.raw_bson.RawBSONDocument` now maintains the same field order of the underlying raw BSON document. - Applications can now register a custom server selector. For more information - see the :doc:`server selector example `. + see `Customize Server Selection `_. - The connection pool now implements a LIFO policy. Unavoidable breaking changes: @@ -1874,9 +1874,9 @@ Changes in Version 3.7.0 (2018/06/26) Version 3.7 adds support for MongoDB 4.0. Highlights include: - Support for single replica set multi-document ACID transactions. - See :ref:`transactions-ref`. + See `transactions `_. - Support for wire protocol compression via the new ``compressors`` URI and keyword argument to - :meth:`~pymongo.mongo_client.MongoClient`. See :ref:`network-compression-example` for details. + :meth:`~pymongo.mongo_client.MongoClient`. See `network compression `_ for details. - Support for Python 3.7. - New count methods, :meth:`~pymongo.collection.Collection.count_documents` and :meth:`~pymongo.collection.Collection.estimated_document_count`. @@ -1897,9 +1897,9 @@ Version 3.7 adds support for MongoDB 4.0. Highlights include: the following features and changes allow PyMongo to function when MD5 support is disabled in OpenSSL by the FIPS Object Module: - - Support for the :ref:`SCRAM-SHA-256 ` - authentication mechanism. The :ref:`GSSAPI `, - :ref:`PLAIN `, and :ref:`MONGODB-X509 ` + - Support for the `SCRAM-SHA-256 `_ + authentication mechanism. The `GSSAPI `_, + `PLAIN `_, and `MONGODB-X509 `_ mechanisms can also be used to avoid issues with OpenSSL in FIPS environments. - MD5 checksums are now optional in GridFS. See the ``disable_md5`` option @@ -1917,7 +1917,7 @@ Version 3.7 adds support for MongoDB 4.0. Highlights include: class which is a subclass of :class:`~pymongo.change_stream.ChangeStream`. - SCRAM client and server keys are cached for improved performance, following `RFC 5802 `_. -- If not specified, the authSource for the :ref:`PLAIN ` +- If not specified, the authSource for the `PLAIN `_ authentication mechanism defaults to $external. - wtimeoutMS is once again supported as a URI option. - When using unacknowledged write concern and connected to MongoDB server @@ -2167,7 +2167,7 @@ Changes and Deprecations: consistent across all MongoDB versions. - In Python 3, :meth:`~bson.json_util.loads` now automatically decodes JSON $binary with a subtype of 0 into :class:`bytes` instead of - :class:`~bson.binary.Binary`. See the :doc:`/python3` for more details. + :class:`~bson.binary.Binary`. - :meth:`~bson.json_util.loads` now raises ``TypeError`` or ``ValueError`` when parsing JSON type wrappers with values of the wrong type or any extra keys. @@ -2196,7 +2196,7 @@ Highlights include: - Complete support for MongoDB 3.4: - - Unicode aware string comparison using :doc:`examples/collations`. + - Unicode aware string comparison using `Collation `_. - Support for the new :class:`~bson.decimal128.Decimal128` BSON type. - A new maxStalenessSeconds read preference option. - A username is no longer required for the MONGODB-X509 authentication @@ -2534,7 +2534,7 @@ In PyMongo 3.0, the ``use_greenlets`` option is gone. To use PyMongo with Gevent simply call ``gevent.monkey.patch_all()``. For more information, -see :doc:`PyMongo's Gevent documentation `. +see `Gevent `_. :class:`~pymongo.mongo_client.MongoClient` changes .................................................. @@ -2578,7 +2578,7 @@ the list, and used it until a network error prompted it to re-evaluate all mongoses' latencies and reconnect to one of them. In PyMongo 3, the client monitors its network latency to all the mongoses continuously, and distributes operations evenly among those with the lowest latency. -See :ref:`mongos-load-balancing` for more information. +See `load balancing `_ for more information. The client methods ``start_request``, ``in_request``, and ``end_request`` are removed, and so is the ``auto_start_request`` option. Requests were @@ -2586,7 +2586,7 @@ designed to make read-your-writes consistency more likely with the ``w=0`` write concern. Additionally, a thread in a request used the same member for all secondary reads in a replica set. To ensure read-your-writes consistency in PyMongo 3.0, do not override the default write concern with ``w=0``, and -do not override the default :ref:`read preference ` of +do not override the default `read preference `_ of PRIMARY. Support for the ``slaveOk`` (or ``slave_okay``), ``safe``, and @@ -2600,8 +2600,7 @@ The ``max_pool_size`` option has been removed. It is replaced by the ``maxPoolSize`` MongoDB URI option. ``maxPoolSize`` is now a supported URI option in PyMongo and can be passed as a keyword argument. -The ``copy_database`` method is removed, see the -:doc:`copy_database examples ` for alternatives. +The ``copy_database`` method is removed, see `Copy and Clone Databases `_ for alternatives. The ``disconnect`` method is removed. Use :meth:`~pymongo.mongo_client.MongoClient.close` instead. @@ -2938,7 +2937,7 @@ Version 2.9.4 fixes issues reported since the release of 2.9.3. - Fixed :class:`~pymongo.mongo_replica_set_client.MongoReplicaSetClient` handling of uuidRepresentation. - Fixed building and testing the documentation with python 3.x. -- New documentation for :doc:`examples/tls` and :doc:`atlas`. +- New documentation for `TLS `_ and `Atlas `_. Issues Resolved ............... @@ -3177,7 +3176,7 @@ PyMongo 2.7 is a major release with a large number of new features and bug fixes. Highlights include: - Full support for MongoDB 2.6. -- A new :doc:`bulk write operations API `. +- A new `bulk write operations API `_. - Support for server side query timeouts using :meth:`~pymongo.cursor.Cursor.max_time_ms`. - Support for writing :meth:`~pymongo.collection.Collection.aggregate` @@ -3188,7 +3187,7 @@ fixes. Highlights include: error details from the server. - A new GridFS :meth:`~gridfs.GridFS.find` method that returns a :class:`~gridfs.grid_file.GridOutCursor`. -- Greatly improved :doc:`support for mod_wsgi ` when using +- Greatly improved `support for mod_wsgi `_ when using PyMongo's C extensions. Read `Jesse's blog post `_ for details. - Improved C extension support for ARM little endian. @@ -3268,14 +3267,14 @@ Important new features: ``waitQueueTimeoutMS`` is set, an operation that blocks waiting for a socket will raise :exc:`~pymongo.errors.ConnectionFailure` after the timeout. By default ``waitQueueTimeoutMS`` is not set. - See :ref:`connection-pooling` for more information. + See `connection pooling `_ for more information. - The :meth:`~pymongo.collection.Collection.insert` method automatically splits large batches of documents into multiple insert messages based on :attr:`~pymongo.mongo_client.MongoClient.max_message_size` - Support for the exhaust cursor flag. See :meth:`~pymongo.collection.Collection.find` for details and caveats. - Support for the PLAIN and MONGODB-X509 authentication mechanisms. - See :doc:`the authentication docs ` for more + See `the authentication docs `_ for more information. - Support aggregation output as a :class:`~pymongo.cursor.Cursor`. See :meth:`~pymongo.collection.Collection.aggregate` for details. @@ -3288,7 +3287,7 @@ Important new features: to having a ``max_pool_size`` larger than necessary. Err towards a larger value.) If your application accepts the default, continue to do so. - See :ref:`connection-pooling` for more information. + See `connection pooling `_ for more information. Issues Resolved ............... @@ -3334,7 +3333,7 @@ Version 2.5 includes changes to support new features in MongoDB 2.4. Important new features: -- Support for :ref:`GSSAPI (Kerberos) authentication `. +- Support for `GSSAPI (Kerberos) `_. - Support for SSL certificate validation with hostname matching. - Support for delegated and role based authentication. - New GEOSPHERE (2dsphere) and HASHED index constants. @@ -3441,7 +3440,7 @@ Version 2.3 adds support for new features and behavior changes in MongoDB Important New Features: - Support for expanded read preferences including directing reads to tagged - servers - See :ref:`secondary-reads` for more information. + servers - See `secondary reads `_ for more information. - Support for mongos failover. - A new :meth:`~pymongo.collection.Collection.aggregate` method to support MongoDB's new `aggregation framework @@ -3495,10 +3494,10 @@ to this release. Important New Features: -- Support for Python 3 - - See the :doc:`python3` for more information. +- Support for Python 3. + See `Python 3 `_ for more information. - Support for Gevent - - See :doc:`examples/gevent` for more information. + See `Gevent `_ for more information. - Improved connection pooling. See `PYTHON-287 `_. @@ -4104,7 +4103,7 @@ Other changes: - clean up all cases where :class:`~pymongo.errors.ConnectionFailure` is raised. - simplification of connection pooling - makes driver ~2x faster for - simple benchmarks. see :ref:`connection-pooling` for more information. + simple benchmarks. see `connection pooling `_ for more information. - DEPRECATED ``pool_size``, ``auto_start_request`` and ``timeout`` parameters to :class:`~pymongo.connection.Connection`. DEPRECATED :meth:`~pymongo.connection.Connection.start_request`. @@ -4171,7 +4170,7 @@ Changes in Version 1.2 (2009/12/09) get around some issues with queries on fields named ``query`` - enforce 4MB document limit on the client side - added :meth:`~pymongo.collection.Collection.map_reduce` helper - see - :doc:`example ` + `Aggregation `_ - added :meth:`~pymongo.cursor.Cursor.distinct` method on :class:`~pymongo.cursor.Cursor` instances to allow distinct with queries diff --git a/doc/common-issues.rst b/doc/common-issues.rst deleted file mode 100644 index b300bac784..0000000000 --- a/doc/common-issues.rst +++ /dev/null @@ -1,96 +0,0 @@ -Frequently Encountered Issues -============================= - -Also see the :ref:`TLSErrors` section. - -Server reports wire version X, PyMongo requires Y -------------------------------------------------- - -When one attempts to connect to a <=3.6 version server, PyMongo will throw the following error:: - - >>> client.admin.command('ping') - ... - pymongo.errors.ConfigurationError: Server at localhost:27017 reports wire version 6, but this version of PyMongo requires at least 7 (MongoDB 4.0). - -This is caused by the driver being too new for the server it is being run against. -To resolve this issue either upgrade your database to version >= 4.0 or downgrade to an early version of PyMongo which supports MongoDB < 4.0. - - -'Cursor' object has no attribute '_Cursor__killed' --------------------------------------------------- - -On versions of PyMongo <3.9, when supplying invalid arguments the constructor of Cursor, -there will be a TypeError raised, and an AttributeError printed to ``stderr``. The AttributeError is not relevant, -instead look at the TypeError for debugging information:: - - >>> coll.find(wrong=1) - Exception ignored in: - ... - AttributeError: 'Cursor' object has no attribute '_Cursor__killed' - ... - TypeError: __init__() got an unexpected keyword argument 'wrong' - -To fix this, make sure that you are supplying the correct keyword arguments. -In addition, you can also upgrade to PyMongo >=3.9, which will remove the spurious error. - - -MongoClient fails ConfigurationError ------------------------------------- - -This is a common issue stemming from using incorrect keyword argument names. - - >>> client = MongoClient(wrong=1) - ... - pymongo.errors.ConfigurationError: Unknown option wrong - -To fix this, check your spelling and make sure that the keyword argument you are specifying exists. - - -DeprecationWarning: count is deprecated ---------------------------------------- - -PyMongo no longer supports :meth:`pymongo.cursor.count`. -Instead, use :meth:`pymongo.collection.count_documents`:: - - >>> client = MongoClient() - >>> d = datetime.datetime(2009, 11, 12, 12) - >>> list(client.db.coll.find({"date": {"$lt": d}}, limit=2)) - [{'_id': ObjectId('6247b058cebb8b179b7039f8'), 'date': datetime.datetime(1, 1, 1, 0, 0)}, {'_id': ObjectId('6247b059cebb8b179b7039f9'), 'date': datetime.datetime(1, 1, 1, 0, 0)}] - >>> client.db.coll.count_documents({"date": {"$lt": d}}, limit=2) - 2 - -Note that this is NOT the same as ``Cursor.count_documents`` (which does not exist), -this is a method of the Collection class, so you must call it on a collection object -or you will receive the following error:: - - >>> Cursor(MongoClient().db.coll).count() - Traceback (most recent call last): - File "", line 1, in - AttributeError: 'Cursor' object has no attribute 'count' - >>> - -Timeout when accessing MongoDB from PyMongo with tunneling ----------------------------------------------------------- - -When attempting to connect to a replica set MongoDB instance over an SSH tunnel you -will receive the following error:: - - File "/Library/Python/2.7/site-packages/pymongo/collection.py", line 1560, in count - return self._count(cmd, collation, session) - File "/Library/Python/2.7/site-packages/pymongo/collection.py", line 1504, in _count - with self._socket_for_reads() as (connection, slave_ok): - File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/contextlib.py", line 17, in __enter__ - return self.gen.next() - File "/Library/Python/2.7/site-packages/pymongo/mongo_client.py", line 982, in _socket_for_reads - server = topology.select_server(read_preference) - File "/Library/Python/2.7/site-packages/pymongo/topology.py", line 224, in select_server - address)) - File "/Library/Python/2.7/site-packages/pymongo/topology.py", line 183, in select_servers - selector, server_timeout, address) - File "/Library/Python/2.7/site-packages/pymongo/topology.py", line 199, in _select_servers_loop - self._error_message(selector)) - pymongo.errors.ServerSelectionTimeoutError: localhost:27017: timed out - -This is due to the fact that PyMongo discovers replica set members using the response from the isMaster command which -then contains the address and ports of the other members. However, these addresses and ports will not be accessible through the SSH tunnel. Thus, this behavior is unsupported. -You can, however, connect directly to a single MongoDB node using the directConnection=True option with SSH tunneling. diff --git a/doc/compatibility-policy.rst b/doc/compatibility-policy.rst deleted file mode 100644 index 9721877d4d..0000000000 --- a/doc/compatibility-policy.rst +++ /dev/null @@ -1,62 +0,0 @@ -Compatibility Policy -==================== - -Semantic Versioning -------------------- - -PyMongo's version numbers follow `semantic versioning`_: each version number -is structured "major.minor.patch". Patch releases fix bugs, minor releases -add features (and may fix bugs), and major releases include API changes that -break backwards compatibility (and may add features and fix bugs). - -Deprecation ------------ - -Before we remove a feature in a major release, PyMongo's maintainers make an -effort to release at least one minor version that *deprecates* it. We add -"**DEPRECATED**" to the feature's documentation, and update the code to raise a -`DeprecationWarning`_. You can ensure your code is future-proof by running -your code with the latest PyMongo release and looking for DeprecationWarnings. - -The interpreter silences DeprecationWarnings by default. For example, the -following code uses the deprecated ``insert`` method but does not raise any -warning: - -.. code-block:: python - - # "insert.py" (with PyMongo 3.X) - from pymongo import MongoClient - - client = MongoClient() - client.test.test.insert({}) - -To print deprecation warnings to stderr, run python with "-Wd":: - - $ python3 -Wd insert.py - insert.py:4: DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead. - client.test.test.insert({}) - -You can turn warnings into exceptions with "python -We":: - - $ python3 -We insert.py - Traceback (most recent call last): - File "insert.py", line 4, in - client.test.test.insert({}) - File "/home/durin/work/mongo-python-driver/pymongo/collection.py", line 2906, in insert - "instead.", DeprecationWarning, stacklevel=2) - DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead. - -If your own code's test suite passes with "python -We" then it uses no -deprecated PyMongo features. - -.. seealso:: The Python documentation on `the warnings module`_, - and `the -W command line option`_. - -.. _semantic versioning: https://semver.org/ - -.. _DeprecationWarning: - https://docs.python.org/3/library/exceptions.html#DeprecationWarning - -.. _the warnings module: https://docs.python.org/3/library/warnings.html - -.. _the -W command line option: https://docs.python.org/3/using/cmdline.html#cmdoption-W diff --git a/doc/developer/index.rst b/doc/developer/index.rst deleted file mode 100644 index 2ce1e0536c..0000000000 --- a/doc/developer/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -Developer Guide -=============== - -Technical guide for contributors to PyMongo. - -.. toctree:: - :maxdepth: 1 - - periodic_executor diff --git a/doc/developer/periodic_executor.rst b/doc/developer/periodic_executor.rst deleted file mode 100644 index 67eaa89f10..0000000000 --- a/doc/developer/periodic_executor.rst +++ /dev/null @@ -1,113 +0,0 @@ -Periodic Executors -================== - -.. currentmodule:: pymongo - -PyMongo implements a :class:`~periodic_executor.PeriodicExecutor` for two -purposes: as the background thread for :class:`~monitor.Monitor`, and to -regularly check if there are ``OP_KILL_CURSORS`` messages that must be sent to the server. - -Killing Cursors ---------------- - -An incompletely iterated :class:`~cursor.Cursor` on the client represents an -open cursor object on the server. In code like this, we lose a reference to -the cursor before finishing iteration:: - - for doc in collection.find(): - raise Exception() - -We try to send an ``OP_KILL_CURSORS`` to the server to tell it to clean up the -server-side cursor. But we must not take any locks directly from the cursor's -destructor (see `PYTHON-799`_), so we cannot safely use the PyMongo data -structures required to send a message. The solution is to add the cursor's id -to an array on the :class:`~mongo_client.MongoClient` without taking any locks. - -Each client has a :class:`~periodic_executor.PeriodicExecutor` devoted to -checking the array for cursor ids. Any it sees are the result of cursors that -were freed while the server-side cursor was still open. The executor can safely -take the locks it needs in order to send the ``OP_KILL_CURSORS`` message. - -.. _PYTHON-799: https://jira.mongodb.org/browse/PYTHON-799 - -Stopping Executors ------------------- - -Just as :class:`~cursor.Cursor` must not take any locks from its destructor, -neither can :class:`~mongo_client.MongoClient` and :class:`~topology.Topology`. -Thus, although the client calls :meth:`close` on its kill-cursors thread, and -the topology calls :meth:`close` on all its monitor threads, the :meth:`close` -method cannot actually call :meth:`wake` on the executor, since :meth:`wake` -takes a lock. - -Instead, executors wake periodically to check if ``self.close`` is set, -and if so they exit. - -A thread can log spurious errors if it wakes late in the Python interpreter's -shutdown sequence, so we try to join threads before then. Each periodic -executor (either a monitor or a kill-cursors thread) adds a weakref to itself -to a set called ``_EXECUTORS``, in the ``periodic_executor`` module. - -An `exit handler`_ runs on shutdown and tells all executors to stop, then -tries (with a short timeout) to join all executor threads. - -.. _exit handler: https://docs.python.org/2/library/atexit.html - -Monitoring ----------- - -For each server in the topology, :class:`~topology.Topology` uses a periodic -executor to launch a monitor thread. This thread must not prevent the topology -from being freed, so it weakrefs the topology. Furthermore, it uses a weakref -callback to terminate itself soon after the topology is freed. - -Solid lines represent strong references, dashed lines weak ones: - -.. generated with graphviz: "dot -Tpng periodic-executor-refs.dot > periodic-executor-refs.png" - -.. image:: ../static/periodic-executor-refs.png - -See `Stopping Executors`_ above for an explanation of the ``_EXECUTORS`` set. - -It is a requirement of the `Server Discovery And Monitoring Spec`_ that a -sleeping monitor can be awakened early. Aside from infrequent wakeups to do -their appointed chores, and occasional interruptions, periodic executors also -wake periodically to check if they should terminate. - -Our first implementation of this idea was the obvious one: use the Python -standard library's threading.Condition.wait with a timeout. Another thread -wakes the executor early by signaling the condition variable. - -A topology cannot signal the condition variable to tell the executor to -terminate, because it would risk a deadlock in the garbage collector: no -destructor or weakref callback can take a lock to signal the condition variable -(see `PYTHON-863`_); thus the only way for a dying object to terminate a -periodic executor is to set its "stopped" flag and let the executor see the -flag next time it wakes. - -We erred on the side of prompt cleanup, and set the check interval at 100ms. We -assumed that checking a flag and going back to sleep 10 times a second was -cheap on modern machines. - -Starting in Python 3.2, the builtin C implementation of lock.acquire takes a -timeout parameter, so Python 3.2+ Condition variables sleep simply by calling -lock.acquire; they are implemented as efficiently as expected. - -But in Python 2, lock.acquire has no timeout. To wait with a timeout, a Python -2 condition variable sleeps a millisecond, tries to acquire the lock, sleeps -twice as long, and tries again. This exponential backoff reaches a maximum -sleep time of 50ms. - -If PyMongo calls the condition variable's "wait" method with a short timeout, -the exponential backoff is restarted frequently. Overall, the condition variable -is not waking a few times a second, but hundreds of times. (See `PYTHON-983`_.) - -Thus the current design of periodic executors is surprisingly simple: they -do a simple ``time.sleep`` for a half-second, check if it is time to wake or -terminate, and sleep again. - -.. _Server Discovery And Monitoring Spec: https://github.com/mongodb/specifications/blob/master/source/server-discovery-and-monitoring/server-monitoring.md#requesting-an-immediate-check - -.. _PYTHON-863: https://jira.mongodb.org/browse/PYTHON-863 - -.. _PYTHON-983: https://jira.mongodb.org/browse/PYTHON-983 diff --git a/doc/examples/aggregation.rst b/doc/examples/aggregation.rst deleted file mode 100644 index e7e3df6ce1..0000000000 --- a/doc/examples/aggregation.rst +++ /dev/null @@ -1,90 +0,0 @@ -Aggregation Examples -==================== - -There are several methods of performing aggregations in MongoDB. These -examples cover the new aggregation framework, using map reduce and using the -group method. - -.. testsetup:: - - from pymongo import MongoClient - - client = MongoClient() - client.drop_database("aggregation_example") - -Setup ------ -To start, we'll insert some example data which we can perform -aggregations on: - -.. doctest:: - - >>> from pymongo import MongoClient - >>> db = MongoClient().aggregation_example - >>> result = db.things.insert_many( - ... [ - ... {"x": 1, "tags": ["dog", "cat"]}, - ... {"x": 2, "tags": ["cat"]}, - ... {"x": 2, "tags": ["mouse", "cat", "dog"]}, - ... {"x": 3, "tags": []}, - ... ] - ... ) - >>> result.inserted_ids - [ObjectId('...'), ObjectId('...'), ObjectId('...'), ObjectId('...')] - -.. _aggregate-examples: - -Aggregation Framework ---------------------- - -This example shows how to use the -:meth:`~pymongo.collection.Collection.aggregate` method to use the aggregation -framework. We'll perform a simple aggregation to count the number of -occurrences for each tag in the ``tags`` array, across the entire collection. -To achieve this we need to pass in three operations to the pipeline. -First, we need to unwind the ``tags`` array, then group by the tags and -sum them up, finally we sort by count. - -Python dictionaries prior to 3.7 don't maintain order. You should use :class:`~bson.son.SON` -or :class:`collections.OrderedDict` where explicit ordering is required for an older Python version -eg "$sort": - -.. note:: - - aggregate requires server version **>= 2.1.0**. - -.. doctest:: - - >>> from bson.son import SON - >>> pipeline = [ - ... {"$unwind": "$tags"}, - ... {"$group": {"_id": "$tags", "count": {"$sum": 1}}}, - ... {"$sort": SON([("count", -1), ("_id", -1)])}, - ... ] - >>> import pprint - >>> pprint.pprint(list(db.things.aggregate(pipeline))) - [{'_id': 'cat', 'count': 3}, - {'_id': 'dog', 'count': 2}, - {'_id': 'mouse', 'count': 1}] - -To run an explain plan for this aggregation use -`PyMongoExplain `_, -a companion library for PyMongo. It allows you to explain any CRUD operation -by providing a few convenience classes:: - - >>> from pymongoexplain import ExplainableCollection - >>> ExplainableCollection(collection).aggregate(pipeline) - {'ok': 1.0, 'queryPlanner': [...]} - -Or, use the :meth:`~pymongo.database.Database.command` method:: - - >>> db.command('aggregate', 'things', pipeline=pipeline, explain=True) - {'ok': 1.0, 'stages': [...]} - -As well as simple aggregations the aggregation framework provides projection -capabilities to reshape the returned data. Using projections and aggregation, -you can add computed fields, create new virtual sub-objects, and extract -sub-fields into the top-level of results. - -.. seealso:: The full documentation for MongoDB's `aggregation framework - `_ diff --git a/doc/examples/authentication.rst b/doc/examples/authentication.rst deleted file mode 100644 index 3f1137969d..0000000000 --- a/doc/examples/authentication.rst +++ /dev/null @@ -1,528 +0,0 @@ -Authentication Examples -======================= - -MongoDB supports several different authentication mechanisms. These examples -cover all authentication methods currently supported by PyMongo, documenting -Python module and MongoDB version dependencies. - -.. _percent escaped: - -Percent-Escaping Username and Password --------------------------------------- - -Username and password must be percent-escaped with -:py:func:`urllib.parse.quote_plus`, to be used in a MongoDB URI. For example:: - - >>> from pymongo import MongoClient - >>> import urllib.parse - >>> username = urllib.parse.quote_plus('user') - >>> username - 'user' - >>> password = urllib.parse.quote_plus('pass/word') - >>> password - 'pass%2Fword' - >>> MongoClient('mongodb://%s:%s@127.0.0.1' % (username, password)) - ... - -.. _scram_sha_256: - -SCRAM-SHA-256 (RFC 7677) ------------------------- -.. versionadded:: 3.7 - -SCRAM-SHA-256 is the default authentication mechanism supported by a cluster -configured for authentication with MongoDB 4.0 or later. Authentication -requires a username, a password, and a database name. The default database -name is "admin", this can be overridden with the ``authSource`` option. -Credentials can be specified as arguments to -:class:`~pymongo.mongo_client.MongoClient`:: - - >>> from pymongo import MongoClient - >>> client = MongoClient('example.com', - ... username='user', - ... password='password', - ... authSource='the_database', - ... authMechanism='SCRAM-SHA-256') - -Or through the MongoDB URI:: - - >>> uri = "mongodb://user:password@example.com/?authSource=the_database&authMechanism=SCRAM-SHA-256" - >>> client = MongoClient(uri) - -SCRAM-SHA-1 (RFC 5802) ----------------------- -.. versionadded:: 2.8 - -SCRAM-SHA-1 is the default authentication mechanism supported by a cluster -configured for authentication with MongoDB 3.0 or later. Authentication -requires a username, a password, and a database name. The default database -name is "admin", this can be overridden with the ``authSource`` option. -Credentials can be specified as arguments to -:class:`~pymongo.mongo_client.MongoClient`:: - - >>> from pymongo import MongoClient - >>> client = MongoClient('example.com', - ... username='user', - ... password='password', - ... authSource='the_database', - ... authMechanism='SCRAM-SHA-1') - -Or through the MongoDB URI:: - - >>> uri = "mongodb://user:password@example.com/?authSource=the_database&authMechanism=SCRAM-SHA-1" - >>> client = MongoClient(uri) - -For best performance on Python versions older than 2.7.8 install `backports.pbkdf2`_. - -.. _backports.pbkdf2: https://pypi.python.org/pypi/backports.pbkdf2/ - -Default Authentication Mechanism --------------------------------- - -If no mechanism is specified, PyMongo automatically negotiates the mechanism to use (SCRAM-SHA-1 -or SCRAM-SHA-256) with the MongoDB server. - -Default Database and "authSource" ---------------------------------- - -You can specify both a default database and the authentication database in the -URI:: - - >>> uri = "mongodb://user:password@example.com/default_db?authSource=admin" - >>> client = MongoClient(uri) - -PyMongo will authenticate on the "admin" database, but the default database -will be "default_db":: - - >>> # get_database with no "name" argument chooses the DB from the URI - >>> db = MongoClient(uri).get_database() - >>> print(db.name) - 'default_db' - -.. _mongodb_x509: - -MONGODB-X509 ------------- -.. versionadded:: 2.6 - -The MONGODB-X509 mechanism authenticates via the X.509 certificate presented -by the driver during TLS/SSL negotiation. This authentication method requires -the use of TLS/SSL connections with certificate validation:: - - >>> from pymongo import MongoClient - >>> client = MongoClient('example.com', - ... authMechanism="MONGODB-X509", - ... tls=True, - ... tlsCertificateKeyFile='/path/to/client.pem', - ... tlsCAFile='/path/to/ca.pem') - -MONGODB-X509 authenticates against the $external virtual database, so you -do not have to specify a database in the URI:: - - >>> uri = "mongodb://example.com/?authMechanism=MONGODB-X509" - >>> client = MongoClient(uri, - ... tls=True, - ... tlsCertificateKeyFile='/path/to/client.pem', - ... tlsCAFile='/path/to/ca.pem') - >>> - -.. _gssapi: - -GSSAPI (Kerberos) ------------------ -.. versionadded:: 2.5 - -GSSAPI (Kerberos) authentication is available in the Enterprise Edition of -MongoDB. - -Unix -~~~~ - -To authenticate using GSSAPI you must first install the python `kerberos`_ or -`pykerberos`_ module using pip. Make sure you run kinit before -using the following authentication methods:: - - $ kinit mongodbuser@EXAMPLE.COM - mongodbuser@EXAMPLE.COM's Password: - $ klist - Credentials cache: FILE:/tmp/krb5cc_1000 - Principal: mongodbuser@EXAMPLE.COM - - Issued Expires Principal - Feb 9 13:48:51 2013 Feb 9 23:48:51 2013 krbtgt/EXAMPLE.COM@EXAMPLE.COM - -Now authenticate using the MongoDB URI. GSSAPI authenticates against the -$external virtual database so you do not have to specify a database in the -URI:: - - >>> # Note: the kerberos principal must be url encoded. - >>> from pymongo import MongoClient - >>> uri = "mongodb://mongodbuser%40EXAMPLE.COM@mongo-server.example.com/?authMechanism=GSSAPI" - >>> client = MongoClient(uri) - >>> - -The default service name used by MongoDB and PyMongo is ``mongodb``. You can -specify a custom service name with the ``authMechanismProperties`` option:: - - >>> from pymongo import MongoClient - >>> uri = "mongodb://mongodbuser%40EXAMPLE.COM@mongo-server.example.com/?authMechanism=GSSAPI&authMechanismProperties=SERVICE_NAME:myservicename" - >>> client = MongoClient(uri) - -Windows (SSPI) -~~~~~~~~~~~~~~ -.. versionadded:: 3.3 - -First install the `winkerberos`_ module. Unlike authentication on Unix kinit is -not used. If the user to authenticate is different from the user that owns the -application process provide a password to authenticate:: - - >>> uri = "mongodb://mongodbuser%40EXAMPLE.COM:mongodbuserpassword@example.com/?authMechanism=GSSAPI" - -Two extra ``authMechanismProperties`` are supported on Windows platforms: - -- CANONICALIZE_HOST_NAME - Uses the fully qualified domain name (FQDN) of the - MongoDB host for the server principal (GSSAPI libraries on Unix do this by - default):: - - >>> uri = "mongodb://mongodbuser%40EXAMPLE.COM@example.com/?authMechanism=GSSAPI&authMechanismProperties=CANONICALIZE_HOST_NAME:true" - -- SERVICE_REALM - This is used when the user's realm is different from the service's realm:: - - >>> uri = "mongodb://mongodbuser%40EXAMPLE.COM@example.com/?authMechanism=GSSAPI&authMechanismProperties=SERVICE_REALM:otherrealm" - - -.. _kerberos: https://pypi.python.org/pypi/kerberos -.. _pykerberos: https://pypi.python.org/pypi/pykerberos -.. _winkerberos: https://pypi.python.org/pypi/winkerberos/ - -.. _sasl_plain: - -SASL PLAIN (RFC 4616) ---------------------- -.. versionadded:: 2.6 - -MongoDB Enterprise Edition version 2.6 and newer support the SASL PLAIN -authentication mechanism, initially intended for delegating authentication -to an LDAP server. These examples use the $external virtual database for LDAP support:: - - >>> from pymongo import MongoClient - >>> uri = "mongodb://user:password@example.com/?authMechanism=PLAIN" - >>> client = MongoClient(uri) - >>> - -SASL PLAIN is a clear-text authentication mechanism. We **strongly** recommend -that you connect to MongoDB using TLS/SSL with certificate validation when -using the SASL PLAIN mechanism:: - - >>> from pymongo import MongoClient - >>> uri = "mongodb://user:password@example.com/?authMechanism=PLAIN" - >>> client = MongoClient(uri, - ... tls=True, - ... tlsCertificateKeyFile='/path/to/client.pem', - ... tlsCAFile='/path/to/ca.pem') - >>> - -.. _MONGODB-AWS: - -MONGODB-AWS ------------ -.. versionadded:: 3.11 - -The MONGODB-AWS authentication mechanism is available in MongoDB 4.4+ and -requires extra pymongo dependencies. To use it, install pymongo with the -``aws`` extra:: - - $ python -m pip install 'pymongo[aws]' - -The MONGODB-AWS mechanism authenticates using AWS IAM credentials (an access -key ID and a secret access key), `temporary AWS IAM credentials`_ obtained -from an `AWS Security Token Service (STS)`_ `Assume Role`_ request, -AWS Lambda `environment variables`_, or temporary AWS IAM credentials assigned -to an `EC2 instance`_ or ECS task. The use of temporary credentials, in -addition to an access key ID and a secret access key, also requires a -security (or session) token. - -Credentials can be configured through the MongoDB URI, environment variables, -or the local EC2 or ECS endpoint. The order in which the client searches for -`credentials`_ is the same as the one used by the AWS ``boto3`` library -when using ``pymongo_auth_aws>=1.1.0``. - -Because we are now using ``boto3`` to handle credentials, the order and -locations of credentials are slightly different from before. Particularly, -if you have a shared AWS credentials or config file, -then those credentials will be used by default if AWS auth environment -variables are not set. To override this behavior, set -``AWS_SHARED_CREDENTIALS_FILE=""`` in your shell or add -``os.environ["AWS_SHARED_CREDENTIALS_FILE"] = ""`` to your script or -application. Alternatively, you can create an AWS profile specifically for -your MongoDB credentials and set ``AWS_PROFILE`` to that profile name. - -MONGODB-AWS authenticates against the "$external" virtual database, so none of -the URIs in this section need to include the ``authSource`` URI option. - -.. _credentials: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html - -AWS IAM credentials -~~~~~~~~~~~~~~~~~~~ - -Applications can authenticate using AWS IAM credentials by providing a valid -access key id and secret access key pair as the username and password, -respectively, in the MongoDB URI. A sample URI would be:: - - >>> from pymongo import MongoClient - >>> uri = "mongodb+srv://:@example.mongodb.net/?authMechanism=MONGODB-AWS" - >>> client = MongoClient(uri) - -.. note:: The access_key_id and secret_access_key passed into the URI MUST - be `percent escaped`_. - -AssumeRole -~~~~~~~~~~ - -Applications can authenticate using temporary credentials returned from an -assume role request. These temporary credentials consist of an access key -ID, a secret access key, and a security token passed into the URI. -A sample URI would be:: - - >>> from pymongo import MongoClient - >>> uri = "mongodb+srv://:@example.mongodb.net/?authMechanism=MONGODB-AWS&authMechanismProperties=AWS_SESSION_TOKEN:" - >>> client = MongoClient(uri) - -.. note:: The access_key_id, secret_access_key, and session_token passed into - the URI MUST be `percent escaped`_. - - -AWS Lambda (Environment Variables) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When the username and password are not provided and the MONGODB-AWS mechanism -is set, the client will fallback to using the `environment variables`_ -``AWS_ACCESS_KEY_ID``, ``AWS_SECRET_ACCESS_KEY``, and ``AWS_SESSION_TOKEN`` -for the access key ID, secret access key, and session token, respectively:: - - $ export AWS_ACCESS_KEY_ID= - $ export AWS_SECRET_ACCESS_KEY= - $ export AWS_SESSION_TOKEN= - $ python - >>> from pymongo import MongoClient - >>> uri = "mongodb+srv://example.mongodb.net/?authMechanism=MONGODB-AWS" - >>> client = MongoClient(uri) - -.. note:: No username, password, or session token is passed into the URI. - PyMongo will use credentials set via the environment variables. - These environment variables MUST NOT be `percent escaped`_. - - -.. _EKS Clusters: - -EKS Clusters -~~~~~~~~~~~~ - -Applications using the `Authenticating users for your cluster from an OpenID Connect identity provider `_ capability on EKS can now -use the provided credentials, by giving the associated IAM User -`sts:AssumeRoleWithWebIdentity `_ -permission. - -When the username and password are not provided, the MONGODB-AWS mechanism -is set, and ``AWS_WEB_IDENTITY_TOKEN_FILE``, ``AWS_ROLE_ARN``, and -optional ``AWS_ROLE_SESSION_NAME`` are available, the driver will use -an ``AssumeRoleWithWebIdentity`` call to retrieve temporary credentials. -The application must be using ``pymongo_auth_aws`` >= 1.1.0 for EKS support. - -ECS Container -~~~~~~~~~~~~~ - -Applications can authenticate from an ECS container via temporary -credentials assigned to the machine. A sample URI on an ECS container -would be:: - - >>> from pymongo import MongoClient - >>> uri = "mongodb+srv://example.mongodb.com/?authMechanism=MONGODB-AWS" - >>> client = MongoClient(uri) - -.. note:: No username, password, or session token is passed into the URI. - PyMongo will query the ECS container endpoint to obtain these - credentials. - -EC2 Instance -~~~~~~~~~~~~ - -Applications can authenticate from an EC2 instance via temporary -credentials assigned to the machine. A sample URI on an EC2 machine -would be:: - - >>> from pymongo import MongoClient - >>> uri = "mongodb+srv://example.mongodb.com/?authMechanism=MONGODB-AWS" - >>> client = MongoClient(uri) - -.. note:: No username, password, or session token is passed into the URI. - PyMongo will query the EC2 instance endpoint to obtain these - credentials. - -.. _temporary AWS IAM credentials: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html -.. _AWS Security Token Service (STS): https://docs.aws.amazon.com/STS/latest/APIReference/Welcome.html -.. _Assume Role: https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html -.. _EC2 instance: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2.html -.. _environment variables: https://docs.aws.amazon.com/lambda/latest/dg/configuration-envvars.html#configuration-envvars-runtime - -MONGODB-OIDC ------------- -.. versionadded:: 4.7 - -The `MONGODB-OIDC authentication mechanism`_ is available in MongoDB 7.0+ on Linux platforms. - -The MONGODB-OIDC mechanism authenticates using an OpenID Connect (OIDC) access token. -The driver supports OIDC for workload identity, defined as an identity you assign to a software workload -(such as an application, service, script, or container) to authenticate and access other services and resources. - -Credentials can be configured through the MongoDB URI or as arguments to -:class:`~pymongo.mongo_client.MongoClient`. - -Built-in Support -~~~~~~~~~~~~~~~~ - -The driver has built-in support for Azure IMDS and GCP IMDS environments. Other environments -are supported with `Custom Callbacks`_. - -Azure IMDS -^^^^^^^^^^ - -For an application running on an Azure VM or otherwise using the `Azure Internal Metadata Service`_, -you can use the built-in support for Azure. If using an Azure managed identity, the "" is -the client ID. If using a service principal to represent an enterprise application, the "" is -the application ID of the service principal. The ```` value is the ``audience`` -`configured on your MongoDB deployment`_. - -.. code-block:: python - - import os - - uri = os.environ["MONGODB_URI"] - - props = {"ENVIRONMENT": "azure", "TOKEN_RESOURCE": ""} - c = MongoClient( - uri, - username="", - authMechanism="MONGODB-OIDC", - authMechanismProperties=props, - ) - c.test.test.insert_one({}) - c.close() - -If the application is running on an Azure VM and only one managed identity is associated with the -VM, ``username`` can be omitted. - -If providing the ``TOKEN_RESOURCE`` as part of a connection string, it can be given as follows. -If the ``TOKEN_RESOURCE`` contains any of the following characters [``,``, ``+``, ``&``], then -it MUST be url-encoded. - -.. code-block:: python - - import os - - uri = f'{os.environ["MONGODB_URI"]}?authMechanism=MONGODB-OIDC&authMechanismProperties=ENVIRONMENT:azure,TOKEN_RESOURCE:' - c = MongoClient(uri) - c.test.test.insert_one({}) - c.close() - -GCP IMDS -^^^^^^^^ - -For an application running on an GCP VM or otherwise using the `GCP Internal Metadata Service`_, -you can use the built-in support for GCP, where ```` below is the ``audience`` -`configured on your MongoDB deployment`_. - -.. code-block:: python - - import os - - uri = os.environ["MONGODB_URI"] - - props = {"ENVIRONMENT": "gcp", "TOKEN_RESOURCE": ""} - c = MongoClient(uri, authMechanism="MONGODB-OIDC", authMechanismProperties=props) - c.test.test.insert_one({}) - c.close() - -If providing the ``TOKEN_RESOURCE`` as part of a connection string, it can be given as follows. -If the ``TOKEN_RESOURCE`` contains any of the following characters [``,``, ``+``, ``&``], then -it MUST be url-encoded. - -.. code-block:: python - - import os - - uri = f'{os.environ["MONGODB_URI"]}?authMechanism=MONGODB-OIDC&authMechanismProperties=ENVIRONMENT:gcp,TOKEN_RESOURCE:' - c = MongoClient(uri) - c.test.test.insert_one({}) - c.close() - -Custom Callbacks -~~~~~~~~~~~~~~~~ - -For environments that are not directly supported by the driver, you can use :class:`~pymongo.auth_oidc.OIDCCallback`. -Some examples are given below. - -Other Azure Environments -^^^^^^^^^^^^^^^^^^^^^^^^ - -For applications running on Azure Functions, App Service Environment (ASE), or -Azure Kubernetes Service (AKS), you can use the `azure-identity package`_ -to fetch the credentials. This example assumes you have set environment variables for -the ``audience`` `configured on your MongoDB deployment`_, and for the client id of the Azure -managed identity. - -.. code-block:: python - - import os - from azure.identity import DefaultAzureCredential - from pymongo import MongoClient - from pymongo.auth_oidc import OIDCCallback, OIDCCallbackContext, OIDCCallbackResult - - audience = os.environ["AZURE_AUDIENCE"] - client_id = os.environ["AZURE_IDENTITY_CLIENT_ID"] - uri = os.environ["MONGODB_URI"] - - - class MyCallback(OIDCCallback): - def fetch(self, context: OIDCCallbackContext) -> OIDCCallbackResult: - credential = DefaultAzureCredential(managed_identity_client_id=client_id) - token = credential.get_token(f"{audience}/.default").token - return OIDCCallbackResult(access_token=token) - - - props = {"OIDC_CALLBACK": MyCallback()} - c = MongoClient(uri, authMechanism="MONGODB-OIDC", authMechanismProperties=props) - c.test.test.insert_one({}) - c.close() - -GCP GKE -^^^^^^^ - -For a Google Kubernetes Engine cluster with a `configured service account`_, the token can be read from the standard -service account token file location. - -.. code-block:: python - - import os - from pymongo.auth_oidc import OIDCCallback, OIDCCallbackContext, OIDCCallbackResult - - - class MyCallback(OIDCCallback): - def fetch(self, context: OIDCCallbackContext) -> OIDCCallbackResult: - with open("/var/run/secrets/kubernetes.io/serviceaccount/token") as fid: - token = fid.read() - return OIDCCallbackResult(access_token=token) - - - uri = os.environ["MONGODB_URI"] - props = {"OIDC_CALLBACK": MyCallback()} - c = MongoClient(uri, authMechanism="MONGODB-OIDC", authMechanismProperties=props) - c.test.test.insert_one({}) - c.close() - -.. _MONGODB-OIDC authentication mechanism: https://www.mongodb.com/docs/manual/core/security-oidc/ -.. _Azure Internal Metadata Service: https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service -.. _configured on your MongoDB deployment: https://www.mongodb.com/docs/manual/reference/parameters/#mongodb-parameter-param.oidcIdentityProviders -.. _GCP Internal Metadata Service: https://cloud.google.com/compute/docs/metadata/querying-metadata -.. _azure-identity package: https://pypi.org/project/azure-identity/ -.. _configured service account: https://cloud.google.com/kubernetes-engine/docs/how-to/service-accounts diff --git a/doc/examples/bulk.rst b/doc/examples/bulk.rst deleted file mode 100644 index 3ed8e09645..0000000000 --- a/doc/examples/bulk.rst +++ /dev/null @@ -1,184 +0,0 @@ -Bulk Write Operations -===================== - -.. testsetup:: - - from pymongo import MongoClient - - client = MongoClient() - client.drop_database("bulk_example") - -This tutorial explains how to take advantage of PyMongo's bulk -write operation features. Executing write operations in batches -reduces the number of network round trips, increasing write -throughput. - -Bulk Insert ------------ - -.. versionadded:: 2.6 - -A batch of documents can be inserted by passing a list to the -:meth:`~pymongo.collection.Collection.insert_many` method. PyMongo -will automatically split the batch into smaller sub-batches based on -the maximum message size accepted by MongoDB, supporting very large -bulk insert operations. - -.. doctest:: - - >>> import pymongo - >>> db = pymongo.MongoClient().bulk_example - >>> db.test.insert_many([{"i": i} for i in range(10000)]).inserted_ids - [...] - >>> db.test.count_documents({}) - 10000 - -Mixed Bulk Write Operations ---------------------------- - -.. versionadded:: 2.7 - -PyMongo also supports executing mixed bulk write operations. A batch -of insert, update, and remove operations can be executed together using -the bulk write operations API. - -.. _ordered_bulk: - -Ordered Bulk Write Operations -............................. - -Ordered bulk write operations are batched and sent to the server in the -order provided for serial execution. The return value is an instance of -:class:`~pymongo.results.BulkWriteResult` describing the type and count -of operations performed. - -.. doctest:: - :options: +NORMALIZE_WHITESPACE - - >>> from pprint import pprint - >>> from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateOne - >>> result = db.test.bulk_write( - ... [ - ... DeleteMany({}), # Remove all documents from the previous example. - ... InsertOne({"_id": 1}), - ... InsertOne({"_id": 2}), - ... InsertOne({"_id": 3}), - ... UpdateOne({"_id": 1}, {"$set": {"foo": "bar"}}), - ... UpdateOne({"_id": 4}, {"$inc": {"j": 1}}, upsert=True), - ... ReplaceOne({"j": 1}, {"j": 2}), - ... ] - ... ) - >>> pprint(result.bulk_api_result) - {'nInserted': 3, - 'nMatched': 2, - 'nModified': 2, - 'nRemoved': 10000, - 'nUpserted': 1, - 'upserted': [{'_id': 4, 'index': 5}], - 'writeConcernErrors': [], - 'writeErrors': []} - -The first write failure that occurs (e.g. duplicate key error) aborts the -remaining operations, and PyMongo raises -:class:`~pymongo.errors.BulkWriteError`. The :attr:`details` attribute of -the exception instance provides the execution results up until the failure -occurred and details about the failure - including the operation that caused -the failure. - -.. doctest:: - :options: +NORMALIZE_WHITESPACE - - >>> from pymongo import InsertOne, DeleteOne, ReplaceOne - >>> from pymongo.errors import BulkWriteError - >>> requests = [ - ... ReplaceOne({"j": 2}, {"i": 5}), - ... InsertOne({"_id": 4}), # Violates the unique key constraint on _id. - ... DeleteOne({"i": 5}), - ... ] - >>> try: - ... db.test.bulk_write(requests) - ... except BulkWriteError as bwe: - ... pprint(bwe.details) - ... - {'nInserted': 0, - 'nMatched': 1, - 'nModified': 1, - 'nRemoved': 0, - 'nUpserted': 0, - 'upserted': [], - 'writeConcernErrors': [], - 'writeErrors': [{'code': 11000, - 'errmsg': '...E11000...duplicate key error...', - 'index': 1,... - 'op': {'_id': 4}}]} - -.. _unordered_bulk: - -Unordered Bulk Write Operations -............................... - -Unordered bulk write operations are batched and sent to the server in -**arbitrary order** where they may be executed in parallel. Any errors -that occur are reported after all operations are attempted. - -In the next example the first and third operations fail due to the unique -constraint on _id. Since we are doing unordered execution the second -and fourth operations succeed. - -.. doctest:: - :options: +NORMALIZE_WHITESPACE - - >>> requests = [ - ... InsertOne({"_id": 1}), - ... DeleteOne({"_id": 2}), - ... InsertOne({"_id": 3}), - ... ReplaceOne({"_id": 4}, {"i": 1}), - ... ] - >>> try: - ... db.test.bulk_write(requests, ordered=False) - ... except BulkWriteError as bwe: - ... pprint(bwe.details) - ... - {'nInserted': 0, - 'nMatched': 1, - 'nModified': 1, - 'nRemoved': 1, - 'nUpserted': 0, - 'upserted': [], - 'writeConcernErrors': [], - 'writeErrors': [{'code': 11000, - 'errmsg': '...E11000...duplicate key error...', - 'index': 0,... - 'op': {'_id': 1}}, - {'code': 11000, - 'errmsg': '...', - 'index': 2,... - 'op': {'_id': 3}}]} - -Write Concern -............. - -Bulk operations are executed with the -:attr:`~pymongo.collection.Collection.write_concern` of the collection they -are executed against. Write concern errors (e.g. wtimeout) will be reported -after all operations are attempted, regardless of execution order. - -:: - >>> from pymongo import WriteConcern - >>> coll = db.get_collection( - ... 'test', write_concern=WriteConcern(w=3, wtimeout=1)) - >>> try: - ... coll.bulk_write([InsertOne({'a': i}) for i in range(4)]) - ... except BulkWriteError as bwe: - ... pprint(bwe.details) - ... - {'nInserted': 4, - 'nMatched': 0, - 'nModified': 0, - 'nRemoved': 0, - 'nUpserted': 0, - 'upserted': [], - 'writeConcernErrors': [{'code': 64... - 'errInfo': {'wtimeout': True}, - 'errmsg': 'waiting for replication timed out'}], - 'writeErrors': []} diff --git a/doc/examples/client_bulk.rst b/doc/examples/client_bulk.rst deleted file mode 100644 index ad435fa2e4..0000000000 --- a/doc/examples/client_bulk.rst +++ /dev/null @@ -1,192 +0,0 @@ -Client Bulk Write Operations -============================= - -.. testsetup:: - - from pymongo import MongoClient - - client = MongoClient() - client.drop_database("client_bulk_example") - db = client.client_bulk_example - client.db.drop_collection("test_one") - client.db.drop_collection("test_two") - client.db.drop_collection("test_three") - client.db.drop_collection("test_four") - client.db.drop_collection("test_five") - client.db.drop_collection("test_six") - -The :meth:`~pymongo.mongo_client.MongoClient.bulk_write` -method has been added to :class:`~pymongo.mongo_client.MongoClient` in PyMongo 4.9. -This method enables users to perform batches of write operations **across -multiple namespaces** in a minimized number of round trips, and -to receive detailed results for each operation performed. - -.. note:: This method requires MongoDB server version 8.0+. - -Basic Usage ------------- - -A list of insert, update, and delete operations can be passed into the -:meth:`~pymongo.mongo_client.MongoClient.bulk_write` method. Each request -must include the namespace on which to perform the operation. - -PyMongo will automatically split the given requests into smaller sub-batches based on -the maximum message size accepted by MongoDB, supporting very large bulk write operations. - -The return value is an instance of -:class:`~pymongo.results.ClientBulkWriteResult`. - -.. _summary_client_bulk: - -Summary Results -................. - -By default, the returned :class:`~pymongo.results.ClientBulkWriteResult` instance will contain a -summary of the types of operations performed in the bulk write, along with their respective counts. - -.. doctest:: - :options: +NORMALIZE_WHITESPACE - :skipif: server_major_version < 8 - - >>> from pymongo import InsertOne, DeleteOne, UpdateOne - >>> models = [ - ... InsertOne(namespace="db.test_one", document={"_id": 1}), - ... InsertOne(namespace="db.test_two", document={"_id": 2}), - ... DeleteOne(namespace="db.test_one", filter={"_id": 1}), - ... UpdateOne( - ... namespace="db.test_two", - ... filter={"_id": 4}, - ... update={"$inc": {"j": 1}}, - ... upsert=True, - ... ), - ... ] - >>> result = client.bulk_write(models) - >>> result.inserted_count - 2 - >>> result.deleted_count - 1 - >>> result.modified_count - 0 - >>> result.upserted_count - 1 - -.. _verbose_client_bulk: - -Verbose Results -................. - -If the ``verbose_results`` parameter is set to True, the returned :class:`~pymongo.results.ClientBulkWriteResult` -instance will also include detailed results about each successful operation performed as part of the bulk write. - -.. doctest:: - :options: +NORMALIZE_WHITESPACE - :skipif: server_major_version < 8 - - >>> from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateMany - >>> models = [ - ... DeleteMany( - ... namespace="db.test_two", filter={} - ... ), # Delete all documents from the previous example - ... InsertOne(namespace="db.test_one", document={"_id": 1}), - ... InsertOne(namespace="db.test_one", document={"_id": 2}), - ... InsertOne(namespace="db.test_two", document={"_id": 3}), - ... UpdateMany(namespace="db.test_one", filter={}, update={"$set": {"foo": "bar"}}), - ... ReplaceOne( - ... namespace="db.test_two", filter={"j": 1}, replacement={"_id": 4}, upsert=True - ... ), - ... ] - >>> result = client.bulk_write(models, verbose_results=True) - >>> result.delete_results - {0: DeleteResult({'ok': 1.0, 'idx': 0, 'n': 2}, ...)} - >>> result.insert_results - {1: InsertOneResult(1, ...), - 2: InsertOneResult(2, ...), - 3: InsertOneResult(3, ...)} - >>> result.update_results - {4: UpdateResult({'ok': 1.0, 'idx': 4, 'n': 2, 'nModified': 2}, ...), - 5: UpdateResult({'ok': 1.0, 'idx': 5, 'n': 1, 'nModified': 0, 'upserted': {'_id': 4}}, ...)} - - -Handling Errors ----------------- - -If any errors occur during the bulk write, a :class:`~pymongo.errors.ClientBulkWriteException` will be raised. -If a server, connection, or network error occurred, the ``error`` field of the exception will contain -that error. - -Individual write errors or write concern errors get recorded in the ``write_errors`` and ``write_concern_errors`` fields of the exception. -The ``partial_result`` field gets populated with the results of any operations that were successfully completed before the exception was raised. - -.. _ordered_client_bulk: - -Ordered Operations -.................... - -In an ordered bulk write (the default), if an individual write fails, no further operations will get executed. -For example, a duplicate key error on the third operation below aborts the remaining two operations. - -.. doctest:: - :options: +NORMALIZE_WHITESPACE - :skipif: server_major_version < 8 - - >>> from pymongo import InsertOne, DeleteOne - >>> from pymongo.errors import ClientBulkWriteException - >>> models = [ - ... InsertOne(namespace="db.test_three", document={"_id": 3}), - ... InsertOne(namespace="db.test_four", document={"_id": 4}), - ... InsertOne(namespace="db.test_three", document={"_id": 3}), # Duplicate _id - ... InsertOne(namespace="db.test_four", document={"_id": 5}), - ... DeleteOne(namespace="db.test_three", filter={"_id": 3}), - ... ] - >>> try: - ... client.bulk_write(models) - ... except ClientBulkWriteException as cbwe: - ... exception = cbwe - ... - >>> exception.write_errors - [{'ok': 0.0, - 'idx': 2, - 'code': 11000, - 'errmsg': 'E11000 duplicate key error ... dup key: { _id: 3 }', ... - 'op': {'insert': 0, 'document': {'_id': 3}}}] - >>> exception.partial_result.inserted_count - 2 - >>> exception.partial_result.deleted_count - 0 - -.. _unordered_client_bulk: - -Unordered Operations -..................... - -If the ``ordered`` parameter is set to False, all operations in the bulk write will be attempted, regardless of any individual write errors that occur. -For example, the fourth and fifth write operations below get executed successfully, despite the duplicate key error on the third operation. - -.. doctest:: - :options: +NORMALIZE_WHITESPACE - :skipif: server_major_version < 8 - - >>> from pymongo import InsertOne, DeleteOne - >>> from pymongo.errors import ClientBulkWriteException - >>> models = [ - ... InsertOne(namespace="db.test_five", document={"_id": 5}), - ... InsertOne(namespace="db.test_six", document={"_id": 6}), - ... InsertOne(namespace="db.test_five", document={"_id": 5}), # Duplicate _id - ... InsertOne(namespace="db.test_six", document={"_id": 7}), - ... DeleteOne(namespace="db.test_five", filter={"_id": 5}), - ... ] - >>> try: - ... client.bulk_write(models, ordered=False) - ... except ClientBulkWriteException as cbwe: - ... exception = cbwe - ... - >>> exception.write_errors - [{'ok': 0.0, - 'idx': 2, - 'code': 11000, - 'errmsg': 'E11000 duplicate key error ... dup key: { _id: 5 }', ... - 'op': {'insert': 0, 'document': {'_id': 5}}}] - >>> exception.partial_result.inserted_count - 3 - >>> exception.partial_result.deleted_count - 1 diff --git a/doc/examples/collations.rst b/doc/examples/collations.rst deleted file mode 100644 index 45e647d816..0000000000 --- a/doc/examples/collations.rst +++ /dev/null @@ -1,134 +0,0 @@ -Collations -========== - -.. seealso:: The API docs for :mod:`~pymongo.collation`. - -Collations are a new feature in MongoDB version 3.4. They provide a set of rules -to use when comparing strings that comply with the conventions of a particular -language, such as Spanish or German. If no collation is specified, the server -sorts strings based on a binary comparison. Many languages have specific -ordering rules, and collations allow users to build applications that adhere to -language-specific comparison rules. - -In French, for example, the last accent in a given word determines the sorting -order. The correct sorting order for the following four words in French is:: - - cote < côte < coté < côté - -Specifying a French collation allows users to sort string fields using the -French sort order. - -Usage ------ - -Users can specify a collation for a -:ref:`collection`, an -:ref:`index`, or a -:ref:`CRUD command `. - -Collation Parameters: -~~~~~~~~~~~~~~~~~~~~~ - -Collations can be specified with the :class:`~pymongo.collation.Collation` model -or with plain Python dictionaries. The structure is the same:: - - Collation(locale=, - caseLevel=, - caseFirst=, - strength=, - numericOrdering=, - alternate=, - maxVariable=, - backwards=) - -The only required parameter is ``locale``, which the server parses as -an `ICU format locale ID `_. -For example, set ``locale`` to ``en_US`` to represent US English -or ``fr_CA`` to represent Canadian French. - -For a complete description of the available parameters, see the MongoDB `manual -`_. - -.. COMMENT add link for manual entry. - -.. _collation-on-collection: - -Assign a Default Collation to a Collection -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following example demonstrates how to create a new collection called -``contacts`` and assign a default collation with the ``fr_CA`` locale. This -operation ensures that all queries that are run against the ``contacts`` -collection use the ``fr_CA`` collation unless another collation is explicitly -specified:: - - from pymongo import MongoClient - from pymongo.collation import Collation - - db = MongoClient().test - collection = db.create_collection('contacts', - collation=Collation(locale='fr_CA')) - -.. _collation-on-index: - -Assign a Default Collation to an Index -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When creating a new index, you can specify a default collation. - -The following example shows how to create an index on the ``name`` -field of the ``contacts`` collection, with the ``unique`` parameter -enabled and a default collation with ``locale`` set to ``fr_CA``:: - - from pymongo import MongoClient - from pymongo.collation import Collation - - contacts = MongoClient().test.contacts - contacts.create_index('name', - unique=True, - collation=Collation(locale='fr_CA')) - -.. _collation-on-operation: - -Specify a Collation for a Query -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Individual queries can specify a collation to use when sorting -results. The following example demonstrates a query that runs on the -``contacts`` collection in database ``test``. It matches on -documents that contain ``New York`` in the ``city`` field, -and sorts on the ``name`` field with the ``fr_CA`` collation:: - - from pymongo import MongoClient - from pymongo.collation import Collation - - collection = MongoClient().test.contacts - docs = collection.find({'city': 'New York'}).sort('name').collation( - Collation(locale='fr_CA')) - -Other Query Types -~~~~~~~~~~~~~~~~~ - -You can use collations to control document matching rules for several different -types of queries. All the various update and delete methods -(:meth:`~pymongo.collection.Collection.update_one`, -:meth:`~pymongo.collection.Collection.update_many`, -:meth:`~pymongo.collection.Collection.delete_one`, etc.) support collation, and -you can create query filters which employ collations to comply with any of the -languages and variants available to the ``locale`` parameter. - -The following example uses a collation with ``strength`` set to -:const:`~pymongo.collation.CollationStrength.SECONDARY`, which considers only -the base character and character accents in string comparisons, but not case -sensitivity, for example. All documents in the ``contacts`` collection with -``jürgen`` (case-insensitive) in the ``first_name`` field are updated:: - - from pymongo import MongoClient - from pymongo.collation import Collation, CollationStrength - - contacts = MongoClient().test.contacts - result = contacts.update_many( - {'first_name': 'jürgen'}, - {'$set': {'verified': 1}}, - collation=Collation(locale='de', - strength=CollationStrength.SECONDARY)) diff --git a/doc/examples/copydb.rst b/doc/examples/copydb.rst deleted file mode 100644 index c8026ba05f..0000000000 --- a/doc/examples/copydb.rst +++ /dev/null @@ -1,73 +0,0 @@ -Copying a Database -================== - -MongoDB >= 4.2 --------------- - -Starting in MongoDB version 4.2, the server removes the deprecated ``copydb`` command. -As an alternative, users can use ``mongodump`` and ``mongorestore`` (with the ``mongorestore`` -options ``--nsFrom`` and ``--nsTo``). - -For example, to copy the ``test`` database from a local instance running on the -default port 27017 to the ``examples`` database on the same instance, you can: - -#. Use ``mongodump`` to dump the test database to an archive ``mongodump-test-db``:: - - mongodump --archive="mongodump-test-db" --db=test - -#. Use ``mongorestore`` with ``--nsFrom`` and ``--nsTo`` to restore (with database name change) - from the archive:: - - mongorestore --archive="mongodump-test-db" --nsFrom='test.*' --nsTo='examples.*' - -Include additional options as necessary, such as to specify the uri or host, username, -password and authentication database. - -For more info about using ``mongodump`` and ``mongorestore`` see the `Copy a Database`_ example -in the official ``mongodump`` documentation. - -MongoDB <= 4.0 --------------- - -When using MongoDB <= 4.0, it is possible to use the deprecated ``copydb`` command -to copy a database. To copy a database within a single ``mongod`` process, or -between ``mongod`` servers, connect to the target ``mongod`` and use the -:meth:`~pymongo.database.Database.command` method:: - - >>> from pymongo import MongoClient - >>> client = MongoClient('target.example.com') - >>> client.admin.command('copydb', - fromdb='source_db_name', - todb='target_db_name') - -To copy from a different mongod server that is not password-protected:: - - >>> client.admin.command('copydb', - fromdb='source_db_name', - todb='target_db_name', - fromhost='source.example.com') - -If the target server is password-protected, authenticate to the "admin" -database:: - - >>> client = MongoClient('target.example.com', - ... username='administrator', - ... password='pwd') - >>> client.admin.command('copydb', - fromdb='source_db_name', - todb='target_db_name', - fromhost='source.example.com') - -See the :doc:`authentication examples `. - -If the **source** server is password-protected, use the `copyDatabase -function in the mongo shell`_. - -Versions of PyMongo before 3.0 included a ``copy_database`` helper method, -but it has been removed. - -.. _copyDatabase function in the mongo shell: - https://mongodb.com/docs/manual/reference/method/db.copyDatabase/ - -.. _Copy a Database: - https://www.mongodb.com/docs/database-tools/mongodump/mongodump-examples/#copy-and-clone-databases diff --git a/doc/examples/custom_type.rst b/doc/examples/custom_type.rst deleted file mode 100644 index acf706deba..0000000000 --- a/doc/examples/custom_type.rst +++ /dev/null @@ -1,436 +0,0 @@ -Custom Type Example -=================== - -This is an example of using a custom type with PyMongo. The example here shows -how to subclass :class:`~bson.codec_options.TypeCodec` to write a type -codec, which is used to populate a :class:`~bson.codec_options.TypeRegistry`. -The type registry can then be used to create a custom-type-aware -:class:`~pymongo.collection.Collection`. Read and write operations -issued against the resulting collection object transparently manipulate -documents as they are saved to or retrieved from MongoDB. - - -Setting Up ----------- - -We'll start by getting a clean database to use for the example: - -.. doctest:: - - >>> from pymongo import MongoClient - >>> client = MongoClient() - >>> client.drop_database("custom_type_example") - >>> db = client.custom_type_example - - -Since the purpose of the example is to demonstrate working with custom types, -we'll need a custom data type to use. For this example, we will be working with -the :py:class:`~decimal.Decimal` type from Python's standard library. Since the -BSON library's :class:`~bson.decimal128.Decimal128` type (that implements -the IEEE 754 decimal128 decimal-based floating-point numbering format) is -distinct from Python's built-in :py:class:`~decimal.Decimal` type, attempting -to save an instance of ``Decimal`` with PyMongo, results in an -:exc:`~bson.errors.InvalidDocument` exception. - -.. doctest:: - - >>> from decimal import Decimal - >>> num = Decimal("45.321") - >>> db.test.insert_one({"num": num}) - Traceback (most recent call last): - ... - bson.errors.InvalidDocument: cannot encode object: Decimal('45.321'), of type: - - -.. _custom-type-type-codec: - -The :class:`~bson.codec_options.TypeCodec` Class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. versionadded:: 3.8 - -In order to encode a custom type, we must first define a **type codec** for -that type. A type codec describes how an instance of a custom type can be -*transformed* to and/or from one of the types :mod:`~bson` already understands. -Depending on the desired functionality, users must choose from the following -base classes when defining type codecs: - -* :class:`~bson.codec_options.TypeEncoder`: subclass this to define a codec that - encodes a custom Python type to a known BSON type. Users must implement the - ``python_type`` property/attribute and the ``transform_python`` method. -* :class:`~bson.codec_options.TypeDecoder`: subclass this to define a codec that - decodes a specified BSON type into a custom Python type. Users must implement - the ``bson_type`` property/attribute and the ``transform_bson`` method. -* :class:`~bson.codec_options.TypeCodec`: subclass this to define a codec that - can both encode and decode a custom type. Users must implement the - ``python_type`` and ``bson_type`` properties/attributes, as well as the - ``transform_python`` and ``transform_bson`` methods. - - -The type codec for our custom type simply needs to define how a -:py:class:`~decimal.Decimal` instance can be converted into a -:class:`~bson.decimal128.Decimal128` instance and vice-versa. Since we are -interested in both encoding and decoding our custom type, we use the -``TypeCodec`` base class to define our codec: - -.. doctest:: - - >>> from bson.decimal128 import Decimal128 - >>> from bson.codec_options import TypeCodec - >>> class DecimalCodec(TypeCodec): - ... python_type = Decimal # the Python type acted upon by this type codec - ... bson_type = Decimal128 # the BSON type acted upon by this type codec - ... def transform_python(self, value): - ... """Function that transforms a custom type value into a type - ... that BSON can encode.""" - ... return Decimal128(value) - ... def transform_bson(self, value): - ... """Function that transforms a vanilla BSON type value into our - ... custom type.""" - ... return value.to_decimal() - ... - >>> decimal_codec = DecimalCodec() - - -.. _custom-type-type-registry: - -The :class:`~bson.codec_options.TypeRegistry` Class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. versionadded:: 3.8 - -Before we can begin encoding and decoding our custom type objects, we must -first inform PyMongo about the corresponding codec. This is done by creating -a :class:`~bson.codec_options.TypeRegistry` instance: - -.. doctest:: - - >>> from bson.codec_options import TypeRegistry - >>> type_registry = TypeRegistry([decimal_codec]) - - -Note that type registries can be instantiated with any number of type codecs. -Once instantiated, registries are immutable and the only way to add codecs -to a registry is to create a new one. - - -Putting It Together -------------------- - -Finally, we can define a :class:`~bson.codec_options.CodecOptions` instance -with our ``type_registry`` and use it to get a -:class:`~pymongo.collection.Collection` object that understands the -:py:class:`~decimal.Decimal` data type: - -.. doctest:: - - >>> from bson.codec_options import CodecOptions - >>> codec_options = CodecOptions(type_registry=type_registry) - >>> collection = db.get_collection("test", codec_options=codec_options) - - -Now, we can seamlessly encode and decode instances of -:py:class:`~decimal.Decimal`: - -.. doctest:: - - >>> collection.insert_one({"num": Decimal("45.321")}) - InsertOneResult(ObjectId('...'), acknowledged=True) - >>> mydoc = collection.find_one() - >>> import pprint - >>> pprint.pprint(mydoc) - {'_id': ObjectId('...'), 'num': Decimal('45.321')} - - -We can see what's actually being saved to the database by creating a fresh -collection object without the customized codec options and using that to query -MongoDB: - -.. doctest:: - - >>> vanilla_collection = db.get_collection("test") - >>> pprint.pprint(vanilla_collection.find_one()) - {'_id': ObjectId('...'), 'num': Decimal128('45.321')} - - -Encoding Subtypes -^^^^^^^^^^^^^^^^^ - -Consider the situation where, in addition to encoding -:py:class:`~decimal.Decimal`, we also need to encode a type that subclasses -``Decimal``. PyMongo does this automatically for types that inherit from -Python types that are BSON-encodable by default, but the type codec system -described above does not offer the same flexibility. - -Consider this subtype of ``Decimal`` that has a method to return its value as -an integer: - -.. doctest:: - - >>> class DecimalInt(Decimal): - ... def my_method(self): - ... """Method implementing some custom logic.""" - ... return int(self) - ... - -If we try to save an instance of this type without first registering a type -codec for it, we get an error: - -.. doctest:: - - >>> collection.insert_one({"num": DecimalInt("45.321")}) - Traceback (most recent call last): - ... - bson.errors.InvalidDocument: cannot encode object: Decimal('45.321'), of type: - -In order to proceed further, we must define a type codec for ``DecimalInt``. -This is trivial to do since the same transformation as the one used for -``Decimal`` is adequate for encoding ``DecimalInt`` as well: - -.. doctest:: - - >>> class DecimalIntCodec(DecimalCodec): - ... @property - ... def python_type(self): - ... """The Python type acted upon by this type codec.""" - ... return DecimalInt - ... - >>> decimalint_codec = DecimalIntCodec() - - -.. note:: - - No attempt is made to modify decoding behavior because without additional - information, it is impossible to discern which incoming - :class:`~bson.decimal128.Decimal128` value needs to be decoded as ``Decimal`` - and which needs to be decoded as ``DecimalInt``. This example only considers - the situation where a user wants to *encode* documents containing either - of these types. - -After creating a new codec options object and using it to get a collection -object, we can seamlessly encode instances of ``DecimalInt``: - -.. doctest:: - - >>> type_registry = TypeRegistry([decimal_codec, decimalint_codec]) - >>> codec_options = CodecOptions(type_registry=type_registry) - >>> collection = db.get_collection("test", codec_options=codec_options) - >>> collection.drop() - >>> collection.insert_one({"num": DecimalInt("45.321")}) - InsertOneResult(ObjectId('...'), acknowledged=True) - >>> mydoc = collection.find_one() - >>> pprint.pprint(mydoc) - {'_id': ObjectId('...'), 'num': Decimal('45.321')} - -Note that the ``transform_bson`` method of the base codec class results in -these values being decoded as ``Decimal`` (and not ``DecimalInt``). - - -.. _decoding-binary-types: - -Decoding :class:`~bson.binary.Binary` Types -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The decoding treatment of :class:`~bson.binary.Binary` types having -``subtype = 0`` by the :mod:`bson` module varies slightly depending on the -version of the Python runtime in use. This must be taken into account while -writing a ``TypeDecoder`` that modifies how this datatype is decoded. - -On Python 3.x, :class:`~bson.binary.Binary` data (``subtype = 0``) is decoded -as a ``bytes`` instance: - -.. code-block:: pycon - - >>> # On Python 3.x. - >>> from bson.binary import Binary - >>> newcoll = db.get_collection("new") - >>> newcoll.insert_one({"_id": 1, "data": Binary(b"123", subtype=0)}) - >>> doc = newcoll.find_one() - >>> type(doc["data"]) - bytes - - -On Python 2.7.x, the same data is decoded as a :class:`~bson.binary.Binary` -instance: - -.. code-block:: pycon - - >>> # On Python 2.7.x - >>> newcoll = db.get_collection("new") - >>> doc = newcoll.find_one() - >>> type(doc["data"]) - bson.binary.Binary - - -As a consequence of this disparity, users must set the ``bson_type`` attribute -on their :class:`~bson.codec_options.TypeDecoder` classes differently, -depending on the python version in use. - - -.. note:: - - For codebases requiring compatibility with both Python 2 and 3, type - decoders will have to be registered for both possible ``bson_type`` values. - - -.. _fallback-encoder-callable: - -The ``fallback_encoder`` Callable ---------------------------------- - -.. versionadded:: 3.8 - - -In addition to type codecs, users can also register a callable to encode types -that BSON doesn't recognize and for which no type codec has been registered. -This callable is the **fallback encoder** and like the ``transform_python`` -method, it accepts an unencodable value as a parameter and returns a -BSON-encodable value. The following fallback encoder encodes python's -:py:class:`~decimal.Decimal` type to a :class:`~bson.decimal128.Decimal128`: - -.. doctest:: - - >>> def fallback_encoder(value): - ... if isinstance(value, Decimal): - ... return Decimal128(value) - ... return value - ... - -After declaring the callback, we must create a type registry and codec options -with this fallback encoder before it can be used for initializing a collection: - -.. doctest:: - - >>> type_registry = TypeRegistry(fallback_encoder=fallback_encoder) - >>> codec_options = CodecOptions(type_registry=type_registry) - >>> collection = db.get_collection("test", codec_options=codec_options) - >>> collection.drop() - -We can now seamlessly encode instances of :py:class:`~decimal.Decimal`: - -.. doctest:: - - >>> collection.insert_one({"num": Decimal("45.321")}) - InsertOneResult(ObjectId('...'), acknowledged=True) - >>> mydoc = collection.find_one() - >>> pprint.pprint(mydoc) - {'_id': ObjectId('...'), 'num': Decimal128('45.321')} - - -.. note:: - - Fallback encoders are invoked *after* attempts to encode the given value - with standard BSON encoders and any configured type encoders have failed. - Therefore, in a type registry configured with a type encoder and fallback - encoder that both target the same custom type, the behavior specified in - the type encoder will prevail. - - -Because fallback encoders don't need to declare the types that they encode -beforehand, they can be used to support interesting use-cases that cannot be -serviced by ``TypeEncoder``. One such use-case is described in the next -section. - - -Encoding Unknown Types -^^^^^^^^^^^^^^^^^^^^^^ - -In this example, we demonstrate how a fallback encoder can be used to save -arbitrary objects to the database. We will use the the standard library's -:py:mod:`pickle` module to serialize the unknown types and so naturally, this -approach only works for types that are picklable. - -We start by defining some arbitrary custom types: - -.. code-block:: python - - class MyStringType(object): - def __init__(self, value): - self.__value = value - - def __repr__(self): - return "MyStringType('%s')" % (self.__value,) - - - class MyNumberType(object): - def __init__(self, value): - self.__value = value - - def __repr__(self): - return "MyNumberType(%s)" % (self.__value,) - -We also define a fallback encoder that pickles whatever objects it receives -and returns them as :class:`~bson.binary.Binary` instances with a custom -subtype. The custom subtype, in turn, allows us to write a TypeDecoder that -identifies pickled artifacts upon retrieval and transparently decodes them -back into Python objects: - -.. code-block:: python - - import pickle - from bson.binary import Binary, USER_DEFINED_SUBTYPE - - - def fallback_pickle_encoder(value): - return Binary(pickle.dumps(value), USER_DEFINED_SUBTYPE) - - - class PickledBinaryDecoder(TypeDecoder): - bson_type = Binary - - def transform_bson(self, value): - if value.subtype == USER_DEFINED_SUBTYPE: - return pickle.loads(value) - return value - - -.. note:: - - The above example is written assuming the use of Python 3. If you are using - Python 2, ``bson_type`` must be set to ``Binary``. See the - :ref:`decoding-binary-types` section for a detailed explanation. - - -Finally, we create a ``CodecOptions`` instance: - -.. code-block:: python - - codec_options = CodecOptions( - type_registry=TypeRegistry( - [PickledBinaryDecoder()], fallback_encoder=fallback_pickle_encoder - ) - ) - -We can now round trip our custom objects to MongoDB: - -.. code-block:: python - - collection = db.get_collection("test_fe", codec_options=codec_options) - collection.insert_one( - {"_id": 1, "str": MyStringType("hello world"), "num": MyNumberType(2)} - ) - mydoc = collection.find_one() - assert isinstance(mydoc["str"], MyStringType) - assert isinstance(mydoc["num"], MyNumberType) - - -Limitations ------------ - -PyMongo's type codec and fallback encoder features have the following -limitations: - -#. Users cannot customize the encoding behavior of Python types that PyMongo - already understands like ``int`` and ``str`` (the 'built-in types'). - Attempting to instantiate a type registry with one or more codecs that act - upon a built-in type results in a ``TypeError``. This limitation extends - to all subtypes of the standard types. -#. Chaining type encoders is not supported. A custom type value, once - transformed by a codec's ``transform_python`` method, *must* result in a - type that is either BSON-encodable by default, or can be - transformed by the fallback encoder into something BSON-encodable--it - *cannot* be transformed a second time by a different type codec. -#. The :meth:`~pymongo.database.Database.command` method does not apply the - user's TypeDecoders while decoding the command response document. -#. :mod:`gridfs` does not apply custom type encoding or decoding to any - documents received from or to returned to the user. diff --git a/doc/examples/datetimes.rst b/doc/examples/datetimes.rst deleted file mode 100644 index a8c0476903..0000000000 --- a/doc/examples/datetimes.rst +++ /dev/null @@ -1,177 +0,0 @@ -Datetimes and Timezones -======================= - -.. testsetup:: - - import datetime - from pymongo import MongoClient - from bson.codec_options import CodecOptions - - client = MongoClient() - client.drop_database("dt_example") - db = client.dt_example - -These examples show how to handle Python :class:`datetime.datetime` objects -correctly in PyMongo. - -Basic Usage ------------ - -PyMongo uses :class:`datetime.datetime` objects for representing dates and times -in MongoDB documents. Because MongoDB assumes that dates and times are in UTC, -care should be taken to ensure that dates and times written to the database -reflect UTC. For example, the following code stores the current UTC date and -time into MongoDB: - -.. doctest:: - - >>> result = db.objects.insert_one( - ... {"last_modified": datetime.datetime.now(tz=datetime.timezone.utc)} - ... ) - -Always use :meth:`datetime.datetime.now(tz=datetime.timezone.utc)`, which explicitly returns the current time in -UTC, instead of :meth:`datetime.datetime.now`, with no arguments, which returns the current local -time. Avoid doing this: - -.. doctest:: - - >>> result = db.objects.insert_one({"last_modified": datetime.datetime.now()}) - -The value for ``last_modified`` is very different between these two examples, even -though both documents were stored at around the same local time. This will be -confusing to the application that reads them: - -.. doctest:: - - >>> [doc["last_modified"] for doc in db.objects.find()] # doctest: +SKIP - [datetime.datetime(2015, 7, 8, 18, 17, 28, 324000), - datetime.datetime(2015, 7, 8, 11, 17, 42, 911000)] - -:class:`bson.codec_options.CodecOptions` has a ``tz_aware`` option that enables -"aware" :class:`datetime.datetime` objects, i.e., datetimes that know what -timezone they're in. By default, PyMongo retrieves naive datetimes: - -.. doctest:: - - >>> result = db.tzdemo.insert_one({"date": datetime.datetime(2002, 10, 27, 6, 0, 0)}) - >>> db.tzdemo.find_one()["date"] - datetime.datetime(2002, 10, 27, 6, 0) - >>> options = CodecOptions(tz_aware=True) - >>> db.get_collection("tzdemo", codec_options=options).find_one()["date"] # doctest: +SKIP - datetime.datetime(2002, 10, 27, 6, 0, - tzinfo=) - -Saving Datetimes with Timezones -------------------------------- - -When storing :class:`datetime.datetime` objects that specify a timezone -(i.e. they have a ``tzinfo`` property that isn't ``None``), PyMongo will convert -those datetimes to UTC automatically: - -.. doctest:: - - >>> from zoneinfo import ZoneInfo - >>> from datetime import datetime - >>> aware_datetime = datetime(2002, 10, 27, 6, 0, 0, tzinfo=ZoneInfo("US/Pacific")) - >>> result = db.times.insert_one({"date": aware_datetime}) - >>> db.times.find_one()["date"] - datetime.datetime(2002, 10, 27, 14, 0) - -Reading Time ------------- - -As previously mentioned, by default all :class:`datetime.datetime` objects -returned by PyMongo will be naive but reflect UTC (i.e. the time as stored in -MongoDB). By setting the ``tz_aware`` option on -:class:`~bson.codec_options.CodecOptions`, :class:`datetime.datetime` objects -will be timezone-aware and have a ``tzinfo`` property that reflects the UTC -timezone. - -PyMongo 3.1 introduced a ``tzinfo`` property that can be set on -:class:`~bson.codec_options.CodecOptions` to convert :class:`datetime.datetime` -objects to local time automatically. For example, if we wanted to read all times -out of MongoDB in US/Pacific time: - - >>> from bson.codec_options import CodecOptions - >>> db.times.find_one()['date'] - datetime.datetime(2002, 10, 27, 14, 0) - >>> aware_times = db.times.with_options(codec_options=CodecOptions( - ... tz_aware=True, - ... tzinfo=ZoneInfo("US/Pacific"))) - >>> result = aware_times.find_one()['date'] - datetime.datetime(2002, 10, 27, 6, 0, # doctest: +NORMALIZE_WHITESPACE - tzinfo=) - -.. _handling-out-of-range-datetimes: - -Handling out of range datetimes -------------------------------- - -Python's :class:`~datetime.datetime` can only represent datetimes within the -range allowed by -:attr:`~datetime.datetime.min` and :attr:`~datetime.datetime.max`, whereas -the range of datetimes allowed in BSON can represent any 64-bit number -of milliseconds from the Unix epoch. To deal with this, we can use the -:class:`bson.datetime_ms.DatetimeMS` object, which is a wrapper for the -:class:`int` built-in. - -To decode UTC datetime values as :class:`~bson.datetime_ms.DatetimeMS`, -:class:`~bson.codec_options.CodecOptions` should have its -``datetime_conversion`` parameter set to one of the options available in -:class:`bson.datetime_ms.DatetimeConversion`. These include -:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME`, -:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_MS`, -:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_AUTO`, -:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_CLAMP`. -:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME` is the default -option and has the behavior of raising an :class:`~builtin.OverflowError` upon -attempting to decode an out-of-range date. -:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_MS` will only return -:class:`~bson.datetime_ms.DatetimeMS` objects, regardless of whether the -represented datetime is in- or out-of-range: - -.. doctest:: - - >>> from datetime import datetime - >>> from bson import encode, decode - >>> from bson.datetime_ms import DatetimeMS - >>> from bson.codec_options import CodecOptions, DatetimeConversion - >>> x = encode({"x": datetime(1970, 1, 1)}) - >>> codec_ms = CodecOptions(datetime_conversion=DatetimeConversion.DATETIME_MS) - >>> decode(x, codec_options=codec_ms) - {'x': DatetimeMS(0)} - -:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_AUTO` will return -:class:`~datetime.datetime` if the underlying UTC datetime is within range, -or :class:`~bson.datetime_ms.DatetimeMS` if the underlying datetime -cannot be represented using the builtin Python :class:`~datetime.datetime`: - -.. doctest:: - - >>> x = encode({"x": datetime(1970, 1, 1)}) - >>> y = encode({"x": DatetimeMS(-(2**62))}) - >>> codec_auto = CodecOptions(datetime_conversion=DatetimeConversion.DATETIME_AUTO) - >>> decode(x, codec_options=codec_auto) - {'x': datetime.datetime(1970, 1, 1, 0, 0)} - >>> decode(y, codec_options=codec_auto) - {'x': DatetimeMS(-4611686018427387904)} - -:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_CLAMP` will clamp -resulting :class:`~datetime.datetime` objects to be within -:attr:`~datetime.datetime.min` and :attr:`~datetime.datetime.max` -(trimmed to ``999000`` microseconds): - -.. doctest:: - - >>> x = encode({"x": DatetimeMS(2**62)}) - >>> y = encode({"x": DatetimeMS(-(2**62))}) - >>> codec_clamp = CodecOptions(datetime_conversion=DatetimeConversion.DATETIME_CLAMP) - >>> decode(x, codec_options=codec_clamp) - {'x': datetime.datetime(9999, 12, 31, 23, 59, 59, 999000)} - >>> decode(y, codec_options=codec_clamp) - {'x': datetime.datetime(1, 1, 1, 0, 0)} - -:class:`~bson.datetime_ms.DatetimeMS` objects have support for rich comparison -methods against other instances of :class:`~bson.datetime_ms.DatetimeMS`. -They can also be converted to :class:`~datetime.datetime` objects with -:meth:`~bson.datetime_ms.DatetimeMS.to_datetime()`. diff --git a/doc/examples/encryption.rst b/doc/examples/encryption.rst deleted file mode 100644 index 4b3de8d8d0..0000000000 --- a/doc/examples/encryption.rst +++ /dev/null @@ -1,840 +0,0 @@ -.. _In-Use Encryption: - -In-Use Encryption -================= - -.. _Client-Side Field Level Encryption: - -Client-Side Field Level Encryption ----------------------------------- - -New in MongoDB 4.2, client-side field level encryption allows an application -to encrypt specific data fields in addition to pre-existing MongoDB -encryption features such as `Encryption at Rest -`_ and -`TLS/SSL (Transport Encryption) -`_. - -With field level encryption, applications can encrypt fields in documents -*prior* to transmitting data over the wire to the server. Client-side field -level encryption supports workloads where applications must guarantee that -unauthorized parties, including server administrators, cannot read the -encrypted data. - -.. seealso:: The MongoDB documentation on `Client Side Field Level Encryption `_. - -Dependencies -~~~~~~~~~~~~ - -To get started using client-side field level encryption in your project, -you will need to install the -`pymongocrypt `_ and -`pymongo-auth-aws `_ libraries -as well as the driver itself. Install both the driver and a compatible -version of the dependencies like this:: - - $ python -m pip install 'pymongo[encryption]' - -Note that installing on Linux requires pip 19 or later for manylinux2010 wheel -support. For more information about installing pymongocrypt see -`the installation instructions on the project's PyPI page -`_. - -Additionally, either `crypt_shared`_ or `mongocryptd`_ are required in order -to use *automatic* client-side encryption. - -crypt_shared -```````````` - -The Automatic Encryption Shared Library (crypt_shared) provides the same -functionality as `mongocryptd`_, but does not require you to spawn another -process to perform automatic encryption. - -By default, pymongo attempts to load crypt_shared from the system and if -found uses it automatically. To load crypt_shared from another location, -use the ``crypt_shared_lib_path`` argument to -:class:`~pymongo.encryption_options.AutoEncryptionOpts`. -If pymongo cannot load crypt_shared it will attempt to fallback to using -`mongocryptd`_ by default. Set ``crypt_shared_lib_required=True`` to make -the app always use crypt_shared and fail if it could not be loaded. - -For detailed installation instructions see -`the MongoDB documentation on Automatic Encryption Shared Library -`_. - -mongocryptd -``````````` - -The ``mongocryptd`` binary is required for automatic client-side encryption -and is included as a component in the `MongoDB Enterprise Server package -`_. -For detailed installation instructions see -`the MongoDB documentation on mongocryptd -`_. - -``mongocryptd`` performs the following: - -- Parses the automatic encryption rules specified to the database connection. - If the JSON schema contains invalid automatic encryption syntax or any - document validation syntax, ``mongocryptd`` returns an error. -- Uses the specified automatic encryption rules to mark fields in read and - write operations for encryption. -- Rejects read/write operations that may return unexpected or incorrect results - when applied to an encrypted field. For supported and unsupported operations, - see `Read/Write Support with Automatic Field Level Encryption - `_. - -A MongoClient configured with auto encryption will automatically spawn the -``mongocryptd`` process from the application's ``PATH``. Applications can -control the spawning behavior as part of the automatic encryption options. -For example to set the path to the ``mongocryptd`` process:: - - auto_encryption_opts = AutoEncryptionOpts( - ..., - mongocryptd_spawn_path='/path/to/mongocryptd') - -To control the logging output of ``mongocryptd`` pass options using -``mongocryptd_spawn_args``:: - - auto_encryption_opts = AutoEncryptionOpts( - ..., - mongocryptd_spawn_args=['--logpath=/path/to/mongocryptd.log', '--logappend']) - -If your application wishes to manage the ``mongocryptd`` process manually, -it is possible to disable spawning ``mongocryptd``:: - - auto_encryption_opts = AutoEncryptionOpts( - ..., - mongocryptd_bypass_spawn=True, - # URI of the local ``mongocryptd`` process. - mongocryptd_uri='mongodb://localhost:27020') - -``mongocryptd`` is only responsible for supporting automatic client-side field -level encryption and does not itself perform any encryption or decryption. - -.. _automatic-client-side-encryption: - -Automatic Client-Side Field Level Encryption -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Automatic client-side field level encryption is enabled by creating a -:class:`~pymongo.mongo_client.MongoClient` with the ``auto_encryption_opts`` -option set to an instance of -:class:`~pymongo.encryption_options.AutoEncryptionOpts`. The following -examples show how to setup automatic client-side field level encryption -using :class:`~pymongo.encryption.ClientEncryption` to create a new -encryption data key. - -.. note:: Automatic client-side field level encryption requires MongoDB >=4.2 - enterprise or a MongoDB >=4.2 Atlas cluster. The community version of the - server supports automatic decryption as well as - :ref:`explicit-client-side-encryption`. - -Providing Local Automatic Encryption Rules -`````````````````````````````````````````` - -The following example shows how to specify automatic encryption rules via the -``schema_map`` option. The automatic encryption rules are expressed using a -`strict subset of the JSON Schema syntax -`_. - -Supplying a ``schema_map`` provides more security than relying on -JSON Schemas obtained from the server. It protects against a -malicious server advertising a false JSON Schema, which could trick -the client into sending unencrypted data that should be encrypted. - -JSON Schemas supplied in the ``schema_map`` only apply to configuring -automatic client-side field level encryption. Other validation -rules in the JSON schema will not be enforced by the driver and -will result in an error. - -.. code-block:: python - - import os - from bson.codec_options import CodecOptions - from bson import json_util - from pymongo import MongoClient - from pymongo.encryption import Algorithm, ClientEncryption - from pymongo.encryption_options import AutoEncryptionOpts - - - def create_json_schema_file(kms_providers, key_vault_namespace, key_vault_client): - client_encryption = ClientEncryption( - kms_providers, - key_vault_namespace, - key_vault_client, - # The CodecOptions class used for encrypting and decrypting. - # This should be the same CodecOptions instance you have configured - # on MongoClient, Database, or Collection. We will not be calling - # encrypt() or decrypt() in this example so we can use any - # CodecOptions. - CodecOptions(), - ) - - # Create a new data key and json schema for the encryptedField. - # https://dochub.mongodb.org/core/client-side-field-level-encryption-automatic-encryption-rules - data_key_id = client_encryption.create_data_key( - "local", key_alt_names=["pymongo_encryption_example_1"] - ) - schema = { - "properties": { - "encryptedField": { - "encrypt": { - "keyId": [data_key_id], - "bsonType": "string", - "algorithm": Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic, - } - } - }, - "bsonType": "object", - } - # Use CANONICAL_JSON_OPTIONS so that other drivers and tools will be - # able to parse the MongoDB extended JSON file. - json_schema_string = json_util.dumps( - schema, json_options=json_util.CANONICAL_JSON_OPTIONS - ) - - with open("jsonSchema.json", "w") as file: - file.write(json_schema_string) - - - def main(): - # The MongoDB namespace (db.collection) used to store the - # encrypted documents in this example. - encrypted_namespace = "test.coll" - - # This must be the same master key that was used to create - # the encryption key. - local_master_key = os.urandom(96) - kms_providers = {"local": {"key": local_master_key}} - - # The MongoDB namespace (db.collection) used to store - # the encryption data keys. - key_vault_namespace = "encryption.__pymongoTestKeyVault" - key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1) - - # The MongoClient used to access the key vault (key_vault_namespace). - key_vault_client = MongoClient() - key_vault = key_vault_client[key_vault_db_name][key_vault_coll_name] - # Ensure that two data keys cannot share the same keyAltName. - key_vault.drop() - key_vault.create_index( - "keyAltNames", - unique=True, - partialFilterExpression={"keyAltNames": {"$exists": True}}, - ) - - create_json_schema_file(kms_providers, key_vault_namespace, key_vault_client) - - # Load the JSON Schema and construct the local schema_map option. - with open("jsonSchema.json", "r") as file: - json_schema_string = file.read() - json_schema = json_util.loads(json_schema_string) - schema_map = {encrypted_namespace: json_schema} - - auto_encryption_opts = AutoEncryptionOpts( - kms_providers, key_vault_namespace, schema_map=schema_map - ) - - client = MongoClient(auto_encryption_opts=auto_encryption_opts) - db_name, coll_name = encrypted_namespace.split(".", 1) - coll = client[db_name][coll_name] - # Clear old data - coll.drop() - - coll.insert_one({"encryptedField": "123456789"}) - print("Decrypted document: %s" % (coll.find_one(),)) - unencrypted_coll = MongoClient()[db_name][coll_name] - print("Encrypted document: %s" % (unencrypted_coll.find_one(),)) - - - if __name__ == "__main__": - main() - - -Server-Side Field Level Encryption Enforcement -`````````````````````````````````````````````` - -MongoDB >=4.2 servers supports using schema validation to enforce encryption -of specific fields in a collection. This schema validation will prevent an -application from inserting unencrypted values for any fields marked with the -``"encrypt"`` JSON schema keyword. - -The following example shows how to setup automatic client-side field level -encryption using -:class:`~pymongo.encryption.ClientEncryption` to create a new encryption -data key and create a collection with the -`Automatic Encryption JSON Schema Syntax -`_: - -.. code-block:: python - - import os - - from bson.codec_options import CodecOptions - from bson.binary import STANDARD - - from pymongo import MongoClient - from pymongo.encryption import Algorithm, ClientEncryption - from pymongo.encryption_options import AutoEncryptionOpts - from pymongo.errors import OperationFailure - from pymongo.write_concern import WriteConcern - - - def main(): - # The MongoDB namespace (db.collection) used to store the - # encrypted documents in this example. - encrypted_namespace = "test.coll" - - # This must be the same master key that was used to create - # the encryption key. - local_master_key = os.urandom(96) - kms_providers = {"local": {"key": local_master_key}} - - # The MongoDB namespace (db.collection) used to store - # the encryption data keys. - key_vault_namespace = "encryption.__pymongoTestKeyVault" - key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1) - - # The MongoClient used to access the key vault (key_vault_namespace). - key_vault_client = MongoClient() - key_vault = key_vault_client[key_vault_db_name][key_vault_coll_name] - # Ensure that two data keys cannot share the same keyAltName. - key_vault.drop() - key_vault.create_index( - "keyAltNames", - unique=True, - partialFilterExpression={"keyAltNames": {"$exists": True}}, - ) - - client_encryption = ClientEncryption( - kms_providers, - key_vault_namespace, - key_vault_client, - # The CodecOptions class used for encrypting and decrypting. - # This should be the same CodecOptions instance you have configured - # on MongoClient, Database, or Collection. We will not be calling - # encrypt() or decrypt() in this example so we can use any - # CodecOptions. - CodecOptions(), - ) - - # Create a new data key and json schema for the encryptedField. - data_key_id = client_encryption.create_data_key( - "local", key_alt_names=["pymongo_encryption_example_2"] - ) - json_schema = { - "properties": { - "encryptedField": { - "encrypt": { - "keyId": [data_key_id], - "bsonType": "string", - "algorithm": Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic, - } - } - }, - "bsonType": "object", - } - - auto_encryption_opts = AutoEncryptionOpts(kms_providers, key_vault_namespace) - client = MongoClient(auto_encryption_opts=auto_encryption_opts) - db_name, coll_name = encrypted_namespace.split(".", 1) - db = client[db_name] - # Clear old data - db.drop_collection(coll_name) - # Create the collection with the encryption JSON Schema. - db.create_collection( - coll_name, - # uuid_representation=STANDARD is required to ensure that any - # UUIDs in the $jsonSchema document are encoded to BSON Binary - # with the standard UUID subtype 4. This is only needed when - # running the "create" collection command with an encryption - # JSON Schema. - codec_options=CodecOptions(uuid_representation=STANDARD), - write_concern=WriteConcern(w="majority"), - validator={"$jsonSchema": json_schema}, - ) - coll = client[db_name][coll_name] - - coll.insert_one({"encryptedField": "123456789"}) - print("Decrypted document: %s" % (coll.find_one(),)) - unencrypted_coll = MongoClient()[db_name][coll_name] - print("Encrypted document: %s" % (unencrypted_coll.find_one(),)) - try: - unencrypted_coll.insert_one({"encryptedField": "123456789"}) - except OperationFailure as exc: - print("Unencrypted insert failed: %s" % (exc.details,)) - - - if __name__ == "__main__": - main() - - -.. _explicit-client-side-encryption: - -Explicit Encryption -~~~~~~~~~~~~~~~~~~~ - -Explicit encryption is a MongoDB community feature and does not use the -``mongocryptd`` process. Explicit encryption is provided by the -:class:`~pymongo.encryption.ClientEncryption` class, for example: - -.. code-block:: python - - import os - - from pymongo import MongoClient - from pymongo.encryption import Algorithm, ClientEncryption - - - def main(): - # This must be the same master key that was used to create - # the encryption key. - local_master_key = os.urandom(96) - kms_providers = {"local": {"key": local_master_key}} - - # The MongoDB namespace (db.collection) used to store - # the encryption data keys. - key_vault_namespace = "encryption.__pymongoTestKeyVault" - key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1) - - # The MongoClient used to read/write application data. - client = MongoClient() - coll = client.test.coll - # Clear old data - coll.drop() - - # Set up the key vault (key_vault_namespace) for this example. - key_vault = client[key_vault_db_name][key_vault_coll_name] - # Ensure that two data keys cannot share the same keyAltName. - key_vault.drop() - key_vault.create_index( - "keyAltNames", - unique=True, - partialFilterExpression={"keyAltNames": {"$exists": True}}, - ) - - client_encryption = ClientEncryption( - kms_providers, - key_vault_namespace, - # The MongoClient to use for reading/writing to the key vault. - # This can be the same MongoClient used by the main application. - client, - # The CodecOptions class used for encrypting and decrypting. - # This should be the same CodecOptions instance you have configured - # on MongoClient, Database, or Collection. - coll.codec_options, - ) - - # Create a new data key for the encryptedField. - data_key_id = client_encryption.create_data_key( - "local", key_alt_names=["pymongo_encryption_example_3"] - ) - - # Explicitly encrypt a field: - encrypted_field = client_encryption.encrypt( - "123456789", - Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic, - key_id=data_key_id, - ) - coll.insert_one({"encryptedField": encrypted_field}) - doc = coll.find_one() - print("Encrypted document: %s" % (doc,)) - - # Explicitly decrypt the field: - doc["encryptedField"] = client_encryption.decrypt(doc["encryptedField"]) - print("Decrypted document: %s" % (doc,)) - - # Cleanup resources. - client_encryption.close() - client.close() - - - if __name__ == "__main__": - main() - - -Explicit Encryption with Automatic Decryption -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Although automatic encryption requires MongoDB >=4.2 enterprise or a -MongoDB >=4.2 Atlas cluster, automatic *decryption* is supported for all users. -To configure automatic *decryption* without automatic *encryption* set -``bypass_auto_encryption=True`` in -:class:`~pymongo.encryption_options.AutoEncryptionOpts`: - -.. code-block:: python - - import os - - from pymongo import MongoClient - from pymongo.encryption import Algorithm, ClientEncryption - from pymongo.encryption_options import AutoEncryptionOpts - - - def main(): - # This must be the same master key that was used to create - # the encryption key. - local_master_key = os.urandom(96) - kms_providers = {"local": {"key": local_master_key}} - - # The MongoDB namespace (db.collection) used to store - # the encryption data keys. - key_vault_namespace = "encryption.__pymongoTestKeyVault" - key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1) - - # bypass_auto_encryption=True disable automatic encryption but keeps - # the automatic _decryption_ behavior. bypass_auto_encryption will - # also disable spawning mongocryptd. - auto_encryption_opts = AutoEncryptionOpts( - kms_providers, key_vault_namespace, bypass_auto_encryption=True - ) - - client = MongoClient(auto_encryption_opts=auto_encryption_opts) - coll = client.test.coll - # Clear old data - coll.drop() - - # Set up the key vault (key_vault_namespace) for this example. - key_vault = client[key_vault_db_name][key_vault_coll_name] - # Ensure that two data keys cannot share the same keyAltName. - key_vault.drop() - key_vault.create_index( - "keyAltNames", - unique=True, - partialFilterExpression={"keyAltNames": {"$exists": True}}, - ) - - client_encryption = ClientEncryption( - kms_providers, - key_vault_namespace, - # The MongoClient to use for reading/writing to the key vault. - # This can be the same MongoClient used by the main application. - client, - # The CodecOptions class used for encrypting and decrypting. - # This should be the same CodecOptions instance you have configured - # on MongoClient, Database, or Collection. - coll.codec_options, - ) - - # Create a new data key for the encryptedField. - data_key_id = client_encryption.create_data_key( - "local", key_alt_names=["pymongo_encryption_example_4"] - ) - - # Explicitly encrypt a field: - encrypted_field = client_encryption.encrypt( - "123456789", - Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic, - key_alt_name="pymongo_encryption_example_4", - ) - coll.insert_one({"encryptedField": encrypted_field}) - # Automatically decrypts any encrypted fields. - doc = coll.find_one() - print("Decrypted document: %s" % (doc,)) - unencrypted_coll = MongoClient().test.coll - print("Encrypted document: %s" % (unencrypted_coll.find_one(),)) - - # Cleanup resources. - client_encryption.close() - client.close() - - - if __name__ == "__main__": - main() - - -.. _CSFLE on-demand credentials: - - -CSFLE on-demand credentials -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``pymongocrypt`` 1.4 adds support for fetching on-demand KMS credentials for -AWS, GCP, and Azure cloud environments. - -To enable the driver's behavior to obtain credentials from the environment, add the appropriate key ("aws", "gcp", or "azure") with an empty map to -"kms_providers" in either :class:`~pymongo.encryption_options.AutoEncryptionOpts` or :class:`~pymongo.encryption.ClientEncryption` options. - -An application using AWS credentials would look like: - -.. code-block:: python - - from pymongo import MongoClient - from pymongo.encryption import ClientEncryption - - client = MongoClient() - client_encryption = ClientEncryption( - # The empty dictionary enables on-demand credentials. - kms_providers={"aws": {}}, - key_vault_namespace="keyvault.datakeys", - key_vault_client=client, - codec_options=client.codec_options, - ) - master_key = { - "region": "us-east-1", - "key": ("arn:aws:kms:us-east-1:123456789:key/89fcc2c4-08b0-4bd9-9f25-e30687b580d0"), - } - client_encryption.create_data_key("aws", master_key) - -The above will enable the same behavior of obtaining AWS credentials from the environment as is used for :ref:`MONGODB-AWS` authentication, including the -caching to avoid rate limiting. - -An application using GCP credentials would look like: - -.. code-block:: python - - from pymongo import MongoClient - from pymongo.encryption import ClientEncryption - - client = MongoClient() - client_encryption = ClientEncryption( - # The empty dictionary enables on-demand credentials. - kms_providers={"gcp": {}}, - key_vault_namespace="keyvault.datakeys", - key_vault_client=client, - codec_options=client.codec_options, - ) - master_key = { - "projectId": "my-project", - "location": "global", - "keyRing": "key-ring-csfle", - "keyName": "key-name-csfle", - } - client_encryption.create_data_key("gcp", master_key) - -The driver will query the `VM instance metadata `_ to obtain credentials. - -An application using Azure credentials would look like, this time using -:class:`~pymongo.encryption_options.AutoEncryptionOpts`: - -.. code-block:: python - - from pymongo import MongoClient - from pymongo.encryption_options import AutoEncryptionOpts - - # The empty dictionary enables on-demand credentials. - kms_providers = {"azure": {}} - key_vault_namespace = "keyvault.datakeys" - auto_encryption_opts = AutoEncryptionOpts(kms_providers, key_vault_namespace) - client = MongoClient(auto_encryption_opts=auto_encryption_opts) - coll = client.test.coll - coll.insert_one({"encryptedField": "123456789"}) - -The driver will `acquire an access token `_ from the Azure VM. - -.. _Queryable Encryption: - -Queryable Encryption --------------------- - -.. _automatic-queryable-client-side-encryption: - -Automatic Queryable Encryption -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Automatic Queryable Encryption requires MongoDB 7.0+ Enterprise or a MongoDB 7.0+ Atlas cluster. - -Queryable Encryption is the second version of Client-Side Field Level Encryption. -Data is encrypted client-side. Queryable Encryption supports indexed encrypted fields, -which are further processed server-side. - -Automatic encryption in Queryable Encryption is configured with an ``encrypted_fields`` mapping, -as demonstrated by the following example: - -.. code-block:: python - - import os - from bson.codec_options import CodecOptions - from pymongo import MongoClient - from pymongo.encryption import ClientEncryption - from pymongo.encryption_options import AutoEncryptionOpts - - local_master_key = os.urandom(96) - kms_providers = {"local": {"key": local_master_key}} - key_vault_namespace = "keyvault.datakeys" - key_vault_client = MongoClient() - client_encryption = ClientEncryption( - kms_providers, key_vault_namespace, key_vault_client, CodecOptions() - ) - key_vault = key_vault_client["keyvault"]["datakeys"] - key_vault.drop() - # Ensure that two data keys cannot share the same keyAltName. - key_vault.create_index( - "keyAltNames", - unique=True, - partialFilterExpression={"keyAltNames": {"$exists": True}}, - ) - key1_id = client_encryption.create_data_key("local", key_alt_names=["firstName"]) - key2_id = client_encryption.create_data_key("local", key_alt_names=["lastName"]) - - encrypted_fields_map = { - "default.encryptedCollection": { - "fields": [ - { - "path": "firstName", - "bsonType": "string", - "keyId": key1_id, - "queries": [{"queryType": "equality"}], - }, - { - "path": "lastName", - "bsonType": "string", - "keyId": key2_id, - }, - ], - } - } - - auto_encryption_opts = AutoEncryptionOpts( - kms_providers, - key_vault_namespace, - encrypted_fields_map=encrypted_fields_map, - ) - client = MongoClient(auto_encryption_opts=auto_encryption_opts) - client.default.drop_collection("encryptedCollection") - coll = client.default.create_collection("encryptedCollection") - coll.insert_one({"_id": 1, "firstName": "Jane", "lastName": "Doe"}) - docs = list(coll.find({"firstName": "Jane"})) - print(docs) - -In the above example, the ``firstName`` and ``lastName`` fields are -automatically encrypted and decrypted. - -Explicit Queryable Encryption -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Explicit Queryable Encryption requires MongoDB 7.0+. - -Queryable Encryption is the second version of Client-Side Field Level Encryption. -Data is encrypted client-side. Queryable Encryption supports indexed encrypted fields, -which are further processed server-side. - -Explicit encryption in Queryable Encryption is performed using the ``encrypt`` and ``decrypt`` -methods. Automatic encryption (to allow the ``find_one`` to automatically decrypt) is configured -using an ``encrypted_fields`` mapping, as demonstrated by the following example: - -.. code-block:: python - - import os - from pymongo import MongoClient - from pymongo.encryption import ( - Algorithm, - AutoEncryptionOpts, - ClientEncryption, - QueryType, - ) - - - def main(): - # This must be the same master key that was used to create - # the encryption key. - local_master_key = os.urandom(96) - kms_providers = {"local": {"key": local_master_key}} - - # The MongoDB namespace (db.collection) used to store - # the encryption data keys. - key_vault_namespace = "encryption.__pymongoTestKeyVault" - key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1) - - # Set up the key vault (key_vault_namespace) for this example. - client = MongoClient() - key_vault = client[key_vault_db_name][key_vault_coll_name] - - # Ensure that two data keys cannot share the same keyAltName. - key_vault.drop() - key_vault.create_index( - "keyAltNames", - unique=True, - partialFilterExpression={"keyAltNames": {"$exists": True}}, - ) - - client_encryption = ClientEncryption( - kms_providers, - key_vault_namespace, - # The MongoClient to use for reading/writing to the key vault. - # This can be the same MongoClient used by the main application. - client, - # The CodecOptions class used for encrypting and decrypting. - # This should be the same CodecOptions instance you have configured - # on MongoClient, Database, or Collection. - client.codec_options, - ) - - # Create a new data key for the encryptedField. - indexed_key_id = client_encryption.create_data_key("local") - unindexed_key_id = client_encryption.create_data_key("local") - - encrypted_fields = { - "fields": [ - { - "keyId": indexed_key_id, - "path": "encryptedIndexed", - "bsonType": "string", - "queries": {"queryType": "equality"}, - }, - { - "keyId": unindexed_key_id, - "path": "encryptedUnindexed", - "bsonType": "string", - }, - ], - } - - opts = AutoEncryptionOpts( - {"local": {"key": local_master_key}}, - key_vault.full_name, - bypass_query_analysis=True, - key_vault_client=client, - ) - - # The MongoClient used to read/write application data. - encrypted_client = MongoClient(auto_encryption_opts=opts) - encrypted_client.drop_database("test") - db = encrypted_client.test - - # Create the collection with encrypted fields. - coll = db.create_collection("coll", encryptedFields=encrypted_fields) - - # Create and encrypt an indexed and unindexed value. - val = "encrypted indexed value" - unindexed_val = "encrypted unindexed value" - insert_payload_indexed = client_encryption.encrypt( - val, Algorithm.INDEXED, indexed_key_id, contention_factor=1 - ) - insert_payload_unindexed = client_encryption.encrypt( - unindexed_val, Algorithm.UNINDEXED, unindexed_key_id - ) - - # Insert the payloads. - coll.insert_one( - { - "encryptedIndexed": insert_payload_indexed, - "encryptedUnindexed": insert_payload_unindexed, - } - ) - - # Encrypt our find payload using QueryType.EQUALITY. - # The value of "indexed_key_id" must be the same as used to encrypt - # the values above. - find_payload = client_encryption.encrypt( - val, - Algorithm.INDEXED, - indexed_key_id, - query_type=QueryType.EQUALITY, - contention_factor=1, - ) - - # Find the document we inserted using the encrypted payload. - # The returned document is automatically decrypted. - doc = coll.find_one({"encryptedIndexed": find_payload}) - print("Returned document: %s" % (doc,)) - - # Cleanup resources. - client_encryption.close() - encrypted_client.close() - client.close() - - - if __name__ == "__main__": - main() diff --git a/doc/examples/geo.rst b/doc/examples/geo.rst deleted file mode 100644 index e7da156720..0000000000 --- a/doc/examples/geo.rst +++ /dev/null @@ -1,109 +0,0 @@ -Geospatial Indexing Example -=========================== - -.. testsetup:: - - from pymongo import MongoClient - - client = MongoClient() - client.drop_database("geo_example") - -This example shows how to create and use a :data:`~pymongo.GEO2D` -index in PyMongo. To create a spherical (earth-like) geospatial index use :data:`~pymongo.GEOSPHERE` instead. - -.. seealso:: The MongoDB documentation on `Geospatial Indexes `_. - -Creating a Geospatial Index ---------------------------- - -Creating a geospatial index in pymongo is easy: - -.. doctest:: - - >>> from pymongo import MongoClient, GEO2D - >>> db = MongoClient().geo_example - >>> db.places.create_index([("loc", GEO2D)]) - 'loc_2d' - -Inserting Places ----------------- - -Locations in MongoDB are represented using either embedded documents -or lists where the first two elements are coordinates. Here, we'll -insert a couple of example locations: - -.. doctest:: - - >>> result = db.places.insert_many( - ... [{"loc": [2, 5]}, {"loc": [30, 5]}, {"loc": [1, 2]}, {"loc": [4, 4]}] - ... ) - >>> result.inserted_ids - [ObjectId('...'), ObjectId('...'), ObjectId('...'), ObjectId('...')] - -.. note:: If specifying latitude and longitude coordinates in :data:`~pymongo.GEOSPHERE`, list the **longitude** first and then **latitude**. - -Querying --------- - -Using the geospatial index we can find documents near another point: - -.. doctest:: - - >>> import pprint - >>> for doc in db.places.find({"loc": {"$near": [3, 6]}}).limit(3): - ... pprint.pprint(doc) - ... - {'_id': ObjectId('...'), 'loc': [2, 5]} - {'_id': ObjectId('...'), 'loc': [4, 4]} - {'_id': ObjectId('...'), 'loc': [1, 2]} - -.. note:: If using :data:`pymongo.GEOSPHERE`, using $nearSphere is recommended. - -The $maxDistance operator requires the use of :class:`~bson.son.SON`: - -.. doctest:: - - >>> from bson.son import SON - >>> query = {"loc": SON([("$near", [3, 6]), ("$maxDistance", 100)])} - >>> for doc in db.places.find(query).limit(3): - ... pprint.pprint(doc) - ... - {'_id': ObjectId('...'), 'loc': [2, 5]} - {'_id': ObjectId('...'), 'loc': [4, 4]} - {'_id': ObjectId('...'), 'loc': [1, 2]} - -It's also possible to query for all items within a given rectangle -(specified by lower-left and upper-right coordinates): - -.. doctest:: - - >>> query = {"loc": {"$within": {"$box": [[2, 2], [5, 6]]}}} - >>> for doc in db.places.find(query).sort("_id"): - ... pprint.pprint(doc) - ... - {'_id': ObjectId('...'), 'loc': [2, 5]} - {'_id': ObjectId('...'), 'loc': [4, 4]} - -Or circle (specified by center point and radius): - -.. doctest:: - - >>> query = {"loc": {"$within": {"$center": [[0, 0], 6]}}} - >>> for doc in db.places.find(query).sort("_id"): - ... pprint.pprint(doc) - ... - {'_id': ObjectId('...'), 'loc': [2, 5]} - {'_id': ObjectId('...'), 'loc': [1, 2]} - {'_id': ObjectId('...'), 'loc': [4, 4]} - -geoNear queries are also supported using :class:`~bson.son.SON`:: - - >>> from bson.son import SON - >>> db.command(SON([('geoNear', 'places'), ('near', [1, 2])])) - {'ok': 1.0, 'stats': ...} - -.. warning:: Starting in MongoDB version 4.0, MongoDB deprecates the **geoNear** command. Use one of the following operations instead. - - * $geoNear - aggregation stage. - * $near - query operator. - * $nearSphere - query operator. diff --git a/doc/examples/gevent.rst b/doc/examples/gevent.rst deleted file mode 100644 index f62697d19f..0000000000 --- a/doc/examples/gevent.rst +++ /dev/null @@ -1,52 +0,0 @@ -Gevent -====== - -PyMongo supports `Gevent `_. Simply call Gevent's -``monkey.patch_all()`` before loading any other modules: - -.. code-block:: pycon - - >>> # You must call patch_all() *before* importing any other modules - >>> from gevent import monkey - >>> _ = monkey.patch_all() - >>> from pymongo import MongoClient - >>> client = MongoClient() - -PyMongo uses thread and socket functions from the Python standard library. -Gevent's monkey-patching replaces those standard functions so that PyMongo -does asynchronous I/O with non-blocking sockets, and schedules operations -on greenlets instead of threads. - -Avoid blocking in Hub.join --------------------------- - -By default, PyMongo uses threads to discover and monitor your servers' topology -(see :ref:`health-monitoring`). If you execute ``monkey.patch_all()`` when -your application first begins, PyMongo automatically uses greenlets instead -of threads. - -When shutting down, if your application calls :meth:`~gevent.hub.Hub.join` on -Gevent's :class:`~gevent.hub.Hub` without first terminating these background -greenlets, the call to :meth:`~gevent.hub.Hub.join` blocks indefinitely. You -therefore **must close or dereference** any active -:class:`~pymongo.mongo_client.MongoClient` before exiting. - -An example solution to this issue in some application frameworks is a signal -handler to end background greenlets when your application receives SIGHUP: - -.. code-block:: python - - import signal - - - def graceful_reload(signum, traceback): - """Explicitly close some global MongoClient object.""" - client.close() - - - signal.signal(signal.SIGHUP, graceful_reload) - -Applications using uWSGI prior to 1.9.16 are affected by this issue, -or newer uWSGI versions with the ``-gevent-wait-for-hub`` option. -See `the uWSGI changelog for details -`_. diff --git a/doc/examples/gridfs.rst b/doc/examples/gridfs.rst deleted file mode 100644 index 52920adbda..0000000000 --- a/doc/examples/gridfs.rst +++ /dev/null @@ -1,84 +0,0 @@ -GridFS Example -============== - -.. testsetup:: - - from pymongo import MongoClient - - client = MongoClient() - client.drop_database("gridfs_example") - -This example shows how to use :mod:`gridfs` to store large binary -objects (e.g. files) in MongoDB. - -.. seealso:: The API docs for :mod:`gridfs`. - -.. seealso:: `This blog post - `_ - for some motivation behind this API. - -Setup ------ - -We start by creating a :class:`~gridfs.GridFS` instance to use: - -.. doctest:: - - >>> from pymongo import MongoClient - >>> import gridfs - >>> - >>> db = MongoClient().gridfs_example - >>> fs = gridfs.GridFS(db) - -Every :class:`~gridfs.GridFS` instance is created with and will -operate on a specific :class:`~pymongo.database.Database` instance. - -Saving and Retrieving Data --------------------------- - -The simplest way to work with :mod:`gridfs` is to use its key/value -interface (the :meth:`~gridfs.GridFS.put` and -:meth:`~gridfs.GridFS.get` methods). To write data to GridFS, use -:meth:`~gridfs.GridFS.put`: - -.. doctest:: - - >>> a = fs.put(b"hello world") - -:meth:`~gridfs.GridFS.put` creates a new file in GridFS, and returns -the value of the file document's ``"_id"`` key. Given that ``"_id"`` -we can use :meth:`~gridfs.GridFS.get` to get back the contents of the -file: - -.. doctest:: - - >>> fs.get(a).read() - b'hello world' - -:meth:`~gridfs.GridFS.get` returns a file-like object, so we get the -file's contents by calling :meth:`~gridfs.grid_file.GridOut.read`. - -In addition to putting a :class:`str` as a GridFS file, we can also -put any file-like object (an object with a :meth:`read` -method). GridFS will handle reading the file in chunk-sized segments -automatically. We can also add additional attributes to the file as -keyword arguments: - -.. doctest:: - - >>> b = fs.put(fs.get(a), filename="foo", bar="baz") - >>> out = fs.get(b) - >>> out.read() - b'hello world' - >>> out.filename - 'foo' - >>> out.bar - 'baz' - >>> out.upload_date - datetime.datetime(...) - -The attributes we set in :meth:`~gridfs.GridFS.put` are stored in the -file document, and retrievable after calling -:meth:`~gridfs.GridFS.get`. Some attributes (like ``"filename"``) are -special and are defined in the GridFS specification - see that -document for more details. diff --git a/doc/examples/high_availability.rst b/doc/examples/high_availability.rst deleted file mode 100644 index 80026153f8..0000000000 --- a/doc/examples/high_availability.rst +++ /dev/null @@ -1,367 +0,0 @@ -High Availability and PyMongo -============================= - -PyMongo makes it easy to write highly available applications whether -you use a `single replica set `_ -or a `large sharded cluster -`_. - -Connecting to a Replica Set ---------------------------- - -PyMongo makes working with `replica sets -`_ easy. Here we'll launch a new -replica set and show how to handle both initialization and normal -connections with PyMongo. - -.. seealso:: The MongoDB documentation on `replication `_. - -Starting a Replica Set -~~~~~~~~~~~~~~~~~~~~~~ - -The main `replica set documentation -`_ contains extensive information -about setting up a new replica set or migrating an existing MongoDB -setup, be sure to check that out. Here, we'll just do the bare minimum -to get a three node replica set setup locally. - -.. warning:: Replica sets should always use multiple nodes in - production - putting all set members on the same physical node is - only recommended for testing and development. - -We start three ``mongod`` processes, each on a different port and with -a different dbpath, but all using the same replica set name "foo". - -.. code-block:: bash - - $ mkdir -p /data/db0 /data/db1 /data/db2 - $ mongod --port 27017 --dbpath /data/db0 --replSet foo - -.. code-block:: bash - - $ mongod --port 27018 --dbpath /data/db1 --replSet foo - -.. code-block:: bash - - $ mongod --port 27019 --dbpath /data/db2 --replSet foo - -Initializing the Set -~~~~~~~~~~~~~~~~~~~~ - -At this point all of our nodes are up and running, but the set has yet -to be initialized. Until the set is initialized no node will become -the primary, and things are essentially "offline". - -To initialize the set we need to connect directly to a single node and run the -initiate command using the ``directConnection`` option:: - - >>> from pymongo import MongoClient - >>> c = MongoClient('localhost', 27017, directConnection=True) - -.. note:: We could have connected to any of the other nodes instead, - but only the node we initiate from is allowed to contain any - initial data. - -After connecting, we run the initiate command to get things started:: - - >>> config = {'_id': 'foo', 'members': [ - ... {'_id': 0, 'host': 'localhost:27017'}, - ... {'_id': 1, 'host': 'localhost:27018'}, - ... {'_id': 2, 'host': 'localhost:27019'}]} - >>> c.admin.command("replSetInitiate", config) - {'ok': 1.0, ...} - -The three ``mongod`` servers we started earlier will now coordinate -and come online as a replica set. - -Connecting to a Replica Set -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The initial connection as made above is a special case for an -uninitialized replica set. Normally we'll want to connect -differently. A connection to a replica set can be made using the -:meth:`~pymongo.mongo_client.MongoClient` constructor, specifying -one or more members of the set and optionally the replica set name. -Any of the following connects to the replica set we just created:: - - >>> MongoClient('localhost') - MongoClient(host=['localhost:27017'], ...) - >>> MongoClient('localhost', replicaset='foo') - MongoClient(host=['localhost:27017'], replicaset='foo', ...) - >>> MongoClient('localhost:27018', replicaset='foo') - MongoClient(['localhost:27018'], replicaset='foo', ...) - >>> MongoClient('localhost', 27019, replicaset='foo') - MongoClient(['localhost:27019'], replicaset='foo', ...) - >>> MongoClient('mongodb://localhost:27017,localhost:27018/') - MongoClient(['localhost:27017', 'localhost:27018'], ...) - >>> MongoClient('mongodb://localhost:27017,localhost:27018/?replicaSet=foo') - MongoClient(['localhost:27017', 'localhost:27018'], replicaset='foo', ...) - -The addresses passed to :meth:`~pymongo.mongo_client.MongoClient` are called -the *seeds*. As long as at least one of the seeds is online, MongoClient -discovers all the members in the replica set, and determines which is the -current primary and which are secondaries or arbiters. Each seed must be the -address of a single mongod. Multihomed and round robin DNS addresses are -**not** supported. - -The :class:`~pymongo.mongo_client.MongoClient` constructor is non-blocking: -the constructor returns immediately while the client connects to the replica -set using background threads. Note how, if you create a client and immediately -print the string representation of its -:attr:`~pymongo.mongo_client.MongoClient.nodes` attribute, the list may be -empty initially. If you wait a moment, MongoClient discovers the whole replica -set:: - - >>> from time import sleep - >>> c = MongoClient(replicaset='foo'); print(c.nodes); sleep(0.1); print(c.nodes) - frozenset([]) - frozenset([('localhost', 27019), ('localhost', 27017), ('localhost', 27018)]) - -You need not wait for replica set discovery in your application, however. -If you need to do any operation with a MongoClient, such as a -:meth:`~pymongo.collection.Collection.find` or an -:meth:`~pymongo.collection.Collection.insert_one`, the client waits to discover -a suitable member before it attempts the operation. - -Handling Failover -~~~~~~~~~~~~~~~~~ - -When a failover occurs, PyMongo will automatically attempt to find the -new primary node and perform subsequent operations on that node. This -can't happen completely transparently, however. Here we'll perform an -example failover to illustrate how everything behaves. First, we'll -connect to the replica set and perform a couple of basic operations:: - - >>> db = MongoClient("localhost", replicaSet='foo').test - >>> db.test.insert_one({"x": 1}).inserted_id - ObjectId('...') - >>> db.test.find_one() - {'x': 1, '_id': ObjectId('...')} - -By checking the host and port, we can see that we're connected to -*localhost:27017*, which is the current primary:: - - >>> db.client.address - ('localhost', 27017) - -Now let's bring down that node and see what happens when we run our -query again:: - - >>> db.test.find_one() - Traceback (most recent call last): - pymongo.errors.AutoReconnect: ... - -We get an :class:`~pymongo.errors.AutoReconnect` exception. This means -that the driver was not able to connect to the old primary (which -makes sense, as we killed the server), but that it will attempt to -automatically reconnect on subsequent operations. When this exception -is raised our application code needs to decide whether to retry the -operation or to simply continue, accepting the fact that the operation -might have failed. - -On subsequent attempts to run the query we might continue to see this -exception. Eventually, however, the replica set will failover and -elect a new primary (this should take no more than a couple of seconds in -general). At that point the driver will connect to the new primary and -the operation will succeed:: - - >>> db.test.find_one() - {'x': 1, '_id': ObjectId('...')} - >>> db.client.address - ('localhost', 27018) - -Bring the former primary back up. It will rejoin the set as a secondary. -Now we can move to the next section: distributing reads to secondaries. - -.. _secondary-reads: - -Secondary Reads -~~~~~~~~~~~~~~~ - -By default an instance of MongoClient sends queries to -the primary member of the replica set. To use secondaries for queries -we have to change the read preference:: - - >>> client = MongoClient( - ... 'localhost:27017', - ... replicaSet='foo', - ... readPreference='secondaryPreferred') - >>> client.read_preference - SecondaryPreferred(tag_sets=None) - -Now all queries will be sent to the secondary members of the set. If there are -no secondary members the primary will be used as a fallback. If you have -queries you would prefer to never send to the primary you can specify that -using the ``secondary`` read preference. - -By default the read preference of a :class:`~pymongo.database.Database` is -inherited from its MongoClient, and the read preference of a -:class:`~pymongo.collection.Collection` is inherited from its Database. To use -a different read preference use the -:meth:`~pymongo.mongo_client.MongoClient.get_database` method, or the -:meth:`~pymongo.database.Database.get_collection` method:: - - >>> from pymongo import ReadPreference - >>> client.read_preference - SecondaryPreferred(tag_sets=None) - >>> db = client.get_database('test', read_preference=ReadPreference.SECONDARY) - >>> db.read_preference - Secondary(tag_sets=None) - >>> coll = db.get_collection('test', read_preference=ReadPreference.PRIMARY) - >>> coll.read_preference - Primary() - -You can also change the read preference of an existing -:class:`~pymongo.collection.Collection` with the -:meth:`~pymongo.collection.Collection.with_options` method:: - - >>> coll2 = coll.with_options(read_preference=ReadPreference.NEAREST) - >>> coll.read_preference - Primary() - >>> coll2.read_preference - Nearest(tag_sets=None) - -Note that since most database commands can only be sent to the primary of a -replica set, the :meth:`~pymongo.database.Database.command` method does not obey -the Database's :attr:`~pymongo.database.Database.read_preference`, but you can -pass an explicit read preference to the method:: - - >>> db.command('dbstats', read_preference=ReadPreference.NEAREST) - {...} - -Reads are configured using three options: **read preference**, **tag sets**, -and **local threshold**. - -**Read preference**: - -Read preference is configured using one of the classes from -:mod:`~pymongo.read_preferences` (:class:`~pymongo.read_preferences.Primary`, -:class:`~pymongo.read_preferences.PrimaryPreferred`, -:class:`~pymongo.read_preferences.Secondary`, -:class:`~pymongo.read_preferences.SecondaryPreferred`, or -:class:`~pymongo.read_preferences.Nearest`). For convenience, we also provide -:class:`~pymongo.read_preferences.ReadPreference` with the following -attributes: - -- ``PRIMARY``: Read from the primary. This is the default read preference, - and provides the strongest consistency. If no primary is available, raise - :class:`~pymongo.errors.AutoReconnect`. - -- ``PRIMARY_PREFERRED``: Read from the primary if available, otherwise read - from a secondary. - -- ``SECONDARY``: Read from a secondary. If no matching secondary is available, - raise :class:`~pymongo.errors.AutoReconnect`. - -- ``SECONDARY_PREFERRED``: Read from a secondary if available, otherwise - from the primary. - -- ``NEAREST``: Read from any available member. - -**Tag sets**: - -Replica-set members can be `tagged -`_ according to any -criteria you choose. By default, PyMongo ignores tags when -choosing a member to read from, but your read preference can be configured with -a ``tag_sets`` parameter. ``tag_sets`` must be a list of dictionaries, each -dict providing tag values that the replica set member must match. -PyMongo tries each set of tags in turn until it finds a set of -tags with at least one matching member. For example, to prefer reads from the -New York data center, but fall back to the San Francisco data center, tag your -replica set members according to their location and create a -MongoClient like so:: - - >>> from pymongo.read_preferences import Secondary - >>> db = client.get_database( - ... 'test', read_preference=Secondary([{'dc': 'ny'}, {'dc': 'sf'}])) - >>> db.read_preference - Secondary(tag_sets=[{'dc': 'ny'}, {'dc': 'sf'}]) - -MongoClient tries to find secondaries in New York, then San Francisco, -and raises :class:`~pymongo.errors.AutoReconnect` if none are available. As an -additional fallback, specify a final, empty tag set, ``{}``, which means "read -from any member that matches the mode, ignoring tags." - -See :mod:`~pymongo.read_preferences` for more information. - -.. _distributes reads to secondaries: - -**Local threshold**: - -If multiple members match the read preference and tag sets, PyMongo reads -from among the nearest members, chosen according to ping time. By default, -only members whose ping times are within 15 milliseconds of the nearest -are used for queries. You can choose to distribute reads among members with -higher latencies by setting ``localThresholdMS`` to a larger -number:: - - >>> client = pymongo.MongoClient( - ... replicaSet='repl0', - ... readPreference='secondaryPreferred', - ... localThresholdMS=35) - -In this case, PyMongo distributes reads among matching members within 35 -milliseconds of the closest member's ping time. - -.. note:: ``localThresholdMS`` is ignored when talking to a - replica set *through* a mongos. The equivalent is the localThreshold_ command - line option. - -.. _localThreshold: https://mongodb.com/docs/manual/reference/program/mongos/#std-option-mongos.--localThreshold - -.. _health-monitoring: - -Health Monitoring -''''''''''''''''' - -When MongoClient is initialized it launches background threads to -monitor the replica set for changes in: - -* Health: detect when a member goes down or comes up, or if a different member - becomes primary -* Configuration: detect when members are added or removed, and detect changes - in members' tags -* Latency: track a moving average of each member's ping time - -Replica-set monitoring ensures queries are continually routed to the proper -members as the state of the replica set changes. - -.. _mongos-load-balancing: - -mongos Load Balancing ---------------------- - -An instance of :class:`~pymongo.mongo_client.MongoClient` can be configured -with a list of addresses of mongos servers: - - >>> client = MongoClient('mongodb://host1,host2,host3') - -Each member of the list must be a single mongos server. Multihomed and round -robin DNS addresses are **not** supported. The client continuously -monitors all the mongoses' availability, and its network latency to each. - -PyMongo distributes operations evenly among the set of mongoses within its -``localThresholdMS`` (similar to how it `distributes reads to secondaries`_ -in a replica set). By default the threshold is 15 ms. - -The lowest-latency server, and all servers with latencies no more than -``localThresholdMS`` beyond the lowest-latency server's, receive -operations equally. For example, if we have three mongoses: - - - host1: 20 ms - - host2: 35 ms - - host3: 40 ms - -By default the ``localThresholdMS`` is 15 ms, so PyMongo uses host1 and host2 -evenly. It uses host1 because its network latency to the driver is shortest. It -uses host2 because its latency is within 15 ms of the lowest-latency server's. -But it excuses host3: host3 is 20ms beyond the lowest-latency server. - -If we set ``localThresholdMS`` to 30 ms all servers are within the threshold: - - >>> client = MongoClient('mongodb://host1,host2,host3/?localThresholdMS=30') - -.. warning:: Do **not** connect PyMongo to a pool of mongos instances through a - load balancer. A single socket connection must always be routed to the same - mongos instance for proper cursor support. diff --git a/doc/examples/index.rst b/doc/examples/index.rst deleted file mode 100644 index 57682fa1af..0000000000 --- a/doc/examples/index.rst +++ /dev/null @@ -1,40 +0,0 @@ -Examples -======== - -The examples in this section are intended to give in depth overviews -of how to accomplish specific tasks with MongoDB and PyMongo. - -Unless otherwise noted, all examples assume that a MongoDB instance is -running on the default host and port. Assuming you have `downloaded -and installed `_ -MongoDB, you can start it like so: - -.. code-block:: bash - - $ mongod - -.. toctree:: - :maxdepth: 1 - - aggregation - authentication - collations - copydb - custom_type - bulk - client_bulk - datetimes - geo - gevent - gridfs - high_availability - logging - mod_wsgi - network_compression - server_selection - tailable - timeouts - tls - type_hints - encryption - uuid diff --git a/doc/examples/logging.rst b/doc/examples/logging.rst deleted file mode 100644 index 0cbc8eff09..0000000000 --- a/doc/examples/logging.rst +++ /dev/null @@ -1,63 +0,0 @@ -Logging -======== - -Starting in 4.8, **PyMongo** supports `Python's native logging library `_, -enabling developers to customize the verbosity of log messages for their applications. - -Components -------------- -There are currently three different **PyMongo** components with logging support: ``pymongo.command``, ``pymongo.connection``, and ``pymongo.serverSelection``. -These components deal with command operations, connection management, and server selection, respectively. -Each can be configured separately or they can all be configured together. - -Configuration -------------- -Currently, the above components each support ``DEBUG`` logging. To enable a single component, do the following:: - - import logging - logging.getLogger('pymongo.').setLevel(logging.DEBUG) - - - -For example, to enable command logging:: - - import logging - logging.getLogger('pymongo.command').setLevel(logging.DEBUG) - - -You can also enable all ``DEBUG`` logs at once:: - - import logging - logging.getLogger('pymongo').setLevel(logging.DEBUG) - - -Truncation -------------- -When ``pymongo.command`` debug logs are enabled, every command sent to the server and every response sent back will be included as part of the logs. -By default, these command and response documents are truncated after 1000 bytes. - -You can configure a higher truncation limit by setting the ``MONGOB_LOG_MAX_DOCUMENT_LENGTH`` environment variable to your desired length. - -Note that by default, only sensitive authentication command contents are redacted. -All commands containing user data will be logged, including the actual contents of your queries. -To prevent this behavior, set ``MONGOB_LOG_MAX_DOCUMENT_LENGTH`` to 0. This will omit the command and response bodies from the logs. - -Example -------------- -Here's a simple example that enables ``pymongo.command`` debug logs and performs two database operations:: - - import logging - import pymongo - - # Automatically writes all logs to stdout - logging.basicConfig() - logging.getLogger('pymongo.command').setLevel(logging.DEBUG) - - client = pymongo.MongoClient() - client.db.test.insert_one({"x": 1}) - client.db.test.find_one({"x": 1}) - --------------------------------- - DEBUG:pymongo.command:{"clientId": {"$oid": "65cbe82614be1fc2beb4e4a9"}, "message": "Command started", "command": "{\"insert\": \"test\", \"ordered\": true, \"lsid\": {\"id\": {\"$binary\": {\"base64\": \"GI7ubVhPSsWd7+OwHEFx6Q==\", \"subType\": \"04\"}}}, \"$db\": \"db\", \"documents\": [{\"x\": 1, \"_id\": {\"$oid\": \"65cbe82614be1fc2beb4e4aa\"}}]}", "commandName": "insert", "databaseName": "db", "requestId": 1144108930, "operationId": 1144108930, "driverConnectionId": 1, "serverConnectionId": 3554, "serverHost": "localhost", "serverPort": 27017} - DEBUG:pymongo.command:{"clientId": {"$oid": "65cbe82614be1fc2beb4e4a9"}, "message": "Command succeeded", "durationMS": 0.515, "reply": "{\"n\": 1, \"ok\": 1.0}", "commandName": "insert", "databaseName": "db", "requestId": 1144108930, "operationId": 1144108930, "driverConnectionId": 1, "serverConnectionId": 3554, "serverHost": "localhost", "serverPort": 27017} - DEBUG:pymongo.command:{"clientId": {"$oid": "65cbe82614be1fc2beb4e4a9"}, "message": "Command started", "command": "{\"find\": \"test\", \"filter\": {\"x\": 1}, \"limit\": 1, \"singleBatch\": true, \"lsid\": {\"id\": {\"$binary\": {\"base64\": \"GI7ubVhPSsWd7+OwHEFx6Q==\", \"subType\": \"04\"}}}, \"$db\": \"db\"}", "commandName": "find", "databaseName": "db", "requestId": 470211272, "operationId": 470211272, "driverConnectionId": 1, "serverConnectionId": 3554, "serverHost": "localhost", "serverPort": 27017} - DEBUG:pymongo.command:{"clientId": {"$oid": "65cbe82614be1fc2beb4e4a9"}, "message": "Command succeeded", "durationMS": 0.621, "reply": "{\"cursor\": {\"firstBatch\": [{\"_id\": {\"$oid\": \"65cbdf391a957ed280001417\"}, \"x\": 1}], \"ns\": \"db.test\"}, \"ok\": 1.0}", "commandName": "find", "databaseName": "db", "requestId": 470211272, "operationId": 470211272, "driverConnectionId": 1, "serverConnectionId": 3554, "serverHost": "localhost", "serverPort": 27017} diff --git a/doc/examples/mod_wsgi.rst b/doc/examples/mod_wsgi.rst deleted file mode 100644 index 96d6ce892f..0000000000 --- a/doc/examples/mod_wsgi.rst +++ /dev/null @@ -1,64 +0,0 @@ -.. _pymongo-and-mod_wsgi: - -PyMongo and mod_wsgi -==================== - -To run your application under `mod_wsgi `_, -follow these guidelines: - -* Run ``mod_wsgi`` in daemon mode with the ``WSGIDaemonProcess`` directive. -* Assign each application to a separate daemon with ``WSGIProcessGroup``. -* Use ``WSGIApplicationGroup %{GLOBAL}`` to ensure your application is running - in the daemon's main Python interpreter, not a sub interpreter. - -For example, this ``mod_wsgi`` configuration ensures an application runs in the -main interpreter:: - - - WSGIDaemonProcess my_process - WSGIScriptAlias /my_app /path/to/app.wsgi - WSGIProcessGroup my_process - WSGIApplicationGroup %{GLOBAL} - - -If you have multiple applications that use PyMongo, put each in a separate -daemon, still in the global application group:: - - - WSGIDaemonProcess my_process - WSGIScriptAlias /my_app /path/to/app.wsgi - - WSGIProcessGroup my_process - - - WSGIDaemonProcess my_other_process - WSGIScriptAlias /my_other_app /path/to/other_app.wsgi - - WSGIProcessGroup my_other_process - - - WSGIApplicationGroup %{GLOBAL} - - -Background: ``mod_wsgi`` can run in "embedded" mode when only WSGIScriptAlias -is set, or "daemon" mode with WSGIDaemonProcess. In daemon mode, ``mod_wsgi`` -can run your application in the Python main interpreter, or in sub interpreters. -The correct way to run a PyMongo application is in daemon mode, using the main -interpreter. - -Python C extensions in general have issues running in multiple -Python sub interpreters. These difficulties are explained in the documentation for -`Py_NewInterpreter `_ -and in the `Multiple Python Sub Interpreters -`_ -section of the ``mod_wsgi`` documentation. - -Beginning with PyMongo 2.7, the C extension for BSON detects when it is running -in a sub interpreter and activates a workaround, which adds a small cost to -BSON decoding. To avoid this cost, use ``WSGIApplicationGroup %{GLOBAL}`` to -ensure your application runs in the main interpreter. - -Since your program runs in the main interpreter it should not share its -process with any other applications, lest they interfere with each other's -state. Each application should have its own daemon process, as shown in the -example above. diff --git a/doc/examples/network_compression.rst b/doc/examples/network_compression.rst deleted file mode 100644 index c270dff4b3..0000000000 --- a/doc/examples/network_compression.rst +++ /dev/null @@ -1,39 +0,0 @@ - -.. _network-compression-example: - -Network Compression -=================== - -PyMongo supports network compression where network traffic between the client -and MongoDB server are compressed which reduces the amount of data passed -over the network. By default no compression is used. - -The driver supports the following algorithms: - -- `snappy `_ available in MongoDB 3.4 and later. -- :mod:`zlib` available in MongoDB 3.6 and later. -- `zstandard `_ available in MongoDB 4.2 and later. - -.. note:: snappy and zstandard compression require additional dependencies. See :ref:`optional-deps`. - -Applications can enable wire protocol compression via the ``compressors`` URI and -keyword argument to :meth:`~pymongo.mongo_client.MongoClient`. For example:: - - >>> client = MongoClient(compressors='zlib') - -When multiple compression algorithms are given, the driver selects the first one in the -list supported by the MongoDB instance to which it is connected. For example:: - - >>> client = MongoClient(compressors='snappy,zstandard,zlib') - -The ``compressors`` option can also be set via the URI:: - - >>> client = MongoClient('mongodb://example.com/?compressors=snappy,zstandard,zlib') - -Additionally, zlib compression allows specifying a compression level with supported values from -1 to 9:: - - >>> client = MongoClient(compressors='zlib', zlibCompressionLevel=-1) - -The ``zlibCompressionLevel`` is passed as the ``level`` argument to :func:`zlib.compress`. - -.. seealso:: The MongoDB documentation on `network compression URI options `_. diff --git a/doc/examples/server_selection.rst b/doc/examples/server_selection.rst deleted file mode 100644 index 227e849df3..0000000000 --- a/doc/examples/server_selection.rst +++ /dev/null @@ -1,108 +0,0 @@ -Server Selector Example -======================= - -Users can exert fine-grained control over the `server selection algorithm`_ -by setting the ``server_selector`` option on the :class:`~pymongo.MongoClient` -to an appropriate callable. This example shows how to use this functionality -to prefer servers running on ``localhost``. - - -.. warning:: - - Use of custom server selector functions is a power user feature. Misusing - custom server selectors can have unintended consequences such as degraded - read/write performance. - - -.. testsetup:: - - from pymongo import MongoClient - - -.. _server selection algorithm: https://mongodb.com/docs/manual/core/read-preference-mechanics/ - - -Example: Selecting Servers Running on ``localhost`` ---------------------------------------------------- - -To start, we need to write the server selector function that will be used. -The server selector function should accept a list of -:class:`~pymongo.server_description.ServerDescription` objects and return a -list of server descriptions that are suitable for the read or write operation. -A server selector must not create or modify -:class:`~pymongo.server_description.ServerDescription` objects, and must return -the selected instances unchanged. - -In this example, we write a server selector that prioritizes servers running on -``localhost``. This can be desirable when using a sharded cluster with multiple -``mongos``, as locally run queries are likely to see lower latency and higher -throughput. Please note, however, that it is highly dependent on the -application if preferring ``localhost`` is beneficial or not. - -In addition to comparing the hostname with ``localhost``, our server selector -function accounts for the edge case when no servers are running on -``localhost``. In this case, we allow the default server selection logic to -prevail by passing through the received server description list unchanged. -Failure to do this would render the client unable to communicate with MongoDB -in the event that no servers were running on ``localhost``. - - -The described server selection logic is implemented in the following server -selector function: - - -.. doctest:: - - >>> def server_selector(server_descriptions): - ... servers = [ - ... server for server in server_descriptions if server.address[0] == "localhost" - ... ] - ... if not servers: - ... return server_descriptions - ... return servers - ... - - - -Finally, we can create a :class:`~pymongo.MongoClient` instance with this -server selector. - - -.. doctest:: - - >>> client = MongoClient(server_selector=server_selector) - - - -Server Selection Process ------------------------- - -This section dives deeper into the server selection process for reads and -writes. In the case of a write, the driver performs the following operations -(in order) during the selection process: - - -#. Select all writeable servers from the list of known hosts. For a replica set - this is the primary, while for a sharded cluster this is all the known mongoses. - -#. Apply the user-defined server selector function. Note that the custom server - selector is **not** called if there are no servers left from the previous - filtering stage. - -#. Apply the ``localThresholdMS`` setting to the list of remaining hosts. This - whittles the host list down to only contain servers whose latency is at most - ``localThresholdMS`` milliseconds higher than the lowest observed latency. - -#. Select a server at random from the remaining host list. The desired - operation is then performed against the selected server. - - -In the case of **reads** the process is identical except for the first step. -Here, instead of selecting all writeable servers, we select all servers -matching the user's :class:`~pymongo.read_preferences.ReadPreference` from the -list of known hosts. As an example, for a 3-member replica set with a -:class:`~pymongo.read_preferences.Secondary` read preference, we would select -all available secondaries. - - -.. _server selection algorithm: https://mongodb.com/docs/manual/core/read-preference-mechanics/ diff --git a/doc/examples/tailable.rst b/doc/examples/tailable.rst deleted file mode 100644 index 79458dc2ff..0000000000 --- a/doc/examples/tailable.rst +++ /dev/null @@ -1,42 +0,0 @@ -Tailable Cursors -================ - -By default, MongoDB will automatically close a cursor when the client has -exhausted all results in the cursor. However, for `capped collections -`_ you may -use a `tailable cursor -`_ -that remains open after the client exhausts the results in the initial cursor. - -The following is a basic example of using a tailable cursor to tail the oplog -of a replica set member:: - - import time - - import pymongo - - client = pymongo.MongoClient() - oplog = client.local.oplog.rs - first = oplog.find().sort('$natural', pymongo.ASCENDING).limit(-1).next() - print(first) - ts = first['ts'] - - while True: - # For a regular capped collection CursorType.TAILABLE_AWAIT is the - # only option required to create a tailable cursor. When querying the - # oplog, the oplog_replay option enables an optimization to quickly - # find the 'ts' value we're looking for. The oplog_replay option - # can only be used when querying the oplog. Starting in MongoDB 4.4 - # this option is ignored by the server as queries against the oplog - # are optimized automatically by the MongoDB query engine. - cursor = oplog.find({'ts': {'$gt': ts}}, - cursor_type=pymongo.CursorType.TAILABLE_AWAIT, - oplog_replay=True) - while cursor.alive: - for doc in cursor: - ts = doc['ts'] - print(doc) - # We end up here if the find() returned no documents or if the - # tailable cursor timed out (no new documents were added to the - # collection for more than 1 second). - time.sleep(1) diff --git a/doc/examples/timeouts.rst b/doc/examples/timeouts.rst deleted file mode 100644 index 5171588962..0000000000 --- a/doc/examples/timeouts.rst +++ /dev/null @@ -1,162 +0,0 @@ - -.. _timeout-example: - -Client Side Operation Timeout -============================= - -PyMongo 4.2 introduced :meth:`~pymongo.timeout` and the ``timeoutMS`` -URI and keyword argument to :class:`~pymongo.mongo_client.MongoClient`. -These features allow applications to more easily limit the amount of time that -one or more operations can execute before control is returned to the app. This -timeout applies to all of the work done to execute the operation, including -but not limited to server selection, connection checkout, serialization, and -server-side execution. - -Basic Usage ------------ - -The following example uses :meth:`~pymongo.timeout` to configure a 10-second -timeout for an :meth:`~pymongo.collection.Collection.insert_one` operation:: - - import pymongo - with pymongo.timeout(10): - coll.insert_one({"name": "Nunu"}) - -The :meth:`~pymongo.timeout` applies to all pymongo operations within the block. -The following example ensures that both the ``insert`` and the ``find`` complete -within 10 seconds total, or raise a timeout error:: - - with pymongo.timeout(10): - coll.insert_one({"name": "Nunu"}) - coll.find_one({"name": "Nunu"}) - -When nesting :func:`~pymongo.timeout`, the nested deadline is capped by the outer -deadline. The deadline can only be shortened, not extended. -When exiting the block, the previous deadline is restored:: - - with pymongo.timeout(5): - coll.find_one() # Uses the 5 second deadline. - with pymongo.timeout(3): - coll.find_one() # Uses the 3 second deadline. - coll.find_one() # Uses the original 5 second deadline. - with pymongo.timeout(10): - coll.find_one() # Still uses the original 5 second deadline. - coll.find_one() # Uses the original 5 second deadline. - -Timeout errors --------------- - -When the :meth:`~pymongo.timeout` with-statement is entered, a deadline is set -for the entire block. When that deadline is exceeded, any blocking pymongo operation -will raise a timeout exception. For example:: - - try: - with pymongo.timeout(10): - coll.insert_one({"name": "Nunu"}) - time.sleep(10) - # The deadline has now expired, the next operation will raise - # a timeout exception. - coll.find_one({"name": "Nunu"}) - except PyMongoError as exc: - if exc.timeout: - print(f"block timed out: {exc!r}") - else: - print(f"failed with non-timeout error: {exc!r}") - -The :attr:`pymongo.errors.PyMongoError.timeout` property (added in PyMongo 4.2) -will be ``True`` when the error was caused by a timeout and ``False`` otherwise. - -The timeoutMS URI option ------------------------- - -PyMongo 4.2 also added support for the ``timeoutMS`` URI and keyword argument to -:class:`~pymongo.mongo_client.MongoClient`. When this option is configured, the -client will automatically apply the timeout to each API call. For example:: - - client = MongoClient("mongodb://localhost/?timeoutMS=10000") - coll = client.test.test - coll.insert_one({"name": "Nunu"}) # Uses a 10-second timeout. - coll.find_one({"name": "Nunu"}) # Also uses a 10-second timeout. - -The above is roughly equivalent to:: - - client = MongoClient() - coll = client.test.test - with pymongo.timeout(10): - coll.insert_one({"name": "Nunu"}) - with pymongo.timeout(10): - coll.find_one({"name": "Nunu"}) - -pymongo.timeout overrides timeoutMS ------------------------------------ - -:meth:`~pymongo.timeout` overrides ``timeoutMS``; within a -:meth:`~pymongo.timeout` block a client's ``timeoutMS`` option is ignored:: - - client = MongoClient("mongodb://localhost/?timeoutMS=10000") - coll = client.test.test - coll.insert_one({"name": "Nunu"}) # Uses the client's 10-second timeout. - # pymongo.timeout overrides the client's timeoutMS. - with pymongo.timeout(20): - coll.insert_one({"name": "Nunu"}) # Uses the 20-second timeout. - with pymongo.timeout(5): - coll.find_one({"name": "Nunu"}) # Uses the 5-second timeout. - -pymongo.timeout is thread safe ------------------------------- - -:meth:`~pymongo.timeout` is thread safe; the timeout only applies to current -thread and multiple threads can configure different timeouts in parallel. - -pymongo.timeout is asyncio safe -------------------------------- - -:meth:`~pymongo.timeout` is asyncio safe; the timeout only applies to current -Task and multiple Tasks can configure different timeouts concurrently. -:meth:`~pymongo.timeout` can be used identically in -`Motor `_, for example:: - - import motor.motor_asyncio - client = motor.motor_asyncio.AsyncIOMotorClient() - coll = client.test.test - with pymongo.timeout(10): - await coll.insert_one({"name": "Nunu"}) - await coll.find_one({"name": "Nunu"}) - -Troubleshooting ---------------- - -There are many timeout errors that can be raised depending on when the timeout -expires. In code, these can be identified with the :attr:`pymongo.errors.PyMongoError.timeout` -property. Some specific timeout errors examples are described below. - -When the client was unable to find an available server to run the operation -within the given timeout:: - - pymongo.errors.ServerSelectionTimeoutError: No servers found yet, Timeout: -0.00202266700216569s, Topology Description: ]> - -When either the client was unable to establish a connection within the given -timeout or the operation was sent but the server was not able to respond in time:: - - pymongo.errors.NetworkTimeout: localhost:27017: timed out - -When the server cancelled the operation because it exceeded the given timeout. -Note that the operation may have partially completed on the server (depending -on the operation):: - - pymongo.errors.ExecutionTimeout: operation exceeded time limit, full error: {'ok': 0.0, 'errmsg': 'operation exceeded time limit', 'code': 50, 'codeName': 'MaxTimeMSExpired'} - -When the client cancelled the operation because it was not possible to complete -within the given timeout:: - - pymongo.errors.ExecutionTimeout: operation would exceed time limit, remaining timeout:0.00196 <= network round trip time:0.00427 - -When the client attempted a write operation but the server could not replicate -that write (according to the configured write concern) within the given timeout:: - - pymongo.errors.WTimeoutError: operation exceeded time limit, full error: {'code': 50, 'codeName': 'MaxTimeMSExpired', 'errmsg': 'operation exceeded time limit', 'errInfo': {'writeConcern': {'w': 1, 'wtimeout': 0}}} - -The same error as above but for :meth:`~pymongo.collection.Collection.insert_many` -or :meth:`~pymongo.collection.Collection.bulk_write`:: - - pymongo.errors.BulkWriteError: batch op errors occurred, full error: {'writeErrors': [], 'writeConcernErrors': [{'code': 50, 'codeName': 'MaxTimeMSExpired', 'errmsg': 'operation exceeded time limit', 'errInfo': {'writeConcern': {'w': 1, 'wtimeout': 0}}}], 'nInserted': 2, 'nUpserted': 0, 'nMatched': 0, 'nModified': 0, 'nRemoved': 0, 'upserted': []} diff --git a/doc/examples/tls.rst b/doc/examples/tls.rst deleted file mode 100644 index ee4d75027e..0000000000 --- a/doc/examples/tls.rst +++ /dev/null @@ -1,234 +0,0 @@ -TLS/SSL and PyMongo -=================== - -PyMongo supports connecting to MongoDB over TLS/SSL. This guide covers the -configuration options supported by PyMongo. See `the server documentation -`_ to configure -MongoDB. - -.. warning:: Industry best practices recommend, and some regulations require, - the use of TLS 1.1 or newer. Though no application changes are required for - PyMongo to make use of the newest protocols, some operating systems or - versions may not provide an OpenSSL version new enough to support them. - - Users of macOS older than 10.13 (High Sierra) will need to install Python - from `python.org`_, `homebrew`_, `macports`_, or another similar source. - - Users of Linux or other non-macOS Unix can check their OpenSSL version like - this:: - - $ openssl version - - If the version number is less than 1.0.1 support for TLS 1.1 or newer is not - available. Contact your operating system vendor for a solution or upgrade to - a newer distribution. - - You can check your Python interpreter by installing the `requests`_ module - and executing the following command:: - - python -c "import requests; print(requests.get('https://www.howsmyssl.com/a/check', verify=False).json()['tls_version'])" - - You should see "TLS 1.X" where X is >= 1. - - You can read more about TLS versions and their security implications here: - - ``_ - -.. _python.org: https://www.python.org/downloads/ -.. _homebrew: https://brew.sh/ -.. _macports: https://www.macports.org/ -.. _requests: https://pypi.python.org/pypi/requests - -Basic configuration -................... - -In many cases connecting to MongoDB over TLS/SSL requires nothing more than -passing ``tls=True`` as a keyword argument to -:class:`~pymongo.mongo_client.MongoClient`:: - - >>> client = pymongo.MongoClient('example.com', tls=True) - -Or passing ``tls=true`` in the URI:: - - >>> client = pymongo.MongoClient('mongodb://example.com/?tls=true') - -This configures PyMongo to connect to the server using TLS, verify the server's -certificate and verify that the host you are attempting to connect to is listed -by that certificate. - -Certificate verification policy -............................... - -By default, PyMongo is configured to require a certificate from the server when -TLS is enabled. This is configurable using the ``tlsAllowInvalidCertificates`` -option. To disable this requirement pass ``tlsAllowInvalidCertificates=True`` -as a keyword parameter:: - - >>> client = pymongo.MongoClient('example.com', - ... tls=True, - ... tlsAllowInvalidCertificates=True) - -Or, in the URI:: - - >>> uri = 'mongodb://example.com/?tls=true&tlsAllowInvalidCertificates=true' - >>> client = pymongo.MongoClient(uri) - -Specifying a CA file -.................... - -In some cases you may want to configure PyMongo to use a specific set of CA -certificates. This is most often the case when you are acting as your own -certificate authority rather than using server certificates signed by a well -known authority. The ``tlsCAFile`` option takes a path to a CA file. It can be -passed as a keyword argument:: - - >>> client = pymongo.MongoClient('example.com', - ... tls=True, - ... tlsCAFile='/path/to/ca.pem') - -Or, in the URI:: - - >>> uri = 'mongodb://example.com/?tls=true&tlsCAFile=/path/to/ca.pem' - >>> client = pymongo.MongoClient(uri) - -Specifying a certificate revocation list -........................................ - -The ``tlsCRLFile`` option takes a path to a CRL file. It can be passed -as a keyword argument:: - - >>> client = pymongo.MongoClient('example.com', - ... tls=True, - ... tlsCRLFile='/path/to/crl.pem') - -Or, in the URI:: - - >>> uri = 'mongodb://example.com/?tls=true&tlsCRLFile=/path/to/crl.pem' - >>> client = pymongo.MongoClient(uri) - -.. note:: Certificate revocation lists and :ref:`OCSP` cannot be used together. - -Client certificates -................... - -PyMongo can be configured to present a client certificate using the -``tlsCertificateKeyFile`` option:: - - >>> client = pymongo.MongoClient('example.com', - ... tls=True, - ... tlsCertificateKeyFile='/path/to/client.pem') - -If the private key for the client certificate is stored in a separate file, -it should be concatenated with the certificate file. For example, to -concatenate a PEM-formatted certificate file ``cert.pem`` and a PEM-formatted -keyfile ``key.pem`` into a single file ``combined.pem``, on Unix systems, -users can run:: - - $ cat key.pem cert.pem > combined.pem - -PyMongo can be configured with the concatenated certificate keyfile using the -``tlsCertificateKeyFile`` option:: - - >>> client = pymongo.MongoClient('example.com', - ... tls=True, - ... tlsCertificateKeyFile='/path/to/combined.pem') - -If the private key contained in the certificate keyfile is encrypted, users -can provide a password or passphrase to decrypt the encrypted private keys -using the ``tlsCertificateKeyFilePassword`` option:: - - >>> client = pymongo.MongoClient('example.com', - ... tls=True, - ... tlsCertificateKeyFile='/path/to/combined.pem', - ... tlsCertificateKeyFilePassword=) - -These options can also be passed as part of the MongoDB URI. - -.. _OCSP: - -OCSP -.... - -Starting with PyMongo 3.11, if PyMongo was installed with the "ocsp" extra:: - - python -m pip install pymongo[ocsp] - -certificate revocation checking is enabled by way of `OCSP (Online Certification -Status Protocol) `_. -MongoDB 4.4+ `staples OCSP responses `_ -to the TLS handshake which PyMongo will verify, failing the TLS handshake if -the stapled OCSP response is invalid or indicates that the peer certificate is -revoked. - -When connecting to a server version older than 4.4, or when a 4.4+ version of -MongoDB does not staple an OCSP response, PyMongo will attempt to connect -directly to an OCSP endpoint if the peer certificate specified one. The TLS -handshake will only fail in this case if the response indicates that the -certificate is revoked. Invalid or malformed responses will be ignored, -favoring availability over maximum security. - -.. _TLSErrors: - -Troubleshooting TLS Errors -.......................... - -TLS errors often fall into three categories - certificate verification failure, -protocol version mismatch or certificate revocation checking failure. An error -message similar to the following means that OpenSSL was not able to verify the -server's certificate:: - - [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed - -This often occurs because OpenSSL does not have access to the system's -root certificates or the certificates are out of date. Linux users should -ensure that they have the latest root certificate updates installed from -their Linux vendor. macOS users using Python 3.7 or newer downloaded -from python.org `may have to run a script included with python -`_ to install -root certificates:: - - open "/Applications/Python /Install Certificates.command" - -Users of older PyPy portable versions may have to `set an environment -variable `_ to tell -OpenSSL where to find root certificates. This is easily done using the `certifi -module `_ from pypi:: - - $ pypy -m pip install certifi - $ export SSL_CERT_FILE=$(pypy -c "import certifi; print(certifi.where())") - -An error message similar to the following message means that the OpenSSL -version used by Python does not support a new enough TLS protocol to connect -to the server:: - - [SSL: TLSV1_ALERT_PROTOCOL_VERSION] tlsv1 alert protocol version - -Industry best practices recommend, and some regulations require, that older -TLS protocols be disabled in some MongoDB deployments. Some deployments may -disable TLS 1.0, others may disable TLS 1.0 and TLS 1.1. See the warning -earlier in this document for troubleshooting steps and solutions. - -An error message similar to the following message means that certificate -revocation checking failed:: - - [('SSL routines', 'tls_process_initial_server_flight', 'invalid status response')] - -See :ref:`OCSP` for more details. - -Python 3.10+ incompatibilities with TLS/SSL on MongoDB <= 4.0 -............................................................. - -Note that `changes made to the ssl module in Python 3.10+ -`_ may cause incompatibilities -with MongoDB <= 4.0. The following are some example errors that may occur with this -combination:: - - SSL handshake failed: localhost:27017: [SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:997) - SSL handshake failed: localhost:27017: EOF occurred in violation of protocol (_ssl.c:997) - -The MongoDB server logs may show the following error:: - - 2021-06-30T21:22:44.917+0100 E NETWORK [conn16] SSL: error:1408A0C1:SSL routines:ssl3_get_client_hello:no shared cipher - -To resolve this issue, use Python <=3.10, upgrade to MongoDB 4.2+, or install -pymongo with the :ref:`OCSP` extra which relies on PyOpenSSL. diff --git a/doc/examples/type_hints.rst b/doc/examples/type_hints.rst deleted file mode 100644 index 375ad14330..0000000000 --- a/doc/examples/type_hints.rst +++ /dev/null @@ -1,332 +0,0 @@ - -.. _type_hints-example: - -Type Hints -========== - -As of version 4.1, PyMongo ships with `type hints`_. With type hints, Python -type checkers can easily find bugs before they reveal themselves in your code. - -If your IDE is configured to use type hints, -it can suggest more appropriate completions and highlight errors in your code. -Some examples include `PyCharm`_, `Sublime Text`_, and `Visual Studio Code`_. - -You can also use the `mypy`_ tool from your command line or in Continuous Integration tests. - -All of the public APIs in PyMongo are fully type hinted, and -several of them support generic parameters for the -type of document object returned when decoding BSON documents. - -Due to `limitations in mypy`_, the default -values for generic document types are not yet provided (they will eventually be ``Dict[str, any]``). - -For a larger set of examples that use types, see the PyMongo `test_typing module`_. - -If you would like to opt out of using the provided types, add the following to -your `mypy config`_: :: - - [mypy-pymongo] - follow_imports = False - - -Basic Usage ------------ - -Note that a type for :class:`~pymongo.mongo_client.MongoClient` must be specified. Here we use the -default, unspecified document type: - -.. doctest:: - - >>> from pymongo import MongoClient - >>> client: MongoClient = MongoClient() - >>> collection = client.test.test - >>> inserted = collection.insert_one({"x": 1, "tags": ["dog", "cat"]}) - >>> retrieved = collection.find_one({"x": 1}) - >>> assert isinstance(retrieved, dict) - -For a more accurate typing for document type you can use: - -.. doctest:: - - >>> from typing import Any, Dict - >>> from pymongo import MongoClient - >>> client: MongoClient[Dict[str, Any]] = MongoClient() - >>> collection = client.test.test - >>> inserted = collection.insert_one({"x": 1, "tags": ["dog", "cat"]}) - >>> retrieved = collection.find_one({"x": 1}) - >>> assert isinstance(retrieved, dict) - -Typed Client ------------- - -:class:`~pymongo.mongo_client.MongoClient` is generic on the document type used to decode BSON documents. - -You can specify a :class:`~bson.raw_bson.RawBSONDocument` document type: - -.. doctest:: - - >>> from pymongo import MongoClient - >>> from bson.raw_bson import RawBSONDocument - >>> client = MongoClient(document_class=RawBSONDocument) - >>> collection = client.test.test - >>> inserted = collection.insert_one({"x": 1, "tags": ["dog", "cat"]}) - >>> result = collection.find_one({"x": 1}) - >>> assert isinstance(result, RawBSONDocument) - -Subclasses of :py:class:`collections.abc.Mapping` can also be used, such as :class:`~bson.son.SON`: - -.. doctest:: - - >>> from bson import SON - >>> from pymongo import MongoClient - >>> client = MongoClient(document_class=SON[str, int]) - >>> collection = client.test.test - >>> inserted = collection.insert_one({"x": 1, "y": 2}) - >>> result = collection.find_one({"x": 1}) - >>> assert result is not None - >>> assert result["x"] == 1 - -Note that when using :class:`~bson.son.SON`, the key and value types must be given, e.g. ``SON[str, Any]``. - - -Typed Collection ----------------- - -You can use :py:class:`~typing.TypedDict` (Python 3.8+) when using a well-defined schema for the data in a -:class:`~pymongo.collection.Collection`. Note that all `schema validation`_ for inserts and updates is done on the server. -These methods automatically add an "_id" field. - -.. doctest:: - :pyversion: >= 3.8 - - >>> from typing import TypedDict - >>> from pymongo import MongoClient - >>> from pymongo.collection import Collection - >>> class Movie(TypedDict): - ... name: str - ... year: int - ... - >>> client: MongoClient = MongoClient() - >>> collection: Collection[Movie] = client.test.test - >>> inserted = collection.insert_one(Movie(name="Jurassic Park", year=1993)) - >>> result = collection.find_one({"name": "Jurassic Park"}) - >>> assert result is not None - >>> assert result["year"] == 1993 - >>> # This will raise a type-checking error, despite being present, because it is added by PyMongo. - >>> assert result["_id"] # type:ignore[typeddict-item] - -This same typing scheme works for all of the insert methods (:meth:`~pymongo.collection.Collection.insert_one`, -:meth:`~pymongo.collection.Collection.insert_many`, and :meth:`~pymongo.collection.Collection.bulk_write`). -For ``bulk_write`` both :class:`~pymongo.operations.InsertOne` and :class:`~pymongo.operations.ReplaceOne` operators are generic. - -.. doctest:: - :pyversion: >= 3.8 - - >>> from typing import TypedDict - >>> from pymongo import MongoClient - >>> from pymongo.operations import InsertOne - >>> from pymongo.collection import Collection - >>> client: MongoClient = MongoClient() - >>> collection: Collection[Movie] = client.test.test - >>> inserted = collection.bulk_write([InsertOne(Movie(name="Jurassic Park", year=1993))]) - >>> result = collection.find_one({"name": "Jurassic Park"}) - >>> assert result is not None - >>> assert result["year"] == 1993 - >>> # This will raise a type-checking error, despite being present, because it is added by PyMongo. - >>> assert result["_id"] # type:ignore[typeddict-item] - -Modeling Document Types with TypedDict --------------------------------------- - -You can use :py:class:`~typing.TypedDict` (Python 3.8+) to model structured data. -As noted above, PyMongo will automatically add an ``_id`` field if it is not present. This also applies to TypedDict. -There are three approaches to this: - - 1. Do not specify ``_id`` at all. It will be inserted automatically, and can be retrieved at run-time, but will yield a type-checking error unless explicitly ignored. - - 2. Specify ``_id`` explicitly. This will mean that every instance of your custom TypedDict class will have to pass a value for ``_id``. - - 3. Make use of :py:class:`~typing.NotRequired`. This has the flexibility of option 1, but with the ability to access the ``_id`` field without causing a type-checking error. - -Note: to use :py:class:`~typing.TypedDict` and :py:class:`~typing.NotRequired` in earlier versions of Python (<3.8, <3.11), use the ``typing_extensions`` package. - -.. doctest:: typed-dict-example - :pyversion: >= 3.11 - - >>> from typing import TypedDict, NotRequired - >>> from pymongo import MongoClient - >>> from pymongo.collection import Collection - >>> from bson import ObjectId - >>> class Movie(TypedDict): - ... name: str - ... year: int - ... - >>> class ExplicitMovie(TypedDict): - ... _id: ObjectId - ... name: str - ... year: int - ... - >>> class NotRequiredMovie(TypedDict): - ... _id: NotRequired[ObjectId] - ... name: str - ... year: int - ... - >>> client: MongoClient = MongoClient() - >>> collection: Collection[Movie] = client.test.test - >>> inserted = collection.insert_one(Movie(name="Jurassic Park", year=1993)) - >>> result = collection.find_one({"name": "Jurassic Park"}) - >>> assert result is not None - >>> # This will yield a type-checking error, despite being present, because it is added by PyMongo. - >>> assert result["_id"] # type:ignore[typeddict-item] - >>> collection: Collection[ExplicitMovie] = client.test.test - >>> # Note that the _id keyword argument must be supplied - >>> inserted = collection.insert_one( - ... ExplicitMovie(_id=ObjectId(), name="Jurassic Park", year=1993) - ... ) - >>> result = collection.find_one({"name": "Jurassic Park"}) - >>> assert result is not None - >>> # This will not raise a type-checking error. - >>> assert result["_id"] - >>> collection: Collection[NotRequiredMovie] = client.test.test - >>> # Note the lack of _id, similar to the first example - >>> inserted = collection.insert_one(NotRequiredMovie(name="Jurassic Park", year=1993)) - >>> result = collection.find_one({"name": "Jurassic Park"}) - >>> assert result is not None - >>> # This will not raise a type-checking error, despite not being provided explicitly. - >>> assert result["_id"] - - -Typed Database --------------- - -While less common, you could specify that the documents in an entire database -match a well-defined schema using :py:class:`~typing.TypedDict` (Python 3.8+). - - -.. doctest:: - - >>> from typing import TypedDict - >>> from pymongo import MongoClient - >>> from pymongo.database import Database - >>> class Movie(TypedDict): - ... name: str - ... year: int - ... - >>> client: MongoClient = MongoClient() - >>> db: Database[Movie] = client.test - >>> collection = db.test - >>> inserted = collection.insert_one({"name": "Jurassic Park", "year": 1993}) - >>> result = collection.find_one({"name": "Jurassic Park"}) - >>> assert result is not None - >>> assert result["year"] == 1993 - -Typed Command -------------- -When using the :meth:`~pymongo.database.Database.command`, you can specify the document type by providing a custom :class:`~bson.codec_options.CodecOptions`: - -.. doctest:: - - >>> from pymongo import MongoClient - >>> from bson.raw_bson import RawBSONDocument - >>> from bson import CodecOptions - >>> client: MongoClient = MongoClient() - >>> options = CodecOptions(RawBSONDocument) - >>> result = client.admin.command("ping", codec_options=options) - >>> assert isinstance(result, RawBSONDocument) - -Custom :py:class:`collections.abc.Mapping` subclasses and :py:class:`~typing.TypedDict` (Python 3.8+) are also supported. -For :py:class:`~typing.TypedDict`, use the form: ``options: CodecOptions[MyTypedDict] = CodecOptions(...)``. - -Typed BSON Decoding -------------------- -You can specify the document type returned by :mod:`bson` decoding functions by providing :class:`~bson.codec_options.CodecOptions`: - -.. doctest:: - - >>> from typing import Any, Dict - >>> from bson import CodecOptions, encode, decode - >>> class MyDict(Dict[str, Any]): - ... def foo(self): - ... return "bar" - ... - >>> options = CodecOptions(document_class=MyDict) - >>> doc = {"x": 1, "y": 2} - >>> bsonbytes = encode(doc, codec_options=options) - >>> rt_document = decode(bsonbytes, codec_options=options) - >>> assert rt_document.foo() == "bar" - -:class:`~bson.raw_bson.RawBSONDocument` and :py:class:`~typing.TypedDict` (Python 3.8+) are also supported. -For :py:class:`~typing.TypedDict`, use the form: ``options: CodecOptions[MyTypedDict] = CodecOptions(...)``. - - -Troubleshooting ---------------- - -Client Type Annotation -~~~~~~~~~~~~~~~~~~~~~~ -If you forget to add a type annotation for a :class:`~pymongo.mongo_client.MongoClient` object you may get the following ``mypy`` error:: - - from pymongo import MongoClient - client = MongoClient() # error: Need type annotation for "client" - -The solution is to annotate the type as ``client: MongoClient`` or ``client: MongoClient[Dict[str, Any]]``. See `Basic Usage`_. - -Incompatible Types -~~~~~~~~~~~~~~~~~~ -If you use the generic form of :class:`~pymongo.mongo_client.MongoClient` you -may encounter a ``mypy`` error like:: - - from pymongo import MongoClient - - client: MongoClient = MongoClient() - client.test.test.insert_many( - {"a": 1} - ) # error: Dict entry 0 has incompatible type "str": "int"; - # expected "Mapping[str, Any]": "int" - - -The solution is to use ``client: MongoClient[Dict[str, Any]]`` as used in -`Basic Usage`_ . - -Actual Type Errors -~~~~~~~~~~~~~~~~~~ - -Other times ``mypy`` will catch an actual error, like the following code:: - - from pymongo import MongoClient - from typing import Mapping - client: MongoClient = MongoClient() - client.test.test.insert_one( - [{}] - ) # error: Argument 1 to "insert_one" of "Collection" has - # incompatible type "List[Dict[, ]]"; - # expected "Mapping[str, Any]" - -In this case the solution is to use ``insert_one({})``, passing a document instead of a list. - -Another example is trying to set a value on a :class:`~bson.raw_bson.RawBSONDocument`, which is read-only.:: - - from bson.raw_bson import RawBSONDocument - from pymongo import MongoClient - - client = MongoClient(document_class=RawBSONDocument) - coll = client.test.test - doc = {"my": "doc"} - coll.insert_one(doc) - retrieved = coll.find_one({"_id": doc["_id"]}) - assert retrieved is not None - assert len(retrieved.raw) > 0 - retrieved[ - "foo" - ] = "bar" # error: Unsupported target for indexed assignment - # ("RawBSONDocument") [index] - -.. _PyCharm: https://www.jetbrains.com/help/pycharm/type-hinting-in-product.html -.. _Visual Studio Code: https://code.visualstudio.com/docs/languages/python -.. _Sublime Text: https://github.com/sublimelsp/LSP-pyright -.. _type hints: https://docs.python.org/3/library/typing.html -.. _mypy: https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html -.. _limitations in mypy: https://github.com/python/mypy/issues/3737 -.. _mypy config: https://mypy.readthedocs.io/en/stable/config_file.html -.. _test_typing module: https://github.com/mongodb/mongo-python-driver/blob/master/test/test_typing.py -.. _schema validation: https://www.mongodb.com/docs/manual/core/schema-validation/#when-to-use-schema-validation diff --git a/doc/examples/uuid.rst b/doc/examples/uuid.rst deleted file mode 100644 index 350db14d9a..0000000000 --- a/doc/examples/uuid.rst +++ /dev/null @@ -1,512 +0,0 @@ - -.. _handling-uuid-data-example: - -Handling UUID Data -================== - -PyMongo ships with built-in support for dealing with UUID types. -It is straightforward to store native :class:`uuid.UUID` objects -to MongoDB and retrieve them as native :class:`uuid.UUID` objects:: - - from pymongo import MongoClient - from bson.binary import UuidRepresentation - from uuid import uuid4 - - # use the 'standard' representation for cross-language compatibility. - client = MongoClient(uuidRepresentation='standard') - collection = client.get_database('uuid_db').get_collection('uuid_coll') - - # remove all documents from collection - collection.delete_many({}) - - # create a native uuid object - uuid_obj = uuid4() - - # save the native uuid object to MongoDB - collection.insert_one({'uuid': uuid_obj}) - - # retrieve the stored uuid object from MongoDB - document = collection.find_one({}) - - # check that the retrieved UUID matches the inserted UUID - assert document['uuid'] == uuid_obj - -Native :class:`uuid.UUID` objects can also be used as part of MongoDB -queries:: - - document = collection.find({'uuid': uuid_obj}) - assert document['uuid'] == uuid_obj - -The above examples illustrate the simplest of use-cases - one where the -UUID is generated by, and used in the same application. However, -the situation can be significantly more complex when dealing with a MongoDB -deployment that contains UUIDs created by other drivers as the Java and CSharp -drivers have historically encoded UUIDs using a byte-order that is different -from the one used by PyMongo. Applications that require interoperability across -these drivers must specify the appropriate -:class:`~bson.binary.UuidRepresentation`. - -In the following sections, we describe how drivers have historically differed -in their encoding of UUIDs, and how applications can use the -:class:`~bson.binary.UuidRepresentation` configuration option to maintain -cross-language compatibility. - -.. attention:: New applications that do not share a MongoDB deployment with - any other application and that have never stored UUIDs in MongoDB - should use the ``standard`` UUID representation for cross-language - compatibility. See :ref:`configuring-uuid-representation` for details - on how to configure the :class:`~bson.binary.UuidRepresentation`. - -.. _example-legacy-uuid: - -Legacy Handling of UUID Data ----------------------------- - -Historically, MongoDB Drivers have used different byte-ordering -while serializing UUID types to :class:`~bson.binary.Binary`. -Consider, for instance, a UUID with the following canonical textual -representation:: - - 00112233-4455-6677-8899-aabbccddeeff - -This UUID would historically be serialized by the Python driver as:: - - 00112233-4455-6677-8899-aabbccddeeff - -The same UUID would historically be serialized by the C# driver as:: - - 33221100-5544-7766-8899-aabbccddeeff - -Finally, the same UUID would historically be serialized by the Java driver as:: - - 77665544-3322-1100-ffee-ddccbbaa9988 - -.. note:: For in-depth information about the the byte-order historically - used by different drivers, see the `Handling of Native UUID Types - Specification - `_. - -This difference in the byte-order of UUIDs encoded by different drivers can -result in highly unintuitive behavior in some scenarios. We detail two such -scenarios in the next sections. - -Scenario 1: Applications Share a MongoDB Deployment -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Consider the following situation: - -* Application ``C`` written in C# generates a UUID and uses it as the ``_id`` - of a document that it proceeds to insert into the ``uuid_test`` collection of - the ``example_db`` database. Let's assume that the canonical textual - representation of the generated UUID is:: - - 00112233-4455-6677-8899-aabbccddeeff - -* Application ``P`` written in Python attempts to ``find`` the document - written by application ``C`` in the following manner:: - - from uuid import UUID - collection = client.example_db.uuid_test - result = collection.find_one({'_id': UUID('00112233-4455-6677-8899-aabbccddeeff')}) - - In this instance, ``result`` will never be the document that - was inserted by application ``C`` in the previous step. This is because of - the different byte-order used by the C# driver for representing UUIDs as - BSON Binary. The following query, on the other hand, will successfully find - this document:: - - result = collection.find_one({'_id': UUID('33221100-5544-7766-8899-aabbccddeeff')}) - -This example demonstrates how the differing byte-order used by different -drivers can hamper interoperability. To workaround this problem, users should -configure their ``MongoClient`` with the appropriate -:class:`~bson.binary.UuidRepresentation` (in this case, ``client`` in application -``P`` can be configured to use the -:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation to -avoid the unintuitive behavior) as described in -:ref:`configuring-uuid-representation`. - -Scenario 2: Round-Tripping UUIDs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In the following examples, we see how using a misconfigured -:class:`~bson.binary.UuidRepresentation` can cause an application -to inadvertently change the :class:`~bson.binary.Binary` subtype, and in some -cases, the bytes of the :class:`~bson.binary.Binary` field itself when -round-tripping documents containing UUIDs. - -Consider the following situation:: - - from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS - from bson.binary import Binary, UuidRepresentation - from uuid import uuid4 - - # Using UuidRepresentation.PYTHON_LEGACY stores a Binary subtype-3 UUID - python_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY) - input_uuid = uuid4() - collection = client.testdb.get_collection('test', codec_options=python_opts) - collection.insert_one({'_id': 'foo', 'uuid': input_uuid}) - assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)})['_id'] == 'foo' - - # Retrieving this document using UuidRepresentation.STANDARD returns a Binary instance - std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD) - std_collection = client.testdb.get_collection('test', codec_options=std_opts) - doc = std_collection.find_one({'_id': 'foo'}) - assert isinstance(doc['uuid'], Binary) - - # Round-tripping the retrieved document yields the exact same document - std_collection.replace_one({'_id': 'foo'}, doc) - round_tripped_doc = collection.find_one({'uuid': Binary(input_uuid.bytes, 3)}) - assert doc == round_tripped_doc - - -In this example, round-tripping the document using the incorrect -:class:`~bson.binary.UuidRepresentation` (``STANDARD`` instead of -``PYTHON_LEGACY``) changes the :class:`~bson.binary.Binary` subtype as a -side-effect. **Note that this can also happen when the situation is reversed - -i.e. when the original document is written using ``STANDARD`` representation -and then round-tripped using the ``PYTHON_LEGACY`` representation.** - -In the next example, we see the consequences of incorrectly using a -representation that modifies byte-order (``CSHARP_LEGACY`` or ``JAVA_LEGACY``) -when round-tripping documents:: - - from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS - from bson.binary import Binary, UuidRepresentation - from uuid import uuid4 - - # Using UuidRepresentation.STANDARD stores a Binary subtype-4 UUID - std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD) - input_uuid = uuid4() - collection = client.testdb.get_collection('test', codec_options=std_opts) - collection.insert_one({'_id': 'baz', 'uuid': input_uuid}) - assert collection.find_one({'uuid': Binary(input_uuid.bytes, 4)})['_id'] == 'baz' - - # Retrieving this document using UuidRepresentation.JAVA_LEGACY returns a native UUID - # without modifying the UUID byte-order - java_opts = CodecOptions(uuid_representation=UuidRepresentation.JAVA_LEGACY) - java_collection = client.testdb.get_collection('test', codec_options=java_opts) - doc = java_collection.find_one({'_id': 'baz'}) - assert doc['uuid'] == input_uuid - - # Round-tripping the retrieved document silently changes the Binary bytes and subtype - java_collection.replace_one({'_id': 'baz'}, doc) - assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)}) is None - assert collection.find_one({'uuid': Binary(input_uuid.bytes, 4)}) is None - round_tripped_doc = collection.find_one({'_id': 'baz'}) - assert round_tripped_doc['uuid'] == Binary(input_uuid.bytes, 3).as_uuid(UuidRepresentation.JAVA_LEGACY) - - -In this case, using the incorrect :class:`~bson.binary.UuidRepresentation` -(``JAVA_LEGACY`` instead of ``STANDARD``) changes the -:class:`~bson.binary.Binary` bytes and subtype as a side-effect. -**Note that this happens when any representation that -manipulates byte-order (``CSHARP_LEGACY`` or ``JAVA_LEGACY``) is incorrectly -used to round-trip UUIDs written with ``STANDARD``. When the situation is -reversed - i.e. when the original document is written using ``CSHARP_LEGACY`` -or ``JAVA_LEGACY`` and then round-tripped using ``STANDARD`` - -only the :class:`~bson.binary.Binary` subtype is changed.** - -.. note:: Starting in PyMongo 4.0, these issue will be resolved as - the ``STANDARD`` representation will decode Binary subtype 3 fields as - :class:`~bson.binary.Binary` objects of subtype 3 (instead of - :class:`uuid.UUID`), and each of the ``LEGACY_*`` representations will - decode Binary subtype 4 fields to :class:`~bson.binary.Binary` objects of - subtype 4 (instead of :class:`uuid.UUID`). - -.. _configuring-uuid-representation: - -Configuring a UUID Representation ---------------------------------- - -Users can workaround the problems described above by configuring their -applications with the appropriate :class:`~bson.binary.UuidRepresentation`. -Configuring the representation modifies PyMongo's behavior while -encoding :class:`uuid.UUID` objects to BSON and decoding -Binary subtype 3 and 4 fields from BSON. - -Applications can set the UUID representation in one of the following ways: - -#. At the ``MongoClient`` level using the ``uuidRepresentation`` URI option, - e.g.:: - - client = MongoClient("mongodb://a:27107/?uuidRepresentation=standard") - - Valid values are: - - .. list-table:: - :header-rows: 1 - - * - Value - - UUID Representation - - * - ``unspecified`` - - :ref:`unspecified-representation-details` - - * - ``standard`` - - :ref:`standard-representation-details` - - * - ``pythonLegacy`` - - :ref:`python-legacy-representation-details` - - * - ``javaLegacy`` - - :ref:`java-legacy-representation-details` - - * - ``csharpLegacy`` - - :ref:`csharp-legacy-representation-details` - -#. At the ``MongoClient`` level using the ``uuidRepresentation`` kwarg - option, e.g.:: - - from bson.binary import UuidRepresentation - client = MongoClient(uuidRepresentation=UuidRepresentation.STANDARD) - -#. At the ``Database`` or ``Collection`` level by supplying a suitable - :class:`~bson.codec_options.CodecOptions` instance, e.g.:: - - from bson.codec_options import CodecOptions - csharp_opts = CodecOptions(uuid_representation=UuidRepresentation.CSHARP_LEGACY) - java_opts = CodecOptions(uuid_representation=UuidRepresentation.JAVA_LEGACY) - - # Get database/collection from client with csharpLegacy UUID representation - csharp_database = client.get_database('csharp_db', codec_options=csharp_opts) - csharp_collection = client.testdb.get_collection('csharp_coll', codec_options=csharp_opts) - - # Get database/collection from existing database/collection with javaLegacy UUID representation - java_database = csharp_database.with_options(codec_options=java_opts) - java_collection = csharp_collection.with_options(codec_options=java_opts) - -Supported UUID Representations ------------------------------- - -.. list-table:: - :header-rows: 1 - - * - UUID Representation - - Default? - - Encode :class:`uuid.UUID` to - - Decode :class:`~bson.binary.Binary` subtype 4 to - - Decode :class:`~bson.binary.Binary` subtype 3 to - - * - :ref:`standard-representation-details` - - No - - :class:`~bson.binary.Binary` subtype 4 - - :class:`uuid.UUID` - - :class:`~bson.binary.Binary` subtype 3 - - * - :ref:`unspecified-representation-details` - - Yes, in PyMongo>=4 - - Raise :exc:`ValueError` - - :class:`~bson.binary.Binary` subtype 4 - - :class:`~bson.binary.Binary` subtype 3 - - * - :ref:`python-legacy-representation-details` - - No - - :class:`~bson.binary.Binary` subtype 3 with standard byte-order - - :class:`~bson.binary.Binary` subtype 4 - - :class:`uuid.UUID` - - * - :ref:`java-legacy-representation-details` - - No - - :class:`~bson.binary.Binary` subtype 3 with Java legacy byte-order - - :class:`~bson.binary.Binary` subtype 4 - - :class:`uuid.UUID` - - * - :ref:`csharp-legacy-representation-details` - - No - - :class:`~bson.binary.Binary` subtype 3 with C# legacy byte-order - - :class:`~bson.binary.Binary` subtype 4 - - :class:`uuid.UUID` - -We now detail the behavior and use-case for each supported UUID -representation. - -.. _unspecified-representation-details: - -``UNSPECIFIED`` -^^^^^^^^^^^^^^^ - -.. attention:: Starting in PyMongo 4.0, - :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` is the default - UUID representation used by PyMongo. - -The :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` representation -prevents the incorrect interpretation of UUID bytes by stopping short of -automatically converting UUID fields in BSON to native UUID types. Decoding -a UUID when using this representation returns a :class:`~bson.binary.Binary` -object instead. If required, users can coerce the decoded -:class:`~bson.binary.Binary` objects into native UUIDs using the -:meth:`~bson.binary.Binary.as_uuid` method and specifying the appropriate -representation format. The following example shows -what this might look like for a UUID stored by the C# driver:: - - from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS - from bson.binary import Binary, UuidRepresentation - from uuid import uuid4 - - # Using UuidRepresentation.CSHARP_LEGACY - csharp_opts = CodecOptions(uuid_representation=UuidRepresentation.CSHARP_LEGACY) - - # Store a legacy C#-formatted UUID - input_uuid = uuid4() - collection = client.testdb.get_collection('test', codec_options=csharp_opts) - collection.insert_one({'_id': 'foo', 'uuid': input_uuid}) - - # Using UuidRepresentation.UNSPECIFIED - unspec_opts = CodecOptions(uuid_representation=UuidRepresentation.UNSPECIFIED) - unspec_collection = client.testdb.get_collection('test', codec_options=unspec_opts) - - # UUID fields are decoded as Binary when UuidRepresentation.UNSPECIFIED is configured - document = unspec_collection.find_one({'_id': 'foo'}) - decoded_field = document['uuid'] - assert isinstance(decoded_field, Binary) - - # Binary.as_uuid() can be used to coerce the decoded value to a native UUID - decoded_uuid = decoded_field.as_uuid(UuidRepresentation.CSHARP_LEGACY) - assert decoded_uuid == input_uuid - -Native :class:`uuid.UUID` objects cannot directly be encoded to -:class:`~bson.binary.Binary` when the UUID representation is ``UNSPECIFIED`` -and attempting to do so will result in an exception:: - - unspec_collection.insert_one({'_id': 'bar', 'uuid': uuid4()}) - Traceback (most recent call last): - ... - ValueError: cannot encode native uuid.UUID with UuidRepresentation.UNSPECIFIED. UUIDs can be manually converted to bson.Binary instances using bson.Binary.from_uuid() or a different UuidRepresentation can be configured. See the documentation for UuidRepresentation for more information. - -Instead, applications using :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` -must explicitly coerce a native UUID using the -:meth:`~bson.binary.Binary.from_uuid` method:: - - explicit_binary = Binary.from_uuid(uuid4(), UuidRepresentation.STANDARD) - unspec_collection.insert_one({'_id': 'bar', 'uuid': explicit_binary}) - -.. _standard-representation-details: - -``STANDARD`` -^^^^^^^^^^^^ - -.. attention:: This UUID representation should be used by new applications or - applications that are encoding and/or decoding UUIDs in MongoDB for the - first time. - -The :data:`~bson.binary.UuidRepresentation.STANDARD` representation -enables cross-language compatibility by ensuring the same byte-ordering -when encoding UUIDs from all drivers. UUIDs written by a driver with this -representation configured will be handled correctly by every other provided -it is also configured with the ``STANDARD`` representation. - -``STANDARD`` encodes native :class:`uuid.UUID` objects to -:class:`~bson.binary.Binary` subtype 4 objects. - -.. _python-legacy-representation-details: - -``PYTHON_LEGACY`` -^^^^^^^^^^^^^^^^^ - -.. attention:: This uuid representation should be used when reading UUIDs - generated by existing applications that use the Python driver - but **don't** explicitly set a UUID representation. - -.. attention:: :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` - was the default uuid representation in PyMongo 3. - -The :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` representation -corresponds to the legacy representation of UUIDs used by PyMongo. This -representation conforms with -`RFC 4122 Section 4.1.2 `_. - -The following example illustrates the use of this representation:: - - from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS - from bson.binary import Binary, UuidRepresentation - - # No configured UUID representation - collection = client.python_legacy.get_collection('test', codec_options=DEFAULT_CODEC_OPTIONS) - - # Using UuidRepresentation.PYTHON_LEGACY - pylegacy_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY) - pylegacy_collection = client.python_legacy.get_collection('test', codec_options=pylegacy_opts) - - # UUIDs written by PyMongo 3 with no UuidRepresentation configured - # (or PyMongo 4.0 with PYTHON_LEGACY) can be queried using PYTHON_LEGACY - uuid_1 = uuid4() - pylegacy_collection.insert_one({'uuid': uuid_1}) - document = pylegacy_collection.find_one({'uuid': uuid_1}) - -``PYTHON_LEGACY`` encodes native :class:`uuid.UUID` objects to -:class:`~bson.binary.Binary` subtype 3 objects, preserving the same -byte-order as :attr:`~uuid.UUID.bytes`:: - - from bson.binary import Binary - - document = collection.find_one({'uuid': Binary(uuid_2.bytes, subtype=3)}) - assert document['uuid'] == uuid_2 - -.. _java-legacy-representation-details: - -``JAVA_LEGACY`` -^^^^^^^^^^^^^^^ - -.. attention:: This UUID representation should be used when reading UUIDs - written to MongoDB by the legacy applications (i.e. applications that don't - use the ``STANDARD`` representation) using the Java driver. - -The :data:`~bson.binary.UuidRepresentation.JAVA_LEGACY` representation -corresponds to the legacy representation of UUIDs used by the MongoDB Java -Driver. - -.. note:: The ``JAVA_LEGACY`` representation reverses the order of bytes 0-7, - and bytes 8-15. - -As an example, consider the same UUID described in :ref:`example-legacy-uuid`. -Let us assume that an application used the Java driver without an explicitly -specified UUID representation to insert the example UUID -``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this -value using ``PYTHON_LEGACY``, we end up with an entirely different UUID:: - - UUID('77665544-3322-1100-ffee-ddccbbaa9988') - -However, if we explicitly set the representation to -:data:`~bson.binary.UuidRepresentation.JAVA_LEGACY`, we get the correct result:: - - UUID('00112233-4455-6677-8899-aabbccddeeff') - -PyMongo uses the specified UUID representation to reorder the BSON bytes and -load them correctly. ``JAVA_LEGACY`` encodes native :class:`uuid.UUID` objects -to :class:`~bson.binary.Binary` subtype 3 objects, while performing the same -byte-reordering as the legacy Java driver's UUID to BSON encoder. - -.. _csharp-legacy-representation-details: - -``CSHARP_LEGACY`` -^^^^^^^^^^^^^^^^^ - -.. attention:: This UUID representation should be used when reading UUIDs - written to MongoDB by the legacy applications (i.e. applications that don't - use the ``STANDARD`` representation) using the C# driver. - -The :data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation -corresponds to the legacy representation of UUIDs used by the MongoDB Java -Driver. - -.. note:: The ``CSHARP_LEGACY`` representation reverses the order of bytes 0-3, - bytes 4-5, and bytes 6-7. - -As an example, consider the same UUID described in :ref:`example-legacy-uuid`. -Let us assume that an application used the C# driver without an explicitly -specified UUID representation to insert the example UUID -``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this -value using PYTHON_LEGACY, we end up with an entirely different UUID:: - - UUID('33221100-5544-7766-8899-aabbccddeeff') - -However, if we explicitly set the representation to -:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY`, we get the correct result:: - - UUID('00112233-4455-6677-8899-aabbccddeeff') - -PyMongo uses the specified UUID representation to reorder the BSON bytes and -load them correctly. ``CSHARP_LEGACY`` encodes native :class:`uuid.UUID` -objects to :class:`~bson.binary.Binary` subtype 3 objects, while performing -the same byte-reordering as the legacy C# driver's UUID to BSON encoder. diff --git a/doc/faq.rst b/doc/faq.rst deleted file mode 100644 index cb67ea7fe5..0000000000 --- a/doc/faq.rst +++ /dev/null @@ -1,595 +0,0 @@ -Frequently Asked Questions -========================== - -Is PyMongo thread-safe? ------------------------ - -PyMongo is thread-safe and provides built-in connection pooling -for threaded applications. - -.. _pymongo-fork-safe: - -Is PyMongo fork-safe? ---------------------- - -PyMongo is not fork-safe. Care must be taken when using instances of -:class:`~pymongo.mongo_client.MongoClient` with ``fork()``. Specifically, -instances of MongoClient must not be copied from a parent process to -a child process. Instead, the parent process and each child process must -create their own instances of MongoClient. Instances of MongoClient copied from -the parent process have a high probability of deadlock in the child process due -to the inherent incompatibilities between ``fork()``, threads, and locks -described :ref:`below `. PyMongo will attempt to -issue a warning if there is a chance of this deadlock occurring. - -.. _pymongo-fork-safe-details: - -MongoClient spawns multiple threads to run background tasks such as monitoring -connected servers. These threads share state that is protected by instances of -:class:`~threading.Lock`, which are themselves `not fork-safe`_. The -driver is therefore subject to the same limitations as any other multithreaded -code that uses :class:`~threading.Lock` (and mutexes in general). One of these -limitations is that the locks become useless after ``fork()``. During the fork, -all locks are copied over to the child process in the same state as they were -in the parent: if they were locked, the copied locks are also locked. The child -created by ``fork()`` only has one thread, so any locks that were taken out by -other threads in the parent will never be released in the child. The next time -the child process attempts to acquire one of these locks, deadlock occurs. - -Starting in version 4.3, PyMongo utilizes :py:func:`os.register_at_fork` to -reset its locks and other shared state in the child process after a -:py:func:`os.fork` to reduce the frequency of deadlocks. However deadlocks -are still possible because libraries that PyMongo depends on, like `OpenSSL`_ -and `getaddrinfo(3)`_ (on some platforms), are not fork() safe in a -multithreaded application. Linux also imposes the restriction that: - - After a `fork()`_ in a multithreaded program, the child can - safely call only async-signal-safe functions (see - `signal-safety(7)`_) until such time as it calls `execve(2)`_. - -PyMongo relies on functions that are *not* `async-signal-safe`_ and hence the -child process can experience deadlocks or crashes when attempting to call -a non `async-signal-safe`_ function. For examples of deadlocks or crashes -that could occur see `PYTHON-3406`_. - -For a long but interesting read about the problems of Python locks in -multithreaded contexts with ``fork()``, see https://bugs.python.org/issue6721. - -.. _not fork-safe: https://bugs.python.org/issue6721 -.. _OpenSSL: https://github.com/openssl/openssl/issues/19066 -.. _fork(): https://man7.org/linux/man-pages/man2/fork.2.html -.. _signal-safety(7): https://man7.org/linux/man-pages/man7/signal-safety.7.html -.. _async-signal-safe: https://man7.org/linux/man-pages/man7/signal-safety.7.html -.. _execve(2): https://man7.org/linux/man-pages/man2/execve.2.html -.. _getaddrinfo(3): https://man7.org/linux/man-pages/man3/gai_strerror.3.html -.. _PYTHON-3406: https://jira.mongodb.org/browse/PYTHON-3406 - -.. _connection-pooling: - -Can PyMongo help me load the results of my query as a Pandas ``DataFrame``? ---------------------------------------------------------------------------- - -While PyMongo itself does not provide any APIs for working with -numerical or columnar data, -`PyMongoArrow `_ -is a companion library to PyMongo that makes it easy to load MongoDB query result sets as -`Pandas DataFrames `_, -`NumPy ndarrays `_, or -`Apache Arrow Tables `_. - -How does connection pooling work in PyMongo? --------------------------------------------- - -Every :class:`~pymongo.mongo_client.MongoClient` instance has a built-in -connection pool per server in your MongoDB topology. These pools open sockets -on demand to support the number of concurrent MongoDB operations that your -multi-threaded application requires. There is no thread-affinity for sockets. - -The size of each connection pool is capped at ``maxPoolSize``, which defaults -to 100. If there are ``maxPoolSize`` connections to a server and all are in -use, the next request to that server will wait until one of the connections -becomes available. - -The client instance opens two additional sockets per server in your MongoDB -topology for monitoring the server's state. - -For example, a client connected to a 3-node replica set opens 6 monitoring -sockets. It also opens as many sockets as needed to support a multi-threaded -application's concurrent operations on each server, up to ``maxPoolSize``. With -a ``maxPoolSize`` of 100, if the application only uses the primary (the -default), then only the primary connection pool grows and the total connections -is at most 106. If the application uses a -:class:`~pymongo.read_preferences.ReadPreference` to query the secondaries, -their pools also grow and the total connections can reach 306. - -Additionally, the pools are rate limited such that each connection pool can -only create at most 2 connections in parallel at any time. The connection -creation covers covers all the work required to setup a new connection -including DNS, TCP, SSL/TLS, MongoDB handshake, and MongoDB authentication. -For example, if three threads concurrently attempt to check out a connection -from an empty pool, the first two threads will begin creating new connections -while the third thread will wait. The third thread stops waiting when either: - -- one of the first two threads finishes creating a connection, or -- an existing connection is checked back into the pool. - -Rate limiting concurrent connection creation reduces the likelihood of -connection storms and improves the driver's ability to reuse existing -connections. - -It is possible to set the minimum number of concurrent connections to each -server with ``minPoolSize``, which defaults to 0. The connection pool will be -initialized with this number of sockets. If sockets are closed due to any -network errors, causing the total number of sockets (both in use and idle) to -drop below the minimum, more sockets are opened until the minimum is reached. - -The maximum number of milliseconds that a connection can remain idle in the -pool before being removed and replaced can be set with ``maxIdleTimeMS``, which -defaults to ``None`` (no limit). - -The default configuration for a :class:`~pymongo.mongo_client.MongoClient` -works for most applications:: - - client = MongoClient(host, port) - -Create this client **once** for each process, and reuse it for all -operations. It is a common mistake to create a new client for each request, -which is very inefficient. - -To support extremely high numbers of concurrent MongoDB operations within one -process, increase ``maxPoolSize``:: - - client = MongoClient(host, port, maxPoolSize=200) - -... or make it unbounded:: - - client = MongoClient(host, port, maxPoolSize=None) - -Once the pool reaches its maximum size, additional threads have to wait for -sockets to become available. PyMongo does not limit the number of threads -that can wait for sockets to become available and it is the application's -responsibility to limit the size of its thread pool to bound queuing during a -load spike. Threads are allowed to wait for any length of time unless -``waitQueueTimeoutMS`` is defined:: - - client = MongoClient(host, port, waitQueueTimeoutMS=100) - -A thread that waits more than 100ms (in this example) for a socket raises -:exc:`~pymongo.errors.ConnectionFailure`. Use this option if it is more -important to bound the duration of operations during a load spike than it is to -complete every operation. - -When :meth:`~pymongo.mongo_client.MongoClient.close` is called by any thread, -all idle sockets are closed, and all sockets that are in use will be closed as -they are returned to the pool. - -Does PyMongo support Python 3? ------------------------------- - -PyMongo supports CPython 3.9+ and PyPy3.10+. See the :doc:`python3` for details. - -Does PyMongo support asynchronous frameworks like Gevent, asyncio, Tornado, or Twisted? ---------------------------------------------------------------------------------------- -As of PyMongo v4.13, PyMongo fully supports asyncio and `Tornado `_. See `the official docs `_ for more details. - -PyMongo also fully supports :doc:`Gevent `. - -For `Twisted `_, see `TxMongo -`_. Its stated mission is to keep feature -parity with PyMongo. - -.. _writes-and-ids: - -Why does PyMongo add an _id field to all of my documents? ---------------------------------------------------------- - -When a document is inserted to MongoDB using -:meth:`~pymongo.collection.Collection.insert_one`, -:meth:`~pymongo.collection.Collection.insert_many`, or -:meth:`~pymongo.collection.Collection.bulk_write`, and that document does not -include an ``_id`` field, PyMongo automatically adds one for you, set to an -instance of :class:`~bson.objectid.ObjectId`. For example:: - - >>> my_doc = {'x': 1} - >>> collection.insert_one(my_doc) - InsertOneResult(ObjectId('560db337fba522189f171720'), acknowledged=True) - >>> my_doc - {'x': 1, '_id': ObjectId('560db337fba522189f171720')} - -Users often discover this behavior when calling -:meth:`~pymongo.collection.Collection.insert_many` with a list of references -to a single document raises :exc:`~pymongo.errors.BulkWriteError`. Several -Python idioms lead to this pitfall:: - - >>> doc = {} - >>> collection.insert_many(doc for _ in range(10)) - Traceback (most recent call last): - ... - pymongo.errors.BulkWriteError: batch op errors occurred - >>> doc - {'_id': ObjectId('560f171cfba52279f0b0da0c')} - - >>> docs = [{}] - >>> collection.insert_many(docs * 10) - Traceback (most recent call last): - ... - pymongo.errors.BulkWriteError: batch op errors occurred - >>> docs - [{'_id': ObjectId('560f1933fba52279f0b0da0e')}] - -PyMongo adds an ``_id`` field in this manner for a few reasons: - -- All MongoDB documents are required to have an ``_id`` field. -- If PyMongo were to insert a document without an ``_id`` MongoDB would add one - itself, but it would not report the value back to PyMongo. -- Copying the document to insert before adding the ``_id`` field would be - prohibitively expensive for most high write volume applications. - -If you don't want PyMongo to add an ``_id`` to your documents, insert only -documents that already have an ``_id`` field, added by your application. - -Key order in subdocuments -- why does my query work in the shell but not PyMongo? ---------------------------------------------------------------------------------- - -.. - Note: We should rework this section now that Python 3.6+ has ordered dict. - -.. testsetup:: key-order - - from bson.son import SON - from pymongo.mongo_client import MongoClient - - collection = MongoClient().test.collection - collection.drop() - collection.insert_one({"_id": 1.0, "subdocument": SON([("b", 1.0), ("a", 1.0)])}) - -The key-value pairs in a BSON document can have any order (except that ``_id`` -is always first). The mongo shell preserves key order when reading and writing -data. Observe that "b" comes before "a" when we create the document and when it -is displayed: - -.. code-block:: javascript - - > // mongo shell. - > db.collection.insertOne( { "_id" : 1, "subdocument" : { "b" : 1, "a" : 1 } } ) - WriteResult({ "nInserted" : 1 }) - > db.collection.findOne() - { "_id" : 1, "subdocument" : { "b" : 1, "a" : 1 } } - -PyMongo represents BSON documents as Python dicts by default, and the order -of keys in dicts is not defined. That is, a dict declared with the "a" key -first is the same, to Python, as one with "b" first: - - >>> print({'a': 1.0, 'b': 1.0}) - {'a': 1.0, 'b': 1.0} - >>> print({'b': 1.0, 'a': 1.0}) - {'a': 1.0, 'b': 1.0} - -Therefore, Python dicts are not guaranteed to show keys in the order they are -stored in BSON. Here, "a" is shown before "b": - - >>> print(collection.find_one()) - {'_id': 1.0, 'subdocument': {'a': 1.0, 'b': 1.0}} - -To preserve order when reading BSON, use the :class:`~bson.son.SON` class, -which is a dict that remembers its key order. First, get a handle to the -collection, configured to use :class:`~bson.son.SON` instead of dict: - -.. doctest:: key-order - :options: +NORMALIZE_WHITESPACE - - >>> from bson import CodecOptions, SON - >>> opts = CodecOptions(document_class=SON) - >>> opts - CodecOptions(document_class=...SON..., tz_aware=False, uuid_representation=UuidRepresentation.UNSPECIFIED, unicode_decode_error_handler='strict', tzinfo=None, type_registry=TypeRegistry(type_codecs=[], fallback_encoder=None), datetime_conversion=DatetimeConversion.DATETIME) - >>> collection_son = collection.with_options(codec_options=opts) - -Now, documents and subdocuments in query results are represented with -:class:`~bson.son.SON` objects: - -.. doctest:: key-order - - >>> print(collection_son.find_one()) - SON([('_id', 1.0), ('subdocument', SON([('b', 1.0), ('a', 1.0)]))]) - -The subdocument's actual storage layout is now visible: "b" is before "a". - -Because a dict's key order is not defined, you cannot predict how it will be -serialized **to** BSON. But MongoDB considers subdocuments equal only if their -keys have the same order. So if you use a dict to query on a subdocument it may -not match: - - >>> collection.find_one({'subdocument': {'a': 1.0, 'b': 1.0}}) is None - True - -Swapping the key order in your query makes no difference: - - >>> collection.find_one({'subdocument': {'b': 1.0, 'a': 1.0}}) is None - True - -... because, as we saw above, Python considers the two dicts the same. - -There are two solutions. First, you can match the subdocument field-by-field: - - >>> collection.find_one({'subdocument.a': 1.0, - ... 'subdocument.b': 1.0}) - {'_id': 1.0, 'subdocument': {'a': 1.0, 'b': 1.0}} - -The query matches any subdocument with an "a" of 1.0 and a "b" of 1.0, -regardless of the order you specify them in Python or the order they are stored -in BSON. Additionally, this query now matches subdocuments with additional -keys besides "a" and "b", whereas the previous query required an exact match. - -The second solution is to use a :class:`~bson.son.SON` to specify the key order: - - >>> query = {'subdocument': SON([('b', 1.0), ('a', 1.0)])} - >>> collection.find_one(query) - {'_id': 1.0, 'subdocument': {'a': 1.0, 'b': 1.0}} - -The key order you use when you create a :class:`~bson.son.SON` is preserved -when it is serialized to BSON and used as a query. Thus you can create a -subdocument that exactly matches the subdocument in the collection. - -.. seealso:: `MongoDB Manual entry on subdocument matching - `_. - -What does *CursorNotFound* cursor id not valid at server mean? --------------------------------------------------------------- -Cursors in MongoDB can timeout on the server if they've been open for -a long time without any operations being performed on them. This can -lead to an :class:`~pymongo.errors.CursorNotFound` exception being -raised when attempting to iterate the cursor. - -How do I change the timeout value for cursors? ----------------------------------------------- -MongoDB doesn't support custom timeouts for cursors, but cursor -timeouts can be turned off entirely. Pass ``no_cursor_timeout=True`` to -:meth:`~pymongo.collection.Collection.find`. - -How can I store :mod:`decimal.Decimal` instances? -------------------------------------------------- - -PyMongo >= 3.4 supports the Decimal128 BSON type introduced in MongoDB 3.4. -See :mod:`~bson.decimal128` for more information. - -MongoDB <= 3.2 only supports IEEE 754 floating points - the same as the -Python float type. The only way PyMongo could store Decimal instances to -these versions of MongoDB would be to convert them to this standard, so -you'd really only be storing floats anyway - we force users to do this -conversion explicitly so that they are aware that it is happening. - -I'm saving ``9.99`` but when I query my document contains ``9.9900000000000002`` - what's going on here? --------------------------------------------------------------------------------------------------------- -The database representation is ``9.99`` as an IEEE floating point (which -is common to MongoDB and Python as well as most other modern -languages). The problem is that ``9.99`` cannot be represented exactly -with a double precision floating point - this is true in some versions of -Python as well: - - >>> 9.99 - 9.9900000000000002 - -The result that you get when you save ``9.99`` with PyMongo is exactly the -same as the result you'd get saving it with the JavaScript shell or -any of the other languages (and as the data you're working with when -you type ``9.99`` into a Python program). - -Can you add attribute style access for documents? -------------------------------------------------- -This request has come up a number of times but we've decided not to -implement anything like this. The relevant `jira case -`_ has some information -about the decision, but here is a brief summary: - -1. This will pollute the attribute namespace for documents, so could - lead to subtle bugs / confusing errors when using a key with the - same name as a dictionary method. - -2. The only reason we even use SON objects instead of regular - dictionaries is to maintain key ordering, since the server - requires this for certain operations. So we're hesitant to - needlessly complicate SON (at some point it's hypothetically - possible we might want to revert back to using dictionaries alone, - without breaking backwards compatibility for everyone). - -3. It's easy (and Pythonic) for new users to deal with documents, - since they behave just like dictionaries. If we start changing - their behavior it adds a barrier to entry for new users - another - class to learn. - -What is the correct way to handle time zones with PyMongo? ----------------------------------------------------------- - -See :doc:`examples/datetimes` for examples on how to handle -:class:`~datetime.datetime` objects correctly. - -How can I save a :mod:`datetime.date` instance? ------------------------------------------------ -PyMongo doesn't support saving :mod:`datetime.date` instances, since -there is no BSON type for dates without times. Rather than having the -driver enforce a convention for converting :mod:`datetime.date` -instances to :mod:`datetime.datetime` instances for you, any -conversion should be performed in your client code. - -.. _web-application-querying-by-objectid: - -When I query for a document by ObjectId in my web application I get no result ------------------------------------------------------------------------------ -It's common in web applications to encode documents' ObjectIds in URLs, like:: - - "/posts/50b3bda58a02fb9a84d8991e" - -Your web framework will pass the ObjectId portion of the URL to your request -handler as a string, so it must be converted to :class:`~bson.objectid.ObjectId` -before it is passed to :meth:`~pymongo.collection.Collection.find_one`. It is a -common mistake to forget to do this conversion. Here's how to do it correctly -in Flask_ (other web frameworks are similar):: - - from pymongo import MongoClient - from bson.objectid import ObjectId - - from flask import Flask, render_template - - client = MongoClient() - app = Flask(__name__) - - @app.route("/posts/<_id>") - def show_post(_id): - # NOTE!: converting _id from string to ObjectId before passing to find_one - post = client.db.posts.find_one({'_id': ObjectId(_id)}) - return render_template('post.html', post=post) - - if __name__ == "__main__": - app.run() - -.. _Flask: http://flask.pocoo.org/ - -.. seealso:: :ref:`querying-by-objectid` - -How can I use PyMongo from Django? ----------------------------------- -`Django `_ is a popular Python web -framework. Django includes an ORM, :mod:`django.db`. Currently, -there's no official MongoDB backend for Django. - -`django-mongodb-engine `_ -is an unofficial MongoDB backend that supports Django aggregations, (atomic) -updates, embedded objects, Map/Reduce and GridFS. It allows you to use most -of Django's built-in features, including the ORM, admin, authentication, site -and session frameworks and caching. - -However, it's easy to use MongoDB (and PyMongo) from Django -without using a Django backend. Certain features of Django that require -:mod:`django.db` (admin, authentication and sessions) will not work -using just MongoDB, but most of what Django provides can still be -used. - -One project which should make working with MongoDB and Django easier -is `mango `_. Mango is a set of -MongoDB backends for Django sessions and authentication (bypassing -:mod:`django.db` entirely). - -.. _using-with-mod-wsgi: - -Does PyMongo work with **mod_wsgi**? ------------------------------------- -Yes. See the configuration guide for :ref:`pymongo-and-mod_wsgi`. - -Does PyMongo work with PythonAnywhere? --------------------------------------- -No. PyMongo creates Python threads which -`PythonAnywhere `_ does not support. For more -information see `PYTHON-1495 `_. - -How can I use something like Python's ``json`` module to encode my documents to JSON? -------------------------------------------------------------------------------------- -:mod:`~bson.json_util` is PyMongo's built in, flexible tool for using -Python's :mod:`json` module with BSON documents and `MongoDB Extended JSON -`_. The -:mod:`json` module won't work out of the box with all documents from PyMongo -as PyMongo supports some special types (like :class:`~bson.objectid.ObjectId` -and :class:`~bson.dbref.DBRef`) that are not supported in JSON. - -`python-bsonjs `_ is a fast -BSON to MongoDB Extended JSON converter built on top of -`libbson `_. ``python-bsonjs`` does not -depend on PyMongo and can offer a nice performance improvement over -:mod:`~bson.json_util`. ``python-bsonjs`` works best with PyMongo when using -:class:`~bson.raw_bson.RawBSONDocument`. - -Why do I get OverflowError decoding dates stored by another language's driver? ------------------------------------------------------------------------------- -PyMongo decodes BSON datetime values to instances of Python's -:class:`datetime.datetime`. Instances of :class:`datetime.datetime` are -limited to years between :data:`datetime.MINYEAR` (usually 1) and -:data:`datetime.MAXYEAR` (usually 9999). Some MongoDB drivers (e.g. the PHP -driver) can store BSON datetimes with year values far outside those supported -by :class:`datetime.datetime`. - -There are a few ways to work around this issue. Starting with PyMongo 4.3, -:func:`bson.decode` can decode BSON datetimes in one of four ways, and can -be specified using the ``datetime_conversion`` parameter of -:class:`~bson.codec_options.CodecOptions`. - -The default option is -:attr:`~bson.codec_options.DatetimeConversion.DATETIME`, which will -attempt to decode as a :class:`datetime.datetime`, allowing -:class:`~builtin.OverflowError` to occur upon out-of-range dates. -:attr:`~bson.codec_options.DatetimeConversion.DATETIME_AUTO` alters -this behavior to instead return :class:`~bson.datetime_ms.DatetimeMS` when -representations are out-of-range, while returning :class:`~datetime.datetime` -objects as before: - -.. doctest:: - - >>> from datetime import datetime - >>> from bson.datetime_ms import DatetimeMS - >>> from bson.codec_options import DatetimeConversion - >>> from pymongo import MongoClient - >>> client = MongoClient(datetime_conversion=DatetimeConversion.DATETIME_AUTO) - >>> client.db.collection.insert_one({"x": datetime(1970, 1, 1)}) - InsertOneResult(ObjectId('...'), acknowledged=True) - >>> client.db.collection.insert_one({"x": DatetimeMS(2**62)}) - InsertOneResult(ObjectId('...'), acknowledged=True) - >>> for x in client.db.collection.find(): - ... print(x) - ... - {'_id': ObjectId('...'), 'x': datetime.datetime(1970, 1, 1, 0, 0)} - {'_id': ObjectId('...'), 'x': DatetimeMS(4611686018427387904)} - -For other options, please refer to -:class:`~bson.codec_options.DatetimeConversion`. - -Another option that does not involve setting ``datetime_conversion`` is to to -filter out documents values outside of the range supported by -:class:`~datetime.datetime`: - - >>> from datetime import datetime - >>> coll = client.test.dates - >>> cur = coll.find({'dt': {'$gte': datetime.min, '$lte': datetime.max}}) - -Another option, assuming you don't need the datetime field, is to filter out -just that field:: - - >>> cur = coll.find({}, projection={'dt': False}) - -.. _multiprocessing: - -Using PyMongo with Multiprocessing ----------------------------------- - -On Unix systems the multiprocessing module spawns processes using ``fork()``. -Care must be taken when using instances of -:class:`~pymongo.mongo_client.MongoClient` with ``fork()``. Specifically, -instances of MongoClient must not be copied from a parent process to a child -process. Instead, the parent process and each child process must create their -own instances of MongoClient. For example:: - - # Each process creates its own instance of MongoClient. - def func(): - db = pymongo.MongoClient().mydb - # Do something with db. - - proc = multiprocessing.Process(target=func) - proc.start() - -**Never do this**:: - - client = pymongo.MongoClient() - - # Each child process attempts to copy a global MongoClient - # created in the parent process. Never do this. - def func(): - db = client.mydb - # Do something with db. - - proc = multiprocessing.Process(target=func) - proc.start() - -Instances of MongoClient copied from the parent process have a high probability -of deadlock in the child process due to -:ref:`inherent incompatibilities between fork(), threads, and locks -`. PyMongo will attempt to issue a warning if there -is a chance of this deadlock occurring. - -.. seealso:: :ref:`pymongo-fork-safe` diff --git a/doc/index.rst b/doc/index.rst index c7616ca795..85812d1b14 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -3,70 +3,21 @@ PyMongo |release| Documentation .. note:: The PyMongo documentation has been migrated to the `MongoDB Documentation site `_. - As of PyMongo 4.10, the ReadTheDocs site will contain the detailed changelog and API docs, while the - rest of the documentation will only appear on the MongoDB Documentation site. + This site contains only the detailed changelog and API docs, while the + rest of the documentation appears on the MongoDB Documentation site. Overview -------- **PyMongo** is a Python distribution containing tools for working with `MongoDB `_, and is the recommended way to -work with MongoDB from Python. This documentation attempts to explain -everything you need to know to use **PyMongo**. - -.. todo:: a list of PyMongo's features - -:doc:`installation` - Instructions on how to get the distribution. - -:doc:`tutorial` - Start here for a quick overview. - -:doc:`async-tutorial` - Start here for a quick overview of the asynchronous API. - -:doc:`examples/index` - Examples of how to perform specific tasks. - -:doc:`atlas` - Using PyMongo with MongoDB Atlas. - -:doc:`examples/tls` - Using PyMongo with TLS / SSL. - -:doc:`examples/encryption` - Using PyMongo with In-Use Encryption. - -:doc:`examples/type_hints` - Using PyMongo with type hints. - -:doc:`examples/logging` - Using PyMongo's logging capabilities. - -:doc:`faq` - Some questions that come up often. - -:doc:`migrate-to-pymongo4` - A PyMongo 3.x to 4.x migration guide. - -:doc:`python3` - Frequently asked questions about python 3 support. - -:doc:`compatibility-policy` - Explanation of deprecations, and how to keep pace with changes in PyMongo's - API. +work with MongoDB from Python. :doc:`api/index` The complete API documentation, organized by module. -:doc:`tools` - A listing of Python tools and libraries that have been written for - MongoDB. +:doc:`changelog` + A full list of changes to PyMongo. -:doc:`developer/index` - Developer guide for contributors to PyMongo. - -:doc:`common-issues` - Common issues encountered when using PyMongo. Getting Help ------------ @@ -97,10 +48,6 @@ minor tweaks to this documentation. To contribute, fork the project on `GitHub `_ and send a pull request. -Changes -------- -See the :doc:`changelog` for a full list of changes to PyMongo. - About This Documentation ------------------------ This documentation is generated using the `Sphinx @@ -119,18 +66,6 @@ Indices and tables .. toctree:: :hidden: - atlas - installation - tutorial - async-tutorial - examples/index - faq - compatibility-policy api/index - tools - contributors changelog - python3 - migrate-to-pymongo4 - developer/index - common-issues + contributors diff --git a/doc/installation.rst b/doc/installation.rst deleted file mode 100644 index 837cbf4d97..0000000000 --- a/doc/installation.rst +++ /dev/null @@ -1,197 +0,0 @@ -Installing / Upgrading -====================== -.. highlight:: bash - -**PyMongo** is in the `Python Package Index -`_. - -.. warning:: **Do not install the "bson" package from pypi.** PyMongo comes - with its own bson package; doing "pip install bson" - installs a third-party package that is incompatible with PyMongo. - -Installing with pip -------------------- - -We recommend using `pip `_ -to install pymongo on all platforms:: - - $ python3 -m pip install pymongo - -To get a specific version of pymongo:: - - $ python3 -m pip install pymongo==3.5.1 - -To upgrade using pip:: - - $ python3 -m pip install --upgrade pymongo - -Dependencies ------------- - -PyMongo supports CPython 3.9+ and PyPy3.10+. - -Required dependencies -..................... - -Support for mongodb+srv:// URIs requires `dnspython -`_ - -.. _optional-deps: - -Optional dependencies -..................... - -GSSAPI authentication requires `pykerberos -`_ on Unix or `WinKerberos -`_ on Windows. The correct -dependency can be installed automatically along with PyMongo:: - - $ python3 -m pip install "pymongo[gssapi]" - -:ref:`MONGODB-AWS` authentication requires `pymongo-auth-aws -`_:: - - $ python3 -m pip install "pymongo[aws]" - - - -:ref:`OCSP` requires `PyOpenSSL -`_, `requests -`_ and `service_identity -`_:: - - $ python3 -m pip install "pymongo[ocsp]" - -Wire protocol compression with snappy requires `python-snappy -`_:: - - $ python3 -m pip install "pymongo[snappy]" - -Wire protocol compression with zstandard requires `zstandard -`_:: - - $ python3 -m pip install "pymongo[zstd]" - -:ref:`Client-Side Field Level Encryption` requires `pymongocrypt -`_ and -`pymongo-auth-aws `_:: - - $ python3 -m pip install "pymongo[encryption]" - -You can install all dependencies automatically with the following -command:: - - $ python3 -m pip install "pymongo[gssapi,aws,ocsp,snappy,zstd,encryption]" - -Installing from source ----------------------- - -If you'd rather install directly from the source (i.e. to stay on the -bleeding edge), install the C extension dependencies then check out the -latest source from GitHub and install the driver from the resulting tree:: - - $ git clone https://github.com/mongodb/mongo-python-driver.git pymongo - $ cd pymongo/ - $ pip install . - -Installing from source on Unix -.............................. - -To build the optional C extensions on Linux or another non-macOS Unix you must -have the GNU C compiler (gcc) installed. Depending on your flavor of Unix -(or Linux distribution) you may also need a python development package that -provides the necessary header files for your version of Python. The package -name may vary from distro to distro. - -Debian and Ubuntu users should issue the following command:: - - $ sudo apt-get install build-essential python-dev - -Users of Red Hat based distributions (RHEL, CentOS, Amazon Linux, Oracle Linux, -Fedora, etc.) should issue the following command:: - - $ sudo yum install gcc python-devel - -Installing from source on macOS / OSX -..................................... - -If you want to install PyMongo with C extensions from source you will need -the command line developer tools. On modern versions of macOS they can be -installed by running the following in Terminal (found in -/Applications/Utilities/):: - - xcode-select --install - -For older versions of OSX you may need Xcode. See the notes below for various -OSX and Xcode versions. - -**Snow Leopard (10.6)** - Xcode 3 with 'UNIX Development Support'. - -**Snow Leopard Xcode 4**: The Python versions shipped with OSX 10.6.x -are universal binaries. They support i386, PPC, and x86_64. Xcode 4 removed -support for PPC, causing the distutils version shipped with Apple's builds of -Python to fail to build the C extensions if you have Xcode 4 installed. There -is a workaround:: - - # For some Python builds from python.org - $ env ARCHFLAGS='-arch i386 -arch x86_64' python -m pip install pymongo - -See `https://bugs.python.org/issue11623 `_ -for a more detailed explanation. - -**Lion (10.7) and newer** - PyMongo's C extensions can be built against -versions of Python 3.9+ downloaded from python.org. In all cases Xcode must be -installed with 'UNIX Development Support'. - -**Xcode 5.1**: Starting with version 5.1 the version of clang that ships with -Xcode throws an error when it encounters compiler flags it doesn't recognize. -This may cause C extension builds to fail with an error similar to:: - - clang: error: unknown argument: '-mno-fused-madd' [-Wunused-command-line-argument-hard-error-in-future] - -There are workarounds:: - - # Apple specified workaround for Xcode 5.1 - $ ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future pip install pymongo - - # Alternative workaround using CFLAGS - $ CFLAGS=-Qunused-arguments pip install pymongo - - -Installing from source on Windows -................................. - -If you want to install PyMongo with C extensions from source the following -requirements apply to both CPython and ActiveState's ActivePython: - -Windows -~~~~~~~ - -Install Visual Studio 2015+. - -.. _install-no-c: - -Installing Without C Extensions -------------------------------- - -By default, the driver attempts to build and install optional C -extensions (used for increasing performance) when it is installed. If -any extension fails to build the driver will be installed anyway but a -warning will be printed. - -If you wish to install PyMongo without the C extensions, even if the -extensions build properly, it can be done using a command line option to -*pip install*:: - - $ NO_EXT=1 python -m pip install . - -Installing a beta or release candidate --------------------------------------- - -MongoDB, Inc. may occasionally tag a beta or release candidate for testing by -the community before final release. These releases will not be uploaded to pypi -but can be found on the -`GitHub tags page `_. -They can be installed by passing the full URL for the tag to pip:: - - $ python3 -m pip install https://github.com/mongodb/mongo-python-driver/archive/4.4.0b0.tar.gz diff --git a/doc/migrate-to-pymongo4.rst b/doc/migrate-to-pymongo4.rst index 68dc1980b9..fda3e2e129 100644 --- a/doc/migrate-to-pymongo4.rst +++ b/doc/migrate-to-pymongo4.rst @@ -1,3 +1,5 @@ +:orphan: + .. _pymongo4-migration-guide: PyMongo 4 Migration Guide @@ -34,7 +36,7 @@ Python 3.6+ PyMongo 4.0 drops support for Python 2.7, 3.4, and 3.5. Users who wish to upgrade to 4.x must first upgrade to Python 3.6.2+. Users upgrading from -Python 2 should consult the :doc:`python3`. +Python 2 should consult `Python 3 `_. Enable Deprecation Warnings --------------------------- @@ -796,8 +798,7 @@ incoming documents after receiving them from PyMongo. Alternatively, if your application uses the ``SONManipulator`` API to convert custom types to BSON, the :class:`~bson.codec_options.TypeCodec` and :class:`~bson.codec_options.TypeRegistry` APIs may be a suitable alternative. -For more information, see the -:doc:`custom type example `. +For more information, see `Custom Types `_. ``SON().items()`` now returns ``dict_items`` object. ---------------------------------------------------- @@ -982,7 +983,7 @@ you will receive an error like this when attempting to encode a :class:`uuid.UUI ValueError: cannot encode native uuid.UUID with UuidRepresentation.UNSPECIFIED. UUIDs can be manually converted... -See :ref:`handling-uuid-data-example` for details. +See `Handling UUIDs `_ for details. Additional BSON classes implement ``__slots__`` ............................................... diff --git a/doc/python3.rst b/doc/python3.rst deleted file mode 100644 index 0a63f968a5..0000000000 --- a/doc/python3.rst +++ /dev/null @@ -1,114 +0,0 @@ -Python 3 FAQ -============ - -What Python 3 versions are supported? -------------------------------------- - -PyMongo supports CPython 3.9+ and PyPy3.10+. - -Are there any PyMongo behavior changes with Python 3? ------------------------------------------------------ - -Only one intentional change. Instances of :class:`bytes` -are encoded as BSON type 5 (Binary data) with subtype 0. -In Python 3 they are decoded back to :class:`bytes`. In -Python 2 they are decoded to :class:`~bson.binary.Binary` -with subtype 0. - -For example, let's insert a :class:`bytes` instance using Python 3 then -read it back. Notice the byte string is decoded back to :class:`bytes`:: - - Python 3.7.9 (v3.7.9:13c94747c7, Aug 15 2020, 01:31:08) - [Clang 6.0 (clang-600.0.57)] on darwin - Type "help", "copyright", "credits" or "license" for more information. - >>> import pymongo - >>> c = pymongo.MongoClient() - >>> c.test.bintest.insert_one({'binary': b'this is a byte string'}).inserted_id - ObjectId('4f9086b1fba5222021000000') - >>> c.test.bintest.find_one() - {'binary': b'this is a byte string', '_id': ObjectId('4f9086b1fba5222021000000')} - -Now retrieve the same document in Python 2. Notice the byte string is decoded -to :class:`~bson.binary.Binary`:: - - Python 2.7.6 (default, Feb 26 2014, 10:36:22) - [GCC 4.7.3] on linux2 - Type "help", "copyright", "credits" or "license" for more information. - >>> import pymongo - >>> c = pymongo.MongoClient() - >>> c.test.bintest.find_one() - {u'binary': Binary('this is a byte string', 0), u'_id': ObjectId('4f9086b1fba5222021000000')} - - -There is a similar change in behavior in parsing JSON binary with subtype 0. -In Python 3 they are decoded into :class:`bytes`. In Python 2 they are -decoded to :class:`~bson.binary.Binary` with subtype 0. - -For example, let's decode a JSON binary subtype 0 using Python 3. Notice the -byte string is decoded to :class:`bytes`:: - - Python 3.7.9 (v3.7.9:13c94747c7, Aug 15 2020, 01:31:08) - [Clang 6.0 (clang-600.0.57)] on darwin - Type "help", "copyright", "credits" or "license" for more information. - >>> from bson.json_util import loads - >>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}') - {'b': b'this is a byte string'} - -Now decode the same JSON in Python 2 . Notice the byte string is decoded -to :class:`~bson.binary.Binary`:: - - Python 2.7.10 (default, Feb 7 2017, 00:08:15) - [GCC 4.2.1 Compatible Apple LLVM 8.0.0 (clang-800.0.34)] on darwin - Type "help", "copyright", "credits" or "license" for more information. - >>> from bson.json_util import loads - >>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}') - {u'b': Binary('this is a byte string', 0)} - -Why can't I share pickled ObjectIds between some versions of Python 2 and 3? ----------------------------------------------------------------------------- - -Instances of :class:`~bson.objectid.ObjectId` pickled using Python 2 -can always be unpickled using Python 3. - -If you pickled an ObjectId using Python 2 and want to unpickle it using -Python 3 you must pass ``encoding='latin-1'`` to pickle.loads:: - - Python 2.7.6 (default, Feb 26 2014, 10:36:22) - [GCC 4.7.3] on linux2 - Type "help", "copyright", "credits" or "license" for more information. - >>> import pickle - >>> from bson.objectid import ObjectId - >>> oid = ObjectId() - >>> oid - ObjectId('4f919ba2fba5225b84000000') - >>> pickle.dumps(oid) - 'ccopy_reg\n_reconstructor\np0\n(cbson.objectid\...' - - Python 3.7.9 (v3.7.9:13c94747c7, Aug 15 2020, 01:31:08) - [Clang 6.0 (clang-600.0.57)] on darwin - Type "help", "copyright", "credits" or "license" for more information. - >>> import pickle - >>> pickle.loads(b'ccopy_reg\n_reconstructor\np0\n(cbson.objectid\...', encoding='latin-1') - ObjectId('4f919ba2fba5225b84000000') - - -If you need to pickle ObjectIds using Python 3 and unpickle them using Python 2 -you must use ``protocol <= 2``:: - - Python 3.7.9 (v3.7.9:13c94747c7, Aug 15 2020, 01:31:08) - [Clang 6.0 (clang-600.0.57)] on darwin - Type "help", "copyright", "credits" or "license" for more information. - >>> import pickle - >>> from bson.objectid import ObjectId - >>> oid = ObjectId() - >>> oid - ObjectId('4f96f20c430ee6bd06000000') - >>> pickle.dumps(oid, protocol=2) - b'\x80\x02cbson.objectid\nObjectId\nq\x00)\x81q\x01c_codecs\nencode\...' - - Python 2.7.15 (default, Jun 21 2018, 15:00:48) - [GCC 7.3.0] on linux2 - Type "help", "copyright", "credits" or "license" for more information. - >>> import pickle - >>> pickle.loads('\x80\x02cbson.objectid\nObjectId\nq\x00)\x81q\x01c_codecs\nencode\...') - ObjectId('4f96f20c430ee6bd06000000') diff --git a/doc/tools.rst b/doc/tools.rst deleted file mode 100644 index 5a9297ad64..0000000000 --- a/doc/tools.rst +++ /dev/null @@ -1,173 +0,0 @@ -Tools -===== -Many tools have been written for working with **PyMongo**. If you know -of or have created a tool for working with MongoDB from Python please -list it here. - -.. note:: We try to keep this list current. As such, projects that - have not been updated recently or appear to be unmaintained will - occasionally be removed from the list or moved to the back (to keep - the list from becoming too intimidating). - - If a project gets removed that is still being developed or is in active use - please let us know or add it back. - -ORM-like Layers ---------------- -Some people have found that they prefer to work with a layer that -has more features than PyMongo provides. Often, things like models and -validation are desired. To that end, several different ORM-like layers -have been written by various authors. - -It is our recommendation that new users begin by working directly with -PyMongo, as described in the rest of this documentation. Many people -have found that the features of PyMongo are enough for their -needs. Even if you eventually come to the decision to use one of these -layers, the time spent working directly with the driver will have -increased your understanding of how MongoDB actually works. - -MongoEngine - `MongoEngine `_ is another ORM-like - layer on top of PyMongo. It allows you to define schemas for - documents and query collections using syntax inspired by the Django - ORM. The code is available on `GitHub - `_; for more information, see - the `tutorial `_. - -MincePy - `MincePy `_ is an - object-document mapper (ODM) designed to make any Python object storable - and queryable in a MongoDB database. It is designed with machine learning - and big-data computational and experimental science applications in mind - but is entirely general and can be useful to anyone looking to organise, - share, or process large amounts data with as little change to their current - workflow as possible. - -Ming - `Ming `_ is a - library that allows you to enforce schemas on a MongoDB database in - your Python application. It was developed by `SourceForge - `_ in the course of their migration to - MongoDB. - -MotorEngine - `MotorEngine `_ is a port of - MongoEngine to Motor, for asynchronous access with Tornado. - It implements the same modeling APIs to be data-portable, meaning that a - model defined in MongoEngine can be read in MotorEngine. The source is - `available on GitHub `_. - -uMongo - `uMongo `_ is a Python MongoDB ODM. - Its inception comes from two needs: the lack of async ODM and the - difficulty to do document (un)serialization with existing ODMs. - Works with multiple drivers: PyMongo, TxMongo, motor_asyncio, and - mongomock. The source `is available on GitHub - `_ - -Django MongoDB Backend - `Django MongoDB Backend `_ is a - database backend library specifically made for Django. The integration takes - advantage of MongoDB's unique document model capabilities, which align - naturally with Django's philosophy of simplified data modeling and - reduced development complexity. The source is available - `on GitHub `_. - -No longer maintained -"""""""""""""""""""" - -PyMODM - `PyMODM `_ is an ORM-like framework on top - of PyMongo. PyMODM is maintained by engineers at MongoDB, Inc. and is quick - to adopt new MongoDB features. PyMODM is a "core" ODM, meaning that it - provides simple, extensible functionality that can be leveraged by other - libraries to target platforms like Django. At the same time, PyMODM is - powerful enough to be used for developing applications on its own. Complete - documentation is available on `readthedocs - `_. - -MongoKit - The `MongoKit `_ framework - is an ORM-like layer on top of PyMongo. There is also a MongoKit - `google group `_. - -Minimongo - `minimongo `_ is a lightweight, - pythonic interface to MongoDB. It retains pymongo's query and update API, - and provides a number of additional features, including a simple - document-oriented interface, connection pooling, index management, and - collection & database naming helpers. The `source is on GitHub - `_. - -Manga - `Manga `_ aims to be a simpler ORM-like - layer on top of PyMongo. The syntax for defining schema is inspired by the - Django ORM, but Pymongo's query language is maintained. The source `is on - GitHub `_. - -Humongolus - `Humongolus `_ is a lightweight ORM - framework for Python and MongoDB. The name comes from the combination of - MongoDB and `Homunculus `_ (the - concept of a miniature though fully formed human body). Humongolus allows - you to create models/schemas with robust validation. It attempts to be as - pythonic as possible and exposes the pymongo cursor objects whenever - possible. The code is available for download - `at GitHub `_. Tutorials and usage - examples are also available at GitHub. - -Framework Tools ---------------- -This section lists tools and adapters that have been designed to work with -various Python frameworks and libraries. - -* `Djongo `_ is a connector for using - Django with MongoDB as the database backend. Use the Django Admin GUI to add and - modify documents in MongoDB. - The `Djongo Source Code `_ is hosted on GitHub - and the `Djongo package `_ is on pypi. -* `Django MongoDB Engine - `_ is a MongoDB - database backend for Django that completely integrates with its ORM. - For more information `see the tutorial - `_. -* `mango `_ provides MongoDB backends for - Django sessions and authentication (bypassing :mod:`django.db` entirely). -* `Django MongoEngine - `_ is a MongoDB backend for - Django, an `example: - `_. - For more information see ``_ -* `mongodb_beaker `_ is a - project to enable using MongoDB as a backend for `beakers `_ caching / session system. - `The source is on GitHub `_. -* `Log4Mongo `_ is a flexible - Python logging handler that can store logs in MongoDB using normal and capped - collections. -* `MongoLog `_ is a Python logging - handler that stores logs in MongoDB using a capped collection. -* `rod.recipe.mongodb `_ is a - ZC Buildout recipe for downloading and installing MongoDB. -* `mongobox `_ is a tool to run a sandboxed - MongoDB instance from within a python app. -* `Flask-MongoAlchemy `_ Add - Flask support for MongoDB using MongoAlchemy. -* `Flask-MongoKit `_ Flask extension - to better integrate MongoKit into Flask. -* `Flask-PyMongo `_ Flask-PyMongo - bridges Flask and PyMongo. - -Alternative Drivers -------------------- -These are alternatives to PyMongo. - -* `Motor `_ is a full-featured, non-blocking - MongoDB driver for Python Tornado applications. - As of PyMongo v4.13, Motor's features have been merged into PyMongo via the new AsyncMongoClient API. - As a result of this merger, Motor will be officially deprecated on May 14th, 2026. - For more information, see `the official PyMongo docs `_. -* `TxMongo `_ is an asynchronous Twisted - Python driver for MongoDB. -* `MongoMock `_ is a small - library to help testing Python code that interacts with MongoDB via - Pymongo. diff --git a/doc/tutorial.rst b/doc/tutorial.rst deleted file mode 100644 index 46bde3035d..0000000000 --- a/doc/tutorial.rst +++ /dev/null @@ -1,413 +0,0 @@ -Tutorial -======== - -.. testsetup:: - - from pymongo import MongoClient - - client = MongoClient() - client.drop_database("test-database") - -This tutorial is intended as an introduction to working with -**MongoDB** and **PyMongo**. - -Prerequisites -------------- -Before we start, make sure that you have the **PyMongo** distribution -:doc:`installed `. In the Python shell, the following -should run without raising an exception: - -.. doctest:: - - >>> import pymongo - -This tutorial also assumes that a MongoDB instance is running on the -default host and port. Assuming you have `downloaded and installed -`_ MongoDB, you -can start it like so: - -.. code-block:: bash - - $ mongod - -Making a Connection with MongoClient ------------------------------------- -The first step when working with **PyMongo** is to create a -:class:`~pymongo.mongo_client.MongoClient` to the running **mongod** -instance. Doing so is easy: - -.. doctest:: - - >>> from pymongo import MongoClient - >>> client = MongoClient() - -The above code will connect on the default host and port. We can also -specify the host and port explicitly, as follows: - -.. doctest:: - - >>> client = MongoClient("localhost", 27017) - -Or use the MongoDB URI format: - -.. doctest:: - - >>> client = MongoClient("mongodb://localhost:27017/") - -Getting a Database ------------------- -A single instance of MongoDB can support multiple independent -`databases `_. When -working with PyMongo you access databases using attribute style access -on :class:`~pymongo.mongo_client.MongoClient` instances: - -.. doctest:: - - >>> db = client.test_database - -If your database name is such that using attribute style access won't -work (like ``test-database``), you can use dictionary style access -instead: - -.. doctest:: - - >>> db = client["test-database"] - -Getting a Collection --------------------- -A `collection `_ is a -group of documents stored in MongoDB, and can be thought of as roughly -the equivalent of a table in a relational database. Getting a -collection in PyMongo works the same as getting a database: - -.. doctest:: - - >>> collection = db.test_collection - -or (using dictionary style access): - -.. doctest:: - - >>> collection = db["test-collection"] - -An important note about collections (and databases) in MongoDB is that -they are created lazily - none of the above commands have actually -performed any operations on the MongoDB server. Collections and -databases are created when the first document is inserted into them. - -Documents ---------- -Data in MongoDB is represented (and stored) using JSON-style -documents. In PyMongo we use dictionaries to represent documents. As -an example, the following dictionary might be used to represent a blog -post: - -.. doctest:: - - >>> import datetime - >>> post = { - ... "author": "Mike", - ... "text": "My first blog post!", - ... "tags": ["mongodb", "python", "pymongo"], - ... "date": datetime.datetime.now(tz=datetime.timezone.utc), - ... } - -Note that documents can contain native Python types (like -:class:`datetime.datetime` instances) which will be automatically -converted to and from the appropriate `BSON -`_ types. - -.. todo:: link to table of Python <-> BSON types - -Inserting a Document --------------------- -To insert a document into a collection we can use the -:meth:`~pymongo.collection.Collection.insert_one` method: - -.. doctest:: - - >>> posts = db.posts - >>> post_id = posts.insert_one(post).inserted_id - >>> post_id - ObjectId('...') - -When a document is inserted a special key, ``"_id"``, is automatically -added if the document doesn't already contain an ``"_id"`` key. The value -of ``"_id"`` must be unique across the -collection. :meth:`~pymongo.collection.Collection.insert_one` returns an -instance of :class:`~pymongo.results.InsertOneResult`. For more information -on ``"_id"``, see the `documentation on _id -`_. - -After inserting the first document, the *posts* collection has -actually been created on the server. We can verify this by listing all -of the collections in our database: - -.. doctest:: - - >>> db.list_collection_names() - ['posts'] - -Getting a Single Document With :meth:`~pymongo.collection.Collection.find_one` ------------------------------------------------------------------------------- -The most basic type of query that can be performed in MongoDB is -:meth:`~pymongo.collection.Collection.find_one`. This method returns a -single document matching a query (or ``None`` if there are no -matches). It is useful when you know there is only one matching -document, or are only interested in the first match. Here we use -:meth:`~pymongo.collection.Collection.find_one` to get the first -document from the posts collection: - -.. doctest:: - - >>> import pprint - >>> pprint.pprint(posts.find_one()) - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - -The result is a dictionary matching the one that we inserted previously. - -.. note:: The returned document contains an ``"_id"``, which was - automatically added on insert. - -:meth:`~pymongo.collection.Collection.find_one` also supports querying -on specific elements that the resulting document must match. To limit -our results to a document with author "Mike" we do: - -.. doctest:: - - >>> pprint.pprint(posts.find_one({"author": "Mike"})) - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - -If we try with a different author, like "Eliot", we'll get no result: - -.. doctest:: - - >>> posts.find_one({"author": "Eliot"}) - >>> - -.. _querying-by-objectid: - -Querying By ObjectId --------------------- -We can also find a post by its ``_id``, which in our example is an ObjectId: - -.. doctest:: - - >>> post_id - ObjectId(...) - >>> pprint.pprint(posts.find_one({"_id": post_id})) - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - -Note that an ObjectId is not the same as its string representation: - -.. doctest:: - - >>> post_id_as_str = str(post_id) - >>> posts.find_one({"_id": post_id_as_str}) # No result - >>> - -A common task in web applications is to get an ObjectId from the -request URL and find the matching document. It's necessary in this -case to **convert the ObjectId from a string** before passing it to -``find_one``:: - - from bson.objectid import ObjectId - - # The web framework gets post_id from the URL and passes it as a string - def get(post_id): - # Convert from string to ObjectId: - document = client.db.collection.find_one({'_id': ObjectId(post_id)}) - -.. seealso:: :ref:`web-application-querying-by-objectid` - -Bulk Inserts ------------- -In order to make querying a little more interesting, let's insert a -few more documents. In addition to inserting a single document, we can -also perform *bulk insert* operations, by passing a list as the -first argument to :meth:`~pymongo.collection.Collection.insert_many`. -This will insert each document in the list, sending only a single -command to the server: - -.. doctest:: - - >>> new_posts = [ - ... { - ... "author": "Mike", - ... "text": "Another post!", - ... "tags": ["bulk", "insert"], - ... "date": datetime.datetime(2009, 11, 12, 11, 14), - ... }, - ... { - ... "author": "Eliot", - ... "title": "MongoDB is fun", - ... "text": "and pretty easy too!", - ... "date": datetime.datetime(2009, 11, 10, 10, 45), - ... }, - ... ] - >>> result = posts.insert_many(new_posts) - >>> result.inserted_ids - [ObjectId('...'), ObjectId('...')] - -There are a couple of interesting things to note about this example: - - - The result from :meth:`~pymongo.collection.Collection.insert_many` now - returns two :class:`~bson.objectid.ObjectId` instances, one for - each inserted document. - - ``new_posts[1]`` has a different "shape" than the other posts - - there is no ``"tags"`` field and we've added a new field, - ``"title"``. This is what we mean when we say that MongoDB is - *schema-free*. - -Querying for More Than One Document ------------------------------------ -To get more than a single document as the result of a query we use the -:meth:`~pymongo.collection.Collection.find` -method. :meth:`~pymongo.collection.Collection.find` returns a -:class:`~pymongo.cursor.Cursor` instance, which allows us to iterate -over all matching documents. For example, we can iterate over every -document in the ``posts`` collection: - -.. doctest:: - - >>> for post in posts.find(): - ... pprint.pprint(post) - ... - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['bulk', 'insert'], - 'text': 'Another post!'} - {'_id': ObjectId('...'), - 'author': 'Eliot', - 'date': datetime.datetime(...), - 'text': 'and pretty easy too!', - 'title': 'MongoDB is fun'} - -Just like we did with :meth:`~pymongo.collection.Collection.find_one`, -we can pass a document to :meth:`~pymongo.collection.Collection.find` -to limit the returned results. Here, we get only those documents whose -author is "Mike": - -.. doctest:: - - >>> for post in posts.find({"author": "Mike"}): - ... pprint.pprint(post) - ... - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['mongodb', 'python', 'pymongo'], - 'text': 'My first blog post!'} - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['bulk', 'insert'], - 'text': 'Another post!'} - -Counting --------- -If we just want to know how many documents match a query we can -perform a :meth:`~pymongo.collection.Collection.count_documents` operation -instead of a full query. We can get a count of all of the documents -in a collection: - -.. doctest:: - - >>> posts.count_documents({}) - 3 - -or just of those documents that match a specific query: - -.. doctest:: - - >>> posts.count_documents({"author": "Mike"}) - 2 - -Range Queries -------------- -MongoDB supports many different types of `advanced queries -`_. As an -example, lets perform a query where we limit results to posts older -than a certain date, but also sort the results by author: - -.. doctest:: - - >>> d = datetime.datetime(2009, 11, 12, 12) - >>> for post in posts.find({"date": {"$lt": d}}).sort("author"): - ... pprint.pprint(post) - ... - {'_id': ObjectId('...'), - 'author': 'Eliot', - 'date': datetime.datetime(...), - 'text': 'and pretty easy too!', - 'title': 'MongoDB is fun'} - {'_id': ObjectId('...'), - 'author': 'Mike', - 'date': datetime.datetime(...), - 'tags': ['bulk', 'insert'], - 'text': 'Another post!'} - -Here we use the special ``"$lt"`` operator to do a range query, and -also call :meth:`~pymongo.cursor.Cursor.sort` to sort the results -by author. - -Indexing --------- - -Adding indexes can help accelerate certain queries and can also add additional -functionality to querying and storing documents. In this example, we'll -demonstrate how to create a `unique index -`_ on a key that rejects -documents whose value for that key already exists in the index. - -First, we'll need to create the index: - -.. doctest:: - - >>> result = db.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True) - >>> sorted(list(db.profiles.index_information())) - ['_id_', 'user_id_1'] - -Notice that we have two indexes now: one is the index on ``_id`` that MongoDB -creates automatically, and the other is the index on ``user_id`` we just -created. - -Now let's set up some user profiles: - -.. doctest:: - - >>> user_profiles = [{"user_id": 211, "name": "Luke"}, {"user_id": 212, "name": "Ziltoid"}] - >>> result = db.profiles.insert_many(user_profiles) - -The index prevents us from inserting a document whose ``user_id`` is already in -the collection: - -.. doctest:: - :options: +IGNORE_EXCEPTION_DETAIL - - >>> new_profile = {"user_id": 213, "name": "Drew"} - >>> duplicate_profile = {"user_id": 212, "name": "Tommy"} - >>> result = db.profiles.insert_one(new_profile) # This is fine. - >>> result = db.profiles.insert_one(duplicate_profile) - Traceback (most recent call last): - DuplicateKeyError: E11000 duplicate key error index: test_database.profiles.$user_id_1 dup key: { : 212 } - -.. seealso:: The MongoDB documentation on `indexes `_ diff --git a/pymongo/__init__.py b/pymongo/__init__.py index 95eabef242..ac540d94db 100644 --- a/pymongo/__init__.py +++ b/pymongo/__init__.py @@ -165,7 +165,7 @@ def timeout(seconds: Optional[float]) -> ContextManager[None]: :raises: :py:class:`ValueError`: When `seconds` is negative. - See :ref:`timeout-example` for more examples. + See `Limit Server Execution Time `_ for more examples. .. versionadded:: 4.2 """ diff --git a/pymongo/asynchronous/collection.py b/pymongo/asynchronous/collection.py index 7fb20b7ab3..f46dd58568 100644 --- a/pymongo/asynchronous/collection.py +++ b/pymongo/asynchronous/collection.py @@ -762,7 +762,7 @@ async def bulk_write( :return: An instance of :class:`~pymongo.results.BulkWriteResult`. - .. seealso:: :ref:`writes-and-ids` + .. seealso:: `Writes and ids `_ .. note:: `bypass_document_validation` requires server version **>= 3.2** @@ -867,7 +867,7 @@ async def insert_one( :return: - An instance of :class:`~pymongo.results.InsertOneResult`. - .. seealso:: :ref:`writes-and-ids` + .. seealso:: `Writes and ids `_ .. note:: `bypass_document_validation` requires server version **>= 3.2** @@ -936,7 +936,7 @@ async def insert_many( :return: An instance of :class:`~pymongo.results.InsertManyResult`. - .. seealso:: :ref:`writes-and-ids` + .. seealso:: `Writes and ids `_ .. note:: `bypass_document_validation` requires server version **>= 3.2** @@ -2041,7 +2041,7 @@ async def estimated_document_count(self, comment: Optional[Any] = None, **kwargs .. versionchanged:: 4.2 This method now always uses the `count`_ command. Due to an oversight in versions 5.0.0-5.0.8 of MongoDB, the count command was not included in V1 of the - :ref:`versioned-api-ref`. Users of the Stable API with estimated_document_count are + `versioned API `_. Users of the Stable API with estimated_document_count are recommended to upgrade their server version to 5.0.9+ or set :attr:`pymongo.server_api.ServerApi.strict` to ``False`` to avoid encountering errors. @@ -2916,7 +2916,7 @@ async def aggregate( .. note:: This method does not support the 'explain' option. Please use `PyMongoExplain `_ - instead. An example is included in the :ref:`aggregate-examples` + instead. An example is included in the `aggregation example `_ documentation. .. note:: The :attr:`~pymongo.asynchronous.collection.AsyncCollection.write_concern` of @@ -2977,7 +2977,7 @@ async def aggregate( The :meth:`aggregate` method always returns an AsyncCommandCursor. The pipeline argument must be a list. - .. seealso:: :doc:`/examples/aggregation` + .. seealso:: `Aggregation `_ .. _aggregate command: https://mongodb.com/docs/manual/reference/command/aggregate diff --git a/pymongo/asynchronous/database.py b/pymongo/asynchronous/database.py index d0089eb4ee..84448ef4b0 100644 --- a/pymongo/asynchronous/database.py +++ b/pymongo/asynchronous/database.py @@ -893,7 +893,7 @@ async def command( when decoding the command response. .. note:: If this client has been configured to use MongoDB Stable - API (see :ref:`versioned-api-ref`), then :meth:`command` will + API (see `versioned API `_), then :meth:`command` will automatically add API versioning options to the given command. Explicitly adding API versioning options in the command and declaring an API version on the client is not supported. @@ -994,7 +994,7 @@ async def cursor_command( when decoding the command response. .. note:: If this client has been configured to use MongoDB Stable - API (see :ref:`versioned-api-ref`), then :meth:`command` will + API (see `versioned API `_), then :meth:`command` will automatically add API versioning options to the given command. Explicitly adding API versioning options in the command and declaring an API version on the client is not supported. diff --git a/pymongo/asynchronous/encryption.py b/pymongo/asynchronous/encryption.py index 9b0757b1a5..149cb3ac85 100644 --- a/pymongo/asynchronous/encryption.py +++ b/pymongo/asynchronous/encryption.py @@ -579,7 +579,7 @@ def __init__( creating data keys. It does not provide an API to query keys from the key vault collection, as this can be done directly on the AsyncMongoClient. - See :ref:`explicit-client-side-encryption` for an example. + See `explicit client-side encryption `_ for an example. :param kms_providers: Map of KMS provider options. The `kms_providers` map values differ by provider: @@ -608,7 +608,7 @@ def __init__( KMS providers may be specified with an optional name suffix separated by a colon, for example "kmip:name" or "aws:name". - Named KMS providers do not support :ref:`CSFLE on-demand credentials`. + Named KMS providers do not support `CSFLE on-demand credentials `_. :param key_vault_namespace: The namespace for the key vault collection. The key vault collection contains all data keys used for encryption and decryption. Data keys are stored as documents in this MongoDB diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 3488030166..629fd466d6 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -14,7 +14,7 @@ """Tools for connecting to MongoDB. -.. seealso:: :doc:`/examples/high_availability` for examples of connecting +.. seealso:: `Read and Write Settings `_ for examples of connecting to replica sets or sets of mongos servers. To get a :class:`~pymongo.asynchronous.database.AsyncDatabase` instance from a @@ -263,7 +263,7 @@ def __init__( print("Server not available") .. warning:: When using PyMongo in a multiprocessing context, please - read :ref:`multiprocessing` first. + read `PyMongo multiprocessing `_ first. .. note:: Many of the following options can be passed using a MongoDB URI or keyword parameters. If the same option is passed in a URI and @@ -296,7 +296,7 @@ def __init__( return DatetimeMS objects when the underlying datetime is out-of-range and 'datetime_clamp' to clamp to the minimum and maximum possible datetimes. Defaults to 'datetime'. See - :ref:`handling-out-of-range-datetimes` for details. + `handling out of range datetimes `_ for details. - `directConnection` (optional): if ``True``, forces this client to connect directly to the specified MongoDB host as a standalone. If ``false``, the client connects to the entire replica set of @@ -421,7 +421,7 @@ def __init__( package. By default no compression is used. Compression support must also be enabled on the server. MongoDB 3.6+ supports snappy and zlib compression. MongoDB 4.2+ adds support for zstd. - See :ref:`network-compression-example` for details. + See `compress network traffic `_ for details. - `zlibCompressionLevel`: (int) The zlib compression level to use when zlib is used as the wire protocol compressor. Supported values are -1 through 9. -1 tells the zlib library to use its default @@ -432,7 +432,7 @@ def __init__( values are the strings: "standard", "pythonLegacy", "javaLegacy", "csharpLegacy", and "unspecified" (the default). New applications should consider setting this to "standard" for cross language - compatibility. See :ref:`handling-uuid-data-example` for details. + compatibility. See `handling UUID data `_ for details. - `unicode_decode_error_handler`: The error handler to apply when a Unicode-related error occurs during BSON decoding that would otherwise raise :exc:`UnicodeDecodeError`. Valid options include @@ -496,7 +496,7 @@ def __init__( is set, it must be a positive integer greater than or equal to 90 seconds. - .. seealso:: :doc:`/examples/server_selection` + .. seealso:: `Customize Server Selection `_ | **Authentication:** @@ -522,7 +522,7 @@ def __init__( To specify the session token for MONGODB-AWS authentication pass ``authMechanismProperties='AWS_SESSION_TOKEN:'``. - .. seealso:: :doc:`/examples/authentication` + .. seealso:: `Authentication `_ | **TLS/SSL configuration:** @@ -585,7 +585,7 @@ def __init__( :class:`~pymongo.encryption_options.AutoEncryptionOpts` which configures this client to automatically encrypt collection commands and automatically decrypt results. See - :ref:`automatic-client-side-encryption` for an example. + `client-side field level encryption `_ for an example. If a :class:`AsyncMongoClient` is configured with ``auto_encryption_opts`` and a non-None ``maxPoolSize``, a separate internal ``AsyncMongoClient`` is created if any of the @@ -601,7 +601,7 @@ def __init__( - `server_api`: A :class:`~pymongo.server_api.ServerApi` which configures this - client to use Stable API. See :ref:`versioned-api-ref` for + client to use Stable API. See `versioned API `_ for details. .. seealso:: The MongoDB documentation on `connections `_. @@ -712,15 +712,15 @@ def __init__( reconnect to one of them. In PyMongo 3, the client monitors its network latency to all the mongoses continuously, and distributes operations evenly among those with the lowest latency. See - :ref:`mongos-load-balancing` for more information. + `load balancing `_ for more information. The ``connect`` option is added. The ``start_request``, ``in_request``, and ``end_request`` methods are removed, as well as the ``auto_start_request`` option. - The ``copy_database`` method is removed, see the - :doc:`copy_database examples ` for alternatives. + The ``copy_database`` method is removed, see + `Copy and Clone Databases `_ for alternatives. The :meth:`AsyncMongoClient.disconnect` method is removed; it was a synonym for :meth:`~pymongo.asynchronous.AsyncMongoClient.close`. @@ -2519,9 +2519,9 @@ async def bulk_write( :return: An instance of :class:`~pymongo.results.ClientBulkWriteResult`. - .. seealso:: For more info, see :doc:`/examples/client_bulk`. + .. seealso:: For more info, see `Client Bulk Write `_. - .. seealso:: :ref:`writes-and-ids` + .. seealso:: `Writes and ids `_ .. note:: requires MongoDB server version 8.0+. diff --git a/pymongo/daemon.py b/pymongo/daemon.py index be976decd9..c0a01db16d 100644 --- a/pymongo/daemon.py +++ b/pymongo/daemon.py @@ -16,7 +16,7 @@ PyMongo only attempts to spawn the mongocryptd daemon process when automatic client-side field level encryption is enabled. See -:ref:`automatic-client-side-encryption` for more info. +`Client-side Field Level Encryption `_ for more info. """ from __future__ import annotations diff --git a/pymongo/encryption_options.py b/pymongo/encryption_options.py index e9ad1c1e01..0317393c94 100644 --- a/pymongo/encryption_options.py +++ b/pymongo/encryption_options.py @@ -75,7 +75,7 @@ def __init__( encryption and explicit decryption is also supported for all users with the :class:`~pymongo.asynchronous.encryption.AsyncClientEncryption` and :class:`~pymongo.encryption.ClientEncryption` classes. - See :ref:`automatic-client-side-encryption` for an example. + See `client-side field level encryption `_ for an example. :param kms_providers: Map of KMS provider options. The `kms_providers` map values differ by provider: @@ -104,7 +104,7 @@ def __init__( KMS providers may be specified with an optional name suffix separated by a colon, for example "kmip:name" or "aws:name". - Named KMS providers do not support :ref:`CSFLE on-demand credentials`. + Named KMS providers do not support `CSFLE on-demand credentials `_. Named KMS providers enables more than one of each KMS provider type to be configured. For example, to configure multiple local KMS providers:: diff --git a/pymongo/read_preferences.py b/pymongo/read_preferences.py index dae414c37c..35b92c4d01 100644 --- a/pymongo/read_preferences.py +++ b/pymongo/read_preferences.py @@ -551,7 +551,7 @@ class ReadPreference: Nearest(tag_sets=[{"node":"analytics"}]) - See :doc:`/examples/high_availability` for code examples. + See `Read and Write Settings `_ for code examples. A read preference is used in three cases: diff --git a/pymongo/synchronous/collection.py b/pymongo/synchronous/collection.py index 8a71768318..f6c1ffce41 100644 --- a/pymongo/synchronous/collection.py +++ b/pymongo/synchronous/collection.py @@ -761,7 +761,7 @@ def bulk_write( :return: An instance of :class:`~pymongo.results.BulkWriteResult`. - .. seealso:: :ref:`writes-and-ids` + .. seealso:: `Writes and ids `_ .. note:: `bypass_document_validation` requires server version **>= 3.2** @@ -866,7 +866,7 @@ def insert_one( :return: - An instance of :class:`~pymongo.results.InsertOneResult`. - .. seealso:: :ref:`writes-and-ids` + .. seealso:: `Writes and ids `_ .. note:: `bypass_document_validation` requires server version **>= 3.2** @@ -935,7 +935,7 @@ def insert_many( :return: An instance of :class:`~pymongo.results.InsertManyResult`. - .. seealso:: :ref:`writes-and-ids` + .. seealso:: `Writes and ids `_ .. note:: `bypass_document_validation` requires server version **>= 3.2** @@ -2040,7 +2040,7 @@ def estimated_document_count(self, comment: Optional[Any] = None, **kwargs: Any) .. versionchanged:: 4.2 This method now always uses the `count`_ command. Due to an oversight in versions 5.0.0-5.0.8 of MongoDB, the count command was not included in V1 of the - :ref:`versioned-api-ref`. Users of the Stable API with estimated_document_count are + `versioned API `_. Users of the Stable API with estimated_document_count are recommended to upgrade their server version to 5.0.9+ or set :attr:`pymongo.server_api.ServerApi.strict` to ``False`` to avoid encountering errors. @@ -2909,7 +2909,7 @@ def aggregate( .. note:: This method does not support the 'explain' option. Please use `PyMongoExplain `_ - instead. An example is included in the :ref:`aggregate-examples` + instead. An example is included in the `aggregation example `_ documentation. .. note:: The :attr:`~pymongo.collection.Collection.write_concern` of @@ -2970,7 +2970,7 @@ def aggregate( The :meth:`aggregate` method always returns a CommandCursor. The pipeline argument must be a list. - .. seealso:: :doc:`/examples/aggregation` + .. seealso:: `Aggregation `_ .. _aggregate command: https://mongodb.com/docs/manual/reference/command/aggregate diff --git a/pymongo/synchronous/database.py b/pymongo/synchronous/database.py index a11674b9aa..42ee5c5083 100644 --- a/pymongo/synchronous/database.py +++ b/pymongo/synchronous/database.py @@ -893,7 +893,7 @@ def command( when decoding the command response. .. note:: If this client has been configured to use MongoDB Stable - API (see :ref:`versioned-api-ref`), then :meth:`command` will + API (see `versioned API `_), then :meth:`command` will automatically add API versioning options to the given command. Explicitly adding API versioning options in the command and declaring an API version on the client is not supported. @@ -992,7 +992,7 @@ def cursor_command( when decoding the command response. .. note:: If this client has been configured to use MongoDB Stable - API (see :ref:`versioned-api-ref`), then :meth:`command` will + API (see `versioned API `_), then :meth:`command` will automatically add API versioning options to the given command. Explicitly adding API versioning options in the command and declaring an API version on the client is not supported. diff --git a/pymongo/synchronous/encryption.py b/pymongo/synchronous/encryption.py index 5f9bdac4b7..ba304e7bd3 100644 --- a/pymongo/synchronous/encryption.py +++ b/pymongo/synchronous/encryption.py @@ -576,7 +576,7 @@ def __init__( creating data keys. It does not provide an API to query keys from the key vault collection, as this can be done directly on the MongoClient. - See :ref:`explicit-client-side-encryption` for an example. + See `explicit client-side encryption `_ for an example. :param kms_providers: Map of KMS provider options. The `kms_providers` map values differ by provider: @@ -605,7 +605,7 @@ def __init__( KMS providers may be specified with an optional name suffix separated by a colon, for example "kmip:name" or "aws:name". - Named KMS providers do not support :ref:`CSFLE on-demand credentials`. + Named KMS providers do not support `CSFLE on-demand credentials `_. :param key_vault_namespace: The namespace for the key vault collection. The key vault collection contains all data keys used for encryption and decryption. Data keys are stored as documents in this MongoDB diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 1fd506e052..35d51599c6 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -14,7 +14,7 @@ """Tools for connecting to MongoDB. -.. seealso:: :doc:`/examples/high_availability` for examples of connecting +.. seealso:: `Read and Write Settings `_ for examples of connecting to replica sets or sets of mongos servers. To get a :class:`~pymongo.database.Database` instance from a @@ -260,7 +260,7 @@ def __init__( print("Server not available") .. warning:: When using PyMongo in a multiprocessing context, please - read :ref:`multiprocessing` first. + read `PyMongo multiprocessing `_ first. .. note:: Many of the following options can be passed using a MongoDB URI or keyword parameters. If the same option is passed in a URI and @@ -296,7 +296,7 @@ def __init__( return DatetimeMS objects when the underlying datetime is out-of-range and 'datetime_clamp' to clamp to the minimum and maximum possible datetimes. Defaults to 'datetime'. See - :ref:`handling-out-of-range-datetimes` for details. + `handling out of range datetimes `_ for details. - `directConnection` (optional): if ``True``, forces this client to connect directly to the specified MongoDB host as a standalone. If ``false``, the client connects to the entire replica set of @@ -421,7 +421,7 @@ def __init__( package. By default no compression is used. Compression support must also be enabled on the server. MongoDB 3.6+ supports snappy and zlib compression. MongoDB 4.2+ adds support for zstd. - See :ref:`network-compression-example` for details. + See `compress network traffic `_ for details. - `zlibCompressionLevel`: (int) The zlib compression level to use when zlib is used as the wire protocol compressor. Supported values are -1 through 9. -1 tells the zlib library to use its default @@ -432,7 +432,7 @@ def __init__( values are the strings: "standard", "pythonLegacy", "javaLegacy", "csharpLegacy", and "unspecified" (the default). New applications should consider setting this to "standard" for cross language - compatibility. See :ref:`handling-uuid-data-example` for details. + compatibility. See `handling UUID data `_ for details. - `unicode_decode_error_handler`: The error handler to apply when a Unicode-related error occurs during BSON decoding that would otherwise raise :exc:`UnicodeDecodeError`. Valid options include @@ -496,7 +496,7 @@ def __init__( is set, it must be a positive integer greater than or equal to 90 seconds. - .. seealso:: :doc:`/examples/server_selection` + .. seealso:: `Customize Server Selection `_ | **Authentication:** @@ -522,7 +522,7 @@ def __init__( To specify the session token for MONGODB-AWS authentication pass ``authMechanismProperties='AWS_SESSION_TOKEN:'``. - .. seealso:: :doc:`/examples/authentication` + .. seealso:: `Authentication `_ | **TLS/SSL configuration:** @@ -585,7 +585,7 @@ def __init__( :class:`~pymongo.encryption_options.AutoEncryptionOpts` which configures this client to automatically encrypt collection commands and automatically decrypt results. See - :ref:`automatic-client-side-encryption` for an example. + `client-side field level encryption `_ for an example. If a :class:`MongoClient` is configured with ``auto_encryption_opts`` and a non-None ``maxPoolSize``, a separate internal ``MongoClient`` is created if any of the @@ -601,7 +601,7 @@ def __init__( - `server_api`: A :class:`~pymongo.server_api.ServerApi` which configures this - client to use Stable API. See :ref:`versioned-api-ref` for + client to use Stable API. See `versioned API `_ for details. .. seealso:: The MongoDB documentation on `connections `_. @@ -712,15 +712,15 @@ def __init__( reconnect to one of them. In PyMongo 3, the client monitors its network latency to all the mongoses continuously, and distributes operations evenly among those with the lowest latency. See - :ref:`mongos-load-balancing` for more information. + `load balancing `_ for more information. The ``connect`` option is added. The ``start_request``, ``in_request``, and ``end_request`` methods are removed, as well as the ``auto_start_request`` option. - The ``copy_database`` method is removed, see the - :doc:`copy_database examples ` for alternatives. + The ``copy_database`` method is removed, see + `Copy and Clone Databases `_ for alternatives. The :meth:`MongoClient.disconnect` method is removed; it was a synonym for :meth:`~pymongo.MongoClient.close`. @@ -2509,9 +2509,9 @@ def bulk_write( :return: An instance of :class:`~pymongo.results.ClientBulkWriteResult`. - .. seealso:: For more info, see :doc:`/examples/client_bulk`. + .. seealso:: For more info, see `Client Bulk Write `_. - .. seealso:: :ref:`writes-and-ids` + .. seealso:: `Writes and ids `_ .. note:: requires MongoDB server version 8.0+.