diff --git a/syncstorage/storage/spanner.py b/syncstorage/storage/spanner.py index 00713399..bdb6ba19 100644 --- a/syncstorage/storage/spanner.py +++ b/syncstorage/storage/spanner.py @@ -53,6 +53,9 @@ EPOCH = datetime.datetime.utcfromtimestamp(0) +# a bogus FIND_ITEMS bind param +BOGUS_ID = object() + def dt2ts(dt): """Convert a Python datetime to seconds""" @@ -121,8 +124,18 @@ def __init__(self, sqluri, standard_collections=False, **dbkwds): target_size=int(dbkwds.get("pool_size", 100)) ) self._database = self._instance.database(database_id, pool=pool) + # for debugging: support disabling FORCE_INDEX=BsoLastModified self._force_bsolm_index = dbkwds.get("_force_bsolm_index", True) + # split FIND_ITEMS' query in 2 (first query BsoLastModified + # then query bso with its results) + self._bsolm_index_separate = dbkwds.get( + "_bsolm_index_separate", + False + ) + if self._bsolm_index_separate: + assert self._force_bsolm_index, \ + "_bsolm_index_separate requires _force_bsolm_index" self.standard_collections = standard_collections if self.standard_collections and dbkwds.get("create_tables", False): @@ -358,13 +371,68 @@ def _find_items(self, session, user, collection, **params): bind_types[ts_var] = param_types.TIMESTAMP # Generate the query + fields = params.get("fields") + offset_params = params + + if self._bsolm_index_separate and fields is None: + # Split the get_items query (TODO: get_item_ids could + # reference solely the index) query in 2: + # #1: query BsoLastModified directly for bso ids + # #2: query bso w/ 'id IN UNNEST([])' + + bsolm_params = params.copy() + bsolm_params["fields"] = ["id"] + bsolm_query = FIND_ITEMS(bso, bsolm_params) + bsolm_query = str(bsolm_query.compile()) + bsolm_query = bsolm_query.replace( + "FROM bso", + "FROM bso@{FORCE_INDEX=BsoLastModified}" + ) + bsolm_query = bsolm_query.replace(":", "@") + + result = session.transaction.execute_sql( + bsolm_query, + params=bind, + param_types=bind_types, + ) + ids = [row[0] for row in list(result)] + + # keep the orig. for encode_next_offset + offset_params = params.copy() + + # Setup a 'id IN (:id_1)' bind param for #2 (replacing it + # below) + params["ids"] = [BOGUS_ID] + bind["ids"] = ids + bind_types["ids"] = param_types.Array(param_types.STRING) + + # offset/limit are accomplished in query #1 and don't make + # sense for #2 + if limit is not None: + del params["limit"] + del bind["limit"] + del bind_types["limit"] + if offset is not None: + # restore this later + del params["offset"] + del bind["offset"] + del bind_types["offset"] + # simiarly modified ranges/ttl aren't needed in #2 + for param in ("newer", "newer_eq", "older", "older_eq", "ttl"): + params.pop(param, None) + query = FIND_ITEMS(bso, params) query = str(query.compile()) - if self._force_bsolm_index: + if self._force_bsolm_index and not self._bsolm_index_separate: query = query.replace( "FROM bso", "FROM bso@{FORCE_INDEX=BsoLastModified}" ) + if self._bsolm_index_separate and fields is None: + query = query.replace( + "bso.id IN (:id_1)", + "bso.id IN UNNEST(@ids)" + ) query = query.replace(":", "@") result = session.transaction.execute_sql( @@ -390,7 +458,7 @@ def _find_items(self, session, user, collection, **params): next_offset = None if limit is not None and len(items) > limit: items = items[:-1] - next_offset = self.encode_next_offset(params, items) + next_offset = self.encode_next_offset(offset_params, items) return { "items": items, "next_offset": next_offset, diff --git a/syncstorage/storage/sql/queries_spanner.py b/syncstorage/storage/sql/queries_spanner.py index a2a3deeb..cb2132fa 100644 --- a/syncstorage/storage/sql/queries_spanner.py +++ b/syncstorage/storage/sql/queries_spanner.py @@ -21,7 +21,7 @@ CREATE INDEX BsoTtl ON bso(ttl); -- TODO: modified DESC's ideal for the default sort order but client can -- also specify ASC -CREATE INDEX BsoLastModified ON bso(userid, collection, modified DESC), +CREATE INDEX BsoLastModified ON bso(userid, collection, modified DESC, ttl), INTERLEAVE IN user_collections; CREATE TABLE collections ( @@ -85,7 +85,7 @@ def FIND_ITEMS(bso, params): query = query.where(bso.c.collection == bindparam("collectionid")) # Filter by the various query parameters. if "ids" in params: - # Sadly, we can't use a bindparam in an "IN" expression. + # XXX: could utilize spanner's 'in UNNEST()' query = query.where(bso.c.id.in_(params.get("ids"))) if "newer" in params: query = query.where(bso.c.modified > bindparam("newer"))