Skip to content

Commit 8d5785a

Browse files
feat: support timestamp_precision in table schema (#2333)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕 --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent bfd9d8d commit 8d5785a

File tree

4 files changed

+144
-10
lines changed

4 files changed

+144
-10
lines changed

google/cloud/bigquery/enums.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,3 +480,18 @@ class SourceColumnMatch(str, enum.Enum):
480480
NAME = "NAME"
481481
"""Matches by name. This reads the header row as column names and reorders
482482
columns to match the field names in the schema."""
483+
484+
485+
class TimestampPrecision(enum.Enum):
486+
"""Precision (maximum number of total digits in base 10) for seconds of
487+
TIMESTAMP type."""
488+
489+
MICROSECOND = None
490+
"""
491+
Default, for TIMESTAMP type with microsecond precision.
492+
"""
493+
494+
PICOSECOND = 12
495+
"""
496+
For TIMESTAMP type with picosecond precision.
497+
"""

google/cloud/bigquery/schema.py

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,14 @@ class SchemaField(object):
196196
197197
Only valid for top-level schema fields (not nested fields).
198198
If the type is FOREIGN, this field is required.
199+
200+
timestamp_precision: Optional[enums.TimestampPrecision]
201+
Precision (maximum number of total digits in base 10) for seconds
202+
of TIMESTAMP type.
203+
204+
Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for
205+
microsecond precision. Use `enums.TimestampPrecision.PICOSECOND`
206+
(`12`) for picosecond precision.
199207
"""
200208

201209
def __init__(
@@ -213,6 +221,7 @@ def __init__(
213221
range_element_type: Union[FieldElementType, str, None] = None,
214222
rounding_mode: Union[enums.RoundingMode, str, None] = None,
215223
foreign_type_definition: Optional[str] = None,
224+
timestamp_precision: Optional[enums.TimestampPrecision] = None,
216225
):
217226
self._properties: Dict[str, Any] = {
218227
"name": name,
@@ -237,6 +246,13 @@ def __init__(
237246
if isinstance(policy_tags, PolicyTagList)
238247
else None
239248
)
249+
if isinstance(timestamp_precision, enums.TimestampPrecision):
250+
self._properties["timestampPrecision"] = timestamp_precision.value
251+
elif timestamp_precision is not None:
252+
raise ValueError(
253+
"timestamp_precision must be class enums.TimestampPrecision "
254+
f"or None, got {type(timestamp_precision)} instead."
255+
)
240256
if isinstance(range_element_type, str):
241257
self._properties["rangeElementType"] = {"type": range_element_type}
242258
if isinstance(range_element_type, FieldElementType):
@@ -254,15 +270,22 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
254270
"""Return a ``SchemaField`` object deserialized from a dictionary.
255271
256272
Args:
257-
api_repr (Mapping[str, str]): The serialized representation
258-
of the SchemaField, such as what is output by
259-
:meth:`to_api_repr`.
273+
api_repr (dict): The serialized representation of the SchemaField,
274+
such as what is output by :meth:`to_api_repr`.
260275
261276
Returns:
262277
google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
263278
"""
264279
placeholder = cls("this_will_be_replaced", "PLACEHOLDER")
265280

281+
# The API would return a string despite we send an integer. To ensure
282+
# success of resending received schema, we convert string to integer
283+
# to ensure consistency.
284+
try:
285+
api_repr["timestampPrecision"] = int(api_repr["timestampPrecision"])
286+
except (TypeError, KeyError):
287+
pass
288+
266289
# Note: we don't make a copy of api_repr because this can cause
267290
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
268291
# fields. See https://github.com/googleapis/python-bigquery/issues/6
@@ -374,6 +397,16 @@ def policy_tags(self):
374397
resource = self._properties.get("policyTags")
375398
return PolicyTagList.from_api_repr(resource) if resource is not None else None
376399

400+
@property
401+
def timestamp_precision(self) -> enums.TimestampPrecision:
402+
"""Precision (maximum number of total digits in base 10) for seconds of
403+
TIMESTAMP type.
404+
405+
Returns:
406+
enums.TimestampPrecision: value of TimestampPrecision.
407+
"""
408+
return enums.TimestampPrecision(self._properties.get("timestampPrecision"))
409+
377410
def to_api_repr(self) -> dict:
378411
"""Return a dictionary representing this schema field.
379412
@@ -408,6 +441,8 @@ def _key(self):
408441
None if self.policy_tags is None else tuple(sorted(self.policy_tags.names))
409442
)
410443

444+
timestamp_precision = self._properties.get("timestampPrecision")
445+
411446
return (
412447
self.name,
413448
field_type,
@@ -417,6 +452,7 @@ def _key(self):
417452
self.description,
418453
self.fields,
419454
policy_tags,
455+
timestamp_precision,
420456
)
421457

422458
def to_standard_sql(self) -> standard_sql.StandardSqlField:
@@ -467,10 +503,9 @@ def __hash__(self):
467503
return hash(self._key())
468504

469505
def __repr__(self):
470-
key = self._key()
471-
policy_tags = key[-1]
506+
*initial_tags, policy_tags, timestamp_precision_tag = self._key()
472507
policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags)
473-
adjusted_key = key[:-1] + (policy_tags_inst,)
508+
adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag)
474509
return f"{self.__class__.__name__}{adjusted_key}"
475510

476511

@@ -530,9 +565,11 @@ def _to_schema_fields(schema):
530565
if isinstance(schema, Sequence):
531566
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
532567
return [
533-
field
534-
if isinstance(field, SchemaField)
535-
else SchemaField.from_api_repr(field)
568+
(
569+
field
570+
if isinstance(field, SchemaField)
571+
else SchemaField.from_api_repr(field)
572+
)
536573
for field in schema
537574
]
538575

tests/system/test_client.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@
7474
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
7575
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
7676
]
77+
SCHEMA_PICOSECOND = [
78+
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
79+
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
80+
bigquery.SchemaField(
81+
"time_pico",
82+
"TIMESTAMP",
83+
mode="REQUIRED",
84+
timestamp_precision=enums.TimestampPrecision.PICOSECOND,
85+
),
86+
]
7787
CLUSTERING_SCHEMA = [
7888
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
7989
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
@@ -631,6 +641,19 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self):
631641
self.assertEqual(time_partitioning.field, "transaction_time")
632642
self.assertEqual(table.clustering_fields, ["user_email", "store_code"])
633643

644+
def test_create_table_w_picosecond_timestamp(self):
645+
dataset = self.temp_dataset(_make_dataset_id("create_table"))
646+
table_id = "test_table"
647+
table_arg = Table(dataset.table(table_id), schema=SCHEMA_PICOSECOND)
648+
self.assertFalse(_table_exists(table_arg))
649+
650+
table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
651+
self.to_delete.insert(0, table)
652+
653+
self.assertTrue(_table_exists(table))
654+
self.assertEqual(table.table_id, table_id)
655+
self.assertEqual(table.schema, SCHEMA_PICOSECOND)
656+
634657
def test_delete_dataset_with_string(self):
635658
dataset_id = _make_dataset_id("delete_table_true_with_string")
636659
project = Config.CLIENT.project

tests/unit/test_schema.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ def test_constructor_defaults(self):
5252
self.assertIsNone(field.default_value_expression)
5353
self.assertEqual(field.rounding_mode, None)
5454
self.assertEqual(field.foreign_type_definition, None)
55+
self.assertEqual(
56+
field.timestamp_precision, enums.TimestampPrecision.MICROSECOND
57+
)
5558

5659
def test_constructor_explicit(self):
5760
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
@@ -69,6 +72,7 @@ def test_constructor_explicit(self):
6972
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
7073
rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED,
7174
foreign_type_definition="INTEGER",
75+
timestamp_precision=enums.TimestampPrecision.PICOSECOND,
7276
)
7377
self.assertEqual(field.name, "test")
7478
self.assertEqual(field.field_type, "STRING")
@@ -87,6 +91,10 @@ def test_constructor_explicit(self):
8791
)
8892
self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")
8993
self.assertEqual(field.foreign_type_definition, "INTEGER")
94+
self.assertEqual(
95+
field.timestamp_precision,
96+
enums.TimestampPrecision.PICOSECOND,
97+
)
9098

9199
def test_constructor_explicit_none(self):
92100
field = self._make_one("test", "STRING", description=None, policy_tags=None)
@@ -189,6 +197,23 @@ def test_to_api_repr_with_subfield(self):
189197
},
190198
)
191199

200+
def test_to_api_repr_w_timestamp_precision(self):
201+
field = self._make_one(
202+
"foo",
203+
"TIMESTAMP",
204+
"NULLABLE",
205+
timestamp_precision=enums.TimestampPrecision.PICOSECOND,
206+
)
207+
self.assertEqual(
208+
field.to_api_repr(),
209+
{
210+
"mode": "NULLABLE",
211+
"name": "foo",
212+
"type": "TIMESTAMP",
213+
"timestampPrecision": 12,
214+
},
215+
)
216+
192217
def test_from_api_repr(self):
193218
field = self._get_target_class().from_api_repr(
194219
{
@@ -198,6 +223,7 @@ def test_from_api_repr(self):
198223
"name": "foo",
199224
"type": "record",
200225
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
226+
"timestampPrecision": 12,
201227
}
202228
)
203229
self.assertEqual(field.name, "foo")
@@ -210,6 +236,10 @@ def test_from_api_repr(self):
210236
self.assertEqual(field.fields[0].mode, "NULLABLE")
211237
self.assertEqual(field.range_element_type, None)
212238
self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")
239+
self.assertEqual(
240+
field.timestamp_precision,
241+
enums.TimestampPrecision.PICOSECOND,
242+
)
213243

214244
def test_from_api_repr_policy(self):
215245
field = self._get_target_class().from_api_repr(
@@ -264,6 +294,17 @@ def test_from_api_repr_defaults(self):
264294
self.assertNotIn("policyTags", field._properties)
265295
self.assertNotIn("rangeElementType", field._properties)
266296

297+
def test_from_api_repr_timestamp_precision_str(self):
298+
# The backend would return timestampPrecision field as a string, even
299+
# if we send over an integer. This test verifies we manually converted
300+
# it into integer to ensure resending could succeed.
301+
field = self._get_target_class().from_api_repr(
302+
{
303+
"timestampPrecision": "12",
304+
}
305+
)
306+
self.assertEqual(field._properties["timestampPrecision"], 12)
307+
267308
def test_name_property(self):
268309
name = "lemon-ness"
269310
schema_field = self._make_one(name, "INTEGER")
@@ -323,6 +364,22 @@ def test_foreign_type_definition_property_str(self):
323364
schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION
324365
self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION)
325366

367+
def test_timestamp_precision_unsupported_type(self):
368+
with pytest.raises(ValueError) as e:
369+
self._make_one("test", "TIMESTAMP", timestamp_precision=12)
370+
371+
assert "timestamp_precision must be class enums.TimestampPrecision" in str(
372+
e.value
373+
)
374+
375+
def test_timestamp_precision_property(self):
376+
TIMESTAMP_PRECISION = enums.TimestampPrecision.PICOSECOND
377+
schema_field = self._make_one("test", "TIMESTAMP")
378+
schema_field._properties[
379+
"timestampPrecision"
380+
] = enums.TimestampPrecision.PICOSECOND.value
381+
self.assertEqual(schema_field.timestamp_precision, TIMESTAMP_PRECISION)
382+
326383
def test_to_standard_sql_simple_type(self):
327384
examples = (
328385
# a few legacy types
@@ -637,7 +694,9 @@ def test___hash__not_equals(self):
637694

638695
def test___repr__(self):
639696
field1 = self._make_one("field1", "STRING")
640-
expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)"
697+
expected = (
698+
"SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None, None)"
699+
)
641700
self.assertEqual(repr(field1), expected)
642701

643702
def test___repr__evaluable_no_policy_tags(self):

0 commit comments

Comments
 (0)