Skip to content

Commit 9220dac

Browse files
authored
Marshal row data correctly in 'Table.insert_data()' (#3426)
* Move '_row{,s}_from_json' next to scalar '_from_json' helpers. * Add converter helpers for row data scalars. * Convert row data using helpers. Closes #2957.
1 parent 3de5f33 commit 9220dac

4 files changed

Lines changed: 115 additions & 54 deletions

File tree

bigquery/google/cloud/bigquery/_helpers.py

Lines changed: 57 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from google.cloud._helpers import UTC
2222
from google.cloud._helpers import _date_from_iso8601_date
2323
from google.cloud._helpers import _datetime_from_microseconds
24+
from google.cloud._helpers import _microseconds_from_datetime
2425
from google.cloud._helpers import _RFC3339_NO_FRACTION
2526
from google.cloud._helpers import _time_from_iso8601_time_naive
2627
from google.cloud._helpers import _to_bytes
@@ -122,6 +123,38 @@ def _record_from_json(value, field):
122123
}
123124

124125

126+
def _row_from_json(row, schema):
127+
"""Convert JSON row data to row with appropriate types.
128+
129+
Note: ``row['f']`` and ``schema`` are presumed to be of the same length.
130+
131+
:type row: dict
132+
:param row: A JSON response row to be converted.
133+
134+
:type schema: tuple
135+
:param schema: A tuple of
136+
:class:`~google.cloud.bigquery.schema.SchemaField`.
137+
138+
:rtype: tuple
139+
:returns: A tuple of data converted to native types.
140+
"""
141+
row_data = []
142+
for field, cell in zip(schema, row['f']):
143+
converter = _CELLDATA_FROM_JSON[field.field_type]
144+
if field.mode == 'REPEATED':
145+
row_data.append([converter(item['v'], field)
146+
for item in cell['v']])
147+
else:
148+
row_data.append(converter(cell['v'], field))
149+
150+
return tuple(row_data)
151+
152+
153+
def _rows_from_json(rows, schema):
154+
"""Convert JSON row data to rows with appropriate types."""
155+
return [_row_from_json(row, schema) for row in rows]
156+
157+
125158
def _int_to_json(value):
126159
"""Coerce 'value' to an JSON-compatible representation."""
127160
if isinstance(value, int):
@@ -148,8 +181,11 @@ def _bytes_to_json(value):
148181
return value
149182

150183

151-
def _timestamp_to_json(value):
152-
"""Coerce 'value' to an JSON-compatible representation."""
184+
def _timestamp_to_json_parameter(value):
185+
"""Coerce 'value' to an JSON-compatible representation.
186+
187+
This version returns the string representation used in query parameters.
188+
"""
153189
if isinstance(value, datetime.datetime):
154190
if value.tzinfo not in (None, UTC):
155191
# Convert to UTC and remove the time zone info.
@@ -159,6 +195,16 @@ def _timestamp_to_json(value):
159195
return value
160196

161197

198+
def _timestamp_to_json_row(value):
199+
"""Coerce 'value' to an JSON-compatible representation.
200+
201+
This version returns floating-point seconds value used in row data.
202+
"""
203+
if isinstance(value, datetime.datetime):
204+
value = _microseconds_from_datetime(value) * 1e-6
205+
return value
206+
207+
162208
def _datetime_to_json(value):
163209
"""Coerce 'value' to an JSON-compatible representation."""
164210
if isinstance(value, datetime.datetime):
@@ -180,49 +226,25 @@ def _time_to_json(value):
180226
return value
181227

182228

183-
_SCALAR_VALUE_TO_JSON = {
229+
# Converters used for scalar values marshalled as row data.
230+
_SCALAR_VALUE_TO_JSON_ROW = {
184231
'INTEGER': _int_to_json,
185232
'INT64': _int_to_json,
186233
'FLOAT': _float_to_json,
187234
'FLOAT64': _float_to_json,
188235
'BOOLEAN': _bool_to_json,
189236
'BOOL': _bool_to_json,
190237
'BYTES': _bytes_to_json,
191-
'TIMESTAMP': _timestamp_to_json,
238+
'TIMESTAMP': _timestamp_to_json_row,
192239
'DATETIME': _datetime_to_json,
193240
'DATE': _date_to_json,
194241
'TIME': _time_to_json,
195242
}
196243

197244

198-
def _row_from_json(row, schema):
199-
"""Convert JSON row data to row with appropriate types.
200-
201-
:type row: dict
202-
:param row: A JSON response row to be converted.
203-
204-
:type schema: tuple
205-
:param schema: A tuple of
206-
:class:`~google.cloud.bigquery.schema.SchemaField`.
207-
208-
:rtype: tuple
209-
:returns: A tuple of data converted to native types.
210-
"""
211-
row_data = []
212-
for field, cell in zip(schema, row['f']):
213-
converter = _CELLDATA_FROM_JSON[field.field_type]
214-
if field.mode == 'REPEATED':
215-
row_data.append([converter(item['v'], field)
216-
for item in cell['v']])
217-
else:
218-
row_data.append(converter(cell['v'], field))
219-
220-
return tuple(row_data)
221-
222-
223-
def _rows_from_json(rows, schema):
224-
"""Convert JSON row data to rows with appropriate types."""
225-
return [_row_from_json(row, schema) for row in rows]
245+
# Converters used for scalar values marshalled as query parameters.
246+
_SCALAR_VALUE_TO_JSON_PARAM = _SCALAR_VALUE_TO_JSON_ROW.copy()
247+
_SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter
226248

227249

228250
class _ConfigurationProperty(object):
@@ -420,7 +442,7 @@ def to_api_repr(self):
420442
:returns: JSON mapping
421443
"""
422444
value = self.value
423-
converter = _SCALAR_VALUE_TO_JSON.get(self.type_)
445+
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_)
424446
if converter is not None:
425447
value = converter(value)
426448
resource = {
@@ -506,7 +528,7 @@ def to_api_repr(self):
506528
a_values = [repr_['parameterValue'] for repr_ in reprs]
507529
else:
508530
a_type = {'type': self.array_type}
509-
converter = _SCALAR_VALUE_TO_JSON.get(self.array_type)
531+
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type)
510532
if converter is not None:
511533
values = [converter(value) for value in values]
512534
a_values = [{'value': value} for value in values]
@@ -600,7 +622,7 @@ def to_api_repr(self):
600622
values[name] = repr_['parameterValue']
601623
else:
602624
s_types[name] = {'name': name, 'type': {'type': type_}}
603-
converter = _SCALAR_VALUE_TO_JSON.get(type_)
625+
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_)
604626
if converter is not None:
605627
value = converter(value)
606628
values[name] = {'value': value}

bigquery/google/cloud/bigquery/table.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,18 @@
2222
import six
2323

2424
from google.cloud._helpers import _datetime_from_microseconds
25-
from google.cloud._helpers import _microseconds_from_datetime
2625
from google.cloud._helpers import _millis_from_datetime
2726
from google.cloud.exceptions import NotFound
2827
from google.cloud.exceptions import make_exception
28+
from google.cloud.iterator import HTTPIterator
2929
from google.cloud.streaming.exceptions import HttpError
3030
from google.cloud.streaming.http_wrapper import Request
3131
from google.cloud.streaming.http_wrapper import make_api_request
3232
from google.cloud.streaming.transfer import RESUMABLE_UPLOAD
3333
from google.cloud.streaming.transfer import Upload
3434
from google.cloud.bigquery.schema import SchemaField
3535
from google.cloud.bigquery._helpers import _row_from_json
36-
from google.cloud.iterator import HTTPIterator
36+
from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW
3737

3838

3939
_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'"
@@ -673,6 +673,9 @@ def fetch_data(self, max_results=None, page_token=None, client=None):
673673
(this is distinct from the total number of rows in the
674674
current page: ``iterator.page.num_items``).
675675
"""
676+
if len(self._schema) == 0:
677+
raise ValueError(_TABLE_HAS_NO_SCHEMA)
678+
676679
client = self._require_client(client)
677680
path = '%s/data' % (self.path,)
678681
iterator = HTTPIterator(client=client, path=path,
@@ -741,11 +744,9 @@ def insert_data(self,
741744
row_info = {}
742745

743746
for field, value in zip(self._schema, row):
744-
if field.field_type == 'TIMESTAMP':
745-
# BigQuery stores TIMESTAMP data internally as a
746-
# UNIX timestamp with microsecond precision.
747-
# Specifies the number of seconds since the epoch.
748-
value = _convert_timestamp(value)
747+
converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
748+
if converter is not None: # STRING doesn't need converting
749+
value = converter(value)
749750
row_info[field.name] = value
750751

751752
info = {'json': row_info}
@@ -1131,10 +1132,3 @@ class _UrlBuilder(object):
11311132
def __init__(self):
11321133
self.query_params = {}
11331134
self._relative_path = ''
1134-
1135-
1136-
def _convert_timestamp(value):
1137-
"""Helper for :meth:`Table.insert_data`."""
1138-
if isinstance(value, datetime.datetime):
1139-
value = _microseconds_from_datetime(value) * 1e-6
1140-
return value

bigquery/tests/unit/test__helpers.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -561,12 +561,12 @@ def test_w_bytes(self):
561561
self.assertEqual(converted, expected)
562562

563563

564-
class Test_timestamp_to_json(unittest.TestCase):
564+
class Test_timestamp_to_json_parameter(unittest.TestCase):
565565

566566
def _call_fut(self, value):
567-
from google.cloud.bigquery._helpers import _timestamp_to_json
567+
from google.cloud.bigquery._helpers import _timestamp_to_json_parameter
568568

569-
return _timestamp_to_json(value)
569+
return _timestamp_to_json_parameter(value)
570570

571571
def test_w_float(self):
572572
self.assertEqual(self._call_fut(1.234567), 1.234567)
@@ -604,6 +604,29 @@ def test_w_datetime_w_utc_zone(self):
604604
self.assertEqual(self._call_fut(when), ZULU)
605605

606606

607+
class Test_timestamp_to_json_row(unittest.TestCase):
608+
609+
def _call_fut(self, value):
610+
from google.cloud.bigquery._helpers import _timestamp_to_json_row
611+
612+
return _timestamp_to_json_row(value)
613+
614+
def test_w_float(self):
615+
self.assertEqual(self._call_fut(1.234567), 1.234567)
616+
617+
def test_w_string(self):
618+
ZULU = '2016-12-20 15:58:27.339328+00:00'
619+
self.assertEqual(self._call_fut(ZULU), ZULU)
620+
621+
def test_w_datetime(self):
622+
import datetime
623+
from google.cloud._helpers import _microseconds_from_datetime
624+
625+
when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328)
626+
self.assertEqual(
627+
self._call_fut(when), _microseconds_from_datetime(when) / 1e6)
628+
629+
607630
class Test_datetime_to_json(unittest.TestCase):
608631

609632
def _call_fut(self, value):

bigquery/tests/unit/test_table.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,24 @@ def test_delete_w_alternate_client(self):
10431043
self.assertEqual(req['method'], 'DELETE')
10441044
self.assertEqual(req['path'], '/%s' % PATH)
10451045

1046+
def test_fetch_data_wo_schema(self):
1047+
from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA
1048+
1049+
client = _Client(project=self.PROJECT)
1050+
dataset = _Dataset(client)
1051+
table = self._make_one(self.TABLE_NAME, dataset=dataset)
1052+
ROWS = [
1053+
('Phred Phlyntstone', 32),
1054+
('Bharney Rhubble', 33),
1055+
('Wylma Phlyntstone', 29),
1056+
('Bhettye Rhubble', 27),
1057+
]
1058+
1059+
with self.assertRaises(ValueError) as exc:
1060+
table.fetch_data()
1061+
1062+
self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,))
1063+
10461064
def test_fetch_data_w_bound_client(self):
10471065
import datetime
10481066
import six
@@ -1355,7 +1373,7 @@ def _row_data(row):
13551373
if isinstance(row[2], datetime.datetime):
13561374
joined = _microseconds_from_datetime(joined) * 1e-6
13571375
return {'full_name': row[0],
1358-
'age': row[1],
1376+
'age': str(row[1]),
13591377
'joined': joined}
13601378

13611379
SENT = {
@@ -1404,7 +1422,11 @@ def test_insert_data_w_alternate_client(self):
14041422
]
14051423

14061424
def _row_data(row):
1407-
return {'full_name': row[0], 'age': row[1], 'voter': row[2]}
1425+
return {
1426+
'full_name': row[0],
1427+
'age': str(row[1]),
1428+
'voter': row[2] and 'true' or 'false',
1429+
}
14081430

14091431
SENT = {
14101432
'skipInvalidRows': True,

0 commit comments

Comments
 (0)