diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 1861ad993241..1a28d84bf843 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -917,9 +917,6 @@ def upload_from_file(self, 'configuration': { 'load': { 'sourceFormat': source_format, - 'schema': { - 'fields': _build_schema_resource(self._schema), - }, 'destinationTable': { 'projectId': self._dataset.project, 'datasetId': self._dataset.name, @@ -929,6 +926,12 @@ def upload_from_file(self, } } + if len(self._schema) > 0: + load_config = metadata['configuration']['load'] + load_config['schema'] = { + 'fields': _build_schema_resource(self._schema) + } + _configure_job_metadata(metadata, allow_jagged_rows, allow_quoted_newlines, create_disposition, encoding, field_delimiter, diff --git a/bigquery/tests/data/colors.avro b/bigquery/tests/data/colors.avro new file mode 100644 index 000000000000..e0133fd027f4 Binary files /dev/null and b/bigquery/tests/data/colors.avro differ diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index 44e508428fe6..baad4a240507 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -390,6 +390,49 @@ def _job_done(instance): self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age)) + def test_load_table_from_local_avro_file_then_dump_table(self): + TABLE_NAME = 'test_table_avro' + ROWS = [ + ("violet", 400), + ("indigo", 445), + ("blue", 475), + ("green", 510), + ("yellow", 570), + ("orange", 590), + ("red", 650)] + + dataset = Config.CLIENT.dataset( + _make_dataset_name('load_local_then_dump')) + + retry_403(dataset.create)() + self.to_delete.append(dataset) + + table = dataset.table(TABLE_NAME) + self.to_delete.insert(0, table) + + with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: + job = table.upload_from_file( + avrof, + source_format='AVRO', + write_disposition='WRITE_TRUNCATE' + ) + + def _job_done(instance): + return instance.state.lower() == 'done' + + # Retry until done. + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() + + self.assertEqual(job.output_rows, len(ROWS)) + + # Reload table to get the schema before fetching the rows. + table.reload() + rows = self._fetch_single_page(table) + by_wavelength = operator.itemgetter(1) + self.assertEqual(sorted(rows, key=by_wavelength), + sorted(ROWS, key=by_wavelength)) + def test_load_table_from_storage_then_dump_table(self): import csv from google.cloud._testing import _NamedTemporaryFile