|
13 | 13 | # limitations under the License. |
14 | 14 |
|
15 | 15 | import base64 |
| 16 | +import collections |
16 | 17 | import concurrent.futures |
17 | 18 | import csv |
18 | 19 | import datetime |
@@ -634,6 +635,81 @@ def test_load_table_from_local_avro_file_then_dump_table(self): |
634 | 635 | sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) |
635 | 636 | ) |
636 | 637 |
|
| 638 | + @unittest.skipIf(pandas is None, "Requires `pandas`") |
| 639 | + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") |
| 640 | + def test_load_table_from_dataframe_w_automatic_schema(self): |
| 641 | + """Test that a DataFrame with dtypes that map well to BigQuery types |
| 642 | + can be uploaded without specifying a schema. |
| 643 | +
|
| 644 | + https://github.com/googleapis/google-cloud-python/issues/9044 |
| 645 | + """ |
| 646 | + df_data = collections.OrderedDict( |
| 647 | + [ |
| 648 | + ("bool_col", pandas.Series([True, False, True], dtype="bool")), |
| 649 | + ( |
| 650 | + "ts_col", |
| 651 | + pandas.Series( |
| 652 | + [ |
| 653 | + datetime.datetime(2010, 1, 2, 3, 44, 50), |
| 654 | + datetime.datetime(2011, 2, 3, 14, 50, 59), |
| 655 | + datetime.datetime(2012, 3, 14, 15, 16), |
| 656 | + ], |
| 657 | + dtype="datetime64[ns]", |
| 658 | + ).dt.tz_localize(pytz.utc), |
| 659 | + ), |
| 660 | + ( |
| 661 | + "dt_col", |
| 662 | + pandas.Series( |
| 663 | + [ |
| 664 | + datetime.datetime(2010, 1, 2, 3, 44, 50), |
| 665 | + datetime.datetime(2011, 2, 3, 14, 50, 59), |
| 666 | + datetime.datetime(2012, 3, 14, 15, 16), |
| 667 | + ], |
| 668 | + dtype="datetime64[ns]", |
| 669 | + ), |
| 670 | + ), |
| 671 | + ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")), |
| 672 | + ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")), |
| 673 | + ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")), |
| 674 | + ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")), |
| 675 | + ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")), |
| 676 | + ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")), |
| 677 | + ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), |
| 678 | + ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), |
| 679 | + ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), |
| 680 | + ] |
| 681 | + ) |
| 682 | + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) |
| 683 | + |
| 684 | + dataset_id = _make_dataset_id("bq_load_test") |
| 685 | + self.temp_dataset(dataset_id) |
| 686 | + table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format( |
| 687 | + Config.CLIENT.project, dataset_id |
| 688 | + ) |
| 689 | + |
| 690 | + load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) |
| 691 | + load_job.result() |
| 692 | + |
| 693 | + table = Config.CLIENT.get_table(table_id) |
| 694 | + self.assertEqual( |
| 695 | + tuple(table.schema), |
| 696 | + ( |
| 697 | + bigquery.SchemaField("bool_col", "BOOLEAN"), |
| 698 | + bigquery.SchemaField("ts_col", "TIMESTAMP"), |
| 699 | + bigquery.SchemaField("dt_col", "DATETIME"), |
| 700 | + bigquery.SchemaField("float32_col", "FLOAT"), |
| 701 | + bigquery.SchemaField("float64_col", "FLOAT"), |
| 702 | + bigquery.SchemaField("int8_col", "INTEGER"), |
| 703 | + bigquery.SchemaField("int16_col", "INTEGER"), |
| 704 | + bigquery.SchemaField("int32_col", "INTEGER"), |
| 705 | + bigquery.SchemaField("int64_col", "INTEGER"), |
| 706 | + bigquery.SchemaField("uint8_col", "INTEGER"), |
| 707 | + bigquery.SchemaField("uint16_col", "INTEGER"), |
| 708 | + bigquery.SchemaField("uint32_col", "INTEGER"), |
| 709 | + ), |
| 710 | + ) |
| 711 | + self.assertEqual(table.num_rows, 3) |
| 712 | + |
637 | 713 | @unittest.skipIf(pandas is None, "Requires `pandas`") |
638 | 714 | @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") |
639 | 715 | def test_load_table_from_dataframe_w_nulls(self): |
|
0 commit comments