diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 2c93e437fa..5c1db93dce 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -245,7 +245,7 @@ def _cached_data(self) -> pd.DataFrame: """Combine all cached batches into a single DataFrame.""" if not self._cached_batches: return pd.DataFrame(columns=self._dataframe.columns) - return pd.concat(self._cached_batches, ignore_index=True) + return pd.concat(self._cached_batches) def _reset_batch_cache(self) -> None: """Resets batch caching attributes.""" @@ -294,8 +294,18 @@ def _set_table_html(self) -> None: break # Get the data for the current page - page_data = cached_data.iloc[start:end] - + page_data = cached_data.iloc[start:end].copy() + + # Handle index display + # TODO(b/438181139): Add tests for custom multiindex + if self._dataframe._block.has_index: + index_name = page_data.index.name + page_data.insert( + 0, index_name if index_name is not None else "", page_data.index + ) + else: + # Default index - include as "Row" column + page_data.insert(0, "Row", range(start + 1, start + len(page_data) + 1)) # Handle case where user navigated beyond available data with unknown row count is_unknown_count = self.row_count is None is_beyond_data = self._all_data_loaded and len(page_data) == 0 and self.page > 0 diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index b0d908fc17..0ce286ce64 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -106,17 +106,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Annie 482\n", - " AL F 1910 Myrtle 104\n", - " AR F 1910 Lillian 56\n", - " CT F 1910 Anne 38\n", - " CT F 1910 Frances 45\n", - " FL F 1910 Margaret 53\n", - " GA F 1910 Mae 73\n", - " GA F 1910 Beatrice 96\n", - " GA F 1910 Lola 47\n", - " IA F 1910 Viola 49\n", + "state gender year name number\n", + " AL F 1910 Lillian 99\n", + " AL F 1910 Ruby 204\n", + " AL F 1910 Helen 76\n", + " AL F 1910 Eunice 41\n", + " AR F 1910 Dora 42\n", + " CA F 1910 Edna 62\n", + " CA F 1910 Helen 239\n", + " CO F 1910 Alice 46\n", + " FL F 1910 Willie 71\n", + " FL F 1910 Thelma 65\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -196,7 +196,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e2231d99614a4489b2930c24b30f1d34", + "model_id": "775e84ca212c4867bb889266b830ae68", "version_major": 2, "version_minor": 1 }, @@ -232,79 +232,79 @@ " AL\n", " F\n", " 1910\n", - " Hazel\n", - " 51\n", + " Cora\n", + " 61\n", " \n", " \n", " 1\n", " AL\n", " F\n", " 1910\n", - " Lucy\n", - " 76\n", + " Anna\n", + " 74\n", " \n", " \n", " 2\n", " AR\n", " F\n", " 1910\n", - " Nellie\n", - " 39\n", + " Willie\n", + " 132\n", " \n", " \n", " 3\n", - " AR\n", + " CO\n", " F\n", " 1910\n", - " Lena\n", - " 40\n", + " Anna\n", + " 42\n", " \n", " \n", " 4\n", - " CO\n", + " FL\n", " F\n", " 1910\n", - " Thelma\n", - " 36\n", + " Louise\n", + " 70\n", " \n", " \n", " 5\n", - " CO\n", + " GA\n", " F\n", " 1910\n", - " Ruth\n", - " 68\n", + " Catherine\n", + " 57\n", " \n", " \n", " 6\n", - " CT\n", + " IL\n", " F\n", " 1910\n", - " Elizabeth\n", - " 86\n", + " Jessie\n", + " 43\n", " \n", " \n", " 7\n", - " DC\n", + " IN\n", " F\n", " 1910\n", - " Mary\n", - " 80\n", + " Anna\n", + " 100\n", " \n", " \n", " 8\n", - " FL\n", + " IN\n", " F\n", " 1910\n", - " Annie\n", - " 101\n", + " Pauline\n", + " 77\n", " \n", " \n", " 9\n", - " FL\n", + " IN\n", " F\n", " 1910\n", - " Alma\n", + " Beulah\n", " 39\n", " \n", " \n", @@ -314,16 +314,16 @@ ], "text/plain": [ "state gender year name number\n", - " AL F 1910 Hazel 51\n", - " AL F 1910 Lucy 76\n", - " AR F 1910 Nellie 39\n", - " AR F 1910 Lena 40\n", - " CO F 1910 Thelma 36\n", - " CO F 1910 Ruth 68\n", - " CT F 1910 Elizabeth 86\n", - " DC F 1910 Mary 80\n", - " FL F 1910 Annie 101\n", - " FL F 1910 Alma 39\n", + " AL F 1910 Cora 61\n", + " AL F 1910 Anna 74\n", + " AR F 1910 Willie 132\n", + " CO F 1910 Anna 42\n", + " FL F 1910 Louise 70\n", + " GA F 1910 Catherine 57\n", + " IL F 1910 Jessie 43\n", + " IN F 1910 Anna 100\n", + " IN F 1910 Pauline 77\n", + " IN F 1910 Beulah 39\n", "...\n", "\n", "[5552452 rows x 5 columns]" @@ -409,12 +409,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f26e26da0c84469fb7a9c211ab4423b7", + "model_id": "bf4224f8022042aea6d72507ddb5570b", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -523,12 +523,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f1a893516ee04a5f9eb2655d5aaca778", + "model_id": "8d9bfeeba3ca4d11a56dccb28aacde23", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -563,7 +563,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 85.9 kB in 11 seconds of slot time.\n", + " Query processed 85.9 kB in 13 seconds of slot time.\n", " " ], "text/plain": [ @@ -624,7 +624,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d48598e7d34a4fd0a817e4995868395e", + "model_id": "9fce25a077604e4882144d46d0d4ba45", "version_major": 2, "version_minor": 1 }, @@ -671,6 +671,42 @@ " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", + " 29.08.018\n", + " E04H 6/12\n", + " <NA>\n", + " 18157874.1\n", + " 21.02.2018\n", + " 22.02.2017\n", + " Liedtke & Partner Patentanw√§lte\n", + " SHB Hebezeugbau GmbH\n", + " VOLGER, Alexander\n", + " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", + " EP 3 366 869 A1\n", + " \n", + " \n", + " 1\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 03.10.2018\n", + " H05B 6/12\n", + " <NA>\n", + " 18165514.3\n", + " 03.04.2018\n", + " 30.03.2017\n", + " <NA>\n", + " BSH Hausger√§te GmbH\n", + " Acero Acero, Jesus\n", + " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", + " EP 3 383 141 A2\n", + " \n", + " \n", + " 2\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", " 03.10.2018\n", " H01L 21/20\n", " <NA>\n", @@ -684,7 +720,7 @@ " EP 3 382 744 A1\n", " \n", " \n", - " 1\n", + " 3\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -702,7 +738,7 @@ " EP 3 382 553 A1\n", " \n", " \n", - " 2\n", + " 4\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -719,42 +755,6 @@ " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", " EP 3 381 276 A1\n", " \n", - " \n", - " 3\n", - " {'application_number': None, 'class_internatio...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 03.10.2018\n", - " H05B 6/12\n", - " <NA>\n", - " 18165514.3\n", - " 03.04.2018\n", - " 30.03.2017\n", - " <NA>\n", - " BSH Hausger√§te GmbH\n", - " Acero Acero, Jesus\n", - " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", - " EP 3 383 141 A2\n", - " \n", - " \n", - " 4\n", - " {'application_number': None, 'class_internatio...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 29.08.018\n", - " E04H 6/12\n", - " <NA>\n", - " 18157874.1\n", - " 21.02.2018\n", - " 22.02.2017\n", - " Liedtke & Partner Patentanw√§lte\n", - " SHB Hebezeugbau GmbH\n", - " VOLGER, Alexander\n", - " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", - " EP 3 366 869 A1\n", - " \n", " \n", "\n", "

5 rows × 15 columns

\n", @@ -776,32 +776,32 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 03.10.2018 H01L 21/20 18166536.5 \n", - "1 03.10.2018 G06F 11/30 18157347.8 \n", - "2 03.10.2018 A01K 31/00 18171005.4 \n", - "3 03.10.2018 H05B 6/12 18165514.3 \n", - "4 29.08.018 E04H 6/12 18157874.1 \n", + "0 29.08.018 E04H 6/12 18157874.1 \n", + "1 03.10.2018 H05B 6/12 18165514.3 \n", + "2 03.10.2018 H01L 21/20 18166536.5 \n", + "3 03.10.2018 G06F 11/30 18157347.8 \n", + "4 03.10.2018 A01K 31/00 18171005.4 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 16.02.2016 Scheider, Sascha et al \n", - "1 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "2 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", - "3 03.04.2018 30.03.2017 \n", - "4 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", + "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", + "1 03.04.2018 30.03.2017 \n", + "2 16.02.2016 Scheider, Sascha et al \n", + "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "4 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", "\n", " applicant_line_1 inventor_line_1 \\\n", - "0 EV Group E. Thallner GmbH Kurz, Florian \n", - "1 FUJITSU LIMITED Kukihara, Kensuke \n", - "2 Linco Food Systems A/S Thrane, Uffe \n", - "3 BSH Hausgeräte GmbH Acero Acero, Jesus \n", - "4 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "1 BSH Hausgeräte GmbH Acero Acero, Jesus \n", + "2 EV Group E. Thallner GmbH Kurz, Florian \n", + "3 FUJITSU LIMITED Kukihara, Kensuke \n", + "4 Linco Food Systems A/S Thrane, Uffe \n", "\n", " title_line_1 number \n", - "0 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", - "1 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "2 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", - "3 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", - "4 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", + "0 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", + "1 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", + "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "4 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", "\n", "[5 rows x 15 columns]" ] diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 260a338bf7..b0eeb4a3c2 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -998,6 +998,140 @@ def test_dataframe_repr_mimebundle_anywidget_with_metadata( assert "colab" in metadata["application/vnd.jupyter.widget-view+json"] -# TODO(b/332316283): Add tests for custom index and multiindex +@pytest.fixture(scope="module") +def custom_index_pandas_df() -> pd.DataFrame: + """Create a DataFrame with a custom named index for testing.""" + test_data = pd.DataFrame( + { + "value_a": [10, 20, 30, 40, 50, 60], + "value_b": ["a", "b", "c", "d", "e", "f"], + } + ) + test_data.index = pd.Index( + ["row_1", "row_2", "row_3", "row_4", "row_5", "row_6"], name="custom_idx" + ) + return test_data + + +@pytest.fixture(scope="module") +def custom_index_bf_df( + session: bf.Session, custom_index_pandas_df: pd.DataFrame +) -> bf.dataframe.DataFrame: + return session.read_pandas(custom_index_pandas_df) + + +@pytest.fixture(scope="module") +def multiindex_pandas_df() -> pd.DataFrame: + """Create a DataFrame with MultiIndex for testing.""" + test_data = pd.DataFrame( + { + "value": [100, 200, 300, 400, 500, 600], + "category": ["X", "Y", "Z", "X", "Y", "Z"], + } + ) + test_data.index = pd.MultiIndex.from_arrays( + [ + ["group_A", "group_A", "group_A", "group_B", "group_B", "group_B"], + [1, 2, 3, 1, 2, 3], + ], + names=["group", "item"], + ) + return test_data + + +@pytest.fixture(scope="module") +def multiindex_bf_df( + session: bf.Session, multiindex_pandas_df: pd.DataFrame +) -> bf.dataframe.DataFrame: + return session.read_pandas(multiindex_pandas_df) + + +def test_widget_with_default_index_should_display_index_column_with_empty_header( + paginated_bf_df: bf.dataframe.DataFrame, +): + """ + Given a DataFrame with a default index, when the TableWidget is rendered, + then an index column should be visible with an empty header. + """ + import re + + from bigframes.display.anywidget import TableWidget + + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): + widget = TableWidget(paginated_bf_df) + html = widget.table_html + + # The header for the index should be present but empty, matching the + # internal rendering logic. + thead = html.split("")[1].split("")[0] + # Find the first header cell and check that its content div is empty. + match = re.search(r"]*>]*>([^<]*)", thead) + assert match is not None, "Could not find table header cell in output." + assert ( + match.group(1) == "" + ), f"Expected empty index header, but found: {match.group(1)}" + + +def test_widget_with_custom_index_should_display_index_column( + custom_index_bf_df: bf.dataframe.DataFrame, +): + """ + Given a DataFrame with a custom named index, when rendered, + then the index column and first page of rows should be visible. + """ + from bigframes.display.anywidget import TableWidget + + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): + widget = TableWidget(custom_index_bf_df) + html = widget.table_html + + assert "custom_idx" in html + assert "row_1" in html + assert "row_2" in html + assert "row_3" not in html # Verify pagination is working + assert "row_4" not in html + + +def test_widget_with_custom_index_pagination_preserves_index( + custom_index_bf_df: bf.dataframe.DataFrame, +): + """ + Given a DataFrame with a custom index, when navigating to the second page, + then the second page's index values should be visible. + """ + from bigframes.display.anywidget import TableWidget + + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): + widget = TableWidget(custom_index_bf_df) + + widget.page = 1 # Navigate to page 2 + html = widget.table_html + + assert "row_3" in html + assert "row_4" in html + assert "row_1" not in html # Verify page 1 content is gone + assert "row_2" not in html + + +def test_widget_with_custom_index_matches_pandas_output( + custom_index_bf_df: bf.dataframe.DataFrame, +): + """ + Given a DataFrame with a custom index and max_rows=3, the widget's HTML + output should contain the first three index values. + """ + from bigframes.display.anywidget import TableWidget + + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 3): + widget = TableWidget(custom_index_bf_df) + html = widget.table_html + + assert "row_1" in html + assert "row_2" in html + assert "row_3" in html + assert "row_4" not in html # Verify it respects max_rows + + +# TODO(b/438181139): Add tests for custom multiindex # This may not be necessary for the SQL Cell use case but should be # considered for completeness.