IntelPython · densmirn · May 22, 2020 · May 21, 2020
diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py
@@ -227,29 +227,19 @@ def hpat_pandas_df_values_impl(self, numba_common_dtype):
 
 def sdc_pandas_dataframe_append_codegen(df, other, _func_name, ignore_index_value, indexes_comparable, args):
     """
-    Input:
-    df = pd.DataFrame({'A': ['cat', 'dog', np.nan], 'B': [.2, .3, np.nan]})
-    other = pd.DataFrame({'A': ['bird', 'fox', 'mouse'], 'C': ['a', np.nan, '']})
-    ignore_index=True
-
-    Func generated:
+    Example of generated implementation:
     def sdc_pandas_dataframe_append_impl(df, other, ignore_index=False, verify_integrity=False, sort=None):
-        len_df = len(df._data[0])
-        len_other = len(other._data[0])
-        new_col_A_data_df = df._data[0]
-        new_col_A_data_other = other._data[0]
-        new_col_A = init_series(new_col_A_data_df).append(init_series(new_col_A_data_other))._data
-        new_col_B_data_df = df._data[1]
-        new_col_B_data = init_series(new_col_B_data_df)._data
-        new_col_B = fill_array(new_col_B_data, len_df+len_other)
-        new_col_C_data_other = other._data[0]
-        new_col_C_data = init_series(new_col_C_data_other)._data
-        new_col_C = fill_str_array(new_col_C_data, len_df+len_other, push_back=False)
-        return pandas.DataFrame({"A": new_col_A, "B": new_col_B, "C": new_col_C)
+        len_df = len(df._data[0][0])
+        len_other = len(other._data[0][0])
+        new_col_0_data_df = df._data[0][0]
+        new_col_0_data_other = other._data[0][0]
+        new_col_0 = init_series(new_col_0_data_df).append(init_series(new_col_0_data_other))._data
+        new_col_1_data_df = df._data[0][1]
+        new_col_1_data_other = other._data[0][1]
+        new_col_1 = init_series(new_col_1_data_df).append(init_series(new_col_1_data_other))._data
+        return pandas.DataFrame({"A": new_col_0, "B": new_col_1})
     """
     indent = 4 * ' '
-    func_args = ['df', 'other']
-
     func_args = ['df', 'other'] + kwsparams2list(args)
 
     df_columns_indx = {col_name: i for i, col_name in enumerate(df.columns)}
@@ -267,38 +257,43 @@ def sdc_pandas_dataframe_append_impl(df, other, ignore_index=False, verify_integ
     func_text = []
     column_list = []
 
-    func_text.append(f'len_df = len(df._data[0])')
-    func_text.append(f'len_other = len(other._data[0])')
+    func_text.append(f'len_df = len(df._data[0][0])')
+    func_text.append(f'len_other = len(other._data[0][0])')
 
-    for col_name, col_id in df_columns_indx.items():
-        func_text.append(f'new_col_{col_id}_data_{"df"} = {"df"}._data[{col_id}]')
+    for col_name, idx in df_columns_indx.items():
+        col_loc = df.column_loc[col_name]
+        type_id, col_id = col_loc.type_id, col_loc.col_id
+        func_text.append(f'new_col_{idx}_data_df = df._data[{type_id}][{col_id}]')
         if col_name in other_columns_indx:
-            other_col_id = other_columns_indx.get(col_name)
-            func_text.append(f'new_col_{col_id}_data_{"other"} = '
-                             f'{"other"}._data[{other_columns_indx.get(col_name)}]')
-            s1 = f'init_series(new_col_{col_id}_data_{"df"})'
-            s2 = f'init_series(new_col_{col_id}_data_{"other"})'
-            func_text.append(f'new_col_{col_id} = {s1}.append({s2})._data')
+            other_col_loc = other.column_loc[col_name]
+            other_type_id, other_col_id = other_col_loc.type_id, other_col_loc.col_id
+            func_text.append(f'new_col_{idx}_data_other = '
+                             f'other._data[{other_type_id}][{other_col_id}]')
+            s1 = f'init_series(new_col_{idx}_data_df)'
+            s2 = f'init_series(new_col_{idx}_data_other)'
+            func_text.append(f'new_col_{idx} = {s1}.append({s2})._data')
         else:
-            func_text.append(f'new_col_{col_id}_data = init_series(new_col_{col_id}_data_df)._data')
+            func_text.append(f'new_col_{idx}_data = init_series(new_col_{idx}_data_df)._data')
             if col_name in string_type_columns:
-                func_text.append(f'new_col_{col_id} = fill_str_array(new_col_{col_id}_data, len_df+len_other)')
+                func_text.append(f'new_col_{idx} = fill_str_array(new_col_{idx}_data, len_df+len_other)')
             else:
-                func_text.append(f'new_col_{col_id} = fill_array(new_col_{col_id}_data, len_df+len_other)')
-        column_list.append((f'new_col_{col_id}', col_name))
+                func_text.append(f'new_col_{idx} = fill_array(new_col_{idx}_data, len_df+len_other)')
+        column_list.append((f'new_col_{idx}', col_name))
 
-    for col_name, col_id in other_columns_indx.items():
+    for col_name, idx in other_columns_indx.items():
         if col_name not in df_columns_indx:
-            func_text.append(f'new_col_{col_id}_data_{"other"} = {"other"}._data[{col_id}]')
-            func_text.append(f'new_col_{col_id}_data = init_series(new_col_{col_id}_data_other)._data')
+            other_col_loc = other.column_loc[col_name]
+            other_type_id, other_col_id = other_col_loc.type_id, other_col_loc.col_id
+            func_text.append(f'new_col_{idx}_data_other = other._data[{other_type_id}][{other_col_id}]')
+            func_text.append(f'new_col_{idx}_data = init_series(new_col_{idx}_data_other)._data')
             if col_name in string_type_columns:
                 func_text.append(
-                    f'new_col_{col_id}_other = '
-                    f'fill_str_array(new_col_{col_id}_data, len_df+len_other, push_back=False)')
+                    f'new_col_{idx}_other = '
+                    f'fill_str_array(new_col_{idx}_data, len_df+len_other, push_back=False)')
             else:
-                func_text.append(f'new_col_{col_id}_other = '
-                                 f'fill_array(new_col_{col_id}_data, len_df+len_other, push_back=False)')
-            column_list.append((f'new_col_{col_id}_other', col_name))
+                func_text.append(f'new_col_{idx}_other = '
+                                 f'fill_array(new_col_{idx}_data, len_df+len_other, push_back=False)')
+            column_list.append((f'new_col_{idx}_other', col_name))
 
     data = ', '.join(f'"{column_name}": {column}' for column, column_name in column_list)
 

diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py
@@ -2057,7 +2057,7 @@ def test_impl(df):
         df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
         pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
 
-    @dfRefactoringNotImplemented
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame unboxing
     def test_append_df_same_cols_no_index(self):
         def test_impl(df, df2):
             return df.append(df2, ignore_index=True)
@@ -2069,7 +2069,19 @@ def test_impl(df, df2):
         df2.A[n // 2:] = n
         pd.testing.assert_frame_equal(sdc_func(df, df2), test_impl(df, df2))
 
-    @dfRefactoringNotImplemented
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame boxing
+    def test_append_df_same_cols_no_index_no_unboxing(self):
+        def test_impl():
+            n = 11
+            df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
+            df2 = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
+            df2.A[n // 2:] = n
+            return df.append(df2, ignore_index=True)
+
+        sdc_func = self.jit(test_impl)
+        pd.testing.assert_frame_equal(sdc_func(), test_impl())
+
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame unboxing
     def test_append_df_same_cols_index_default(self):
         def test_impl(df, df2):
             return df.append(df2)
@@ -2082,7 +2094,7 @@ def test_impl(df, df2):
 
         pd.testing.assert_frame_equal(sdc_func(df, df2), test_impl(df, df2))
 
-    @dfRefactoringNotImplemented
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame unboxing
     def test_append_df_diff_cols_index_ignore_false(self):
         def test_impl(df, df2):
             return df.append(df2, ignore_index=False)
@@ -2096,7 +2108,22 @@ def test_impl(df, df2):
 
         pd.testing.assert_frame_equal(sdc_func(df, df2), test_impl(df, df2))
 
-    @dfRefactoringNotImplemented
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame boxing
+    def test_append_df_diff_cols_index_ignore_false_no_unboxing(self):
+        def test_impl():
+            n1 = 11
+            n2 = n1 * 2
+            df = pd.DataFrame({'A': np.arange(n1), 'B': np.arange(n1) ** 2},
+                              index=np.arange(n1) ** 4)
+            df2 = pd.DataFrame({'C': np.arange(n2), 'D': np.arange(n2) ** 2,
+                                'E S D': np.arange(n2) + 100},
+                               index=np.arange(n2) ** 8)
+            return df.append(df2, ignore_index=False)
+
+        sdc_func = self.jit(test_impl)
+        pd.testing.assert_frame_equal(sdc_func(), test_impl())
+
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame unboxing
     def test_append_df_diff_cols_index_ignore_index(self):
         def test_impl(df, df2):
             return df.append(df2, ignore_index=True)
@@ -2110,7 +2137,7 @@ def test_impl(df, df2):
 
         pd.testing.assert_frame_equal(sdc_func(df, df2), test_impl(df, df2))
 
-    @dfRefactoringNotImplemented
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame unboxing
     def test_append_df_diff_cols_no_index(self):
         def test_impl(df, df2):
             return df.append(df2)
@@ -2123,7 +2150,7 @@ def test_impl(df, df2):
 
         pd.testing.assert_frame_equal(sdc_func(df, df2), test_impl(df, df2))
 
-    @dfRefactoringNotImplemented
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame unboxing
     def test_append_df_cross_cols_no_index(self):
         def test_impl(df, df2):
             return df.append(df2, ignore_index=True)
@@ -2136,7 +2163,7 @@ def test_impl(df, df2):
 
         pd.testing.assert_frame_equal(sdc_func(df, df2), test_impl(df, df2))
 
-    @dfRefactoringNotImplemented
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame unboxing
     def test_append_df_exception_incomparable_index_type(self):
         def test_impl(df, df2):
             return df.append(df2, ignore_index=False)
@@ -2157,7 +2184,7 @@ def test_impl(df, df2):
         self.assertIn(msg, str(raises.exception))
 
     @skip_sdc_jit
-    @dfRefactoringNotImplemented
+    @dfRefactoringNotImplemented  # required re-implementing DataFrame unboxing
     def test_append_df_diff_types_no_index(self):
         def test_impl(df, df2):
             return df.append(df2, ignore_index=True)