|
31 | 31 | cloudpickle = pickle |
32 | 32 |
|
33 | 33 |
|
| 34 | +# ---------------------------------------------------------------------- |
| 35 | +# Set up serialization for numpy with dtype object (primitive types are |
| 36 | +# handled efficiently with Arrow's Tensor facilities, see |
| 37 | +# python_to_arrow.cc) |
| 38 | + |
| 39 | +def _serialize_numpy_array_list(obj): |
| 40 | + return obj.tolist(), obj.dtype.str |
| 41 | + |
| 42 | + |
| 43 | +def _deserialize_numpy_array_list(data): |
| 44 | + return np.array(data[0], dtype=np.dtype(data[1])) |
| 45 | + |
| 46 | + |
| 47 | +def _serialize_numpy_array_pickle(obj): |
| 48 | + pickled = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL) |
| 49 | + return frombuffer(pickled) |
| 50 | + |
| 51 | + |
| 52 | +def _deserialize_numpy_array_pickle(data): |
| 53 | + return pickle.loads(memoryview(data)) |
| 54 | + |
| 55 | + |
34 | 56 | def register_default_serialization_handlers(serialization_context): |
35 | 57 |
|
36 | 58 | # ---------------------------------------------------------------------- |
@@ -81,22 +103,10 @@ def _deserialize_default_dict(data): |
81 | 103 | custom_serializer=cloudpickle.dumps, |
82 | 104 | custom_deserializer=cloudpickle.loads) |
83 | 105 |
|
84 | | - # ---------------------------------------------------------------------- |
85 | | - # Set up serialization for numpy with dtype object (primitive types are |
86 | | - # handled efficiently with Arrow's Tensor facilities, see |
87 | | - # python_to_arrow.cc) |
88 | | - |
89 | | - def _serialize_numpy_array(obj): |
90 | | - pickled = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL) |
91 | | - return frombuffer(pickled) |
92 | | - |
93 | | - def _deserialize_numpy_array(data): |
94 | | - return pickle.loads(memoryview(data)) |
95 | | - |
96 | 106 | serialization_context.register_type( |
97 | 107 | np.ndarray, 'np.array', |
98 | | - custom_serializer=_serialize_numpy_array, |
99 | | - custom_deserializer=_deserialize_numpy_array) |
| 108 | + custom_serializer=_serialize_numpy_array_list, |
| 109 | + custom_deserializer=_deserialize_numpy_array_list) |
100 | 110 |
|
101 | 111 | # ---------------------------------------------------------------------- |
102 | 112 | # Set up serialization for pandas Series and DataFrame |
@@ -155,3 +165,10 @@ def _deserialize_torch_tensor(data): |
155 | 165 |
|
156 | 166 |
|
157 | 167 | register_default_serialization_handlers(_default_serialization_context) |
| 168 | + |
| 169 | +pandas_serialization_context = _default_serialization_context.clone() |
| 170 | + |
| 171 | +pandas_serialization_context.register_type( |
| 172 | + np.ndarray, 'np.array', |
| 173 | + custom_serializer=_serialize_numpy_array_pickle, |
| 174 | + custom_deserializer=_deserialize_numpy_array_pickle) |
0 commit comments