Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions sdc/datatypes/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,21 @@ def sdc_join_series_indexes_impl(left, right):
ridx = numpy.empty(est_total_size, numpy.int64)
joined = numpy.empty(est_total_size, numba_common_dtype)

left_nan = []
right_nan = []
for i in range(lsize):
if numpy.isnan(left[i]):
left_nan.append(i)
for i in range(rsize):
if numpy.isnan(right[i]):
right_nan.append(i)

# sort arrays saving the old positions
sorted_left = numpy.argsort(left, kind='mergesort')
sorted_right = numpy.argsort(right, kind='mergesort')
# put the position of the nans in an increasing sequence
sorted_left[lsize-len(left_nan):] = left_nan
sorted_right[rsize-len(right_nan):] = right_nan

i, j, k = 0, 0, 0
while (i < lsize and j < rsize):
Expand All @@ -232,13 +244,13 @@ def sdc_join_series_indexes_impl(left, right):
left_index = left[sorted_left[i]]
right_index = right[sorted_right[j]]

if (left_index < right_index):
if (left_index < right_index) or numpy.isnan(right_index):
joined[k] = left_index
lidx[k] = sorted_left[i]
ridx[k] = -1
i += 1
k += 1
elif (left_index > right_index):
elif (left_index > right_index) or numpy.isnan(left_index):
joined[k] = right_index
lidx[k] = -1
ridx[k] = sorted_right[j]
Expand Down
256 changes: 229 additions & 27 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2512,31 +2512,151 @@ def hpat_pandas_series_add(self, other, level=None, fill_value=None, axis=0):
"""

_func_name = 'Method add().'
ty_checker = TypeChecker(_func_name)
ty_checker.check(self, SeriesType)
ty_checker = TypeChecker('Operator add().')
self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType)
if not (self_is_series or other_is_series):
return None

if not (isinstance(fill_value, types.Omitted) or fill_value is None):
ty_checker.raise_exc(fill_value, 'None', 'fill_value')
# this overload is not for string series
self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType)
other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType)
if self_is_string_series or other_is_string_series:
return None

if not (isinstance(axis, types.Omitted) or axis == 0):
if not isinstance(self, (SeriesType, types.Number)):
ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

if not isinstance(other, (SeriesType, types.Number)):
ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

operands_are_series = self_is_series and other_is_series
if operands_are_series:
none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self))
and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other)))
series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes
if not series_indexes_comparable:
raise TypingError('{} Not implemented for series with not-comparable indexes. \
Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))

series_data_comparable = check_types_comparable(self, other)
if not series_data_comparable:
raise TypingError('{} Not supported for not-comparable operands. \
Given: self={}, other={}'.format(_func_name, self, other))

if not isinstance(level, types.Omitted) and level is not None:
ty_checker.raise_exc(level, 'None', 'level')

if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None:
ty_checker.raise_exc(fill_value, 'number', 'fill_value')

if not isinstance(axis, types.Omitted) and axis != 0:
ty_checker.raise_exc(axis, 'int', 'axis')
fill_value_is_none = False
if isinstance(fill_value, (types.NoneType, types.Omitted)) or fill_value is None:
fill_value_is_none = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe just:
fill_value_is_none = isinstance(fill_value, (types.NoneType, types.Omitted)) or fill_value is None
?

# specializations for numeric series only
if not operands_are_series:
def _series_add_scalar_impl(self, other, level=None, fill_value=None, axis=0):
fill_value_is_nan = False
if fill_value is None:
fill_value = numpy.nan
if not fill_value_is_none == True: # noqa
fill_value_is_nan = numpy.isnan(fill_value)
if not (fill_value_is_nan or fill_value_is_none == True): # noqa
numpy_like.fillna(self._data, inplace=True, value=fill_value)

if isinstance(other, SeriesType):
def hpat_pandas_series_add_impl(self, other, level=None, fill_value=None, axis=0):
return pandas.Series(self._data + other._data)
if self_is_series == True: # noqa
result_data = numpy.empty(len(self._data), dtype=numpy.float64)
result_data[:] = self._data + numpy.float64(other)
return pandas.Series(result_data, index=self._index, name=self._name)
else:
result_data = numpy.empty(len(other._data), dtype=numpy.float64)
result_data[:] = numpy.float64(self) + other._data
return pandas.Series(result_data, index=other._index, name=other._name)

return hpat_pandas_series_add_impl
return _series_add_scalar_impl

if isinstance(other, types.Integer) or isinstance(other, types.Float):
def hpat_pandas_series_add_number_impl(self, other, level=None, fill_value=None, axis=0):
if axis != 0:
raise ValueError('Method add(). The object axis\n expected: 0')
else: # both operands are numeric series
# optimization for series with default indexes, that can be aligned differently
if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)):
def _series_add_none_indexes_impl(self, other, level=None, fill_value=None, axis=0):
fill_value_is_nan = False
if fill_value is None:
fill_value = numpy.nan
if not fill_value_is_none == True: # noqa
fill_value_is_nan = numpy.isnan(fill_value)
if not (fill_value_is_nan or fill_value_is_none == True): # noqa
numpy_like.fillna(self._data, inplace=True, value=fill_value)
numpy_like.fillna(other._data, inplace=True, value=fill_value)

return pandas.Series(self._data + other)
if (len(self._data) == len(other._data)):
result_data = numpy_like.astype(self._data, numpy.float64)
result_data = result_data + other._data
return pandas.Series(result_data)
else:
left_size, right_size = len(self._data), len(other._data)
min_data_size = min(left_size, right_size)
max_data_size = max(left_size, right_size)
result_data = numpy.empty(max_data_size, dtype=numpy.float64)
if (left_size == min_data_size):
result_data[:min_data_size] = self._data
for i in range(min_data_size, len(result_data)):
result_data[i] = fill_value
result_data = result_data + other._data
else:
result_data[:min_data_size] = other._data
for i in range(min_data_size, len(result_data)):
result_data[i] = fill_value
result_data = self._data + result_data

return hpat_pandas_series_add_number_impl
return pandas.Series(result_data)

ty_checker.raise_exc(other, 'Series, int, float', 'other')
return _series_add_none_indexes_impl
else:
# for numeric indexes find common dtype to be used when creating joined index
if none_or_numeric_indexes:
ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype
ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype
numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
[ty_left_index_dtype, ty_right_index_dtype], [])

def _series_add_common_impl(self, other, level=None, fill_value=None, axis=0):
left_index, right_index = self.index, other.index
fill_value_is_nan = False
if fill_value is None:
fill_value = numpy.nan
if not fill_value_is_none == True: # noqa
fill_value_is_nan = numpy.isnan(fill_value)
if not (fill_value_is_nan or fill_value_is_none == True): # noqa
numpy_like.fillna(self._data, inplace=True, value=fill_value)
numpy_like.fillna(other._data, inplace=True, value=fill_value)
# check if indexes are equal and series don't have to be aligned
if sdc_check_indexes_equal(left_index, right_index):
result_data = numpy.empty(len(self._data), dtype=numpy.float64)
result_data[:] = self._data + other._data

if none_or_numeric_indexes == True: # noqa
result_index = numpy_like.astype(left_index, numba_index_common_dtype)
else:
result_index = self._index

return pandas.Series(result_data, index=result_index)

# TODO: replace below with core join(how='outer', return_indexers=True) when implemented
joined_index, left_indexer, right_indexer = sdc_join_series_indexes(left_index, right_index)
result_size = len(joined_index)
left_values = numpy.empty(result_size, dtype=numpy.float64)
right_values = numpy.empty(result_size, dtype=numpy.float64)
for i in numba.prange(result_size):
left_pos, right_pos = left_indexer[i], right_indexer[i]
left_values[i] = self._data[left_pos] if left_pos != -1 else fill_value
right_values[i] = other._data[right_pos] if right_pos != -1 else fill_value
result_data = left_values + right_values
return pandas.Series(result_data, joined_index)

return _series_add_common_impl

return None


@sdc_overload_method(SeriesType, 'sub')
Expand Down Expand Up @@ -4007,25 +4127,107 @@ def hpat_pandas_series_lt(self, other, level=None, fill_value=None, axis=0):
if not (isinstance(level, types.Omitted) or level is None):
ty_checker.raise_exc(level, 'None', 'level')

if not (isinstance(fill_value, types.Omitted) or fill_value is None):
ty_checker.raise_exc(fill_value, 'None', 'fill_value')
if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None:
ty_checker.raise_exc(fill_value, 'number', 'fill_value')

if not (isinstance(axis, types.Omitted) or axis == 0):
ty_checker.raise_exc(axis, 'int', 'axis')

if isinstance(other, SeriesType):
def hpat_pandas_series_lt_impl(self, other, level=None, fill_value=None, axis=0):
return pandas.Series(self._data < other._data)
self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType)
if not (self_is_series or other_is_series):
return None

return hpat_pandas_series_lt_impl
if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

if isinstance(other, types.Number):
def hpat_pandas_series_lt_impl(self, other, level=None, fill_value=None, axis=0):
return pandas.Series(self._data < other)
if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

return hpat_pandas_series_lt_impl
operands_are_series = self_is_series and other_is_series
if operands_are_series:
none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self))
and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other)))
series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes
if not series_indexes_comparable:
raise TypingError('{} Not implemented for series with not-comparable indexes. \
Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))

ty_checker.raise_exc(other, 'Series, int, float', 'other')
series_data_comparable = check_types_comparable(self, other)
if not series_data_comparable:
raise TypingError('{} Not supported for not-comparable operands. \
Given: self={}, other={}'.format(_func_name, self, other))

fill_value_is_none = False
if isinstance(fill_value, (types.NoneType, types.Omitted)) or fill_value is None:
fill_value_is_none = True
if not operands_are_series:
def _series_lt_scalar_impl(self, other, level=None, fill_value=None, axis=0):
fill_value_is_nan = False
if fill_value is None:
fill_value = numpy.nan
if not fill_value_is_none == True: # noqa
fill_value_is_nan = numpy.isnan(fill_value)
if not (fill_value_is_nan or fill_value_is_none == True): # noqa
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You do not need to capture fill_value_is_none as compile time constant, as there's no need to eliminate dead branches here. If all you need is to run fillna when fill_value is not None/np.nan, you can just write:

            if (fill_value is not None and not numpy.isnan(fill_value)):
                numpy_like.fillna(self._data, inplace=True, value=fill_value)

numpy_like.fillna(self._data, inplace=True, value=fill_value)
if self_is_series == True: # noqa
return pandas.Series(self._data < other, index=self._index, name=self._name)
else:
return pandas.Series(self < other._data, index=other._index, name=other._name)

return _series_lt_scalar_impl

else:

# optimization for series with default indexes, that can be aligned differently
if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)):
def _series_lt_none_indexes_impl(self, other, level=None, fill_value=None, axis=0):
fill_value_is_nan = False
if fill_value is None:
fill_value = numpy.nan
if not fill_value_is_none == True: # noqa
fill_value_is_nan = numpy.isnan(fill_value)
if not (fill_value_is_nan or fill_value_is_none == True): # noqa
numpy_like.fillna(self._data, inplace=True, value=fill_value)
numpy_like.fillna(other._data, inplace=True, value=fill_value)
left_size, right_size = len(self._data), len(other._data)
if (left_size == right_size):
return pandas.Series(self._data < other._data)
else:
raise ValueError("Can only compare identically-labeled Series objects")

return _series_lt_none_indexes_impl
else:

if none_or_numeric_indexes:
ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype
ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype
numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
[ty_left_index_dtype, ty_right_index_dtype], [])

def _series_lt_common_impl(self, other, level=None, fill_value=None, axis=0):
fill_value_is_nan = False
if fill_value is None:
fill_value = numpy.nan
if not fill_value_is_none == True: # noqa
fill_value_is_nan = numpy.isnan(fill_value)
if not (fill_value_is_nan or fill_value_is_none == True): # noqa
numpy_like.fillna(self._data, inplace=True, value=fill_value)
numpy_like.fillna(other._data, inplace=True, value=fill_value)
left_index, right_index = self.index, other.index

if sdc_check_indexes_equal(left_index, right_index):
if none_or_numeric_indexes == True: # noqa
new_index = numpy_like.astype(left_index, numba_index_common_dtype)
else:
new_index = self._index
return pandas.Series(self._data < other._data,
new_index)
else:
raise ValueError("Can only compare identically-labeled Series objects")

return _series_lt_common_impl

return None


@sdc_overload_method(SeriesType, 'gt')
Expand Down
2 changes: 1 addition & 1 deletion sdc/functions/numpy_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def sdc_fillna_inplace_int_impl(self, inplace=False, value=None):

def sdc_fillna_inplace_float_impl(self, inplace=False, value=None):
length = len(self)
for i in prange(length):
for i in range(length):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you reverting back to non scalable implementation?

if isnan(self[i]):
self[i] = value
return None
Expand Down
Loading