diff --git a/legate/numpy/array.py b/legate/numpy/array.py index 9a31ec2f1d..9f24e4ddfc 100644 --- a/legate/numpy/array.py +++ b/legate/numpy/array.py @@ -370,13 +370,6 @@ def _convert_key(self, key, stacklevel=2, first=True): def __getitem__(self, key): if key is None: raise KeyError("invalid key passed to legate.numpy.ndarray") - # If we're a scalar, we're our own value - if self.size == 1: - if (self.ndim == 0 and key != () and key != Ellipsis) or ( - key != ((0,) * self.ndim) - ): - raise KeyError("invalid key passed to legate.numpy.ndarray") - return self key = self._convert_key(key) return ndarray( shape=None, thunk=self._thunk.get_item(key, stacklevel=2) @@ -745,11 +738,6 @@ def __setitem__(self, key, value): temp = ndarray(value_array.shape, dtype=self.dtype) temp._thunk.convert(value_array._thunk, stacklevel=2) value_array = temp - if self.size == 1: - if (self.ndim == 0 and key != () and key != Ellipsis) or ( - key != ((0,) * self.ndim) - ): - raise KeyError("invalid key passed to legate.numpy.ndarray") key = self._convert_key(key) self._thunk.set_item(key, value_array._thunk, stacklevel=2) diff --git a/legate/numpy/config.py b/legate/numpy/config.py index 16023bcdb2..f33883da93 100644 --- a/legate/numpy/config.py +++ b/legate/numpy/config.py @@ -189,6 +189,8 @@ class NumPyOpCode(IntEnum): INCLUSIVE_SCAN = legate_numpy.NUMPY_INCLUSIVE_SCAN CONVERT_TO_RECT = legate_numpy.NUMPY_CONVERT_TO_RECT ARANGE = legate_numpy.NUMPY_ARANGE + ZIP = legate_numpy.NUMPY_ZIP + TRANSFORM = legate_numpy.NUMPY_TRANSFORM # Match these to NumPyRedopID in legate_numpy_c.h @@ -317,6 +319,6 @@ class NumPyProjCode(IntEnum): PROJ_RADIX_3D_Z_4_2 = legate_numpy.NUMPY_PROJ_RADIX_3D_Z_4_2 PROJ_RADIX_3D_Z_4_3 = legate_numpy.NUMPY_PROJ_RADIX_3D_Z_4_3 # Flattening - PROJ_ND_1D_C_ORDER = legate_numpy.NUMPY_PROJ_ND_1D_C_ORDER + PROJ_ND_MD_C_ORDER = legate_numpy.NUMPY_PROJ_ND_MD_C_ORDER # Must always be last PROJ_LAST = legate_numpy.NUMPY_PROJ_LAST diff --git a/legate/numpy/deferred.py b/legate/numpy/deferred.py index 95a2ea5e2e..efca1c34d6 100644 --- a/legate/numpy/deferred.py +++ b/legate/numpy/deferred.py @@ -23,7 +23,7 @@ from .config import * # noqa F403 from .thunk import NumPyThunk -from .utils import calculate_volume +from .utils import calculate_volume, get_point_dim try: xrange # Python 2 @@ -44,11 +44,10 @@ class DeferredArray(NumPyThunk): :meta private: """ - def __init__(self, runtime, base, shape, dtype, scalar): + def __init__(self, runtime, base, shape, dtype): NumPyThunk.__init__(self, runtime, shape, dtype) assert base is not None self.base = base # Either a RegionField or a Future - self.scalar = scalar @property def storage(self): @@ -62,7 +61,7 @@ def ndim(self): return len(self.shape) def __numpy_array__(self, stacklevel): - if self.scalar: + if self.size == 1: return np.full( self.shape, self.get_scalar_array(stacklevel=(stacklevel + 1)), @@ -310,13 +309,13 @@ def get_scalar_array(self, stacklevel): def _create_indexing_array(self, key, stacklevel): # Convert everything into deferred arrays of int64 if isinstance(key, tuple): - tuple_of_arrays = () + tuple_of_keys = () for k in key: if not isinstance(k, NumPyThunk): raise NotImplementedError( "need support for mixed advanced indexing" ) - tuple_of_arrays += (k,) + tuple_of_keys += (k,) else: assert isinstance(key, NumPyThunk) # Handle the boolean array case @@ -326,29 +325,178 @@ def _create_indexing_array(self, key, stacklevel): "Boolean advanced indexing dimension mismatch" ) # For boolean arrays do the non-zero operation to make - # them into a normal indexing array - tuple_of_arrays = key.nonzero(stacklevel + 1) + # them into a set of integer indexing arrays + tuple_of_keys = key.nonzero(stacklevel + 1) else: - tuple_of_arrays = (key,) - if len(tuple_of_arrays) != self.ndim: + tuple_of_keys = (key,) + if len(tuple_of_keys) != self.ndim: raise TypeError("Advanced indexing dimension mismatch") - if self.ndim > 1: - # Check that all the arrays can be broadcast together - # Concatenate all the arrays into a single array - raise NotImplementedError("need support for concatenating arrays") + index_arrays = [] + result_shape = tuple_of_keys[0].shape + for k in tuple_of_keys: + # TODO: Implement broadcasting for advanced indexing arrays + if k.shape != result_shape: + raise NotImplementedError( + "Broadcasting for advanced indexing arrays" + ) + # TODO: Support singleton arrays in advanced indexing + if k.size <= 1: + raise NotImplementedError( + "Singleton arrays in advanced indexing" + ) + arr = self.runtime.to_deferred_array( + k, stacklevel=(stacklevel + 1) + ) + if arr.dtype != np.dtype(np.int64): + converted_array = self.runtime.to_deferred_array( + self.runtime.create_empty_thunk( + shape=arr.shape, + dtype=np.dtype(np.int64), + inputs=(arr,), + ), + stacklevel=(stacklevel + 1), + ) + converted_array.convert( + arr, warn=False, stacklevel=(stacklevel + 1) + ) + arr = converted_array + index_arrays.append(arr) + # Zip index arrays if necessary + if len(index_arrays) == 1: + return index_arrays[0] + # NOTE: We need to instantiate a RegionField of non-primitive dtype, to + # store N-dimensional index points, to be used as the indirection field + # in a copy. Such dtypes are technically not supported, but it should + # be safe to directly create a DeferredArray of that dtype, so long as + # we don't try to convert it to a NumPy array. + result_dtype = np.dtype((np.int64, (len(index_arrays),))) + result_array = DeferredArray( + self.runtime, + self.runtime.allocate_field(result_shape, result_dtype), + shape=result_shape, + dtype=result_dtype, + ) + result = result_array.base + launch_space = result.compute_parallel_launch_space() + # Pack our arguments + argbuf = BufferBuilder() + if launch_space is not None: + ( + result_part, + shardfn, + shardsp, + ) = result.find_or_create_key_partition() + self.pack_shape( + argbuf, result_array.shape, result_part.tile_shape, 0 + ) else: - return self.runtime.to_deferred_array( - tuple_of_arrays[0], stacklevel=(stacklevel + 1) + self.pack_shape(argbuf, result_array.shape) + argbuf.pack_accessor(result.field.field_id, result.transform) + # All arrays are of the same shape, so no need for transform accessors + for arr in index_arrays: + argbuf.pack_accessor(arr.base.field.field_id, arr.base.transform) + task_id = self.runtime.get_zip_task_id(len(index_arrays)) + if launch_space is not None: + # Index task launch case + task = IndexTask( + task_id, + Rect(launch_space), + self.runtime.empty_argmap, + argbuf.get_string(), + argbuf.get_size(), + mapper=self.runtime.mapper_id, + tag=shardfn, ) + if shardsp is not None: + task.set_sharding_space(shardsp) + task.add_write_requirement( + result_part, + result.field.field_id, + 0, + tag=NumPyMappingTag.KEY_REGION_TAG, + ) + + for arr in index_arrays: + task.add_read_requirement( + arr.base.find_or_create_congruent_partition(result_part), + arr.base.field.field_id, + 0, + ) + self.runtime.dispatch(task) + else: + # Single task launch case + shardpt, shardfn, shardsp = result.find_point_sharding() + task = Task( + task_id, + argbuf.get_string(), + argbuf.get_size(), + mapper=self.runtime.mapper_id, + tag=shardfn, + ) + if shardpt is not None: + task.set_point(shardpt) + if shardsp is not None: + task.set_sharding_space(shardsp) + task.add_write_requirement(result.region, result.field.field_id) + for arr in index_arrays: + task.add_read_requirement( + arr.base.region, arr.base.field.field_id + ) + self.runtime.dispatch(task) + return result_array + + # Gets rid of any transforms + def materialize(self, stacklevel): + if isinstance(self.base, Future): + return self + result = DeferredArray( + self.runtime, + self.runtime.allocate_field(self.shape, self.dtype), + shape=self.shape, + dtype=self.dtype, + ) + result.copy(self, stacklevel=(stacklevel + 1), deep=False) + return result + + # Apply a transform to every element in an array of Points + def apply_transform(self, transform, stacklevel): + assert get_point_dim(self.dtype) == transform.N + result_dtype = ( + np.dtype(np.int64) + if transform.M == 1 + else np.dtype((np.int64, (transform.M,))) + ) + argbuf = BufferBuilder() + argbuf.pack_transform(transform) + args = (argbuf.get_string(),) + result = DeferredArray( + self.runtime, + self.runtime.allocate_field(self.shape, result_dtype), + shape=self.shape, + dtype=result_dtype, + ) + result.unary_op( + NumPyOpCode.TRANSFORM, + self.dtype, + self, + True, + args, + stacklevel=(stacklevel + 1), + ) + return result def get_item(self, key, stacklevel, view=None, dim_map=None): - assert self.size > 1 # Check to see if this is advanced indexing or not if self._is_advanced_indexing(key): # Create the indexing array index_array = self._create_indexing_array( key, stacklevel=(stacklevel + 1) ) + # TODO: Support singleton arrays in advanced indexing + if self.size <= 1: + raise NotImplementedError( + "Singleton arrays in advanced indexing" + ) # Create a new array to be the result result_field = self.runtime.allocate_field( index_array.shape, dtype=self.dtype @@ -358,40 +506,43 @@ def get_item(self, key, stacklevel, view=None, dim_map=None): result_field, shape=index_array.shape, dtype=self.dtype, - scalar=False, ) - # Issue the gather copy to the result - index = index_array.base - launch_space = index.compute_parallel_launch_space() - src = self.base + # Prepare regions for the gather copy: Get rid of all transforms, + # so that we use the underlying "global" coordinate space directly + # (gather copies can't accept accessors, that are normally used + # for this purpose). + src = self.base.root dst = result_field - if src.transform is not None or index.transform is not None: - raise NotImplementedError("views with advanced indexing") + if self.base.transform is not None: + src_indirect = index_array.apply_transform( + self.base.transform, stacklevel=(stacklevel + 1) + ).base + elif index_array.base.transform is not None: + src_indirect = index_array.materialize( + stacklevel=(stacklevel + 1) + ).base + else: + src_indirect = index_array.base + # Issue the gather copy to the result + ( + src_indirect_part, + shardfn, + shardsp, + ) = src_indirect.find_or_create_key_partition() + launch_space = src_indirect.compute_parallel_launch_space() if launch_space is not None: # Index copy launch - if self.ndim == index_array.ndim: + if len(src.shape) == len(src_indirect.shape): # If we have the same dimensionality then normal # partitioning works - ( - src_part, - shardfn, - shardsp, - ) = src.find_or_create_key_partition() + src_part = src.find_or_create_partition(launch_space) src_proj = 0 # identity - index_part = index.find_or_create_congruent_partition( - src_part - ) else: # Otherwise we need to compute an indirect partition # and functor src_part, src_proj = src.find_or_create_indirect_partition( launch_space ) - ( - index_part, - shardfn, - shardsp, - ) = index.find_or_create_key_partition() copy = IndexCopy( Rect(launch_space), mapper=self.runtime.mapper_id, @@ -401,7 +552,9 @@ def get_item(self, key, stacklevel, view=None, dim_map=None): copy.set_sharding_space(shardsp) # Partition the index array and the destination array # the same way - dst_part = dst.find_or_create_congruent_partition(index_part) + dst_part = dst.find_or_create_congruent_partition( + src_indirect_part + ) # Set this as the key partition dst.set_key_partition(dst_part, shardfn, shardsp) copy.add_src_requirement( @@ -414,11 +567,11 @@ def get_item(self, key, stacklevel, view=None, dim_map=None): tag=NumPyMappingTag.KEY_REGION_TAG, ) copy.add_src_indirect_requirement( - index_part, index.field.field_id, 0 + src_indirect_part, src_indirect.field.field_id, 0 ) else: # Single copy launch - shardpt, shardfn, shardsp = index.find_point_sharding() + shardpt, shardfn, shardsp = src_indirect.find_point_sharding() copy = Copy(mapper=self.runtime.mapper_id, tag=shardfn) if shardpt is not None: copy.set_point(shardpt) @@ -427,11 +580,28 @@ def get_item(self, key, stacklevel, view=None, dim_map=None): copy.add_src_requirement(src.region, src.field.field_id) copy.add_dst_requirement(dst.region, dst.field.field_id) copy.add_src_indirect_requirement( - index.region, index.field.field_id + src_indirect.region, src_indirect.field.field_id ) # Issue the copy to the runtime self.runtime.dispatch(copy) + elif self.size == 1: + # Reading from a singleton array, using basic indexing: simulate + # the operation in numpy and see how many values need to be + # returned + sim_result = np.zeros(self.shape)[key] + is_number = type(sim_result) is not np.ndarray + result_size = ( + 1 if is_number else calculate_volume(sim_result.shape) + ) + assert result_size <= 1 + result = DeferredArray( + self.runtime, + base=(self.base if result_size == 1 else Future()), + shape=(() if is_number else sim_result.shape), + dtype=self.dtype, + ) else: + # Reading from a non-singleton array, using basic indexing if view is None or dim_map is None: view, dim_map = self._get_view(key) new_shape = self._get_view_shape(view, dim_map) @@ -476,7 +646,6 @@ def get_item(self, key, stacklevel, view=None, dim_map=None): base=future, shape=(), dtype=self.dtype, - scalar=True, ) if self.runtime.shadow_debug: result.shadow = self.shadow.get_item( @@ -495,48 +664,56 @@ def set_item(self, key, rhs, stacklevel): index_array = self._create_indexing_array( key, stacklevel=(stacklevel + 1) ) + # TODO: Support singleton arrays in advanced indexing + if self.size <= 1 or value_array.size <= 1: + raise NotImplementedError( + "Singleton arrays in advanced indexing" + ) if index_array.shape != value_array.shape: raise ValueError( "Advanced indexing array does not match source shape" ) - # Do the scatter copy - index = index_array.base - launch_space = index.compute_parallel_launch_space() - dst = self.base + # Prepare regions for the scatter copy: Get rid of all transforms, + # so that we use the underlying "global" coordinate space directly + # (scatter copies can't accept accessors, that are normally used + # for this purpose). src = value_array.base + if src.transform is not None: + # Waiting for Legion Issue #705 + raise NotImplementedError("views on advanced __set_item__ RHS") + dst = self.base.root + if self.base.transform is not None: + dst_indirect = index_array.apply_transform( + self.base.transform, stacklevel=(stacklevel + 1) + ).base + elif index_array.base.transform is not None: + dst_indirect = index_array.materialize( + stacklevel=(stacklevel + 1) + ).base + else: + dst_indirect = index_array.base if src.field is dst.field: raise NotImplementedError("intra-array advanced coyping") - if ( - src.transform is not None - or index.transform is not None - or dst.transform is not None - ): - raise NotImplementedError("views with advanced indexing") + # Do the scatter copy + ( + dst_indirect_part, + shardfn, + shardsp, + ) = dst_indirect.find_or_create_key_partition() + launch_space = dst_indirect.compute_parallel_launch_space() if launch_space is not None: # Index copy launch - if self.ndim == index_array.ndim: + if len(dst.shape) == len(dst_indirect.shape): # If we have the same dimensionality then normal # partitioning works - ( - dst_part, - shardfn, - shardsp, - ) = dst.find_or_create_key_partition() + dst_part = dst.find_or_create_partition(launch_space) dst_proj = 0 # identity - index_part = index.find_or_create_congruent_partition( - dst_part - ) else: # Otherwise we need to compute an indirect partition # and functor dst_part, dst_proj = dst.find_or_create_indirect_partition( launch_space ) - ( - index_part, - shardfn, - shardsp, - ) = index.find_or_create_key_partition() copy = IndexCopy( Rect(launch_space), mapper=self.runtime.mapper_id, @@ -544,36 +721,49 @@ def set_item(self, key, rhs, stacklevel): ) if shardsp is not None: copy.set_sharding_space(shardsp) - src_part = src.find_or_create_congruent_partition(index_part) + src_part = src.find_or_create_congruent_partition( + dst_indirect_part + ) copy.add_src_requirement(src_part, src.field.field_id, 0) copy.add_dst_requirement( dst_part, dst.field.field_id, dst_proj ) copy.add_dst_indirect_requirement( - index_part, index.field.field_id, 0 + dst_indirect_part, dst_indirect.field.field_id, 0 ) else: # Single copy launch - point, shardfn, shardsp = index.find_point_sharding() + shardpt, shardfn, shardsp = dst_indirect.find_point_sharding() copy = Copy(mapper=self.runtime.mapper_id, tag=shardfn) - if point is not None: - copy.set_point(point) + if shardpt is not None: + copy.set_point(shardpt) if shardsp is not None: copy.set_sharding_space(shardsp) copy.add_src_requirement(src.region, src.field.field_id) copy.add_dst_requirement(dst.region, dst.field.field_id) copy.add_dst_indirect_requirement( - index.region, index.field.field_id + dst_indirect.region, dst_indirect.field.field_id ) # Issue the copy to the runtime self.runtime.dispatch(copy) elif self.size == 1: - assert value_array.size == 1 - # Special case of writing a single value - # We can just copy the future because they are immutable - self.base = value_array.base + # Writing to a singleton array, using basic indexing: simulate the + # operation in numpy and see how many values need to be updated. + # If it is a single value we can just copy the future because they + # are immutable. We don't check for exact match of shapes between + # LHS and RHS to capture cases of broadcasting. + updated_part = np.zeros(self.shape)[key] + updated_values = ( + calculate_volume(updated_part.shape) + if type(updated_part) is np.ndarray + else 1 + ) + assert updated_values <= 1 + if updated_values == 1: + assert value_array.size == 1 + self.base = value_array.base else: - # Writing to a view of this array + # Writing to a view of a non-singleton array, using basic indexing view, dim_map = self._get_view(key) # See what the shape of the view is new_shape = self._get_view_shape(view, dim_map) @@ -729,7 +919,7 @@ def squeeze(self, axis, stacklevel): raise TypeError( '"axis" argument for squeeze must be int-like or tuple-like' ) - if not self.scalar: + if self.size != 1: # Make transform for the lost dimensions transform = AffineTransform(self.ndim, len(new_shape), False) child_idx = 0 @@ -745,7 +935,7 @@ def squeeze(self, axis, stacklevel): else: # Easy case of size 1 array, nothing to do result = DeferredArray( - self.runtime, self.base, new_shape, self.dtype, True + self.runtime, self.base, new_shape, self.dtype ) if self.runtime.shadow_debug: result.shadow = self.shadow.squeeze( @@ -2925,7 +3115,7 @@ def nonzero(self, stacklevel, callsite=None): for _ in xrange(self.ndim): result += ( self.runtime.create_empty_thunk( - shape=(0,), dtype=np.dtype(np.uint64), inputs=(self,) + shape=(0,), dtype=np.dtype(np.int64), inputs=(self,) ), ) return result @@ -2934,7 +3124,7 @@ def nonzero(self, stacklevel, callsite=None): # TODO: A better way to create the output array? dst_array = self.runtime.to_deferred_array( self.runtime.create_empty_thunk( - shape=out_shape, dtype=np.dtype(np.uint64), inputs=(self,) + shape=out_shape, dtype=np.dtype(np.int64), inputs=(self,) ), stacklevel=(stacklevel + 1), ) @@ -2958,7 +3148,7 @@ def nonzero(self, stacklevel, callsite=None): argbuf, nonzeros_dist_shape, nonzeros_dist_part.tile_shape, - self.runtime.first_proj_id + NumPyProjCode.PROJ_ND_1D_C_ORDER, + self.runtime.first_proj_id + NumPyProjCode.PROJ_ND_MD_C_ORDER, ) argbuf.pack_accessor( nonzeros_dist_field.field.field_id, @@ -2988,7 +3178,7 @@ def nonzero(self, stacklevel, callsite=None): index_task.add_write_requirement( nonzeros_dist_part, nonzeros_dist_field.field.field_id, - self.runtime.first_proj_id + NumPyProjCode.PROJ_ND_1D_C_ORDER, + self.runtime.first_proj_id + NumPyProjCode.PROJ_ND_MD_C_ORDER, tag=NumPyMappingTag.NO_MEMOIZE_TAG, ) self.runtime.dispatch(index_task) @@ -3032,7 +3222,7 @@ def nonzero(self, stacklevel, callsite=None): task = Task( self.runtime.get_unary_task_id( NumPyOpCode.CONVERT_TO_RECT, - result_type=np.dtype(np.uint64), + result_type=nonzeros_ranges_field.field.dtype, argument_type=nonzeros_dist_field.field.dtype, ), argbuf.get_string(), @@ -3076,7 +3266,7 @@ def nonzero(self, stacklevel, callsite=None): index_task = IndexTask( self.runtime.get_unary_task_id( NumPyOpCode.NONZERO, - result_type=np.dtype(np.uint64), + result_type=np.dtype(np.int64), argument_type=self.dtype, ), Rect(launch_space), @@ -3095,7 +3285,7 @@ def nonzero(self, stacklevel, callsite=None): index_task.add_write_requirement( dst_partition, dst.field.field_id, - self.runtime.first_proj_id + NumPyProjCode.PROJ_ND_1D_C_ORDER, + self.runtime.first_proj_id + NumPyProjCode.PROJ_ND_MD_C_ORDER, tag=NumPyMappingTag.NO_MEMOIZE_TAG, ) self.runtime.dispatch(index_task) @@ -3111,8 +3301,8 @@ def nonzero(self, stacklevel, callsite=None): task = Task( self.runtime.get_unary_task_id( NumPyOpCode.NONZERO, + result_type=np.dtype(np.int64), argument_type=self.dtype, - result_type=self.dtype, ), argbuf.get_string(), argbuf.get_size(), @@ -5683,11 +5873,8 @@ def add_read_requirement(task, region, tag): # A helper method for attaching arguments def add_arguments(self, task, args): assert args is not None - for numpy_array in args: - assert numpy_array.size == 1 - future = self.runtime.create_future( - numpy_array.data, numpy_array.nbytes - ) + for buf in args: + future = self.runtime.create_future(buf, memoryview(buf).nbytes) task.add_future(future) # A helper method for support for 16 bit arithmetic diff --git a/legate/numpy/runtime.py b/legate/numpy/runtime.py index 8498f78884..9fdca8a138 100644 --- a/legate/numpy/runtime.py +++ b/legate/numpy/runtime.py @@ -58,7 +58,7 @@ from .eager import EagerArray from .lazy import LazyArray from .thunk import NumPyThunk -from .utils import calculate_volume +from .utils import calculate_volume, get_point_dim # Helper method for python 3 support @@ -562,6 +562,13 @@ def set_key_partition(self, part, shardfn=None, shardsp=None): self.shard_function = shardfn self.shard_space = shardsp + @property + def root(self): + res = self + while res.parent is not None: + res = res.parent + return res + def find_or_create_key_partition(self): if self.key_partition is not None: return self.key_partition, self.shard_function, self.shard_space @@ -570,9 +577,7 @@ def find_or_create_key_partition(self): return None, None, None if self.parent is not None: # Figure out how many tiles we overlap with of the root - root = self.parent - while root.parent is not None: - root = root.parent + root = self.root root_key, rootfn, rootsp = root.find_or_create_key_partition() if root_key is None: self.launch_space = () @@ -798,11 +803,21 @@ def find_or_create_partition( ) def find_or_create_indirect_partition(self, launch_space): - assert len(launch_space) != len(self.shape) # If there is a mismatch in the number of dimensions then we need # to compute a partition and projection functor that can transform - # the points into a partition that makes sense - raise NotImplementedError("need support for indirect partitioning") + # the points into a partition that makes sense. + assert len(launch_space) != len(self.shape) + # For now cover the common case where we launch over a space wih the + # same number of points as the array's key partition. + key_space = self.compute_parallel_launch_space() + if calculate_volume(key_space) != calculate_volume(launch_space): + raise NotImplementedError( + "indirect partitioning over launch space of different size " + "than key partition" + ) + key_part, _, _ = self.find_or_create_key_partition() + proj = self.runtime.first_proj_id + NumPyProjCode.PROJ_ND_MD_C_ORDER + return key_part, proj def attach_numpy_array(self, numpy_array, share=False): assert self.parent is None @@ -1352,9 +1367,7 @@ def create_future(self, data, size, wrap=False, dtype=None, shape=()): # If we have a shape then all the extents should be 1 for now for extent in shape: assert extent == 1 - return DeferredArray( - self, result, shape=shape, dtype=dtype, scalar=True - ) + return DeferredArray(self, result, shape=shape, dtype=dtype) else: return result @@ -1576,7 +1589,6 @@ def find_or_create_view(self, parent, view, dim_map, shape, key): child, shape, parent.field.dtype, - scalar=False, ) # We need to make this subview # If all the slices have strides of one then this is a dense @@ -1638,9 +1650,7 @@ def find_or_create_view(self, parent, view, dim_map, shape, key): dim_map, key, ) - return DeferredArray( - self, region_field, shape, parent.field.dtype, scalar=False - ) + return DeferredArray(self, region_field, shape, parent.field.dtype) elif dense: # We can do a single call to create partition by restriction # Build the rect for the subview @@ -1718,7 +1728,7 @@ def find_or_create_view(self, parent, view, dim_map, shape, key): ) parent.subviews.add(region_field) return DeferredArray( - self, region_field, shape, parent.field.dtype, scalar=False + self, region_field, shape, parent.field.dtype ) else: tile_shape = tuple(map(lambda x, y: ((x - y) + 1), hi, lo)) @@ -1749,7 +1759,7 @@ def find_or_create_view(self, parent, view, dim_map, shape, key): ) parent.subviews.add(region_field) return DeferredArray( - self, region_field, shape, parent.field.dtype, scalar=False + self, region_field, shape, parent.field.dtype ) else: # We need fill in a phased partition operation from Legion @@ -1773,7 +1783,6 @@ def create_transform_view(self, region_field, new_shape, transform): new_region_field, new_shape, region_field.field.dtype, - scalar=False, ) def find_or_create_transform_sharding_functor(self, transform): @@ -1888,9 +1897,7 @@ def get_numpy_thunk(self, obj, stacklevel, share=False, dtype=None): dtype = np.dtype(store.type.to_pandas_dtype()) primitive = store.storage if kind == Future: - return DeferredArray( - self, primitive, shape=(), dtype=dtype, scalar=True - ) + return DeferredArray(self, primitive, shape=(), dtype=dtype) elif kind == FutureMap: raise NotImplementedError("Need support for FutureMap inputs") elif kind == (Region, FieldID): @@ -1903,9 +1910,7 @@ def get_numpy_thunk(self, obj, stacklevel, share=False, dtype=None): ) else: raise TypeError("Unknown LegateStore type") - return DeferredArray( - self, region_field, region_field.shape, dtype, scalar=False - ) + return DeferredArray(self, region_field, region_field.shape, dtype) # See if this is a normal numpy array if not isinstance(obj, np.ndarray): # If it's not, make it into a numpy array @@ -2170,7 +2175,6 @@ def find_or_create_array_thunk( region_field, shape=array.shape, dtype=array.dtype, - scalar=(array.size == 1), ) # If we're doing shadow debug make an EagerArray shadow if self.shadow_debug: @@ -2195,14 +2199,12 @@ def create_empty_thunk(self, shape, dtype, inputs=None): ): if len(shape) == 0: # Empty tuple - result = DeferredArray( - self, Future(), shape=(), dtype=dtype, scalar=True - ) + result = DeferredArray(self, Future(), shape=(), dtype=dtype) else: volume = reduce(lambda x, y: x * y, shape) if volume == 1: result = DeferredArray( - self, Future(), shape=shape, dtype=dtype, scalar=True + self, Future(), shape=shape, dtype=dtype ) else: region_field = self.allocate_field(shape, dtype) @@ -2211,7 +2213,6 @@ def create_empty_thunk(self, shape, dtype, inputs=None): region_field, shape=shape, dtype=dtype, - scalar=False, ) # If we're doing shadow debug make an EagerArray shadow if self.shadow_debug: @@ -2324,6 +2325,16 @@ def get_unary_task_id( * NUMPY_MAX_VARIANTS ) + # TRANSFORM's ID is special + if op_code == NumPyOpCode.TRANSFORM: + assert variant_code == NumPyVariantCode.NORMAL + return ( + self.first_task_id + + legate_numpy.NUMPY_TRANSFORM_OFFSET + + get_point_dim(result_type) * (LEGATE_MAX_DIM + 1) + + get_point_dim(argument_type) + ) + # unary tasks distinguish themselves by argument_type return ( self.first_task_id @@ -2388,6 +2399,11 @@ def get_weighted_bincount_task_id(self, dt1, dt2): result += numpy_field_type_offsets[dt2.type] * NUMPY_MAX_VARIANTS return result + def get_zip_task_id(self, num_coord_arrays): + result = self.first_task_id + legate_numpy.NUMPY_ZIP_OFFSET + result += num_coord_arrays + return result + def get_reduction_op_id(self, op, field_dtype): redop_id = numpy_reduction_op_offsets[op] if redop_id < legion.LEGION_REDOP_KIND_TOTAL: diff --git a/legate/numpy/utils.py b/legate/numpy/utils.py index 76792e2085..ffd1b20c68 100644 --- a/legate/numpy/utils.py +++ b/legate/numpy/utils.py @@ -19,6 +19,8 @@ import numpy as np +from legate.core import LEGATE_MAX_DIM + try: reduce # Python 2 except NameError: @@ -100,3 +102,16 @@ def calculate_volume(shape): if shape == (): return 0 return reduce(lambda x, y: x * y, shape) + + +def get_point_dim(dtype): + if dtype == np.int64: + # Special case: allowing coord_t in place of Point<1>; this is fine + # because physical regions are untyped + return 1 + assert ( + dtype.ndim == 1 + and dtype.base == np.int64 + and dtype.shape[0] <= LEGATE_MAX_DIM + ) + return dtype.shape[0] diff --git a/src/clip.h b/src/clip.h index dcab04a026..f83aeafab3 100644 --- a/src/clip.h +++ b/src/clip.h @@ -34,24 +34,33 @@ struct ClipOperation { #if defined(LEGATE_USE_CUDA) && defined(__CUDACC__) template -__global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) gpu_clip(const Args args) +__global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + gpu_clip(const Args args, const bool dense) { const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= args.volume) return; - const Legion::Point point = args.pitches.unflatten(idx, args.rect.lo); ClipOperation func; - args.out[point] = func(args.in[point], args.min, args.max); + if (dense) { + args.outptr[idx] = func(args.inptr[idx], args.min, args.max); + } else { + const Legion::Point point = args.pitches.unflatten(idx, args.rect.lo); + args.out[point] = func(args.in[point], args.min, args.max); + } } template __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) - gpu_clip_inplace(const Args args) + gpu_clip_inplace(const Args args, const bool dense) { const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= args.volume) return; - const Legion::Point point = args.pitches.unflatten(idx, args.rect.lo); ClipOperation func; - args.inout[point] = func(args.inout[point], args.min, args.max); + if (dense) { + args.inoutptr[idx] = func(args.inoutptr[idx], args.min, args.max); + } else { + const Legion::Point point = args.pitches.unflatten(idx, args.rect.lo); + args.inout[point] = func(args.inout[point], args.min, args.max); + } } #endif @@ -145,10 +154,10 @@ class ClipTask : public PointTask> { LegateDeserializer& derez) { DeserializedArgs args; - args.deserialize(derez, task, regions); + const bool dense = args.deserialize(derez, task, regions); if (args.volume == 0) return; const size_t blocks = (args.volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - gpu_clip><<>>(args); + gpu_clip><<>>(args, dense); } #elif defined(LEGATE_USE_CUDA) template @@ -243,10 +252,10 @@ class ClipInplace : public PointTask> { LegateDeserializer& derez) { DeserializedArgs args; - args.deserialize(derez, task, regions); + const bool dense = args.deserialize(derez, task, regions); if (args.volume == 0) return; const size_t blocks = (args.volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - gpu_clip_inplace><<>>(args); + gpu_clip_inplace><<>>(args, dense); } #elif defined(LEGATE_USE_CUDA) template diff --git a/src/fill.h b/src/fill.h index c381626a75..1d382b468e 100644 --- a/src/fill.h +++ b/src/fill.h @@ -24,12 +24,17 @@ namespace numpy { #if defined(LEGATE_USE_CUDA) && defined(__CUDACC__) template -__global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) gpu_fill(const Args args) +__global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + gpu_fill(const Args args, const bool dense) { const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= args.volume) return; - const Legion::Point point = args.pitches.unflatten(idx, args.rect.lo); - args.out[point] = args.value; + if (dense) { + args.outptr[idx] = args.value; + } else { + const Legion::Point point = args.pitches.unflatten(idx, args.rect.lo); + args.out[point] = args.value; + } } #endif @@ -112,10 +117,10 @@ struct FillTask : PointTask> { LegateDeserializer& derez) { DeserializedArgs args; - args.deserialize(derez, task, regions); + const bool dense = args.deserialize(derez, task, regions); if (args.volume == 0) return; const size_t blocks = (args.volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - gpu_fill><<>>(args); + gpu_fill><<>>(args, dense); } #elif defined(LEGATE_USE_CUDA) template diff --git a/src/legate_numpy_c.h b/src/legate_numpy_c.h index 959f8b6e35..3b8cb6ab8c 100644 --- a/src/legate_numpy_c.h +++ b/src/legate_numpy_c.h @@ -107,6 +107,8 @@ enum NumPyOpCode { NUMPY_INCLUSIVE_SCAN = 72, NUMPY_CONVERT_TO_RECT = 73, NUMPY_ARANGE = 74, + NUMPY_ZIP = 75, + NUMPY_TRANSFORM = 76, }; // Match these to NumPyRedopCode in legate/numpy/config.py @@ -175,7 +177,7 @@ enum NumPyProjectionCode { NUMPY_PROJ_RADIX_3D_Z_4_2 = 46, NUMPY_PROJ_RADIX_3D_Z_4_3 = 47, // Flattening - NUMPY_PROJ_ND_1D_C_ORDER = 48, + NUMPY_PROJ_ND_MD_C_ORDER = 48, // Must always be last NUMPY_PROJ_LAST = 49, }; @@ -242,8 +244,10 @@ enum NumPyShardingCode { }; enum NumpyTaskOffset { - NUMPY_CONVERT_OFFSET = 100000, - NUMPY_BINCOUNT_OFFSET = 200000, + NUMPY_CONVERT_OFFSET = 100000, + NUMPY_BINCOUNT_OFFSET = 200000, + NUMPY_ZIP_OFFSET = 300000, + NUMPY_TRANSFORM_OFFSET = 400000, }; // Match these to NumPyMappingTag in legate/numpy/config.py diff --git a/src/numpy.mk b/src/numpy.mk index d6c9351706..d062946b05 100644 --- a/src/numpy.mk +++ b/src/numpy.mk @@ -13,7 +13,7 @@ # limitations under the License. # -# List all the application source files that need OpenMP separately +# List all the application source files that need OpenMP separately # since we have to add the -fopenmp flag to CC_FLAGS for them GEN_CPU_SRC += universal_functions/absolute.cc \ universal_functions/add.cc \ @@ -93,6 +93,8 @@ GEN_CPU_SRC += universal_functions/absolute.cc \ tile.cc \ trans.cc \ where.cc \ + zip.cc \ + transform.cc \ numpy.cc # This must always be the last file! # It guarantees we do our registration callback # only after all task variants are recorded @@ -169,4 +171,6 @@ GEN_GPU_SRC += universal_functions/absolute.cu \ universal_functions/tanh.cu \ tile.cu \ trans.cu \ - where.cu + where.cu \ + zip.cu \ + transform.cu diff --git a/src/point_task.h b/src/point_task.h index 372e52866f..69c2ddebf3 100644 --- a/src/point_task.h +++ b/src/point_task.h @@ -31,13 +31,13 @@ namespace numpy { template constexpr int task_id = -1; -// nullary tasks themselves by ResultType +// nullary tasks distinguish themselves by ResultType template constexpr int task_id = static_cast(op_code) * NUMPY_TYPE_OFFSET + variant_code + legate_type_code_of* NUMPY_MAX_VARIANTS; -// unary tasks themselves by ArgumentType +// unary tasks distinguish themselves by ArgumentType template constexpr int task_id = static_cast(op_code) * NUMPY_TYPE_OFFSET + variant_code @@ -181,6 +181,14 @@ class CPULoop<1> { for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) inout[x] = func(inout[x], in[x], std::forward(args)...); } + template + static inline void generic_loop(const Legion::Rect<1>& rect, + const Function& func, + const T& out, + const Ts&... in) + { + for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) out[x] = func(in[x]...); + } }; template <> @@ -224,6 +232,15 @@ class CPULoop<2> { for (int y = rect.lo[1]; y <= rect.hi[1]; ++y) inout[x][y] = func(inout[x][y], in[x][y], std::forward(args)...); } + template + static inline void generic_loop(const Legion::Rect<2>& rect, + const Function& func, + const T& out, + const Ts&... in) + { + for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) + for (int y = rect.lo[1]; y <= rect.hi[1]; ++y) out[x][y] = func(in[x][y]...); + } }; template <> @@ -271,6 +288,16 @@ class CPULoop<3> { for (int z = rect.lo[2]; z <= rect.hi[2]; ++z) inout[x][y][z] = func(inout[x][y][z], in[x][y][z], std::forward(args)...); } + template + static inline void generic_loop(const Legion::Rect<3>& rect, + const Function& func, + const T& out, + const Ts&... in) + { + for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) + for (int y = rect.lo[1]; y <= rect.hi[1]; ++y) + for (int z = rect.lo[2]; z <= rect.hi[2]; ++z) out[x][y][z] = func(in[x][y][z]...); + } }; template <> @@ -323,6 +350,17 @@ class CPULoop<4> { inout[x][y][z][w] = func(inout[x][y][z][w], in[x][y][z][w], std::forward(args)...); } + template + static inline void generic_loop(const Legion::Rect<4>& rect, + const Function& func, + const T& out, + const Ts&... in) + { + for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) + for (int y = rect.lo[1]; y <= rect.hi[1]; ++y) + for (int z = rect.lo[2]; z <= rect.hi[2]; ++z) + for (int w = rect.lo[3]; w <= rect.hi[3]; ++w) out[x][y][z][w] = func(in[x][y][z][w]...); + } }; #ifdef LEGATE_USE_OPENMP @@ -373,6 +411,15 @@ class OMPLoop<1> { for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) inout[x] = func(inout[x], in[x], std::forward(args)...); } + template + static inline void generic_loop(const Legion::Rect<1>& rect, + const Function& func, + const T& out, + const Ts&... in) + { +#pragma omp parallel for schedule(static) + for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) out[x] = func(in[x]...); + } }; template <> @@ -420,6 +467,16 @@ class OMPLoop<2> { for (int y = rect.lo[1]; y <= rect.hi[1]; ++y) inout[x][y] = func(inout[x][y], in[x][y], std::forward(args)...); } + template + static inline void generic_loop(const Legion::Rect<2>& rect, + const Function& func, + const T& out, + const Ts&... in) + { +#pragma omp parallel for schedule(static), collapse(2) + for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) + for (int y = rect.lo[1]; y <= rect.hi[1]; ++y) out[x][y] = func(in[x][y]...); + } }; template <> @@ -471,6 +528,17 @@ class OMPLoop<3> { for (int z = rect.lo[2]; z <= rect.hi[2]; ++z) inout[x][y][z] = func(inout[x][y][z], in[x][y][z], std::forward(args)...); } + template + static inline void generic_loop(const Legion::Rect<3>& rect, + const Function& func, + const T& out, + const Ts&... in) + { +#pragma omp parallel for schedule(static), collapse(3) + for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) + for (int y = rect.lo[1]; y <= rect.hi[1]; ++y) + for (int z = rect.lo[2]; z <= rect.hi[2]; ++z) out[x][y][z] = func(in[x][y][z]...); + } }; template <> @@ -527,6 +595,18 @@ class OMPLoop<4> { inout[x][y][z][w] = func(inout[x][y][z][w], in[x][y][z][w], std::forward(args)...); } + template + static inline void generic_loop(const Legion::Rect<4>& rect, + const Function& func, + const T& out, + const Ts&... in) + { +#pragma omp parallel for schedule(static), collapse(4) + for (int x = rect.lo[0]; x <= rect.hi[0]; ++x) + for (int y = rect.lo[1]; y <= rect.hi[1]; ++y) + for (int z = rect.lo[2]; z <= rect.hi[2]; ++z) + for (int w = rect.lo[3]; w <= rect.hi[3]; ++w) out[x][y][z][w] = func(in[x][y][z][w]...); + } }; #endif diff --git a/src/proj.cc b/src/proj.cc index 7dea6e296d..59607d44c5 100644 --- a/src/proj.cc +++ b/src/proj.cc @@ -29,7 +29,8 @@ LogicalRegion NumPyProjectionFunctor::project(LogicalPartition upper_bound, const DomainPoint& point, const Domain& launch_domain) { - const DomainPoint dp = project_point(point, launch_domain); + Domain color_domain = runtime->get_index_partition_color_space(upper_bound.get_index_partition()); + const DomainPoint dp = project_point(color_domain, point, launch_domain); if (runtime->has_logical_subregion_by_color(upper_bound, dp)) return runtime->get_logical_subregion_by_color(upper_bound, dp); else @@ -62,7 +63,8 @@ NumPyProjectionFunctor_2D_1D::NumPyProjectionFunctor_2D_1D(NumPyProjectionCode c return result; } -DomainPoint NumPyProjectionFunctor_2D_1D::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_2D_1D::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Point<1> point = transform * Point<2>(p); @@ -110,7 +112,8 @@ NumPyProjectionFunctor_2D_2D::NumPyProjectionFunctor_2D_2D(NumPyProjectionCode c return result; } -DomainPoint NumPyProjectionFunctor_2D_2D::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_2D_2D::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Point<2> point = transform * Point<2>(p); @@ -145,7 +148,8 @@ NumPyProjectionFunctor_1D_2D::NumPyProjectionFunctor_1D_2D(NumPyProjectionCode c return result; } -DomainPoint NumPyProjectionFunctor_1D_2D::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_1D_2D::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Point<2> point = transform * Point<1>(p); @@ -221,7 +225,8 @@ NumPyProjectionFunctor_3D_2D::NumPyProjectionFunctor_3D_2D(NumPyProjectionCode c return result; } -DomainPoint NumPyProjectionFunctor_3D_2D::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_3D_2D::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Point<2> point = transform * Point<3>(p); @@ -263,7 +268,8 @@ NumPyProjectionFunctor_3D_1D::NumPyProjectionFunctor_3D_1D(NumPyProjectionCode c return result; } -DomainPoint NumPyProjectionFunctor_3D_1D::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_3D_1D::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Point<1> point = transform * Point<3>(p); @@ -374,7 +380,8 @@ NumPyProjectionFunctor_3D_3D::NumPyProjectionFunctor_3D_3D(NumPyProjectionCode c return result; } -DomainPoint NumPyProjectionFunctor_3D_3D::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_3D_3D::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Point<3> point = transform * Point<3>(p); @@ -431,7 +438,8 @@ NumPyProjectionFunctor_2D_3D::NumPyProjectionFunctor_2D_3D(NumPyProjectionCode c return result; } -DomainPoint NumPyProjectionFunctor_2D_3D::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_2D_3D::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Point<3> point = transform * Point<2>(p); @@ -473,31 +481,43 @@ NumPyProjectionFunctor_1D_3D::NumPyProjectionFunctor_1D_3D(NumPyProjectionCode c return result; } -DomainPoint NumPyProjectionFunctor_1D_3D::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_1D_3D::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Point<3> point = transform * Point<1>(p); return DomainPoint(point); } -NumPyProjectionFunctor_ND_1D_C_ORDER::NumPyProjectionFunctor_ND_1D_C_ORDER(NumPyProjectionCode c, +NumPyProjectionFunctor_ND_MD_C_ORDER::NumPyProjectionFunctor_ND_MD_C_ORDER(NumPyProjectionCode c, Runtime* rt) : NumPyProjectionFunctor(rt), code(c) { } -DomainPoint NumPyProjectionFunctor_ND_1D_C_ORDER::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_ND_MD_C_ORDER::project_point(const Domain& color_domain, + const DomainPoint& launch_point, const Domain& launch_domain) const { - auto hi = launch_domain.hi(); - auto lo = launch_domain.lo(); - coord_t index = p[0] - lo[0]; - coord_t partial_volume = hi[0] - lo[0] + 1; - for (int dim = 1; dim < p.get_dim(); ++dim) { - index += p[dim] * partial_volume; - partial_volume *= hi[dim] - lo[dim] + 1; + assert(color_domain.get_volume() == launch_domain.get_volume()); + const DomainPoint launch_hi = launch_domain.hi(); + const DomainPoint launch_lo = launch_domain.lo(); + coord_t index = 0; + coord_t partial_volume = 1; + for (int dim = launch_domain.get_dim() - 1; dim >= 0; --dim) { + index += (launch_point[dim] - launch_lo[dim]) * partial_volume; + partial_volume *= launch_hi[dim] - launch_lo[dim] + 1; } - return index; + const DomainPoint color_hi = color_domain.hi(); + const DomainPoint color_lo = color_domain.lo(); + DomainPoint color_point = color_lo; + for (int dim = color_domain.get_dim() - 1; dim >= 0; --dim) { + const coord_t diff = color_hi[dim] - color_lo[dim] + 1; + color_point[dim] += index % diff; + index /= diff; + } + assert(index == 0); + return color_point; } template @@ -507,7 +527,8 @@ NumPyProjectionFunctor_GEMV::NumPyProjectionFunctor_GEMV(NumPyProjectionCo } template -DomainPoint NumPyProjectionFunctor_GEMV::project_point(const DomainPoint& p, +DomainPoint NumPyProjectionFunctor_GEMV::project_point(const Domain& color_domain, + const DomainPoint& p, const Domain& launch_domain) const { const Rect<3> launch_rect = launch_domain; @@ -536,7 +557,7 @@ NumPyProjectionFunctorRadix2D::NumPyProjectionFunctorRadix2D template DomainPoint NumPyProjectionFunctorRadix2D::project_point( - const DomainPoint& p, const Domain& launch_domain) const + const Domain& color_domain, const DomainPoint& p, const Domain& launch_domain) const { const Rect<2> launch_rect = launch_domain; const Point<2> point = p; @@ -559,7 +580,7 @@ NumPyProjectionFunctorRadix3D::NumPyProjectionFunctorRadix3D template DomainPoint NumPyProjectionFunctorRadix3D::project_point( - const DomainPoint& p, const Domain& launch_domain) const + const Domain& color_domain, const DomainPoint& p, const Domain& launch_domain) const { const Rect<3> launch_rect = launch_domain; const Point<3> point = p; @@ -662,7 +683,7 @@ static void register_functor(Runtime* runtime, ProjectionID offset, NumPyProject register_functor>( runtime, offset, NUMPY_PROJ_RADIX_3D_Z_4_3); // Flattening - register_functor(runtime, offset, NUMPY_PROJ_ND_1D_C_ORDER); + register_functor(runtime, offset, NUMPY_PROJ_ND_MD_C_ORDER); } } // namespace numpy diff --git a/src/proj.h b/src/proj.h index e5b27eca35..d1d86c5c4f 100644 --- a/src/proj.h +++ b/src/proj.h @@ -35,7 +35,8 @@ class NumPyProjectionFunctor : public Legion::ProjectionFunctor { const Legion::Domain& launch_domain); public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const = 0; public: @@ -63,7 +64,7 @@ class NumPyProjectionFunctor : public Legion::ProjectionFunctor { } else { NumPyProjectionFunctor* functor = functors[functor_id]; const Legion::Point local = - functor->project_point(task->index_point, task->index_domain); + functor->project_point(rect, task->index_point, task->index_domain); const Legion::Point lower = local * chunk; const Legion::Point upper = lower + chunk - Legion::Point::ONES(); return rect.intersection(Legion::Rect(lower, upper)); @@ -81,7 +82,8 @@ class NumPyProjectionFunctor_2D_1D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -102,7 +104,8 @@ class NumPyProjectionFunctor_2D_2D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -123,7 +126,8 @@ class NumPyProjectionFunctor_1D_2D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -144,7 +148,8 @@ class NumPyProjectionFunctor_3D_2D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -165,7 +170,8 @@ class NumPyProjectionFunctor_3D_1D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -186,7 +192,8 @@ class NumPyProjectionFunctor_3D_3D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -207,7 +214,8 @@ class NumPyProjectionFunctor_2D_3D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -228,7 +236,8 @@ class NumPyProjectionFunctor_1D_3D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -250,7 +259,8 @@ class NumPyProjectionFunctor_GEMV : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -268,7 +278,8 @@ class NumPyProjectionFunctorRadix2D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: @@ -286,16 +297,17 @@ class NumPyProjectionFunctorRadix3D : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: const NumPyProjectionCode code; }; -class NumPyProjectionFunctor_ND_1D_C_ORDER : public NumPyProjectionFunctor { +class NumPyProjectionFunctor_ND_MD_C_ORDER : public NumPyProjectionFunctor { public: - NumPyProjectionFunctor_ND_1D_C_ORDER(NumPyProjectionCode code, Legion::Runtime* runtime); + NumPyProjectionFunctor_ND_MD_C_ORDER(NumPyProjectionCode code, Legion::Runtime* runtime); public: virtual bool is_functional(void) const { return true; } @@ -303,7 +315,8 @@ class NumPyProjectionFunctor_ND_1D_C_ORDER : public NumPyProjectionFunctor { virtual unsigned get_depth(void) const { return 0; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + virtual Legion::DomainPoint project_point(const Legion::Domain& color_domain, + const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const; public: diff --git a/src/transform.cc b/src/transform.cc new file mode 100644 index 0000000000..661bc010fb --- /dev/null +++ b/src/transform.cc @@ -0,0 +1,31 @@ +/* Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "transform.h" + +using namespace Legion; + +namespace legate { +namespace numpy { + +// Instantiate tasks for applying an AffineTransform to arrays of Points, +// for all combinations of dimensionalities. +#define DIMFUNC(M, N) template class TransformTask; +LEGION_FOREACH_NN(DIMFUNC) +#undef DIMFUNC + +} // namespace numpy +} // namespace legate diff --git a/src/transform.cu b/src/transform.cu new file mode 100644 index 0000000000..9ce64acceb --- /dev/null +++ b/src/transform.cu @@ -0,0 +1,32 @@ +/* Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "transform.h" + +using namespace Legion; + +namespace legate { +namespace numpy { + +// Instantiate Transform tasks' gpu variants. +#define DIMFUNC(M, N) \ + template void PointTask>::gpu_variant( \ + const Task*, const std::vector&, Context, Runtime*); +LEGION_FOREACH_NN(DIMFUNC) +#undef DIMFUNC + +} // namespace numpy +} // namespace legate diff --git a/src/transform.h b/src/transform.h new file mode 100644 index 0000000000..f05b42a944 --- /dev/null +++ b/src/transform.h @@ -0,0 +1,154 @@ +/* Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef __NUMPY_TRANSFORM_H__ +#define __NUMPY_TRANSFORM_H__ + +#include "point_task.h" + +namespace legate { +namespace numpy { + +template +struct TransformOperation { + __CUDA_HD__ constexpr Legion::Point operator()( + const Legion::Point& p, const Legion::AffineTransform& transform) const + { + return transform[p]; + } +}; + +#if defined(LEGATE_USE_CUDA) && defined(__CUDACC__) +template +__global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + gpu_transform(const Args args, const bool dense) +{ + const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= args.volume) return; + TransformOperation func; + if (dense) { + args.outptr[idx] = func(args.inptr[idx], args.transform); + } else { + const Legion::Point point = args.pitches.unflatten(idx, args.rect.lo); + args.out[point] = func(args.in[point], args.transform); + } +} +#endif + +template +class TransformTask : public PointTask> { + public: + static const int TASK_ID = NUMPY_TRANSFORM_OFFSET + M * (LEGION_MAX_DIM + 1) + N; + + // out_region = op in_region; + static const int REGIONS = 2; + + template + struct DeserializedArgs { + Legion::Rect rect; + AccessorWO, DIM> out; + AccessorRO, DIM> in; + Pitches pitches; + size_t volume; + Legion::AffineTransform transform; + Legion::Point* outptr; + const Legion::Point* inptr; + bool deserialize(LegateDeserializer& derez, + const Legion::Task* task, + const std::vector& regions) + { + rect = NumPyProjectionFunctor::unpack_shape(task, derez); + out = derez.unpack_accessor_WO, DIM>(regions[0], rect); + in = derez.unpack_accessor_RO, DIM>(regions[1], rect); + volume = pitches.flatten(rect); + size_t size; + const void* data = task->futures[0].get_buffer( + Realm::Memory::SYSTEM_MEM, &size, false /*check_extent*/, true /*silence_warnings*/); + transform = LegateDeserializer(data, size).unpack_transform(); +#ifndef LEGION_BOUNDS_CHECKS + // Check to see if this is dense or not + return out.accessor.is_dense_row_major(rect) && in.accessor.is_dense_row_major(rect) && + (outptr = out.ptr(rect)) && (inptr = in.ptr(rect)); +#else + // No dense execution if we're doing bounds checks + return false; +#endif + } + }; + + template + static void dispatch_cpu(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez) + { + DeserializedArgs args; + const bool dense = args.deserialize(derez, task, regions); + if (args.volume == 0) return; + TransformOperation func; + if (dense) { + for (size_t idx = 0; idx < args.volume; ++idx) { + args.outptr[idx] = func(args.inptr[idx], args.transform); + } + } else { + CPULoop::unary_loop(func, args.out, args.in, args.rect, args.transform); + } + } + +#ifdef LEGATE_USE_OPENMP + template + static void dispatch_omp(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez) + { + DeserializedArgs args; + const bool dense = args.deserialize(derez, task, regions); + if (args.volume == 0) return; + TransformOperation func; + if (dense) { +#pragma omp parallel for schedule(static) + for (size_t idx = 0; idx < args.volume; ++idx) { + args.outptr[idx] = func(args.inptr[idx], args.transform); + } + } else { + OMPLoop::unary_loop(func, args.out, args.in, args.rect, args.transform); + } + } +#endif // LEGATE_USE_OPENMP + +#if defined(LEGATE_USE_CUDA) && defined(__CUDACC__) + template + static void dispatch_gpu(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez) + { + DeserializedArgs args; + const bool dense = args.deserialize(derez, task, regions); + if (args.volume == 0) return; + const size_t blocks = (args.volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + gpu_transform<<>>(args, dense); + } +#elif defined(LEGATE_USE_CUDA) + template + static void dispatch_gpu(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez); +#endif // LEGATE_USE_CUDA +}; + +} // namespace numpy +} // namespace legate + +#endif // __NUMPY_TRANSFORM_H__ diff --git a/src/zip.cc b/src/zip.cc new file mode 100644 index 0000000000..cbae4302f4 --- /dev/null +++ b/src/zip.cc @@ -0,0 +1,29 @@ +/* Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "zip.h" + +namespace legate { +namespace numpy { + +// Instantiate tasks for zipping multiple arrays of coordinates into one array of points, +// up to our maximum array dimensionality. +#define DIMFUNC(N) template class ZipTask; +LEGATE_FOREACH_N(DIMFUNC) +#undef DIMFUNC + +} // namespace numpy +} // namespace legate diff --git a/src/zip.cu b/src/zip.cu new file mode 100644 index 0000000000..082d71abed --- /dev/null +++ b/src/zip.cu @@ -0,0 +1,32 @@ +/* Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "zip.h" + +using namespace Legion; + +namespace legate { +namespace numpy { + +// Instantiate Zip tasks' gpu variants. +#define DIMFUNC(N) \ + template void PointTask>::gpu_variant( \ + const Task*, const std::vector&, Context, Runtime*); +LEGATE_FOREACH_N(DIMFUNC) +#undef DIMFUNC + +} // namespace numpy +} // namespace legate diff --git a/src/zip.h b/src/zip.h new file mode 100644 index 0000000000..86173a3ac9 --- /dev/null +++ b/src/zip.h @@ -0,0 +1,188 @@ +/* Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef __NUMPY_ZIP_H__ +#define __NUMPY_ZIP_H__ + +#include +#include "point_task.h" + +namespace legate { +namespace numpy { + +template +struct point_ctor { +}; +template <> +struct point_ctor<1> { + inline Legion::Point<1> operator()(coord_t x) const { return Legion::Point<1>(x); } +}; +template <> +struct point_ctor<2> { + inline Legion::Point<2> operator()(coord_t x, coord_t y) const { return Legion::Point<2>(x, y); } +}; +template <> +struct point_ctor<3> { + inline Legion::Point<3> operator()(coord_t x, coord_t y, coord_t z) const + { + return Legion::Point<3>(x, y, z); + } +}; +template <> +struct point_ctor<4> { + inline Legion::Point<4> operator()(coord_t x, coord_t y, coord_t z, coord_t w) const + { + return Legion::Point<4>(x, y, z, w); + } +}; + +#if defined(LEGATE_USE_CUDA) && defined(__CUDACC__) +template +__global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + gpu_zip(const Args args, const bool dense, std::index_sequence) +{ + const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= args.volume) return; + if (dense) { + args.outptr[idx] = Legion::Point(args.inptr[Is][idx]...); + } else { + const Legion::Point point = args.pitches.unflatten(idx, args.rect.lo); + args.out[point] = Legion::Point(args.in[Is][point]...); + } +} +#endif + +template +class ZipTask : public PointTask> { + public: + static const int TASK_ID = NumpyTaskOffset::NUMPY_ZIP_OFFSET + N; + + // out_region = op( in_region1, ..., in_regionN ) + static const int REGIONS = N + 1; + + template + struct DeserializedArgs { + Legion::Rect rect; + AccessorWO, DIM> out; + AccessorRO in[N]; + Pitches pitches; + size_t volume; + Legion::Point* outptr; + const coord_t* inptr[N]; + bool deserialize(LegateDeserializer& derez, + const Legion::Task* task, + const std::vector& regions) + { + rect = NumPyProjectionFunctor::unpack_shape(task, derez); + out = derez.unpack_accessor_WO, DIM>(regions[0], rect); + for (int i = 0; i < N; ++i) { + in[i] = derez.unpack_accessor_RO(regions[i + 1], rect); + } + volume = pitches.flatten(rect); +#ifndef LEGION_BOUNDS_CHECKS + // Check to see if this is dense or not + if (!out.accessor.is_dense_row_major(rect)) { return false; } + outptr = out.ptr(rect); + for (int i = 0; i < N; ++i) { + if (!in[i].accessor.is_dense_row_major(rect)) { return false; } + inptr[i] = in[i].ptr(rect); + } + return true; +#else + // No dense execution if we're doing bounds checks + return false; +#endif + } + }; + + template + static void dispatch_cpu_impl(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez, + std::index_sequence) + { + DeserializedArgs args; + const bool dense = args.deserialize(derez, task, regions); + if (args.volume == 0) return; + if (dense) { + for (size_t idx = 0; idx < args.volume; ++idx) { + args.outptr[idx] = Legion::Point(args.inptr[Is][idx]...); + } + } else { + CPULoop::generic_loop(args.rect, point_ctor(), args.out, args.in[Is]...); + } + } + + template + static void dispatch_cpu(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez) + { + dispatch_cpu_impl(task, regions, derez, std::make_index_sequence()); + } + +#ifdef LEGATE_USE_OPENMP + template + static void dispatch_omp_impl(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez, + std::index_sequence) + { + DeserializedArgs args; + const bool dense = args.deserialize(derez, task, regions); + if (args.volume == 0) return; + if (dense) { +#pragma omp parallel for schedule(static) + for (size_t idx = 0; idx < args.volume; ++idx) { + args.outptr[idx] = Legion::Point(args.inptr[Is][idx]...); + } + } else { + OMPLoop::generic_loop(args.rect, point_ctor(), args.out, args.in[Is]...); + } + } + + template + static void dispatch_omp(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez) + { + dispatch_omp_impl(task, regions, derez, std::make_index_sequence()); + } +#endif +#if defined(LEGATE_USE_CUDA) && defined(__CUDACC__) + template + static void dispatch_gpu(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez) + { + DeserializedArgs args; + const bool dense = args.deserialize(derez, task, regions); + if (args.volume == 0) return; + const size_t blocks = (args.volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + gpu_zip<<>>(args, dense, std::make_index_sequence()); + } +#elif defined(LEGATE_USE_CUDA) + template + static void dispatch_gpu(const Legion::Task* task, + const std::vector& regions, + LegateDeserializer& derez); +#endif +}; + +} // namespace numpy +} // namespace legate + +#endif // __NUMPY_ZIP_H__ diff --git a/tests/advanced_slicing.py b/tests/advanced_slicing.py new file mode 100644 index 0000000000..f4e5c86b85 --- /dev/null +++ b/tests/advanced_slicing.py @@ -0,0 +1,230 @@ +# Copyright 2021 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np + +import legate.numpy as lg + + +def sequence_2d(): + return lg.array( + [ + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24], + ] + ) + + +def test(): + + # 1d __getitem__ + + # index arrays can be boolean, of the same shape as the base array + a = lg.arange(10) + assert np.array_equal(a[lg.array([False, True] * 5)], [1, 3, 5, 7, 9]) + assert np.array_equal(a[a > 5], [6, 7, 8, 9]) + + # index arrays can be integer, one per base array dimension + a = lg.arange(10) * 2 + assert np.array_equal(a[lg.array([1, 3, 5, 7, 9])], [2, 6, 10, 14, 18]) + + # output shape follows index array shape + a = lg.arange(10) * 2 + assert np.array_equal( + a[lg.array([[1, 2, 3], [4, 5, 6]])], [[2, 4, 6], [8, 10, 12]] + ) + + # index arrays can be any sequence object + a = lg.arange(10) * 2 + assert np.array_equal(a[[4, 3, 2, 1]], [8, 6, 4, 2]) + + # index arrays are automatically cast to int64 + a = lg.arange(10) + assert np.array_equal(a[lg.arange(5, 10, dtype=np.int16)], [5, 6, 7, 8, 9]) + + # advanced slicing creates copies + a = lg.arange(10) + b = a[range(5)] + b[:] = 0 + assert np.array_equal(a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + # can read through views + a = np.arange(10) + c = np.arange(9, -1, -1) + assert np.array_equal(a[2:7][c[5:10]], [6, 5, 4, 3, 2]) + + # 1d __setitem__ + + # can write through a single advanced slice + a = lg.arange(10) + b = lg.arange(5) + a[range(5, 10)] = b + assert np.array_equal(a, [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) + + # can write through views + a = np.arange(10) + b = np.arange(5) # TODO: RHS cannot be a view currently + c = np.arange(9, -1, -1) + a[2:7][c[5:10]] = b + assert np.array_equal(a, [0, 1, 4, 3, 2, 1, 0, 7, 8, 9]) + + a = lg.arange(20) + b = np.arange(10) + a[10:][range(5)] = b[2:7] + assert np.array_equal( + a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 15, 16, 17, 18, 19] + ) + + # can copy within the same array + # TODO: Fix #40 + # a = lg.arange(20) + # a[10:][range(5)] = a[2:7] + # assert np.array_equal( + # a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 15, 16, 17, 18, 19] + # ) + + # source & destination regions can (partially) overlap + # TODO: Fix #40 + # a = lg.arange(20) + # a[10:][range(5)] = a[12:17] + # assert np.array_equal( + # a, + # [0,1,2,3,4,5,6,7,8,9,12,13,14,15,16,15,16,17,18,19], + # ) + + # 2d __getitem__ + + # index arrays can be boolean, of the same shape as the base array + a = sequence_2d() + # TODO: Have to sort the output array, until #50 is fixed + assert np.array_equal(np.sort(a[a > 17]), [18, 19, 20, 21, 22, 23, 24]) + + # index arrays can be integer, one per base array dimension + a = sequence_2d() + assert np.array_equal( + a[lg.array([0, 1, 2, 3]), lg.array([0, 1, 2, 3])], [0, 6, 12, 18] + ) + + # output shape follows index array shape + a = sequence_2d() + assert np.array_equal( + a[lg.array([[1, 1, 1], [2, 2, 2]]), lg.array([[0, 1, 2], [0, 1, 2]])], + [[5, 6, 7], [10, 11, 12]], + ) + + # index arrays can be any sequence object + a = sequence_2d() + assert np.array_equal(a[range(4), range(4)], [0, 6, 12, 18]) + + # index arrays are automatically cast to int64 + a = sequence_2d() + assert np.array_equal(a[range(4), range(4)], [0, 6, 12, 18]) + assert np.array_equal( + a[ + lg.array([0, 1, 2, 3], dtype=np.int16), + lg.array([0, 1, 2, 3], dtype=np.uint32), + ], + [0, 6, 12, 18], + ) + + # advanced slicing creates copies + a = sequence_2d() + b = a[a > 17] + b[:] = -1 + assert a.min() == 0 and a.max() == 24 + + # can read through views + a = sequence_2d() + cx = lg.array([0, 0, 1, 1, 2, 2]) + cy = lg.array([1, 2, 1, 2, 1, 2]) + assert np.array_equal(a[2:, 2:][cx[2:], cy[2:]], [18, 19, 23, 24]) + + # 2d __setitem__ + + # can write through a single advanced slice + a = sequence_2d() + b = lg.zeros(7, dtype=np.int64) + a[a > 17] = b + assert np.array_equal( + a, + [ + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 0, 0], + [0, 0, 0, 0, 0], + ], + ) + + # can write through views + a = sequence_2d() + b = lg.array([100, 101, 102, 103]) # TODO: RHS cannot be a view currently + cx = lg.array([0, 0, 1, 1, 2, 2]) + cy = lg.array([1, 2, 1, 2, 1, 2]) + a[2:, 2:][cx[2:], cy[2:]] = b + assert np.array_equal( + a, + [ + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 100, 101], + [20, 21, 22, 102, 103], + ], + ) + + # can copy within the same array + # TODO: Fix #40 + # a = sequence_2d() + # a[2:, :][[[1, 1], [2, 2]], [[1, 2], [1, 2]]] = a[1:3, 3:5] + # assert np.array_equal( + # a, + # [ + # [0, 1, 2, 3, 4], + # [5, 6, 7, 8, 9], + # [10, 11, 12, 13, 14], + # [15, 8, 9, 18, 19], + # [20, 13, 14, 23, 24], + # ], + # ) + + # source & destination regions can (partially) overlap + # TODO: Fix #40 + # a = sequence_2d() + # a[2:, :][[[1, 1], [2, 2]], [[1, 2], [1, 2]]] = a[3:5, 2:4] + # assert np.array_equal( + # a, + # [ + # [0, 1, 2, 3, 4], + # [5, 6, 7, 8, 9], + # [10, 11, 12, 13, 14], + # [15, 17, 18, 18, 19], + # [20, 22, 23, 23, 24], + # ], + # ) + + # TODO: broadcasting in index arrays + # TODO: mixed advanced indexing + # TODO: singleton arrays + # TODO: views as base, index or value array (incl. reshape) + + return + + +if __name__ == "__main__": + test() diff --git a/tests/slicing.py b/tests/slicing.py index 7b5f47d77c..71d7ddaace 100644 --- a/tests/slicing.py +++ b/tests/slicing.py @@ -79,10 +79,57 @@ def test(): # print(thirdslicegold) # assert(lg.array_equal(thirdslice, thirdslicegold)) + # can copy within the same array x = lg.arange(10) x[0:5] = x[5:10] assert np.array_equal(x, [5, 6, 7, 8, 9, 5, 6, 7, 8, 9]) + a = lg.arange(25).reshape((5, 5)) + a[3:5:, 1:3] = a[1:3, 3:5] + assert np.array_equal( + a, + [ + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 8, 9, 18, 19], + [20, 13, 14, 23, 24], + ], + ) + + # source & destination regions can (partially) overlap + # TODO: Fix #40 + # a = lg.arange(25).reshape((5, 5)) + # a[3:5:, 1:3] = a[3:5, 2:4] + # assert np.array_equal( + # a, + # [ + # [0, 1, 2, 3, 4], + # [5, 6, 7, 8, 9], + # [10, 11, 12, 13, 14], + # [15, 17, 18, 18, 19], + # [20, 22, 23, 23, 24], + # ], + # ) + + # corner case of singleton base (backed by Futures instead of Regions) + a = lg.array([7]) + assert a[0] == 7 + assert np.array_equal(a[:], [7]) + assert np.array_equal(a[1:], []) + + a = lg.array([7]) + a[0] = 4 + assert np.array_equal(a, [4]) + + a = lg.array([7]) + a[:] = [4] + assert np.array_equal(a, [4]) + + a = lg.array([7]) + a[1:] = 4 + assert np.array_equal(a, [7]) + return