diff --git a/examples/census_synthetic_people.yaml b/examples/census_synthetic_people.yaml new file mode 100644 index 0000000..810b041 --- /dev/null +++ b/examples/census_synthetic_people.yaml @@ -0,0 +1,31 @@ +--- + +metadata: + version: 1 + +sources: + - name: Census Blocks Synthetic People + key: census-synthetic-people + text: Census Blocks Synthetic People + description: Census Blocks Synthetic People + service_types: + - tile + cmap: + - white + geometry_field: null + region_of_interest: + - -13898124.212 + - 2801774.864 + - -7445653.568 + - 6340332.344 + geometry_type: point + shade_how: linear + raster_interpolate: linear + xfield: x + yfield: y + filepath: s3://makepath-synthetic-people-2022-alpha-webm-demo/part.6.parquet + transforms: + - name: load_in_memory + storage_options: + key: your_access_key_id + secret: your_secret_access_key diff --git a/mapshader/core.py b/mapshader/core.py index f8eac0f..3ece0c3 100644 --- a/mapshader/core.py +++ b/mapshader/core.py @@ -132,17 +132,21 @@ def point_aggregation(cvs, data, xfield, yfield, zfield, geometry_field, agg_fun """ if zfield: - if geometry_field: + if xfield and yfield: + return cvs.points(data, xfield, yfield, getattr(ds, agg_func)(zfield)) + elif geometry_field: return cvs.points( data, agg=getattr(ds, agg_func)(zfield), geometry=geometry_field ) else: - return cvs.points(data, xfield, yfield, getattr(ds, agg_func)(zfield)) + raise ValueError('None of xfield, yfield, or geometry_field was provided') else: - if geometry_field: + if xfield and yfield: + return cvs.points(data, xfield, yfield) + elif geometry_field: return cvs.points(data, geometry=geometry_field) else: - return cvs.points(data, xfield, yfield) + raise ValueError('None of xfield, yfield, or geometry_field was provided') def line_aggregation(cvs, data, zfield, agg_func): diff --git a/mapshader/io.py b/mapshader/io.py index 875e632..3602d46 100644 --- a/mapshader/io.py +++ b/mapshader/io.py @@ -1,13 +1,16 @@ from os.path import expanduser, splitext -import geopandas as gpd import numpy as np import xarray as xr +import geopandas as gpd +import dask_geopandas +import dask from mapshader.multifile import SharedMultiFile def load_raster(file_path, transforms, force_recreate_overviews, + storage_options, geometry, region_of_interest, xmin=None, ymin=None, xmax=None, ymax=None, chunks=None, layername='data'): """ @@ -68,7 +71,14 @@ def load_raster(file_path, transforms, force_recreate_overviews, return arr -def load_vector(filepath: str, transforms, force_recreate_overviews): +def load_vector( + filepath: str, + transforms, + force_recreate_overviews, + storage_options, + geometry, + region_of_interest, +): """ Load vector data. @@ -82,4 +92,24 @@ def load_vector(filepath: str, transforms, force_recreate_overviews): gpd : geopandas.DataFrame The loaded data. """ - return gpd.read_file(filepath) + + file_extension = splitext(filepath)[1] + + if file_extension == '.parquet': + kwargs = {'storage_options': storage_options} if storage_options is not None else {} + if geometry is not None: + # read data into a dask_geopandas dataframe + df = dask_geopandas.read_parquet(filepath, **kwargs) + else: + # read data into a dask dataframe + df = dask.dataframe.read_parquet(filepath, **kwargs) + else: + # assume a geopandas DataFrame + df = gpd.read_file(filepath) + + if region_of_interest is not None: + # limit data to be within the region of interest + minx, miny, maxx, maxy = region_of_interest + df = df[(df.x >= minx) & (df.x <= maxx) & (df.y >= miny) & (df.y <= maxy)] + + return df diff --git a/mapshader/sources.py b/mapshader/sources.py index 039c3cd..0eca74e 100644 --- a/mapshader/sources.py +++ b/mapshader/sources.py @@ -4,6 +4,7 @@ from os import path import sys +import pandas as pd import geopandas as gpd from mapshader.colors import colors @@ -128,6 +129,8 @@ def __init__(self, # noqa: C901 raster_padding=0, service_types=None, full_extent=None, + storage_options=None, + region_of_interest=None, default_extent=None, default_height=256, default_width=256, @@ -199,6 +202,7 @@ def __init__(self, # noqa: C901 self.extras = extras self.service_types = service_types self.transforms = transforms + self.storage_options = storage_options self.default_extent = default_extent self.default_width = default_width self.default_height = default_height @@ -206,6 +210,7 @@ def __init__(self, # noqa: C901 self.geometry_field = geometry_field self.band = band self.force_recreate_overviews = force_recreate_overviews + self.region_of_interest = region_of_interest self.tiling = tiling self.is_loaded = False @@ -243,6 +248,10 @@ def load(self): print('Zipfile Path', file=sys.stdout) data_path = self.filepath + elif self.filepath.startswith('s3://'): + print('S3 Path', file=sys.stdout) + data_path = self.filepath + elif not path.isabs(self.filepath): print('Not Absolute', file=sys.stdout) data_path = path.abspath(path.expanduser(self.filepath)) @@ -251,7 +260,14 @@ def load(self): print('Using Given Filepath unmodified: config{self.config_file}', file=sys.stdout) data_path = self.filepath - data = self.load_func(data_path, self.transforms, self.force_recreate_overviews) + data = self.load_func( + data_path, + self.transforms, + self.force_recreate_overviews, + self.storage_options, + self.geometry_field, + self.region_of_interest, + ) else: data = self.data @@ -359,8 +375,16 @@ def load_func(self): def full_extent(self): if isinstance(self.data, spatialpandas.GeoDataFrame): return self.data.to_geopandas()[self.geometry_field].total_bounds - else: + elif isinstance(self.data, gpd.GeoDataFrame): return self.data[self.geometry_field].total_bounds + elif isinstance(self.data, pd.DataFrame): + minx, miny, maxx, maxy = ( + self.data[self.xfield].min(), + self.data[self.xfield].max(), + self.data[self.yfield].min(), + self.data[self.yfield].max() + ) + return minx, miny, maxx, maxy # ---------------------------------------------------------------------------- diff --git a/mapshader/transforms.py b/mapshader/transforms.py index eef4712..91df2b4 100644 --- a/mapshader/transforms.py +++ b/mapshader/transforms.py @@ -422,6 +422,22 @@ def raster_to_categorical_points(arr, cats: dict, dim: str = 'data'): return df +def load_in_memory(df): + """ + Compute dask data frame. + + Parameters + ---------- + df: dask.dataframe or dask_geopandas.GeoDataFrame object + + Returns + ------- + computed_df: pandas.DataFrame or geopandas.GeoDataFrame object + """ + df = df.compute() + return df + + _transforms = { 'reproject_raster': reproject_raster, 'reproject_vector': reproject_vector, @@ -436,7 +452,8 @@ def raster_to_categorical_points(arr, cats: dict, dim: str = 'data'): 'add_projected_buffered_extent': add_projected_buffered_extent, 'select_by_attributes': select_by_attributes, 'polygon_to_line': polygon_to_line, - 'raster_to_categorical_points': raster_to_categorical_points + 'raster_to_categorical_points': raster_to_categorical_points, + 'load_in_memory': load_in_memory, } diff --git a/setup.py b/setup.py index 26d99fe..189a87c 100644 --- a/setup.py +++ b/setup.py @@ -23,9 +23,10 @@ ], install_requires=[ 'bokeh >=2.4.2', - 'xarray-spatial >=0.3.1', + 'xarray-spatial >=0.3.5', 'datashader >=0.13.0', 'geopandas >=0.10.2', + 'dask-geopandas', 'click >=8.0.3', 'click_plugins >=1.1.1', 'jinja2 >=3.0.3',