diff --git a/docs/source/reference/hydrology.rst b/docs/source/reference/hydrology.rst index 44fa1ae2..b7b0769e 100644 --- a/docs/source/reference/hydrology.rst +++ b/docs/source/reference/hydrology.rst @@ -4,164 +4,206 @@ Hydrology ********* -Flow Direction -============== +Flow Direction (D8) +=================== .. autosummary:: :toctree: _autosummary - xrspatial.flow_direction.flow_direction + xrspatial.hydro.flow_direction_d8.flow_direction_d8 Flow Direction (D-infinity) =========================== .. autosummary:: :toctree: _autosummary - xrspatial.flow_direction_dinf.flow_direction_dinf + xrspatial.hydro.flow_direction_dinf.flow_direction_dinf Flow Direction (MFD) ==================== .. autosummary:: :toctree: _autosummary - xrspatial.flow_direction_mfd.flow_direction_mfd + xrspatial.hydro.flow_direction_mfd.flow_direction_mfd -Flow Accumulation -================= +Flow Accumulation (D8) +====================== .. autosummary:: :toctree: _autosummary - xrspatial.flow_accumulation.flow_accumulation + xrspatial.hydro.flow_accumulation_d8.flow_accumulation_d8 Flow Accumulation (D-infinity) =============================== .. autosummary:: :toctree: _autosummary - xrspatial.flow_accumulation_dinf.flow_accumulation_dinf + xrspatial.hydro.flow_accumulation_dinf.flow_accumulation_dinf Flow Accumulation (MFD) ======================= .. autosummary:: :toctree: _autosummary - xrspatial.flow_accumulation_mfd.flow_accumulation_mfd + xrspatial.hydro.flow_accumulation_mfd.flow_accumulation_mfd -Flow Length -=========== +Flow Length (D8) +================ .. autosummary:: :toctree: _autosummary - xrspatial.flow_length.flow_length + xrspatial.hydro.flow_length_d8.flow_length_d8 Flow Length (D-infinity) ======================== .. autosummary:: :toctree: _autosummary - xrspatial.flow_length_dinf.flow_length_dinf + xrspatial.hydro.flow_length_dinf.flow_length_dinf Flow Length (MFD) ================= .. autosummary:: :toctree: _autosummary - xrspatial.flow_length_mfd.flow_length_mfd + xrspatial.hydro.flow_length_mfd.flow_length_mfd + +Flow Path (D8) +============== +.. autosummary:: + :toctree: _autosummary + + xrspatial.hydro.flow_path_d8.flow_path_d8 + +Flow Path (D-infinity) +====================== +.. autosummary:: + :toctree: _autosummary + + xrspatial.hydro.flow_path_dinf.flow_path_dinf -Flow Path -========= +Flow Path (MFD) +=============== .. autosummary:: :toctree: _autosummary - xrspatial.flow_path.flow_path + xrspatial.hydro.flow_path_mfd.flow_path_mfd Fill ==== .. autosummary:: :toctree: _autosummary - xrspatial.fill.fill + xrspatial.hydro.fill_d8.fill_d8 Sink ==== .. autosummary:: :toctree: _autosummary - xrspatial.sink.sink + xrspatial.hydro.sink_d8.sink_d8 Basin ===== .. autosummary:: :toctree: _autosummary - xrspatial.basin.basin + xrspatial.hydro.basin_d8.basin_d8 -Watershed -========= +Watershed (D8) +============== .. autosummary:: :toctree: _autosummary - xrspatial.watershed.watershed - xrspatial.watershed.basins + xrspatial.hydro.watershed_d8.watershed_d8 + xrspatial.hydro.watershed_d8.basins_d8 + +Watershed (D-infinity) +====================== +.. autosummary:: + :toctree: _autosummary + + xrspatial.hydro.watershed_dinf.watershed_dinf + +Watershed (MFD) +=============== +.. autosummary:: + :toctree: _autosummary + + xrspatial.hydro.watershed_mfd.watershed_mfd Snap Pour Point =============== .. autosummary:: :toctree: _autosummary - xrspatial.snap_pour_point.snap_pour_point + xrspatial.hydro.snap_pour_point_d8.snap_pour_point_d8 -Stream Link -=========== +Stream Link (D8) +================ .. autosummary:: :toctree: _autosummary - xrspatial.stream_link.stream_link + xrspatial.hydro.stream_link_d8.stream_link_d8 Stream Link (D-infinity) ======================== .. autosummary:: :toctree: _autosummary - xrspatial.stream_link_dinf.stream_link_dinf + xrspatial.hydro.stream_link_dinf.stream_link_dinf Stream Link (MFD) ================= .. autosummary:: :toctree: _autosummary - xrspatial.stream_link_mfd.stream_link_mfd + xrspatial.hydro.stream_link_mfd.stream_link_mfd -Stream Order -============ +Stream Order (D8) +================= .. autosummary:: :toctree: _autosummary - xrspatial.stream_order.stream_order + xrspatial.hydro.stream_order_d8.stream_order_d8 Stream Order (D-infinity) ========================= .. autosummary:: :toctree: _autosummary - xrspatial.stream_order_dinf.stream_order_dinf + xrspatial.hydro.stream_order_dinf.stream_order_dinf Stream Order (MFD) ================== .. autosummary:: :toctree: _autosummary - xrspatial.stream_order_mfd.stream_order_mfd + xrspatial.hydro.stream_order_mfd.stream_order_mfd + +Height Above Nearest Drainage (D8) +=================================== +.. autosummary:: + :toctree: _autosummary + + xrspatial.hydro.hand_d8.hand_d8 + +Height Above Nearest Drainage (D-infinity) +========================================== +.. autosummary:: + :toctree: _autosummary + + xrspatial.hydro.hand_dinf.hand_dinf -Height Above Nearest Drainage (HAND) -===================================== +Height Above Nearest Drainage (MFD) +==================================== .. autosummary:: :toctree: _autosummary - xrspatial.hand.hand + xrspatial.hydro.hand_mfd.hand_mfd Topographic Wetness Index (TWI) =============================== .. autosummary:: :toctree: _autosummary - xrspatial.twi.twi + xrspatial.hydro.twi_d8.twi_d8 diff --git a/docs/source/reference/multispectral.rst b/docs/source/reference/multispectral.rst index 98ba576e..9d7f02b4 100644 --- a/docs/source/reference/multispectral.rst +++ b/docs/source/reference/multispectral.rst @@ -11,8 +11,8 @@ Atmospherically Resistant Vegetation Index (ARVI) xrspatial.multispectral.arvi -Enhanced Built=Up and Bareness Index (EBBI) -=========================================== +Enhanced Built-Up and Bareness Index (EBBI) +============================================ .. autosummary:: :toctree: _autosummary diff --git a/examples/user_guide/33_Porto_Taxi_Trajectories.ipynb b/examples/user_guide/33_Porto_Taxi_Trajectories.ipynb new file mode 100644 index 00000000..901befa8 --- /dev/null +++ b/examples/user_guide/33_Porto_Taxi_Trajectories.ipynb @@ -0,0 +1,362 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Xarray-Spatial Rasterize: Line rasterization with custom merge\n", + "\n", + "Taxi GPS traces make a good stress test for line rasterization. 200,000\n", + "trajectories from Porto, each with a GPS fix every 15 seconds, give enough\n", + "density to trace the full street network. We use the built-in `sum` merge\n", + "alongside a custom log-sum merge to show how non-linear aggregation changes\n", + "what you can see." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What you'll build\n", + "\n", + "1. [Download and parse GPS trajectories](#Download-and-parse-GPS-trajectories) from the ECML/PKDD 2015 Porto taxi dataset\n", + "2. [Preview the raw trajectory data](#Preview)\n", + "3. [Rasterize trip counts](#Trip-count) with the built-in `count` merge\n", + "4. [Rasterize total duration](#Total-duration) with the built-in `sum` merge\n", + "5. [Write a custom log-sum merge function](#Custom-merge:-log-duration) that compresses dynamic range\n", + "6. [Compare all three side by side](#Comparison)\n", + "\n", + "\n", + "\n", + "**Jump to a section:**\n", + "[Data](#Download-and-parse-GPS-trajectories) | [Preview](#Preview) | [Trip count](#Trip-count) | [Duration](#Total-duration) | [Custom merge](#Custom-merge:-log-duration) | [Comparison](#Comparison)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Standard imports plus `json` for parsing GPS polylines, `geopandas` and `shapely` for vector geometry, and `ngjit` for compiling the custom merge function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import json\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xarray as xr\n", + "\n", + "import geopandas as gpd\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.patches import Patch\n", + "from shapely.geometry import LineString\n", + "\n", + "import xrspatial # registers .xrs accessor\n", + "from xrspatial.utils import ngjit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download and parse GPS trajectories\n", + "\n", + "The [ECML/PKDD 2015 Taxi Trajectory Challenge](https://archive.ics.uci.edu/dataset/339/) dataset has 1.7 million taxi trips from Porto, Portugal. Each trip has a `POLYLINE` column with `[longitude, latitude]` pairs recorded every 15 seconds. We read the first 200,000 trips from the ~509 MB download." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import urllib.request, zipfile, tempfile, os\n", + "\n", + "url = ('https://archive.ics.uci.edu/static/public/339/'\n", + " 'taxi+service+trajectory+prediction+challenge+ecml+pkdd+2015.zip')\n", + "\n", + "print('Downloading Porto taxi data (~509 MB)...')\n", + "tmpfile, _ = urllib.request.urlretrieve(url)\n", + "\n", + "# Nested ZIP: outer contains train.csv.zip, which contains train.csv\n", + "tmpdir = tempfile.mkdtemp()\n", + "with zipfile.ZipFile(tmpfile) as outer:\n", + " outer.extract('train.csv.zip', tmpdir)\n", + "\n", + "with zipfile.ZipFile(os.path.join(tmpdir, 'train.csv.zip')) as inner:\n", + " with inner.open('train.csv') as f:\n", + " df = pd.read_csv(f, nrows=200_000,\n", + " usecols=['POLYLINE', 'MISSING_DATA'])\n", + "\n", + "os.remove(tmpfile)\n", + "\n", + "# Drop trips with missing GPS data\n", + "df = df[df.MISSING_DATA == False].copy()\n", + "\n", + "# Parse polyline JSON, keep trips with at least 2 GPS points\n", + "df['coords'] = df.POLYLINE.apply(json.loads)\n", + "df = df[df.coords.apply(len) >= 2].copy()\n", + "\n", + "# Trip duration in minutes (15 seconds between readings)\n", + "df['duration_min'] = df.coords.apply(lambda c: (len(c) - 1) * 0.25)\n", + "\n", + "# Build LineStrings\n", + "df['geometry'] = df.coords.apply(LineString)\n", + "gdf = gpd.GeoDataFrame(df[['duration_min', 'geometry']],\n", + " geometry='geometry', crs='EPSG:4326')\n", + "\n", + "print(f'{len(gdf):,} trajectories after filtering')\n", + "print(f'Duration: min={gdf.duration_min.min():.1f}, '\n", + " f'median={gdf.duration_min.median():.1f}, '\n", + " f'max={gdf.duration_min.max():.1f} minutes')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each trajectory is a GPS trace through Porto's streets. Durations range from a few seconds to multi-hour shifts, with a median around 10 minutes." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preview\n", + "\n", + "20,000 trajectories sampled and plotted on a dark background. The street network and major highways are already visible from the GPS traces alone." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(10, 7.5))\n", + "ax.set_facecolor('#1a1a2e')\n", + "gdf.sample(min(20_000, len(gdf)), random_state=42).plot(\n", + " ax=ax, linewidth=0.1, alpha=0.3, color='#e94560')\n", + "ax.set_title(f'{len(gdf):,} taxi trajectories in Porto')\n", + "ax.set_axis_off()\n", + "ax.legend(handles=[Patch(facecolor='#e94560', alpha=0.6, label='GPS trace')],\n", + " loc='lower right', fontsize=11, framealpha=0.9)\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Raster grid\n", + "\n", + "A template DataArray covering Porto at roughly 25 meters per pixel. This grid sets the output resolution for all three rasterizations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bounds = (-8.72, 41.10, -8.52, 41.22)\n", + "width, height = 800, 600\n", + "\n", + "def make_template(w, h, bounds):\n", + " xmin, ymin, xmax, ymax = bounds\n", + " px, py = (xmax - xmin) / w, (ymax - ymin) / h\n", + " x = np.linspace(xmin + px / 2, xmax - px / 2, w)\n", + " y = np.linspace(ymax - py / 2, ymin + py / 2, h)\n", + " return xr.DataArray(np.zeros((h, w)), dims=['y', 'x'],\n", + " coords={'y': y, 'x': x})\n", + "\n", + "template = make_template(width, height, bounds)\n", + "print(f'Grid: {width}x{height}, '\n", + " f'~{111000 * (bounds[3] - bounds[1]) / height:.0f} m/pixel')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Trip count\n", + "\n", + "The simplest rasterization: count how many trajectories pass through each pixel. High-traffic roads light up. Side streets stay dark." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "count_raster = template.xrs.rasterize(gdf, merge='count', fill=0)\n", + "\n", + "fig, ax = plt.subplots(figsize=(10, 7.5))\n", + "ax.set_facecolor('black')\n", + "count_raster.where(count_raster > 0).plot.imshow(\n", + " ax=ax, cmap='hot', add_colorbar=True,\n", + " cbar_kwargs={'label': 'Trip count', 'shrink': 0.7})\n", + "ax.set_title('Trip count per pixel')\n", + "ax.set_axis_off()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Total duration\n", + "\n", + "Sum up trip durations with `merge='sum'` on the `duration_min` column. Each pixel accumulates the total taxi-minutes of all trips passing through it. Long trips add more weight, so the bright spots shift toward routes with both heavy traffic and longer rides." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dur_raster = template.xrs.rasterize(\n", + " gdf, column='duration_min', merge='sum', fill=0)\n", + "\n", + "fig, ax = plt.subplots(figsize=(10, 7.5))\n", + "ax.set_facecolor('black')\n", + "dur_raster.where(dur_raster > 0).plot.imshow(\n", + " ax=ax, cmap='hot', add_colorbar=True,\n", + " cbar_kwargs={'label': 'Total duration (minutes)', 'shrink': 0.7})\n", + "ax.set_title('Total duration per pixel (sum merge)')\n", + "ax.set_axis_off()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Custom merge: log-duration\n", + "\n", + "The built-in `sum` merge is linear: a 60-minute airport run contributes 12x more than a 5-minute hop. A few long trips can dominate a pixel's value.\n", + "\n", + "A custom merge function fixes this. We define a `@ngjit`-compiled function that sums `log(1 + duration)` instead of raw duration. The log compresses the 12:1 ratio down to about 2:1 (`log(61) = 4.1` vs `log(6) = 1.8`).\n", + "\n", + "The merge function receives three arguments:\n", + "\n", + "- `pixel`: current pixel value\n", + "- `props`: 1D float64 array of feature properties (`props[0]` is the column value)\n", + "- `is_first`: 1 on first write to this pixel, 0 otherwise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@ngjit\n", + "def log_duration_sum(pixel, props, is_first):\n", + " \"\"\"Sum log(1 + duration) instead of raw duration.\"\"\"\n", + " val = np.log1p(props[0])\n", + " if is_first:\n", + " return val\n", + " return pixel + val\n", + "\n", + "log_raster = template.xrs.rasterize(\n", + " gdf, column='duration_min', merge=log_duration_sum, fill=0)\n", + "\n", + "fig, ax = plt.subplots(figsize=(10, 7.5))\n", + "ax.set_facecolor('black')\n", + "log_raster.where(log_raster > 0).plot.imshow(\n", + " ax=ax, cmap='hot', add_colorbar=True,\n", + " cbar_kwargs={'label': 'Log-duration sum', 'shrink': 0.7})\n", + "ax.set_title('Log-duration per pixel (custom merge)')\n", + "ax.set_axis_off()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparison\n", + "\n", + "All three rasterizations side by side. Count and duration look similar since busy roads carry both more trips and more total minutes. The log-duration panel looks different: it compresses the thousand-fold range between highways and side streets down to 3-4x, so the full street network becomes readable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pathlib\n", + "pathlib.Path('images').mkdir(exist_ok=True)\n", + "\n", + "fig, axes = plt.subplots(1, 3, figsize=(20, 8), facecolor='black')\n", + "\n", + "titles = ['Trip count', 'Total duration (sum)', 'Log-duration (custom merge)']\n", + "rasters = [count_raster, dur_raster, log_raster]\n", + "\n", + "for ax, raster, title in zip(axes, rasters, titles):\n", + " ax.set_facecolor('black')\n", + " masked = raster.where(raster > 0)\n", + " masked.plot.imshow(ax=ax, cmap='hot', add_colorbar=False,\n", + " interpolation='nearest')\n", + " ax.set_title(title, color='white', fontsize=14, pad=10)\n", + " ax.set_axis_off()\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig('images/porto_taxi_lines_preview.png',\n", + " bbox_inches='tight', dpi=120, facecolor='black')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The count and duration panels are dominated by the main corridors and the downtown waterfront. Side streets barely register. The log-duration panel pulls those streets into visible range without blowing out the highways. Same data, different merge function.\n", + "\n", + "When to reach for a non-linear merge: when a handful of features dominate the signal and you want to see the rest. Log-sum fits data that spans several orders of magnitude." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "