diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yml similarity index 100% rename from .github/workflows/R-CMD-check.yaml rename to .github/workflows/R-CMD-check.yml diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yml similarity index 100% rename from .github/workflows/test-coverage.yaml rename to .github/workflows/test-coverage.yml diff --git a/.github/workflows/update-maps.yml b/.github/workflows/update-maps.yml new file mode 100644 index 0000000..77d20b3 --- /dev/null +++ b/.github/workflows/update-maps.yml @@ -0,0 +1,84 @@ + +name: Check for new shapefiles + +on: + schedule: + # runs at midnight on the 1st of March and September + - cron: '0 0 1 3,9 *' + workflow_dispatch: + +jobs: + check: + runs-on: ubuntu-latest + env: + PUSHOVER_API_KEY: ${{ secrets.PUSHOVER_API_KEY }} + PUSHOVER_USER_KEY: ${{ secrets.PUSHOVER_USER_KEY }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + cache: 'pip' + + - name: Download shapefiles + run: | + python data-raw/scripts/shapefiles.py + echo "python_exit_code=$?" >> $GITHUB_ENV + continue-on-error: true + + - name: Send failure notification + if: ${{ failure() }} + run: python pushover.py "⚠️ usmapdata updater failed to find new shapefiles." "LOW" + + - name: Setup usmapdata + if: ${{ success() }} + uses: r-lib/actions/setup-r-dependencies@v2 + + - name: Modify shapefiles + if: ${{ success() }} + env: + STATE_SHP_DIR: "data-raw/scripts/shapefiles/$state_shp_path" + COUNTY_SHP_DIR: "data-raw/scripts/shapefiles/$county_shp_path" + STATE_OUTPUT: "inst/extdata/us_states.gpkg" + COUNTY_OUTPUT: "inst/extdata/us_counties.gpkg" + run: | + Rscript -e "usmapdata:::create_us_map('states', Sys.getenv('STATE_SHP_DIR'), Sys.getenv('STATE_OUTPUT'))" + Rscript -e "usmapdata:::create_us_map('counties', Sys.getenv('COUNTY_SHP_DIR'), Sys.getenv('COUNTY_OUTPUT'))" + + - name: Determine pull request parameters + if: ${{ success() }} + run: | + echo "branch_name=data-update/$(date +'%B-%Y')" >> "$GITHUB_ENV" + echo "pr_title=Update map data - $(date +'%B %Y')" >> "$GITHUB_ENV" + + - name: Open pull request + if: ${{ success() }} + uses: peter-evans/create-pull-request@v5 + with: + commit-message: Update map data based on latest shapefiles + branch: ${{ env.branch_name }} + title: ${{ env.pr_title }} + body: | + Updated map data based on latest shapefiles from + the US Census Bureau's [cartographic boundary files][1]. + + ### Review Checklist + - [ ] Ensure all checks and tests pass + - [ ] Load current branch with `devtools::install_github("usmapdata", "${{ env.branch_name }}")` and test `usmap` + - [ ] Perform smoke test of all plotting features to ensure consistency + - [ ] Update data file changelog in [`usmap` `README.md`][2] + + [1]: https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html + [2]: https://github.com/pdil/usmap/blob/master/README.md + assignees: pdil + labels: data update + reviewers: pdil + delete-branch: true + + - name: Send success notification + if: ${{ success() }} + run: python pushover.py "✅ usmapdata has updated its data files, a PR review is needed." diff --git a/NEWS.md b/NEWS.md index 0ecb966..3e5fdcf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,8 @@ * Once the upgrade is complete, this parameter will be removed and the new functionality will be the default. * The new map files are smaller in size while maintaining the same resolution. * The format of the data also allows for easier manipulation in the future using the `sf` package. +* Add scripts to perform automated map data updates, see [Issue #5](https://github.com/pdil/usmapdata/issues/5). + * This is not yet fully functional and will be refined over time independent of `usmapdata` package updates. # usmapdata 0.1.2 Released Monday, December 11, 2023. diff --git a/data-raw/scripts/config.ini b/data-raw/scripts/config.ini new file mode 100644 index 0000000..1876eab --- /dev/null +++ b/data-raw/scripts/config.ini @@ -0,0 +1,6 @@ +[shapefiles] +url = https://www2.census.gov/geo/tiger/GENZ{year}/shp/cb_{year}_us_{entity}_{res}.zip +current_year = 2021 +entities = state,county +res = 20m + diff --git a/data-raw/scripts/pushover.py b/data-raw/scripts/pushover.py new file mode 100644 index 0000000..43e09d3 --- /dev/null +++ b/data-raw/scripts/pushover.py @@ -0,0 +1,51 @@ + +import os +import requests +from strenum import StrEnum + +class Pushover: + + class Priority(StrEnum): + LOWEST = "-2" + LOW = "-1" + NORMAL = "0" + HIGH = "1" + EMERGENCY = "2" + + def __init__(self, token: str, user: str): + self._token = token + self._user = user + + # Send a Pushover notification + def send(self, message: str, attachment_url: str=None, priority=Priority.NORMAL): + MESSAGES_URL = "https://api.pushover.net/1/messages.json" + data = { + "token": self._token, + "user": self._user, + "message": message, + "priority": priority + } + + files = None + if attachment_url and os.path.isfile(attachment_url): + files = { + "attachment": ("image.jpg", open(attachment_url, "rb"), "image/jpeg") + } + + requests.post(MESSAGES_URL, data=data, files=files) + +if __name__ == "__main__": + api_key = os.environ["PUSHOVER_API_KEY"] + user_key = os.environ["PUSHOVER_USER_KEY"] + + pushover = Pushover(token=api_key, user=user_key) + args = sys.argv + + try: + message = args[1] + except IndexError: + raise SystemExit("Required message parameter not supplied") + + priority = getattr(Pushover.Priority, sys.argv[2]) if len(args) >= 3 else Pushover.Priority.NORMAL) + + Pushover.send(message, attachment_url, priority) diff --git a/data-raw/scripts/shapefiles.py b/data-raw/scripts/shapefiles.py new file mode 100644 index 0000000..ac98f21 --- /dev/null +++ b/data-raw/scripts/shapefiles.py @@ -0,0 +1,77 @@ + +from configparser import ConfigParser +import os +import requests +import shutil +import sys +from zipfile import ZipFile + +class DownloadError(Exception): + def __init__(self, message, code=None): + super().__init__(message) + self.code = code + +def _download_and_extract(file_url: str, extract_dir: str) -> bool: + response = requests.get(file_url) + LOCAL_FILE = "download.zip" + + if response.status_code == 200: + with open(LOCAL_FILE, "wb") as f: + f.write(response.content) + print(f"{LOCAL_FILE} downloaded from {file_url}.") + + with ZipFile(LOCAL_FILE, "r") as z: + z.extractall(extract_dir) + print(f"{LOCAL_FILE} extracted to {extract_dir}.") + + os.remove(LOCAL_FILE) + else: + raise DownloadError(f"Failed to download {file_url}.", code=response.status_code) + +def download_shapefiles(): + # create output directory + script_dir = os.path.abspath(os.path.dirname(__file__)) + extract_dir = os.path.join(script_dir, "..", "shapefiles") + + if os.path.exists(extract_dir): + shutil.rmtree(extract_dir) + shutil.os.makedirs(extract_dir) + + # get current configuration + CONFIG_FILE = "config.ini" + config = ConfigParser() + config.read(os.path.join(script_dir, CONFIG_FILE)) + SECTION = "shapefiles" + + url_template = config.get(SECTION, "url") + current_year = config.getint(SECTION, "current_year") + entities = config.get(SECTION, "entities").split(",") + res = config.get(SECTION, "res") + + year = current_year + 1 + + try: + # attempt shapefile downloads + for entity in entities: + url = url_template.format(year=year, entity=entity, res=res) + _download_and_extract(url, extract_dir) + + if (gh_env := os.getenv("GITHUB_ENV")): + with open(gh_env, "a") as f: + f.write(f"{entity}_shp_path=cb_{year}_us_{entity}_{res}.shp") + + # update current year + config.set(SECTION, "current_year", f"{year}") + with open(CONFIG_FILE, "w") as f: + config.write(f) + except DownloadError as e: + if e.code == 404: # i.e. shapefiles not found + print(f"The shapefiles for {year} were not found. Better luck next time!") + else: # other download errors + print(e) + + sys.exit(e.code) + + +if __name__ == "__main__": + download_shapefiles()