diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index 5878d1f90262..a95826e1c005 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -231,8 +231,9 @@ def _break_traceback_cycle_from_frame(frame): def _download_urllib(url, out_path): - from urllib.request import urlopen - with urlopen(url) as response: + from urllib.request import urlopen, Request + req = Request(url, headers={'User-Agent': 'pyarrow'}) + with urlopen(req) as response: with open(out_path, 'wb') as f: f.write(response.read()) @@ -264,11 +265,13 @@ def download_tzdata_on_windows(): # Try to download the files with requests and then fall back to urllib. This # works around possible issues in certain older environment (GH-45295) try: - _download_requests(tzdata_url, tzdata_compressed_path) - _download_requests(windows_zones_url, windows_zones_path) + import requests # noqa: F401 + download_fn = _download_requests except ImportError: - _download_urllib(tzdata_url, tzdata_compressed_path) - _download_urllib(windows_zones_url, windows_zones_path) + download_fn = _download_urllib + + download_fn(tzdata_url, tzdata_compressed_path) + download_fn(windows_zones_url, windows_zones_path) assert os.path.exists(tzdata_compressed_path) assert os.path.exists(windows_zones_path)