diff --git a/unstract/connectors/src/unstract/connectors/filesystems/minio/exceptions.py b/unstract/connectors/src/unstract/connectors/filesystems/minio/exceptions.py index 2a4149ecaa..dd46e61b16 100644 --- a/unstract/connectors/src/unstract/connectors/filesystems/minio/exceptions.py +++ b/unstract/connectors/src/unstract/connectors/filesystems/minio/exceptions.py @@ -8,20 +8,24 @@ class BucketProbeDisposition(Enum): """Action for a `list_objects_v2` probe failure, per S3 error code.""" - DROP = "drop" # Hide the bucket (no access, or bucket gone). - FAIL_OPEN = "fail_open" # Region mismatch — keep bucket visible. + DROP = "drop" # Hide the bucket (no access, bucket gone, or unreachable). + FAIL_OPEN = "fail_open" # Keep the bucket visible despite the probe error. RETRY_FAIL_OPEN = "retry_fail_open" # Throttled — retry once, then keep. # S3 `Error.Code` → probe disposition. Unlisted codes propagate. # `RequestTimeTooSkewed` is omitted deliberately: it's a system-wide clock # issue, not a bucket outcome — let it surface via `handle_s3fs_exception`. +# `PermanentRedirect` / `IllegalLocationConstraintException` are dropped: the +# connector is pinned to one `endpoint_url`, so cross-region buckets can't +# be browsed through it regardless of IAM — listing them only surfaces a +# confusing `[Errno 78]` on click. BUCKET_PROBE_DISPOSITION: dict[str, BucketProbeDisposition] = { "AccessDenied": BucketProbeDisposition.DROP, "AllAccessDisabled": BucketProbeDisposition.DROP, "NoSuchBucket": BucketProbeDisposition.DROP, - "PermanentRedirect": BucketProbeDisposition.FAIL_OPEN, - "IllegalLocationConstraintException": BucketProbeDisposition.FAIL_OPEN, + "PermanentRedirect": BucketProbeDisposition.DROP, + "IllegalLocationConstraintException": BucketProbeDisposition.DROP, "SlowDown": BucketProbeDisposition.RETRY_FAIL_OPEN, "Throttling": BucketProbeDisposition.RETRY_FAIL_OPEN, "ThrottlingException": BucketProbeDisposition.RETRY_FAIL_OPEN, diff --git a/unstract/connectors/tests/filesystems/test_miniofs.py b/unstract/connectors/tests/filesystems/test_miniofs.py index 3cf9ade1f2..9da9a1dcf0 100644 --- a/unstract/connectors/tests/filesystems/test_miniofs.py +++ b/unstract/connectors/tests/filesystems/test_miniofs.py @@ -104,15 +104,31 @@ def test_no_such_bucket_is_dropped(self) -> None: ): self.assertFalse(asyncio.run(fs._is_bucket_accessible("deleted"))) - def test_permanent_redirect_bucket_is_kept(self) -> None: - # Fail-open: region mismatch keeps the bucket listed. + def test_permanent_redirect_bucket_is_dropped(self) -> None: + # Connector pins one endpoint_url; cross-region buckets are unreachable + # via this connector regardless of IAM, so drop them rather than fail + # later with [Errno 78] on click. fs = self._make_fs() with patch.object( fs, "_call_s3", new=AsyncMock(side_effect=_translated_error("PermanentRedirect")), ): - self.assertTrue(asyncio.run(fs._is_bucket_accessible("other-region"))) + self.assertFalse(asyncio.run(fs._is_bucket_accessible("other-region"))) + + def test_illegal_location_constraint_bucket_is_dropped(self) -> None: + # Companion to PermanentRedirect: same cross-region condition surfaced + # as a 400 instead of a 301. Guards against a future edit reverting + # only one of the two cross-region entries in BUCKET_PROBE_DISPOSITION. + fs = self._make_fs() + with patch.object( + fs, + "_call_s3", + new=AsyncMock( + side_effect=_translated_error("IllegalLocationConstraintException") + ), + ): + self.assertFalse(asyncio.run(fs._is_bucket_accessible("wrong-region"))) def test_throttling_retries_then_fails_open(self) -> None: fs = self._make_fs()