From 1a6b85e9eeead216f2d2d1cf54ae9b89f72f7c4f Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 11 Mar 2026 19:43:01 +0530 Subject: [PATCH 1/3] UN-3266 fix: replace hardcoded /tmp paths with secure temp dirs in tests Replace hardcoded /tmp/ paths (SonarCloud S5443 security hotspots) with pytest's tmp_path fixture or module-level tempfile.mkdtemp() constants in all affected test files to avoid world-writable directory vulnerabilities. Co-Authored-By: Claude Sonnet 4.6 --- unstract/sdk1/tests/test_execution.py | 7 ++++++- workers/tests/test_legacy_executor_scaffold.py | 7 ++++--- workers/tests/test_phase1_log_streaming.py | 8 ++++---- workers/tests/test_sanity_phase6g.py | 4 ++-- workers/tests/test_sanity_phase6h.py | 6 +++--- workers/tests/test_sanity_phase6j.py | 6 +++--- workers/tests/test_usage.py | 4 ++-- 7 files changed, 24 insertions(+), 18 deletions(-) diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 3839a01073..08b36f31d4 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -2,6 +2,8 @@ import json import logging +import os +import tempfile from typing import Any, Self from unittest.mock import MagicMock @@ -20,6 +22,9 @@ from unstract.sdk1.execution.result import ExecutionResult +_TEST_FILE_PATH = os.path.join(tempfile.mkdtemp(), "test.pdf") + + class TestExecutionContext: """Tests for ExecutionContext serialization and validation.""" @@ -31,7 +36,7 @@ def _make_context(self, **overrides: Any) -> ExecutionContext: "run_id": "run-001", "execution_source": "tool", "organization_id": "org-123", - "executor_params": {"file_path": "/tmp/test.pdf"}, + "executor_params": {"file_path": _TEST_FILE_PATH}, "request_id": "req-abc", } defaults.update(overrides) diff --git a/workers/tests/test_legacy_executor_scaffold.py b/workers/tests/test_legacy_executor_scaffold.py index f2d9935f9b..48789c218d 100644 --- a/workers/tests/test_legacy_executor_scaffold.py +++ b/workers/tests/test_legacy_executor_scaffold.py @@ -203,11 +203,12 @@ def test_chunking_config_zero_raises(self): with pytest.raises(ValueError, match="zero chunks"): ChunkingConfig(chunk_size=0, chunk_overlap=0) - def test_file_info(self): + def test_file_info(self, tmp_path): from executor.executors.dto import FileInfo - fi = FileInfo(file_path="/tmp/test.pdf", file_hash="abc123") - assert fi.file_path == "/tmp/test.pdf" + test_path = str(tmp_path / "test.pdf") + fi = FileInfo(file_path=test_path, file_hash="abc123") + assert fi.file_path == test_path def test_instance_identifiers(self): from executor.executors.dto import InstanceIdentifiers diff --git a/workers/tests/test_phase1_log_streaming.py b/workers/tests/test_phase1_log_streaming.py index 903449d75a..95de9b21bc 100644 --- a/workers/tests/test_phase1_log_streaming.py +++ b/workers/tests/test_phase1_log_streaming.py @@ -313,7 +313,7 @@ class TestLegacyExecutorLogPassthrough: @patch("executor.executors.legacy_executor.X2Text") @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_extract_passes_log_info_to_shim( - self, mock_shim_cls, mock_x2text, mock_fs + self, mock_shim_cls, mock_x2text, mock_fs, tmp_path ): from executor.executors.legacy_executor import LegacyExecutor from unstract.sdk1.execution.registry import ExecutorRegistry @@ -337,7 +337,7 @@ def test_extract_passes_log_info_to_shim( log_events_id="session-abc", executor_params={ "x2text_instance_id": "x2t-1", - "file_path": "/tmp/test.pdf", + "file_path": str(tmp_path / "test.pdf"), "platform_api_key": "sk-test", }, ) @@ -357,7 +357,7 @@ def test_extract_passes_log_info_to_shim( @patch("executor.executors.legacy_executor.X2Text") @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_extract_no_log_info_when_absent( - self, mock_shim_cls, mock_x2text, mock_fs + self, mock_shim_cls, mock_x2text, mock_fs, tmp_path ): from executor.executors.legacy_executor import LegacyExecutor from unstract.sdk1.execution.registry import ExecutorRegistry @@ -380,7 +380,7 @@ def test_extract_no_log_info_when_absent( execution_source="tool", executor_params={ "x2text_instance_id": "x2t-1", - "file_path": "/tmp/test.pdf", + "file_path": str(tmp_path / "test.pdf"), "platform_api_key": "sk-test", }, ) diff --git a/workers/tests/test_sanity_phase6g.py b/workers/tests/test_sanity_phase6g.py index 73bb738911..8b175f8eec 100644 --- a/workers/tests/test_sanity_phase6g.py +++ b/workers/tests/test_sanity_phase6g.py @@ -154,7 +154,7 @@ def test_dispatch_sends_to_sps_queue(self): call_kwargs = mock_app.send_task.call_args assert call_kwargs.kwargs.get("queue") == "celery_executor_simple_prompt_studio" - def test_dispatch_sps_index_to_correct_queue(self): + def test_dispatch_sps_index_to_correct_queue(self, tmp_path): mock_app = MagicMock() mock_result = MagicMock() mock_result.get.return_value = ExecutionResult( @@ -168,7 +168,7 @@ def test_dispatch_sps_index_to_correct_queue(self): operation="sps_index", run_id="run-1", execution_source="tool", - executor_params={"output": {}, "file_path": "/tmp/test.pdf"}, + executor_params={"output": {}, "file_path": str(tmp_path / "test.pdf")}, ) result = dispatcher.dispatch(ctx) diff --git a/workers/tests/test_sanity_phase6h.py b/workers/tests/test_sanity_phase6h.py index 1c43b3d78b..c4249fb05a 100644 --- a/workers/tests/test_sanity_phase6h.py +++ b/workers/tests/test_sanity_phase6h.py @@ -198,7 +198,7 @@ def test_legacy_returns_failure_for_agentic_summarize(self): # --------------------------------------------------------------------------- class TestStructureToolAgenticRouting: - def test_structure_tool_dispatches_agentic_extract(self): + def test_structure_tool_dispatches_agentic_extract(self, tmp_path): """Verify _run_agentic_extraction sends executor_name='agentic'.""" from file_processing.structure_tool_task import _run_agentic_extraction @@ -210,8 +210,8 @@ def test_structure_tool_dispatches_agentic_extract(self): result = _run_agentic_extraction( tool_metadata={"name": "test"}, - input_file_path="/tmp/test.pdf", - output_dir_path="/tmp/output", + input_file_path=str(tmp_path / "test.pdf"), + output_dir_path=str(tmp_path / "output"), tool_instance_metadata={}, dispatcher=mock_dispatcher, shim=MagicMock(), diff --git a/workers/tests/test_sanity_phase6j.py b/workers/tests/test_sanity_phase6j.py index 2336b65d05..c52dcdf490 100644 --- a/workers/tests/test_sanity_phase6j.py +++ b/workers/tests/test_sanity_phase6j.py @@ -382,7 +382,7 @@ def test_highlight_plugin_not_installed_no_error(self, _mock_eps): assert ExecutorPluginLoader.get("highlight-data") is None # No error — graceful degradation - def test_mock_highlight_plugin_shared_across_executors(self): + def test_mock_highlight_plugin_shared_across_executors(self, tmp_path): """Multiple executors can use the same highlight plugin instance.""" from executor.executors.plugins.loader import ExecutorPluginLoader @@ -412,8 +412,8 @@ def get_confidence_data(self): assert cls is FakeHighlight # Both legacy and agentic contexts can create instances - legacy_hl = cls(file_path="/tmp/doc.txt", execution_source="ide") - agentic_hl = cls(file_path="/tmp/other.txt", execution_source="tool") + legacy_hl = cls(file_path=str(tmp_path / "doc.txt"), execution_source="ide") + agentic_hl = cls(file_path=str(tmp_path / "other.txt"), execution_source="tool") assert legacy_hl.get_highlight_data() == {"lines": [1, 2, 3]} assert agentic_hl.get_confidence_data() == {"confidence": 0.95} diff --git a/workers/tests/test_usage.py b/workers/tests/test_usage.py index 2fecc76713..fc08ac825b 100644 --- a/workers/tests/test_usage.py +++ b/workers/tests/test_usage.py @@ -223,7 +223,7 @@ class TestMetricsInResult: ) @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_answer_prompt_returns_metrics( - self, mock_shim_cls, mock_get_deps, _mock_idx + self, mock_shim_cls, mock_get_deps, _mock_idx, tmp_path ): """answer_prompt result includes metrics dict.""" from unstract.sdk1.execution.context import ExecutionContext @@ -298,7 +298,7 @@ def test_answer_prompt_returns_metrics( ], "tool_id": "tool-1", "file_hash": "hash123", - "file_path": "/tmp/test.txt", + "file_path": str(tmp_path / "test.txt"), "file_name": "test.txt", "PLATFORM_SERVICE_API_KEY": "test-key", }, From de2c737e51d369673c9cdc746ec4e8408f65d3a3 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 11 Mar 2026 20:38:05 +0530 Subject: [PATCH 2/3] UN-3266 fix: resolve ruff linting failures across multiple files - B026: pass url positionally in worker_celery.py to avoid star-arg after keyword - N803: rename MockAsyncResult to mock_async_result in test_tasks.py - E501/I001: fix long line and import sort in llm_whisperer helper - ANN401: replace Any with object|None in dispatcher.py; add noqa in test helpers - F841: remove unused workflow_id and result assignments Co-Authored-By: Claude Sonnet 4.6 --- backend/backend/worker_celery.py | 4 ++-- .../prompt_studio_core_v2/test_tasks.py | 6 +++--- .../adapters/x2text/llm_whisperer_v2/src/helper.py | 5 ++++- .../sdk1/src/unstract/sdk1/execution/dispatcher.py | 8 ++++---- unstract/sdk1/tests/test_execution.py | 12 ++++++------ workers/file_processing/structure_tool_task.py | 4 +--- 6 files changed, 20 insertions(+), 19 deletions(-) diff --git a/backend/backend/worker_celery.py b/backend/backend/worker_celery.py index 018f3d485b..d1158489e0 100644 --- a/backend/backend/worker_celery.py +++ b/backend/backend/worker_celery.py @@ -42,12 +42,12 @@ class _WorkerDispatchCelery(Celery): def connection_for_write(self, url=None, *args, **kwargs): return super().connection_for_write( - url=url or self._explicit_broker, *args, **kwargs + url or self._explicit_broker, *args, **kwargs ) def connection_for_read(self, url=None, *args, **kwargs): return super().connection_for_read( - url=url or self._explicit_broker, *args, **kwargs + url or self._explicit_broker, *args, **kwargs ) diff --git a/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py index 4efef90987..d068be8743 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py @@ -359,7 +359,7 @@ def test_task_status_url_registered(self): assert "" in str(url.pattern) @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) - def test_task_status_processing(self, MockAsyncResult): + def test_task_status_processing(self, mock_async_result): """Verify processing response for unfinished task.""" import inspect @@ -370,7 +370,7 @@ def test_task_status_processing(self, MockAsyncResult): assert '"processing"' in source @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) - def test_task_status_completed(self, MockAsyncResult): + def test_task_status_completed(self, mock_async_result): """Verify completed response structure.""" import inspect @@ -382,7 +382,7 @@ def test_task_status_completed(self, MockAsyncResult): assert "result.result" in source @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) - def test_task_status_failed(self, MockAsyncResult): + def test_task_status_failed(self, mock_async_result): """Verify failed response structure.""" import inspect diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py index 14790065ae..3a64fdb0e0 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py @@ -11,6 +11,7 @@ LLMWhispererClientException, LLMWhispererClientV2, ) + from unstract.sdk1.adapters.exceptions import ExtractorError from unstract.sdk1.adapters.utils import AdapterUtils from unstract.sdk1.adapters.x2text.constants import X2TextConstants @@ -225,7 +226,9 @@ def get_whisperer_params( WhispererConfig.WAIT_TIMEOUT, WhispererDefaults.WAIT_TIMEOUT, ), - WhispererConfig.WAIT_FOR_COMPLETION: WhispererDefaults.WAIT_FOR_COMPLETION, + WhispererConfig.WAIT_FOR_COMPLETION: ( + WhispererDefaults.WAIT_FOR_COMPLETION + ), } ) if params[WhispererConfig.MODE] == Modes.LOW_COST.value: diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index 7fc9c5f720..d5d6867361 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -72,7 +72,7 @@ class ExecutionDispatcher: ) """ - def __init__(self, celery_app: Any = None) -> None: + def __init__(self, celery_app: object | None = None) -> None: """Initialize the dispatcher. Args: @@ -201,10 +201,10 @@ def dispatch_async( def dispatch_with_callback( self, context: ExecutionContext, - on_success: Any = None, - on_error: Any = None, + on_success: object | None = None, + on_error: object | None = None, task_id: str | None = None, - ) -> Any: + ) -> object: """Fire-and-forget dispatch with Celery link callbacks. Sends the task to the executor queue and returns immediately. diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 08b36f31d4..62fb04f37a 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -8,6 +8,7 @@ from unittest.mock import MagicMock import pytest + from unstract.sdk1.constants import LogLevel, ToolEnv from unstract.sdk1.exceptions import SdkError from unstract.sdk1.execution.context import ( @@ -21,14 +22,13 @@ from unstract.sdk1.execution.registry import ExecutorRegistry from unstract.sdk1.execution.result import ExecutionResult - _TEST_FILE_PATH = os.path.join(tempfile.mkdtemp(), "test.pdf") class TestExecutionContext: """Tests for ExecutionContext serialization and validation.""" - def _make_context(self, **overrides: Any) -> ExecutionContext: + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 """Create a default ExecutionContext with optional overrides.""" defaults: dict[str, Any] = { "executor_name": "legacy", @@ -491,7 +491,7 @@ def _clean_registry(self: Self) -> None: """Ensure a clean registry for every test.""" ExecutorRegistry.clear() - def _make_context(self, **overrides: Any) -> ExecutionContext: + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 defaults: dict[str, Any] = { "executor_name": "legacy", "operation": "extract", @@ -587,7 +587,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: class TestExecutionDispatcher: """Tests for ExecutionDispatcher (mocked Celery).""" - def _make_context(self, **overrides: Any) -> ExecutionContext: + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 defaults: dict[str, Any] = { "executor_name": "legacy", "operation": "extract", @@ -918,7 +918,7 @@ def test_dispatch_with_callback_custom_task_id( dispatcher = ExecutionDispatcher(celery_app=mock_app) ctx = self._make_context() - result = dispatcher.dispatch_with_callback(ctx, task_id="pre-gen-id-123") + dispatcher.dispatch_with_callback(ctx, task_id="pre-gen-id-123") call_kwargs = mock_app.send_task.call_args assert call_kwargs[1]["task_id"] == "pre-gen-id-123" @@ -975,7 +975,7 @@ def stream_log( log: str, level: LogLevel = LogLevel.INFO, stage: str = "TOOL_RUN", - **kwargs: Any, + **kwargs: Any, # noqa: ANN401 ) -> None: _level_map = { LogLevel.DEBUG: logging.DEBUG, diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 6775a298a4..9fefe2da63 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -24,7 +24,6 @@ from file_processing.worker import app from shared.enums.task_enums import TaskName - from unstract.sdk1.constants import ToolEnv, UsageKwargs from unstract.sdk1.execution.context import ExecutionContext from unstract.sdk1.execution.dispatcher import ExecutionDispatcher @@ -221,7 +220,6 @@ def _execute_structure_tool_impl(params: dict) -> dict: """ # ---- Unpack params ---- organization_id = params["organization_id"] - workflow_id = params.get("workflow_id", "") execution_id = params.get("execution_id", "") file_execution_id = params["file_execution_id"] tool_instance_metadata = params["tool_instance_metadata"] @@ -679,4 +677,4 @@ def _write_tool_result( data=json.dumps(existing, indent=2), ) except Exception as e: - logger.warning("Failed to write tool result to METADATA.json: %s", e) \ No newline at end of file + logger.warning("Failed to write tool result to METADATA.json: %s", e) From 5b3e1069871cf2e95a10e405863f14ed54940ad7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:11:35 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- backend/backend/worker_celery.py | 8 ++------ .../sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py | 1 - unstract/sdk1/tests/test_execution.py | 1 - workers/file_processing/structure_tool_task.py | 1 + 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/backend/backend/worker_celery.py b/backend/backend/worker_celery.py index d1158489e0..956f789ecf 100644 --- a/backend/backend/worker_celery.py +++ b/backend/backend/worker_celery.py @@ -41,14 +41,10 @@ class _WorkerDispatchCelery(Celery): _explicit_broker: str | None = None def connection_for_write(self, url=None, *args, **kwargs): - return super().connection_for_write( - url or self._explicit_broker, *args, **kwargs - ) + return super().connection_for_write(url or self._explicit_broker, *args, **kwargs) def connection_for_read(self, url=None, *args, **kwargs): - return super().connection_for_read( - url or self._explicit_broker, *args, **kwargs - ) + return super().connection_for_read(url or self._explicit_broker, *args, **kwargs) def get_worker_celery_app() -> Celery: diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py index 3a64fdb0e0..dd63bad72c 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py @@ -11,7 +11,6 @@ LLMWhispererClientException, LLMWhispererClientV2, ) - from unstract.sdk1.adapters.exceptions import ExtractorError from unstract.sdk1.adapters.utils import AdapterUtils from unstract.sdk1.adapters.x2text.constants import X2TextConstants diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 62fb04f37a..540072ea0d 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -8,7 +8,6 @@ from unittest.mock import MagicMock import pytest - from unstract.sdk1.constants import LogLevel, ToolEnv from unstract.sdk1.exceptions import SdkError from unstract.sdk1.execution.context import ( diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 9fefe2da63..e080d1cde8 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -24,6 +24,7 @@ from file_processing.worker import app from shared.enums.task_enums import TaskName + from unstract.sdk1.constants import ToolEnv, UsageKwargs from unstract.sdk1.execution.context import ExecutionContext from unstract.sdk1.execution.dispatcher import ExecutionDispatcher