= ({
- {/* Right side: Workflow selector + Send/Stop buttons */}
+ {/* Right side: Model + Workflow selector + Send/Stop buttons */}
+ {modelSlot}
{workflowSlot}
{isRunActive ? (
diff --git a/components/frontend/src/components/session/MessagesTab.tsx b/components/frontend/src/components/session/MessagesTab.tsx
old mode 100644
new mode 100755
index 89b9ef5c2..c17494324
--- a/components/frontend/src/components/session/MessagesTab.tsx
+++ b/components/frontend/src/components/session/MessagesTab.tsx
@@ -53,11 +53,12 @@ export type MessagesTabProps = {
onAddRepository?: () => void;
onUploadFile?: () => void;
workflowSlot?: React.ReactNode;
+ modelSlot?: React.ReactNode;
projectName?: string;
};
-const MessagesTab: React.FC = ({ session, streamMessages, chatInput, setChatInput, onSendChat, onSendToolAnswer, onInterrupt, onGoToResults, onContinue, workflowMetadata, onCommandClick, isRunActive = false, showWelcomeExperience, welcomeExperienceComponent, activeWorkflow, userHasInteracted = false, queuedMessages = [], hasRealMessages = false, onCancelQueuedMessage, onUpdateQueuedMessage, onPasteImage, onClearQueue, agentName, onAddRepository, onUploadFile, workflowSlot, projectName }) => {
+const MessagesTab: React.FC = ({ session, streamMessages, chatInput, setChatInput, onSendChat, onSendToolAnswer, onInterrupt, onGoToResults, onContinue, workflowMetadata, onCommandClick, isRunActive = false, showWelcomeExperience, welcomeExperienceComponent, activeWorkflow, userHasInteracted = false, queuedMessages = [], hasRealMessages = false, onCancelQueuedMessage, onUpdateQueuedMessage, onPasteImage, onClearQueue, agentName, onAddRepository, onUploadFile, workflowSlot, modelSlot, projectName }) => {
const [sendingChat, setSendingChat] = useState(false);
const showSystemMessages = false;
const [waitingDotCount, setWaitingDotCount] = useState(0);
@@ -294,6 +295,7 @@ const MessagesTab: React.FC = ({ session, streamMessages, chat
onAddRepository={onAddRepository}
onUploadFile={onUploadFile}
workflowSlot={workflowSlot}
+ modelSlot={modelSlot}
projectName={projectName}
/>
diff --git a/components/frontend/src/hooks/agui/event-handlers.ts b/components/frontend/src/hooks/agui/event-handlers.ts
old mode 100644
new mode 100755
index 80996bc5c..590e140f5
--- a/components/frontend/src/hooks/agui/event-handlers.ts
+++ b/components/frontend/src/hooks/agui/event-handlers.ts
@@ -1062,6 +1062,20 @@ function handleCustomEvent(
return { ...state, backgroundTasks: tasks }
}
+ // Model switch confirmation — inject a system message into the conversation
+ if (name === 'ambient:model_switched') {
+ const previousModel = value.previousModel as string
+ const newModel = value.newModel as string
+ const msg: PlatformMessage = {
+ id: `model-switch-${Date.now()}`,
+ role: 'assistant',
+ content: `Model switched from **${previousModel}** to **${newModel}**`,
+ timestamp: new Date().toISOString(),
+ metadata: { isModelSwitch: true },
+ }
+ return { ...state, messages: [...state.messages, msg] }
+ }
+
// Other custom events (hooks) — pass through unchanged
return state
}
diff --git a/components/frontend/src/services/api/sessions.ts b/components/frontend/src/services/api/sessions.ts
index 2cc1fd1cc..e1489c64a 100755
--- a/components/frontend/src/services/api/sessions.ts
+++ b/components/frontend/src/services/api/sessions.ts
@@ -302,6 +302,20 @@ export async function saveToGoogleDrive(
);
}
+/**
+ * Switch the LLM model for a running session
+ */
+export async function switchSessionModel(
+ projectName: string,
+ sessionName: string,
+ model: string
+): Promise {
+ return apiClient.post(
+ `/projects/${projectName}/agentic-sessions/${sessionName}/model`,
+ { model }
+ );
+}
+
// --- Capabilities ---
export type CapabilitiesResponse = {
diff --git a/components/frontend/src/services/queries/use-sessions.ts b/components/frontend/src/services/queries/use-sessions.ts
old mode 100644
new mode 100755
index 0a5b89109..19933b0a8
--- a/components/frontend/src/services/queries/use-sessions.ts
+++ b/components/frontend/src/services/queries/use-sessions.ts
@@ -345,6 +345,31 @@ export function useUpdateSessionDisplayName() {
});
}
+/**
+ * Hook to switch the LLM model for a running session
+ */
+export function useSwitchSessionModel() {
+ const queryClient = useQueryClient();
+
+ return useMutation({
+ mutationFn: ({
+ projectName,
+ sessionName,
+ model,
+ }: {
+ projectName: string;
+ sessionName: string;
+ model: string;
+ }) => sessionsApi.switchSessionModel(projectName, sessionName, model),
+ onSuccess: (_data, { projectName, sessionName }) => {
+ queryClient.invalidateQueries({
+ queryKey: sessionKeys.detail(projectName, sessionName),
+ refetchType: 'all',
+ });
+ },
+ });
+}
+
/**
* Hook to fetch session export data (AG-UI events + legacy messages)
*/
diff --git a/components/runners/ambient-runner/ambient_runner/app.py b/components/runners/ambient-runner/ambient_runner/app.py
old mode 100644
new mode 100755
index f73b2d6df..c8a0b464e
--- a/components/runners/ambient-runner/ambient_runner/app.py
+++ b/components/runners/ambient-runner/ambient_runner/app.py
@@ -222,6 +222,10 @@ def add_ambient_endpoints(
app.include_router(interrupt_router)
app.include_router(health_router)
+ from ambient_runner.endpoints.model import router as model_router
+
+ app.include_router(model_router)
+
# Optional platform endpoints
if enable_capabilities:
from ambient_runner.endpoints.capabilities import router as cap_router
diff --git a/components/runners/ambient-runner/ambient_runner/endpoints/model.py b/components/runners/ambient-runner/ambient_runner/endpoints/model.py
new file mode 100644
index 000000000..4ff4a0fd0
--- /dev/null
+++ b/components/runners/ambient-runner/ambient_runner/endpoints/model.py
@@ -0,0 +1,120 @@
+"""POST /model — Switch the LLM model at runtime."""
+
+import asyncio
+import logging
+import os
+
+from fastapi import APIRouter, HTTPException, Request
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+# Serialise model changes to prevent concurrent switches
+_model_change_lock = asyncio.Lock()
+
+
+@router.post("/model")
+async def switch_model(request: Request):
+ """Switch the LLM model used by this session.
+
+ The agent must be idle (not mid-generation). If a run is in
+ progress the endpoint returns 422.
+ """
+ bridge = request.app.state.bridge
+ context = bridge.context
+ if not context:
+ raise HTTPException(status_code=503, detail="Context not initialized")
+
+ body = await request.json()
+ new_model = (body.get("model") or "").strip()
+
+ if not new_model:
+ raise HTTPException(status_code=400, detail="model is required")
+
+ previous_model = os.getenv("LLM_MODEL", "")
+
+ if new_model == previous_model:
+ return {
+ "message": "Model unchanged",
+ "model": new_model,
+ }
+
+ # Check if agent is mid-generation.
+ # The session manager holds a per-thread asyncio.Lock during runs.
+ session_manager = getattr(bridge, "_session_manager", None)
+ if session_manager:
+ thread_id = context.session_id if context else ""
+ lock = session_manager.get_lock(thread_id) if thread_id else None
+ if lock and lock.locked():
+ raise HTTPException(
+ status_code=422,
+ detail="Cannot switch model while agent is generating a response. Wait for the current turn to complete.",
+ )
+
+ # Fast-reject if another switch is already in progress.
+ # asyncio is single-threaded, so no yield between locked() and acquire().
+ if _model_change_lock.locked():
+ raise HTTPException(
+ status_code=409,
+ detail="A model switch is already in progress",
+ )
+ async with _model_change_lock:
+ return await _perform_model_switch(bridge, context, new_model, previous_model)
+
+
+async def _perform_model_switch(bridge, context, new_model: str, previous_model: str) -> dict:
+ """Execute the model switch: update env, rebuild adapter, emit event."""
+ logger.info(f"Switching model from '{previous_model}' to '{new_model}'")
+
+ # Update environment variable (read by setup_sdk_authentication on next init)
+ os.environ["LLM_MODEL"] = new_model
+
+ # Also update the Vertex ID mapping if applicable
+ use_vertex = os.getenv("USE_VERTEX", "").strip().lower() in ("1", "true", "yes")
+ if use_vertex:
+ # Clear the manifest override so auth.py re-derives from the new LLM_MODEL
+ os.environ.pop("LLM_MODEL_VERTEX_ID", None)
+
+ # Emit confirmation event BEFORE mark_dirty destroys the session manager
+ _emit_model_switched_event(bridge, context, new_model, previous_model)
+
+ # Signal adapter rebuild — stops current workers, preserves session IDs
+ bridge.mark_dirty()
+
+ logger.info(f"Model switch complete: {previous_model} -> {new_model}")
+
+ return {
+ "message": "Model switched",
+ "model": new_model,
+ "previousModel": previous_model,
+ }
+
+
+def _emit_model_switched_event(bridge, context, new_model: str, previous_model: str):
+ """Push a custom AG-UI event to notify the frontend of the model switch."""
+ try:
+ from ag_ui.core import CustomEvent, EventType
+
+ event = CustomEvent(
+ type=EventType.CUSTOM,
+ name="ambient:model_switched",
+ value={
+ "previousModel": previous_model,
+ "newModel": new_model,
+ },
+ )
+
+ # Route to the between-run event queue so the frontend picks it up
+ session_manager = getattr(bridge, "_session_manager", None)
+ if session_manager:
+ thread_id = context.session_id if context else ""
+ worker = session_manager.get_existing(thread_id)
+ if worker:
+ worker._between_run_queue.put_nowait(event)
+ logger.info("Model switch event emitted to between-run queue")
+ return
+
+ logger.warning("No active worker to emit model switch event")
+ except Exception as e:
+ logger.warning(f"Failed to emit model switch event: {e}")
diff --git a/components/runners/ambient-runner/tests/test_model_endpoint.py b/components/runners/ambient-runner/tests/test_model_endpoint.py
new file mode 100644
index 000000000..3f39e5fc0
--- /dev/null
+++ b/components/runners/ambient-runner/tests/test_model_endpoint.py
@@ -0,0 +1,167 @@
+"""Unit tests for the POST /model endpoint."""
+
+import asyncio
+from unittest.mock import MagicMock, patch
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from ambient_runner.endpoints.model import router
+
+
+def _make_mock_bridge(
+ *,
+ session_id="test-session",
+ has_context=True,
+ lock_locked=False,
+ has_worker=True,
+):
+ """Create a mock bridge with configurable session manager and context."""
+ bridge = MagicMock()
+
+ if has_context:
+ bridge.context = MagicMock()
+ bridge.context.session_id = session_id
+ else:
+ bridge.context = None
+
+ # Session manager with per-thread lock
+ lock = asyncio.Lock()
+ if lock_locked:
+ # Simulate a locked lock by acquiring it (non-async safe for test setup)
+ lock._locked = True
+
+ session_manager = MagicMock()
+ session_manager.get_lock.return_value = lock
+
+ if has_worker:
+ worker = MagicMock()
+ worker._between_run_queue = asyncio.Queue()
+ session_manager.get_existing.return_value = worker
+ else:
+ session_manager.get_existing.return_value = None
+
+ bridge._session_manager = session_manager
+
+ return bridge
+
+
+@pytest.fixture(autouse=True)
+def _reset_model_change_lock():
+ """Ensure the module-level _model_change_lock is released between tests."""
+ from ambient_runner.endpoints import model as mod
+
+ # Replace with a fresh lock so no test leaks state
+ mod._model_change_lock = asyncio.Lock()
+ yield
+
+
+@pytest.fixture
+def make_client():
+ """Factory to create a test client with a mock bridge."""
+
+ def _factory(*, env_model="claude-sonnet-4-5", **bridge_kwargs):
+ app = FastAPI()
+ app.state.bridge = _make_mock_bridge(**bridge_kwargs)
+ app.include_router(router)
+ with patch.dict("os.environ", {"LLM_MODEL": env_model}):
+ client = TestClient(app)
+ return client, app.state.bridge
+
+ return _factory
+
+
+class TestModelEndpoint:
+ """Test POST /model request handling."""
+
+ def test_success_switches_model(self, make_client):
+ """POST /model with a valid new model returns 200 with model and previousModel."""
+ with patch.dict("os.environ", {"LLM_MODEL": "claude-sonnet-4-5"}):
+ client, bridge = make_client(env_model="claude-sonnet-4-5")
+ resp = client.post("/model", json={"model": "claude-opus-4"})
+
+ assert resp.status_code == 200
+ data = resp.json()
+ assert data["message"] == "Model switched"
+ assert data["model"] == "claude-opus-4"
+ assert data["previousModel"] == "claude-sonnet-4-5"
+ bridge.mark_dirty.assert_called_once()
+
+ def test_empty_model_returns_400(self, make_client):
+ """POST /model with an empty model string returns 400."""
+ client, _ = make_client()
+ resp = client.post("/model", json={"model": ""})
+
+ assert resp.status_code == 400
+ assert "model is required" in resp.json()["detail"]
+
+ def test_whitespace_only_model_returns_400(self, make_client):
+ """POST /model with whitespace-only model returns 400."""
+ client, _ = make_client()
+ resp = client.post("/model", json={"model": " "})
+
+ assert resp.status_code == 400
+ assert "model is required" in resp.json()["detail"]
+
+ def test_missing_model_field_returns_400(self, make_client):
+ """POST /model with no model field in body returns 400."""
+ client, _ = make_client()
+ resp = client.post("/model", json={})
+
+ assert resp.status_code == 400
+ assert "model is required" in resp.json()["detail"]
+
+ def test_same_model_returns_unchanged(self, make_client):
+ """POST /model with same model as current returns 200 with 'Model unchanged'."""
+ with patch.dict("os.environ", {"LLM_MODEL": "claude-sonnet-4-5"}):
+ client, bridge = make_client(env_model="claude-sonnet-4-5")
+ resp = client.post("/model", json={"model": "claude-sonnet-4-5"})
+
+ assert resp.status_code == 200
+ data = resp.json()
+ assert data["message"] == "Model unchanged"
+ assert data["model"] == "claude-sonnet-4-5"
+ assert "previousModel" not in data
+ bridge.mark_dirty.assert_not_called()
+
+ def test_context_not_initialized_returns_503(self, make_client):
+ """POST /model when bridge.context is None returns 503."""
+ client, _ = make_client(has_context=False)
+ resp = client.post("/model", json={"model": "claude-opus-4"})
+
+ assert resp.status_code == 503
+ assert "Context not initialized" in resp.json()["detail"]
+
+ def test_locked_run_returns_422(self, make_client):
+ """POST /model while agent is mid-generation returns 422."""
+ client, _ = make_client(lock_locked=True)
+ with patch.dict("os.environ", {"LLM_MODEL": "claude-sonnet-4-5"}):
+ resp = client.post("/model", json={"model": "claude-opus-4"})
+
+ assert resp.status_code == 422
+ assert "Cannot switch model" in resp.json()["detail"]
+
+ def test_updates_env_variable(self, make_client):
+ """POST /model updates the LLM_MODEL environment variable."""
+ with patch.dict("os.environ", {"LLM_MODEL": "claude-sonnet-4-5"}):
+ client, _ = make_client(env_model="claude-sonnet-4-5")
+ resp = client.post("/model", json={"model": "claude-opus-4"})
+ import os
+
+ assert resp.status_code == 200
+ assert os.environ["LLM_MODEL"] == "claude-opus-4"
+
+ def test_emits_event_to_worker_queue(self, make_client):
+ """POST /model emits a model_switched event to the between-run queue."""
+ with patch.dict("os.environ", {"LLM_MODEL": "claude-sonnet-4-5"}):
+ client, bridge = make_client(env_model="claude-sonnet-4-5")
+ resp = client.post("/model", json={"model": "claude-opus-4"})
+
+ assert resp.status_code == 200
+ worker = bridge._session_manager.get_existing.return_value
+ assert not worker._between_run_queue.empty()
+ event = worker._between_run_queue.get_nowait()
+ assert event.name == "ambient:model_switched"
+ assert event.value["newModel"] == "claude-opus-4"
+ assert event.value["previousModel"] == "claude-sonnet-4-5"