lightspeed-core · tisnik · Oct 24, 2025 · Oct 24, 2025
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
@@ -18,6 +18,7 @@
 from llama_stack_client.types.agents.turn_create_params import (
     Toolgroup,
     ToolgroupAgentToolGroupWithArgs,
+    Document,
 )
 from llama_stack_client.types.model_list_response import ModelListResponse
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
@@ -692,10 +693,20 @@ async def retrieve_response(  # pylint: disable=too-many-locals,too-many-branche
         if not toolgroups:
             toolgroups = None
 
+    # TODO: LCORE-881 - Remove if Llama Stack starts to support these mime types
+    documents: list[Document] = [
+        (
+            {"content": doc["content"], "mime_type": "text/plain"}
+            if doc["mime_type"].lower() in ("application/json", "application/xml")
+            else doc
+        )
+        for doc in query_request.get_documents()
+    ]
+
     response = await agent.create_turn(
         messages=[UserMessage(role="user", content=query_request.query)],
         session_id=session_id,
-        documents=query_request.get_documents(),
+        documents=documents,
         stream=False,
         toolgroups=toolgroups,
     )

diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
@@ -1,4 +1,4 @@
-"""Handler for REST API call to provide answer to streaming query."""  # pylint: disable=too-many-lines
+"""Handler for REST API call to provide answer to streaming query."""  # pylint: disable=too-many-lines,too-many-locals,W0511
 
 import ast
 import json
@@ -21,6 +21,7 @@
 )
 from llama_stack_client.types.shared import ToolCall
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
+from llama_stack_client.types.agents.turn_create_params import Document
 
 from app.database import get_session
 from app.endpoints.query import (
@@ -62,6 +63,7 @@
 from utils.transcripts import store_transcript
 from utils.types import TurnSummary
 
+
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["streaming_query"])
 
@@ -1039,10 +1041,20 @@ async def retrieve_response(
         if not toolgroups:
             toolgroups = None
 
+    # TODO: LCORE-881 - Remove if Llama Stack starts to support these mime types
+    documents: list[Document] = [
+        (
+            {"content": doc["content"], "mime_type": "text/plain"}
+            if doc["mime_type"].lower() in ("application/json", "application/xml")
+            else doc
+        )
+        for doc in query_request.get_documents()
+    ]
+
     response = await agent.create_turn(
         messages=[UserMessage(role="user", content=query_request.query)],
         session_id=session_id,
-        documents=query_request.get_documents(),
+        documents=documents,
         stream=True,
         toolgroups=toolgroups,
     )

diff --git a/tests/e2e/features/query.feature b/tests/e2e/features/query.feature
@@ -113,4 +113,30 @@ Scenario: Check if LLM responds for query request with error for missing query
     {"query": "Say hello"}
     """
      Then The status code of the response is 500
-      And The body of the response contains Unable to connect to Llama Stack
+      And The body of the response contains Unable to connect to Llama Stack
+
+  Scenario: Check if LLM responds properly when XML and JSON attachments are sent
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "query" to ask question with authorization header
+    """
+    {
+      "query": "Say hello",
+      "attachments": [
+        {
+          "attachment_type": "configuration",
+          "content": "<note><to>User</to><from>System</from><message>Hello</message></note>",
+          "content_type": "application/xml"
+        },
+        {
+          "attachment_type": "configuration",
+          "content": "{\"foo\": \"bar\"}",
+          "content_type": "application/json"
+        }
+      ],
+      "model": "{MODEL}", 
+      "provider": "{PROVIDER}",
+      "system_prompt": "You are a helpful assistant"
+    }
+    """
+    Then The status code of the response is 200
diff --git a/tests/e2e/features/streaming_query.feature b/tests/e2e/features/streaming_query.feature
@@ -88,4 +88,30 @@ Feature: streaming_query endpoint API tests
     {"query": "Say hello", "model": "{MODEL}"}
     """
      Then The status code of the response is 422
-      And The body of the response contains Value error, Provider must be specified if model is specified
+      And The body of the response contains Value error, Provider must be specified if model is specified
+
+  Scenario: Check if LLM responds properly when XML and JSON attachments are sent
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "streaming_query" to ask question with authorization header
+    """
+    {
+      "query": "Say hello",
+      "attachments": [
+        {
+          "attachment_type": "configuration",
+          "content": "<note><to>User</to><from>System</from><message>Hello</message></note>",
+          "content_type": "application/xml"
+        },
+        {
+          "attachment_type": "configuration",
+          "content": "{\"foo\": \"bar\"}",
+          "content_type": "application/json"
+        }
+      ],
+      "model": "{MODEL}", 
+      "provider": "{PROVIDER}",
+      "system_prompt": "You are a helpful assistant"
+    }
+    """
+    Then The status code of the response is 200