Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/app/endpoints/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from llama_stack_client.types.agents.turn_create_params import (
Toolgroup,
ToolgroupAgentToolGroupWithArgs,
Document,
)
from llama_stack_client.types.model_list_response import ModelListResponse
from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
Expand Down Expand Up @@ -692,10 +693,20 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
if not toolgroups:
toolgroups = None

# TODO: LCORE-881 - Remove if Llama Stack starts to support these mime types
documents: list[Document] = [
(
{"content": doc["content"], "mime_type": "text/plain"}
if doc["mime_type"].lower() in ("application/json", "application/xml")
else doc
)
for doc in query_request.get_documents()
]

response = await agent.create_turn(
messages=[UserMessage(role="user", content=query_request.query)],
session_id=session_id,
documents=query_request.get_documents(),
documents=documents,
stream=False,
toolgroups=toolgroups,
)
Expand Down
16 changes: 14 additions & 2 deletions src/app/endpoints/streaming_query.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Handler for REST API call to provide answer to streaming query.""" # pylint: disable=too-many-lines
"""Handler for REST API call to provide answer to streaming query.""" # pylint: disable=too-many-lines,too-many-locals,W0511

import ast
import json
Expand All @@ -21,6 +21,7 @@
)
from llama_stack_client.types.shared import ToolCall
from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
from llama_stack_client.types.agents.turn_create_params import Document

from app.database import get_session
from app.endpoints.query import (
Expand Down Expand Up @@ -62,6 +63,7 @@
from utils.transcripts import store_transcript
from utils.types import TurnSummary


logger = logging.getLogger("app.endpoints.handlers")
router = APIRouter(tags=["streaming_query"])

Expand Down Expand Up @@ -1039,10 +1041,20 @@ async def retrieve_response(
if not toolgroups:
toolgroups = None

# TODO: LCORE-881 - Remove if Llama Stack starts to support these mime types
documents: list[Document] = [
(
{"content": doc["content"], "mime_type": "text/plain"}
if doc["mime_type"].lower() in ("application/json", "application/xml")
else doc
)
for doc in query_request.get_documents()
]

response = await agent.create_turn(
messages=[UserMessage(role="user", content=query_request.query)],
session_id=session_id,
documents=query_request.get_documents(),
documents=documents,
stream=True,
toolgroups=toolgroups,
)
Expand Down
28 changes: 27 additions & 1 deletion tests/e2e/features/query.feature
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,30 @@ Scenario: Check if LLM responds for query request with error for missing query
{"query": "Say hello"}
"""
Then The status code of the response is 500
And The body of the response contains Unable to connect to Llama Stack
And The body of the response contains Unable to connect to Llama Stack

Scenario: Check if LLM responds properly when XML and JSON attachments are sent
Given The system is in default state
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
When I use "query" to ask question with authorization header
"""
{
"query": "Say hello",
"attachments": [
{
"attachment_type": "configuration",
"content": "<note><to>User</to><from>System</from><message>Hello</message></note>",
"content_type": "application/xml"
},
{
"attachment_type": "configuration",
"content": "{\"foo\": \"bar\"}",
"content_type": "application/json"
}
],
"model": "{MODEL}",
"provider": "{PROVIDER}",
"system_prompt": "You are a helpful assistant"
}
"""
Then The status code of the response is 200
28 changes: 27 additions & 1 deletion tests/e2e/features/streaming_query.feature
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,30 @@ Feature: streaming_query endpoint API tests
{"query": "Say hello", "model": "{MODEL}"}
"""
Then The status code of the response is 422
And The body of the response contains Value error, Provider must be specified if model is specified
And The body of the response contains Value error, Provider must be specified if model is specified

Scenario: Check if LLM responds properly when XML and JSON attachments are sent
Given The system is in default state
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
When I use "streaming_query" to ask question with authorization header
"""
{
"query": "Say hello",
"attachments": [
{
"attachment_type": "configuration",
"content": "<note><to>User</to><from>System</from><message>Hello</message></note>",
"content_type": "application/xml"
},
{
"attachment_type": "configuration",
"content": "{\"foo\": \"bar\"}",
"content_type": "application/json"
}
],
"model": "{MODEL}",
"provider": "{PROVIDER}",
"system_prompt": "You are a helpful assistant"
}
"""
Then The status code of the response is 200
Loading