You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docs/openapi.json
+94-18Lines changed: 94 additions & 18 deletions
Original file line number
Diff line number
Diff line change
@@ -1293,11 +1293,11 @@
1293
1293
"/v1/query": {
1294
1294
"post": {
1295
1295
"tags": [
1296
-
"query_v1"
1296
+
"query"
1297
1297
],
1298
-
"summary": "Query Endpoint Handler V1",
1299
-
"description": "Handle request to the /query endpoint using Responses API.\n\nThis is a wrapper around query_endpoint_handler_base that provides\nthe Responses API specific retrieve_response and get_topic_summary functions.\n\nReturns:\n QueryResponse: Contains the conversation ID and the LLM-generated response.",
"description": "Handle request to the /query endpoint using Responses API.\n\nProcesses a POST request to a query endpoint, forwarding the\nuser's query to a selected Llama Stack LLM and returning the generated response.\n\nReturns:\n QueryResponse: Contains the conversation ID and the LLM-generated response.\n\nRaises:\n HTTPException:\n - 401: Unauthorized - Missing or invalid credentials\n - 403: Forbidden - Insufficient permissions or model override not allowed\n - 404: Not Found - Conversation, model, or provider not found\n - 413: Prompt too long - Prompt exceeded model's context window size\n - 422: Unprocessable Entity - Request validation failed\n - 429: Quota limit exceeded - The token quota for model or user has been exceeded\n - 500: Internal Server Error - Configuration not loaded or other server errors\n - 503: Service Unavailable - Unable to connect to Llama Stack backend",
"cause": "The prompt exceeds the maximum allowed length.",
1468
+
"response": "Prompt is too long"
1469
+
}
1470
+
}
1471
+
}
1472
+
}
1473
+
}
1474
+
}
1475
+
},
1456
1476
"422": {
1457
1477
"description": "Request validation failed",
1458
1478
"content": {
@@ -1480,7 +1500,7 @@
1480
1500
"invalid value": {
1481
1501
"value": {
1482
1502
"detail": {
1483
-
"cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
1503
+
"cause": "Invalid attachment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
1484
1504
"response": "Invalid attribute value"
1485
1505
}
1486
1506
}
@@ -1603,11 +1623,11 @@
1603
1623
"/v1/streaming_query": {
1604
1624
"post": {
1605
1625
"tags": [
1606
-
"streaming_query_v1"
1626
+
"streaming_query"
1607
1627
],
1608
-
"summary": "Streaming Query Endpoint Handler V1",
1609
-
"description": "Handle request to the /streaming_query endpoint using Responses API.\n\nReturns a streaming response using Server-Sent Events (SSE) format with\ncontent type text/event-stream.\n\nReturns:\nStreamingResponse: An HTTP streaming response yielding\nSSE-formatted events for the query lifecycle with content type\n text/event-stream.\n\nRaises:\n HTTPException:\n - 401: Unauthorized - Missing or invalid credentials\n - 403: Forbidden - Insufficient permissions or model override not allowed\n - 404: Not Found - Conversation, model, or provider not found\n - 422: Unprocessable Entity - Request validation failed\n - 429: Too Many Requests - Quota limit exceeded\n - 500: Internal Server Error - Configuration not loaded or other server errors\n - 503: Service Unavailable - Unable to connect to Llama Stack backend",
"description": "Handle request to the /streaming_query endpoint using Responses API.\n\nReturns a streaming response using Server-Sent Events (SSE) format with\ncontent type text/event-stream.\n\nReturns:\n SSE-formatted events for the query lifecycle.\n\nRaises:\n HTTPException:\n - 401: Unauthorized - Missing or invalid credentials\n - 403: Forbidden - Insufficient permissions or model override not allowed\n - 404: Not Found - Conversation, model, or provider not found\n - 413: Prompt too long - Prompt exceeded model's context window size\n - 422: Unprocessable Entity - Request validation failed\n - 429: Quota limit exceeded - The token quota for model or user has been exceeded\n - 500: Internal Server Error - Configuration not loaded or other server errors\n - 503: Service Unavailable - Unable to connect to Llama Stack backend",
"cause": "The prompt exceeds the maximum allowed length.",
1766
+
"response": "Prompt is too long"
1767
+
}
1768
+
}
1769
+
}
1770
+
}
1771
+
}
1772
+
}
1773
+
},
1734
1774
"422": {
1735
1775
"description": "Request validation failed",
1736
1776
"content": {
@@ -1758,7 +1798,7 @@
1758
1798
"invalid value": {
1759
1799
"value": {
1760
1800
"detail": {
1761
-
"cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
1801
+
"cause": "Invalid attachment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
1762
1802
"response": "Invalid attribute value"
1763
1803
}
1764
1804
}
@@ -3756,7 +3796,7 @@
3756
3796
"invalid value": {
3757
3797
"value": {
3758
3798
"detail": {
3759
-
"cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
3799
+
"cause": "Invalid attachment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
3760
3800
"response": "Invalid attribute value"
3761
3801
}
3762
3802
}
@@ -4278,7 +4318,7 @@
4278
4318
],
4279
4319
"summary": "Handle A2A Jsonrpc",
4280
4320
"description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response",
4281
-
"operationId": "handle_a2a_jsonrpc_a2a_get",
4321
+
"operationId": "handle_a2a_jsonrpc_a2a_post",
4282
4322
"responses": {
4283
4323
"200": {
4284
4324
"description": "Successful Response",
@@ -4296,7 +4336,7 @@
4296
4336
],
4297
4337
"summary": "Handle A2A Jsonrpc",
4298
4338
"description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response",
4299
-
"operationId": "handle_a2a_jsonrpc_a2a_get",
4339
+
"operationId": "handle_a2a_jsonrpc_a2a_post",
4300
4340
"responses": {
4301
4341
"200": {
4302
4342
"description": "Successful Response",
@@ -6975,14 +7015,16 @@
6975
7015
"url": {
6976
7016
"anyOf": [
6977
7017
{
6978
-
"type": "string"
7018
+
"type": "string",
7019
+
"minLength": 1,
7020
+
"format": "uri"
6979
7021
},
6980
7022
{
6981
7023
"type": "null"
6982
7024
}
6983
7025
],
6984
7026
"title": "Llama Stack URL",
6985
-
"description": "URL to Llama Stack service; used when library mode is disabled"
7027
+
"description": "URL to Llama Stack service; used when library mode is disabled. Must be a valid HTTP or HTTPS URL."
6986
7028
},
6987
7029
"api_key": {
6988
7030
"anyOf": [
@@ -7517,6 +7559,33 @@
7517
7559
"title": "PostgreSQLDatabaseConfiguration",
7518
7560
"description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)"
7519
7561
},
7562
+
"PromptTooLongResponse": {
7563
+
"properties": {
7564
+
"status_code": {
7565
+
"type": "integer",
7566
+
"title": "Status Code"
7567
+
},
7568
+
"detail": {
7569
+
"$ref": "#/components/schemas/DetailModel"
7570
+
}
7571
+
},
7572
+
"type": "object",
7573
+
"required": [
7574
+
"status_code",
7575
+
"detail"
7576
+
],
7577
+
"title": "PromptTooLongResponse",
7578
+
"description": "413 Payload Too Large - Prompt is too long.",
7579
+
"examples": [
7580
+
{
7581
+
"detail": {
7582
+
"cause": "The prompt exceeds the maximum allowed length.",
7583
+
"response": "Prompt is too long"
7584
+
},
7585
+
"label": "prompt too long"
7586
+
}
7587
+
]
7588
+
},
7520
7589
"ProviderHealthStatus": {
7521
7590
"properties": {
7522
7591
"provider_id": {
@@ -7965,7 +8034,7 @@
7965
8034
"truncated": {
7966
8035
"type": "boolean",
7967
8036
"title": "Truncated",
7968
-
"description": "Whether conversation history was truncated",
8037
+
"description": "Deprecated:Whether conversation history was truncated",
7969
8038
"default": false,
7970
8039
"examples": [
7971
8040
false,
@@ -9327,7 +9396,7 @@
9327
9396
},
9328
9397
{
9329
9398
"detail": {
9330
-
"cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
9399
+
"cause": "Invalid attachment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
0 commit comments