Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions model-engine/model_engine_server/api/llms_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,12 +332,14 @@ async def create_completion_sync_task(
metric_metadata,
)
return response
except UpstreamServiceError:
except UpstreamServiceError as exc:
request_id = LoggerTagManager.get(LoggerTagKey.REQUEST_ID)
logger.exception(f"Upstream service error for request {request_id}")
logger.exception(
f"Upstream service error for request {request_id}. Error detail: {str(exc.content)}"
)
raise HTTPException(
status_code=500,
detail=f"Upstream service error for request_id {request_id}.",
detail=f"Upstream service error for request_id {request_id}",
)
except (ObjectNotFoundException, ObjectNotAuthorizedException) as exc:
raise HTTPException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1555,9 +1555,11 @@ async def execute(
),
)
else:
return CompletionSyncV1Response(
request_id=request_id,
output=None,
raise UpstreamServiceError(
status_code=500,
content=predict_result.traceback.encode("utf-8")
if predict_result.traceback is not None
else b"",
)
elif (
endpoint_content.inference_framework == LLMInferenceFramework.TEXT_GENERATION_INFERENCE
Expand Down Expand Up @@ -1589,9 +1591,11 @@ async def execute(
)

if predict_result.status != TaskStatus.SUCCESS or predict_result.result is None:
return CompletionSyncV1Response(
request_id=request_id,
output=None,
raise UpstreamServiceError(
status_code=500,
content=predict_result.traceback.encode("utf-8")
if predict_result.traceback is not None
else b"",
)

output = json.loads(predict_result.result["result"])
Expand Down Expand Up @@ -1628,9 +1632,11 @@ async def execute(
)

if predict_result.status != TaskStatus.SUCCESS or predict_result.result is None:
return CompletionSyncV1Response(
request_id=request_id,
output=None,
raise UpstreamServiceError(
status_code=500,
content=predict_result.traceback.encode("utf-8")
if predict_result.traceback is not None
else b"",
)

output = json.loads(predict_result.result["result"])
Expand Down Expand Up @@ -1670,9 +1676,11 @@ async def execute(
)

if predict_result.status != TaskStatus.SUCCESS or predict_result.result is None:
return CompletionSyncV1Response(
request_id=request_id,
output=None,
raise UpstreamServiceError(
status_code=500,
content=predict_result.traceback.encode("utf-8")
if predict_result.traceback is not None
else b"",
)

output = json.loads(predict_result.result["result"])
Expand Down Expand Up @@ -1706,9 +1714,11 @@ async def execute(
)

if predict_result.status != TaskStatus.SUCCESS or predict_result.result is None:
return CompletionSyncV1Response(
request_id=request_id,
output=None,
raise UpstreamServiceError(
status_code=500,
content=predict_result.traceback.encode("utf-8")
if predict_result.traceback is not None
else b"",
)

output = json.loads(predict_result.result["result"])
Expand Down
33 changes: 11 additions & 22 deletions model-engine/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ boto3==1.28.1
# celery
# kombu
boto3-stubs[essential]==1.26.67
# via -r model-engine/requirements.in
# via
# -r model-engine/requirements.in
# boto3-stubs
botocore==1.31.1
# via
# -r model-engine/requirements.in
Expand All @@ -71,15 +73,15 @@ cachetools==5.3.1
cattrs==23.1.2
# via ddtrace
celery[redis,sqs,tblib]==5.3.1
# via -r model-engine/requirements.in
# via
# -r model-engine/requirements.in
# celery
certifi==2023.7.22
# via
# datadog-api-client
# kubernetes
# kubernetes-asyncio
# requests
cffi==1.15.1
# via cryptography
charset-normalizer==3.2.0
# via
# aiohttp
Expand Down Expand Up @@ -107,8 +109,6 @@ commonmark==0.9.1
# via rich
croniter==1.4.1
# via -r model-engine/requirements.in
cryptography==41.0.3
# via secretstorage
dataclasses-json==0.5.9
# via -r model-engine/requirements.in
datadog==0.47.0
Expand All @@ -127,7 +127,7 @@ docutils==0.20.1
# via readme-renderer
envier==0.4.0
# via ddtrace
exceptiongroup==1.1.3
exceptiongroup==1.2.0
# via
# anyio
# cattrs
Expand Down Expand Up @@ -185,7 +185,7 @@ importlib-metadata==6.8.0
# keyring
# quart
# twine
importlib-resources==6.1.0
importlib-resources==6.1.1
# via
# alembic
# jsonschema
Expand All @@ -195,10 +195,6 @@ itsdangerous==2.1.2
# via quart
jaraco-classes==3.3.0
# via keyring
jeepney==0.8.0
# via
# keyring
# secretstorage
jinja2==3.0.3
# via
# -r model-engine/requirements.in
Expand Down Expand Up @@ -300,8 +296,6 @@ pyasn1==0.5.0
# rsa
pyasn1-modules==0.3.0
# via google-auth
pycparser==2.21
# via cffi
pycurl==7.45.2
# via
# -r model-engine/requirements.in
Expand All @@ -326,7 +320,7 @@ python-dateutil==2.8.2
# pg8000
python-multipart==0.0.6
# via -r model-engine/requirements.in
pyyaml==6.0
pyyaml==6.0.1
# via
# huggingface-hub
# kubeconfig
Expand Down Expand Up @@ -379,8 +373,6 @@ safetensors==0.4.0
# via transformers
scramp==1.4.4
# via pg8000
secretstorage==3.3.3
# via keyring
sentencepiece==0.1.99
# via -r model-engine/requirements.in
sh==1.14.3
Expand Down Expand Up @@ -409,6 +401,7 @@ sqlalchemy[asyncio]==2.0.4
# via
# -r model-engine/requirements.in
# alembic
# sqlalchemy
sse-starlette==1.6.1
# via -r model-engine/requirements.in
sseclient-py==1.7.2
Expand Down Expand Up @@ -525,8 +518,4 @@ zipp==3.16.0
# importlib-resources

# The following packages are considered to be unsafe in a requirements file:
setuptools==68.0.0
# via
# gunicorn
# kubernetes
# kubernetes-asyncio
# setuptools
4 changes: 4 additions & 0 deletions model-engine/tests/unit/api/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def get_test_client(
fake_file_system_gateway_contents=None,
fake_trigger_repository_contents=None,
fake_cron_job_gateway_contents=None,
fake_sync_inference_content=None,
) -> TestClient:
if fake_docker_image_batch_job_gateway_contents is None:
fake_docker_image_batch_job_gateway_contents = {}
Expand All @@ -131,6 +132,8 @@ def get_test_client(
fake_trigger_repository_contents = {}
if fake_cron_job_gateway_contents is None:
fake_cron_job_gateway_contents = {}
if fake_sync_inference_content is None:
fake_sync_inference_content = {}
app.dependency_overrides[get_external_interfaces] = get_repositories_generator_wrapper(
fake_docker_repository_image_always_exists=fake_docker_repository_image_always_exists,
fake_model_bundle_repository_contents=fake_model_bundle_repository_contents,
Expand All @@ -145,6 +148,7 @@ def get_test_client(
fake_file_system_gateway_contents=fake_file_system_gateway_contents,
fake_trigger_repository_contents=fake_trigger_repository_contents,
fake_cron_job_gateway_contents=fake_cron_job_gateway_contents,
fake_sync_inference_content=fake_sync_inference_content,
)
app.dependency_overrides[get_external_interfaces_read_only] = app.dependency_overrides[
get_external_interfaces
Expand Down
19 changes: 18 additions & 1 deletion model-engine/tests/unit/api/test_llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest
from model_engine_server.common.dtos.llms import GetLLMModelEndpointV1Response
from model_engine_server.common.dtos.tasks import SyncEndpointPredictV1Response, TaskStatus
from model_engine_server.domain.entities import ModelEndpoint


Expand Down Expand Up @@ -102,14 +103,30 @@ def test_completion_sync_success(
fake_batch_job_record_repository_contents={},
fake_batch_job_progress_gateway_contents={},
fake_docker_image_batch_job_bundle_repository_contents={},
fake_sync_inference_content=SyncEndpointPredictV1Response(
status=TaskStatus.SUCCESS,
result={
"result": """{
"text": "output",
"count_prompt_tokens": 1,
"count_output_tokens": 1
}"""
},
traceback=None,
),
)
response_1 = client.post(
f"/v1/llm/completions-sync?model_endpoint_name={llm_model_endpoint_sync[0].record.name}",
auth=("no_user", ""),
json=completion_sync_request,
)
assert response_1.status_code == 200
assert response_1.json()["output"] is None
assert response_1.json()["output"] == {
"text": "output",
"num_completion_tokens": 1,
"num_prompt_tokens": 1,
"tokens": None,
}
assert response_1.json().keys() == {"output", "request_id"}


Expand Down
Loading