Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ data:
- --port
- "${FORWARDER_PORT}"
- --num-workers
- "${PER_WORKER}"
- "${FORWARDER_WORKER_COUNT}"
- --set
- "forwarder.sync.predict_route=${PREDICT_ROUTE}"
- --set
Expand Down Expand Up @@ -161,7 +161,7 @@ data:
- --port
- "${FORWARDER_PORT}"
- --num-workers
- "${PER_WORKER}"
- "${FORWARDER_WORKER_COUNT}"
- --set
- "forwarder.sync.predict_route=${PREDICT_ROUTE}"
- --set
Expand Down
1 change: 1 addition & 0 deletions model-engine/model_engine_server/common/resource_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
FORWARDER_CPU_USAGE = 1
FORWARDER_MEMORY_USAGE = "2Gi"
FORWARDER_STORAGE_USAGE = "1G"
FORWARDER_WORKER_COUNT = 2

logger = make_logger(filename_wo_ext(__name__))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
FORWARDER_CPU_USAGE,
FORWARDER_MEMORY_USAGE,
FORWARDER_STORAGE_USAGE,
FORWARDER_WORKER_COUNT,
)
from model_engine_server.common.serialization_utils import python_json_to_b64
from model_engine_server.core.config import infra_config
Expand Down Expand Up @@ -136,13 +137,15 @@ class _SyncRunnableImageDeploymentArguments(TypedDict):
"""Keyword-arguments for substituting into sync deployment templates."""

FORWARDER_PORT: int
FORWARDER_WORKER_COUNT: int


class _StreamingDeploymentArguments(TypedDict):
"""Keyword-arguments for substituting into streaming deployment templates."""

FORWARDER_PORT: int
STREAMING_PREDICT_ROUTE: str
FORWARDER_WORKER_COUNT: int


class _RunnableImageDeploymentArguments(_BaseDeploymentArguments):
Expand Down Expand Up @@ -691,6 +694,7 @@ def get_endpoint_resource_arguments_from_request(
USER_CONTAINER_PORT=USER_CONTAINER_PORT,
# Streaming Deployment Arguments
FORWARDER_PORT=FORWARDER_PORT,
FORWARDER_WORKER_COUNT=FORWARDER_WORKER_COUNT,
)
elif endpoint_resource_name == "deployment-runnable-image-streaming-gpu":
assert isinstance(flavor, StreamingEnhancedRunnableImageFlavor)
Expand Down Expand Up @@ -735,6 +739,7 @@ def get_endpoint_resource_arguments_from_request(
USER_CONTAINER_PORT=USER_CONTAINER_PORT,
# Streaming Deployment Arguments
FORWARDER_PORT=FORWARDER_PORT,
FORWARDER_WORKER_COUNT=FORWARDER_WORKER_COUNT,
# GPU Deployment Arguments
GPU_TYPE=build_endpoint_request.gpu_type.value,
GPUS=build_endpoint_request.gpus,
Expand Down Expand Up @@ -780,6 +785,7 @@ def get_endpoint_resource_arguments_from_request(
USER_CONTAINER_PORT=USER_CONTAINER_PORT,
# Sync Deployment Arguments
FORWARDER_PORT=FORWARDER_PORT,
FORWARDER_WORKER_COUNT=FORWARDER_WORKER_COUNT,
)
elif endpoint_resource_name == "deployment-runnable-image-sync-gpu":
assert isinstance(flavor, RunnableImageLike)
Expand Down Expand Up @@ -823,6 +829,7 @@ def get_endpoint_resource_arguments_from_request(
USER_CONTAINER_PORT=USER_CONTAINER_PORT,
# Sync Deployment Arguments
FORWARDER_PORT=FORWARDER_PORT,
FORWARDER_WORKER_COUNT=FORWARDER_WORKER_COUNT,
# GPU Deployment Arguments
GPU_TYPE=build_endpoint_request.gpu_type.value,
GPUS=build_endpoint_request.gpus,
Expand Down Expand Up @@ -982,6 +989,7 @@ def get_endpoint_resource_arguments_from_request(
USER_CONTAINER_PORT=USER_CONTAINER_PORT,
# Sync Deployment Arguments
FORWARDER_PORT=FORWARDER_PORT,
FORWARDER_WORKER_COUNT=FORWARDER_WORKER_COUNT,
# Triton Deployment Arguments
TRITON_MODEL_REPOSITORY=flavor.triton_model_repository,
TRITON_CPUS=str(flavor.triton_num_cpu),
Expand Down Expand Up @@ -1033,6 +1041,7 @@ def get_endpoint_resource_arguments_from_request(
USER_CONTAINER_PORT=USER_CONTAINER_PORT,
# Sync Deployment Arguments
FORWARDER_PORT=FORWARDER_PORT,
FORWARDER_WORKER_COUNT=FORWARDER_WORKER_COUNT,
# GPU Deployment Arguments
GPU_TYPE=build_endpoint_request.gpu_type.value,
GPUS=build_endpoint_request.gpus,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ data:
- --port
- "${FORWARDER_PORT}"
- --num-workers
- "${PER_WORKER}"
- "${FORWARDER_WORKER_COUNT}"
- --set
- "forwarder.sync.predict_route=${PREDICT_ROUTE}"
- --set
Expand Down Expand Up @@ -878,7 +878,7 @@ data:
- --port
- "${FORWARDER_PORT}"
- --num-workers
- "${PER_WORKER}"
- "${FORWARDER_WORKER_COUNT}"
- --set
- "forwarder.sync.predict_route=${PREDICT_ROUTE}"
- --set
Expand Down Expand Up @@ -1102,7 +1102,7 @@ data:
- --port
- "${FORWARDER_PORT}"
- --num-workers
- "${PER_WORKER}"
- "${FORWARDER_WORKER_COUNT}"
- --set
- "forwarder.sync.predict_route=${PREDICT_ROUTE}"
- --set
Expand Down Expand Up @@ -1845,7 +1845,7 @@ data:
- --port
- "${FORWARDER_PORT}"
- --num-workers
- "${PER_WORKER}"
- "${FORWARDER_WORKER_COUNT}"
- --set
- "forwarder.sync.predict_route=${PREDICT_ROUTE}"
- --set
Expand Down Expand Up @@ -2120,7 +2120,7 @@ data:
- --port
- "${FORWARDER_PORT}"
- --num-workers
- "${PER_WORKER}"
- "${FORWARDER_WORKER_COUNT}"
- --set
- "forwarder.sync.predict_route=${PREDICT_ROUTE}"
- --set
Expand Down Expand Up @@ -2351,7 +2351,7 @@ data:
- --port
- "${FORWARDER_PORT}"
- --num-workers
- "${PER_WORKER}"
- "${FORWARDER_WORKER_COUNT}"
- --set
- "forwarder.sync.predict_route=${PREDICT_ROUTE}"
- --set
Expand Down