diff --git a/charts/model-engine/templates/inference_framework_config.yaml b/charts/model-engine/templates/inference_framework_config.yaml index d81d5be2a..d97d19207 100644 --- a/charts/model-engine/templates/inference_framework_config.yaml +++ b/charts/model-engine/templates/inference_framework_config.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: ConfigMap metadata: - name: model-engine-inference-framework-latest-config + name: {{ include "modelEngine.fullname" . }}-inference-framework-latest-config labels: product: common team: infra diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py index 5f68a5bd0..4f0e2fbfd 100644 --- a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py +++ b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py @@ -251,7 +251,11 @@ NUM_DOWNSTREAM_REQUEST_RETRIES = 80 # has to be high enough so that the retries take the 5 minutes DOWNSTREAM_REQUEST_TIMEOUT_SECONDS = 5 * 60 # 5 minutes -LATEST_INFERENCE_FRAMEWORK_CONFIG_MAP_NAME = "model-engine-inference-framework-latest-config" +SERVICE_NAME = "model-engine" +SERVICE_IDENTIFIER = os.getenv("SERVICE_IDENTIFIER") +if SERVICE_IDENTIFIER: + SERVICE_NAME += f"-{SERVICE_IDENTIFIER}" +LATEST_INFERENCE_FRAMEWORK_CONFIG_MAP_NAME = f"{SERVICE_NAME}-inference-framework-latest-config" def count_tokens(input: str, model_name: str, tokenizer_repository: TokenizerRepository) -> int: