diff --git a/model-engine/model_engine_server/common/dtos/docker_repository.py b/model-engine/model_engine_server/common/dtos/docker_repository.py index 5548eeadc..6e4651d94 100644 --- a/model-engine/model_engine_server/common/dtos/docker_repository.py +++ b/model-engine/model_engine_server/common/dtos/docker_repository.py @@ -17,6 +17,7 @@ class BuildImageRequest(BaseModel): class BuildImageResponse(BaseModel): status: bool logs: str + job_name: str # TODO: We may want to add a DTO for streaming logs from the docker build to users. diff --git a/model-engine/model_engine_server/core/docker/remote_build.py b/model-engine/model_engine_server/core/docker/remote_build.py index 6261334ea..26d58721d 100644 --- a/model-engine/model_engine_server/core/docker/remote_build.py +++ b/model-engine/model_engine_server/core/docker/remote_build.py @@ -48,6 +48,7 @@ class BuildResult: status: bool logs: str + job_name: str def zip_context( @@ -398,13 +399,13 @@ def cleanup_logs_process(): ) elif event["object"].status.phase == "Succeeded": cleanup_logs_process() - return BuildResult(status=True, logs=_read_pod_logs(pod_name)) + return BuildResult(status=True, logs=_read_pod_logs(pod_name), job_name=job_name) elif event["object"].status.phase == "Failed": cleanup_logs_process() - return BuildResult(status=False, logs=_read_pod_logs(pod_name)) + return BuildResult(status=False, logs=_read_pod_logs(pod_name), job_name=job_name) if logs_process is not None: logs_process.kill() - return BuildResult(status=False, logs=_read_pod_logs(pod_name)) + return BuildResult(status=False, logs=_read_pod_logs(pod_name), job_name=job_name) def build_remote_block( diff --git a/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py b/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py index 8ca5dd61e..47aeb61c5 100644 --- a/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py +++ b/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py @@ -47,4 +47,6 @@ def build_image(self, image_params: BuildImageRequest) -> BuildImageResponse: folders_to_include=folders_to_include, build_args=build_args, ) - return BuildImageResponse(status=build_result.status, logs=build_result.logs) + return BuildImageResponse( + status=build_result.status, logs=build_result.logs, job_name=build_result.job_name + ) diff --git a/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py b/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py index 64871730a..7073b39f9 100644 --- a/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py +++ b/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py @@ -174,7 +174,7 @@ async def build_endpoint( base_image_params = self.get_base_image_params( build_endpoint_request, logger_adapter ) - logger.info(f"base_image_params: {base_image_params}") + logger_adapter.info(f"base_image_params: {base_image_params}") base_image = await self._build_image( base_image_params, build_endpoint_request, @@ -227,7 +227,9 @@ async def build_endpoint( if os.path.exists(model_bundle_path): os.remove(model_bundle_path) else: - logger.error(f"No bundle object found at {model_bundle_path}!") + logger_adapter.error( + f"No bundle object found at {model_bundle_path}!" + ) except DockerBuildFailedException: log_error("Failed to build base and user docker images") @@ -493,8 +495,8 @@ def get_base_image_params( inference_folder = "model-engine/model_engine_server/inference" base_path: str = os.getenv("WORKSPACE") # type: ignore - logger.info(f"inference_folder: {inference_folder}") - logger.info(f"dockerfile: {inference_folder}/{dockerfile}") + logger_adapter.info(f"inference_folder: {inference_folder}") + logger_adapter.info(f"dockerfile: {inference_folder}/{dockerfile}") return BuildImageRequest( repo="launch/inference", image_tag=resulting_image_tag[:MAX_IMAGE_TAG_LEN], @@ -614,7 +616,7 @@ def _get_inject_bundle_image_params( pass _, model_bundle_path = tempfile.mkstemp(dir=bundle_folder, suffix=".zip") bundle_url = model_bundle.location - logger.info( + logger_adapter.info( f"Downloading bundle from serialized object at location {bundle_url} to local path {model_bundle_path}" ) with open_wrapper(bundle_url, "rb") as bundle_data: # type: ignore @@ -678,6 +680,7 @@ async def _build_image( ) build_result_status = build_result.status build_result_logs: str = build_result.logs + logger_adapter.info(f"Image Build job: {build_result.job_name}") except Exception: # noqa build_result_status = False s3_logs_location: Optional[str] = None @@ -759,8 +762,7 @@ async def _build_image( else: self.monitoring_metrics_gateway.emit_image_build_cache_hit_metric(image_type) logger_adapter.info( - f"Image {image_params.repo}:{image_params.image_tag} already exists, " - f"skipping build for {endpoint_id=}" + f"Image already exists, skipping build. Image={image_params.repo}:{image_params.image_tag}, {endpoint_id=}" ) return self.docker_repository.get_image_url(image_params.image_tag, image_params.repo) diff --git a/model-engine/tests/unit/conftest.py b/model-engine/tests/unit/conftest.py index b55a2b500..b784e5c45 100644 --- a/model-engine/tests/unit/conftest.py +++ b/model-engine/tests/unit/conftest.py @@ -668,7 +668,7 @@ def get_image_url(self, image_tag: str, repository_name: str) -> str: def build_image(self, image_params: BuildImageRequest) -> BuildImageResponse: if self.raises_error: raise Exception("I hope you're handling this!") - return BuildImageResponse(status=True, logs="") + return BuildImageResponse(status=True, logs="", job_name="test-job-name") class FakeModelEndpointCacheRepository(ModelEndpointCacheRepository): diff --git a/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py b/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py index 31d44b2d4..bf568c9a9 100644 --- a/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py +++ b/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py @@ -213,7 +213,7 @@ async def test_build_endpoint_build_result_failed_yields_docker_build_failed_exc repo.add_model_endpoint_record(build_endpoint_request_sync_pytorch.model_endpoint_record) endpoint_builder_service_empty_docker_not_built.docker_repository.__setattr__( "build_image", - Mock(return_value=BuildImageResponse(status=False, logs="")), + Mock(return_value=BuildImageResponse(status=False, logs="", job_name="")), ) with pytest.raises(DockerBuildFailedException): await endpoint_builder_service_empty_docker_not_built.build_endpoint(