Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class BuildImageRequest(BaseModel):
class BuildImageResponse(BaseModel):
status: bool
logs: str
job_name: str


# TODO: We may want to add a DTO for streaming logs from the docker build to users.
7 changes: 4 additions & 3 deletions model-engine/model_engine_server/core/docker/remote_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class BuildResult:

status: bool
logs: str
job_name: str


def zip_context(
Expand Down Expand Up @@ -398,13 +399,13 @@ def cleanup_logs_process():
)
elif event["object"].status.phase == "Succeeded":
cleanup_logs_process()
return BuildResult(status=True, logs=_read_pod_logs(pod_name))
return BuildResult(status=True, logs=_read_pod_logs(pod_name), job_name=job_name)
elif event["object"].status.phase == "Failed":
cleanup_logs_process()
return BuildResult(status=False, logs=_read_pod_logs(pod_name))
return BuildResult(status=False, logs=_read_pod_logs(pod_name), job_name=job_name)
if logs_process is not None:
logs_process.kill()
return BuildResult(status=False, logs=_read_pod_logs(pod_name))
return BuildResult(status=False, logs=_read_pod_logs(pod_name), job_name=job_name)


def build_remote_block(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,6 @@ def build_image(self, image_params: BuildImageRequest) -> BuildImageResponse:
folders_to_include=folders_to_include,
build_args=build_args,
)
return BuildImageResponse(status=build_result.status, logs=build_result.logs)
return BuildImageResponse(
status=build_result.status, logs=build_result.logs, job_name=build_result.job_name
)
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ async def build_endpoint(
base_image_params = self.get_base_image_params(
build_endpoint_request, logger_adapter
)
logger.info(f"base_image_params: {base_image_params}")
logger_adapter.info(f"base_image_params: {base_image_params}")
base_image = await self._build_image(
base_image_params,
build_endpoint_request,
Expand Down Expand Up @@ -227,7 +227,9 @@ async def build_endpoint(
if os.path.exists(model_bundle_path):
os.remove(model_bundle_path)
else:
logger.error(f"No bundle object found at {model_bundle_path}!")
logger_adapter.error(
f"No bundle object found at {model_bundle_path}!"
)

except DockerBuildFailedException:
log_error("Failed to build base and user docker images")
Expand Down Expand Up @@ -493,8 +495,8 @@ def get_base_image_params(
inference_folder = "model-engine/model_engine_server/inference"
base_path: str = os.getenv("WORKSPACE") # type: ignore

logger.info(f"inference_folder: {inference_folder}")
logger.info(f"dockerfile: {inference_folder}/{dockerfile}")
logger_adapter.info(f"inference_folder: {inference_folder}")
logger_adapter.info(f"dockerfile: {inference_folder}/{dockerfile}")
return BuildImageRequest(
repo="launch/inference",
image_tag=resulting_image_tag[:MAX_IMAGE_TAG_LEN],
Expand Down Expand Up @@ -614,7 +616,7 @@ def _get_inject_bundle_image_params(
pass
_, model_bundle_path = tempfile.mkstemp(dir=bundle_folder, suffix=".zip")
bundle_url = model_bundle.location
logger.info(
logger_adapter.info(
f"Downloading bundle from serialized object at location {bundle_url} to local path {model_bundle_path}"
)
with open_wrapper(bundle_url, "rb") as bundle_data: # type: ignore
Expand Down Expand Up @@ -678,6 +680,7 @@ async def _build_image(
)
build_result_status = build_result.status
build_result_logs: str = build_result.logs
logger_adapter.info(f"Image Build job: {build_result.job_name}")
except Exception: # noqa
build_result_status = False
s3_logs_location: Optional[str] = None
Expand Down Expand Up @@ -759,8 +762,7 @@ async def _build_image(
else:
self.monitoring_metrics_gateway.emit_image_build_cache_hit_metric(image_type)
logger_adapter.info(
f"Image {image_params.repo}:{image_params.image_tag} already exists, "
f"skipping build for {endpoint_id=}"
f"Image already exists, skipping build. Image={image_params.repo}:{image_params.image_tag}, {endpoint_id=}"
)

return self.docker_repository.get_image_url(image_params.image_tag, image_params.repo)
Expand Down
2 changes: 1 addition & 1 deletion model-engine/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ def get_image_url(self, image_tag: str, repository_name: str) -> str:
def build_image(self, image_params: BuildImageRequest) -> BuildImageResponse:
if self.raises_error:
raise Exception("I hope you're handling this!")
return BuildImageResponse(status=True, logs="")
return BuildImageResponse(status=True, logs="", job_name="test-job-name")


class FakeModelEndpointCacheRepository(ModelEndpointCacheRepository):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ async def test_build_endpoint_build_result_failed_yields_docker_build_failed_exc
repo.add_model_endpoint_record(build_endpoint_request_sync_pytorch.model_endpoint_record)
endpoint_builder_service_empty_docker_not_built.docker_repository.__setattr__(
"build_image",
Mock(return_value=BuildImageResponse(status=False, logs="")),
Mock(return_value=BuildImageResponse(status=False, logs="", job_name="")),
)
with pytest.raises(DockerBuildFailedException):
await endpoint_builder_service_empty_docker_not_built.build_endpoint(
Expand Down