Skip to content

Commit f783857

Browse files
committed
PR feedback
1 parent c216e3b commit f783857

File tree

4 files changed

+72
-18
lines changed

4 files changed

+72
-18
lines changed

dist/index.js

Lines changed: 27 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/index.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/model/cloud-runner/providers/k8s/kubernetes-pods.ts

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,42 @@ class KubernetesPods {
8787
// The container might have succeeded but status hasn't been updated yet
8888
if (wasKilled && hasPreStopHookFailure && containerExitCode === undefined) {
8989
CloudRunnerLogger.log(
90-
`Pod ${podName} was killed with PreStopHook failure, but container status not yet available. This may be non-fatal if container succeeded.`,
90+
`Pod ${podName} was killed with PreStopHook failure, but container status not yet available. Waiting for container status...`,
91+
);
92+
// Wait a bit for container status to become available (up to 30 seconds)
93+
for (let i = 0; i < 6; i++) {
94+
await new Promise((resolve) => setTimeout(resolve, 5000));
95+
try {
96+
const updatedPod = (
97+
await kubeClient.listNamespacedPod(namespace)
98+
).body.items.find((x) => podName === x.metadata?.name);
99+
if (updatedPod?.status?.containerStatuses && updatedPod.status.containerStatuses.length > 0) {
100+
const updatedContainerStatus = updatedPod.status.containerStatuses[0];
101+
if (updatedContainerStatus.state?.terminated) {
102+
const updatedExitCode = updatedContainerStatus.state.terminated.exitCode;
103+
if (updatedExitCode === 0) {
104+
CloudRunnerLogger.logWarning(
105+
`Pod ${podName} container succeeded (exit code 0) after waiting. PreStopHook failure is non-fatal.`,
106+
);
107+
return false; // Pod is not running, but container succeeded
108+
} else {
109+
CloudRunnerLogger.log(
110+
`Pod ${podName} container failed with exit code ${updatedExitCode} after waiting.`,
111+
);
112+
errorDetails.push(
113+
`Container terminated after wait: exit code ${updatedExitCode}`,
114+
);
115+
break;
116+
}
117+
}
118+
}
119+
} catch (waitError) {
120+
CloudRunnerLogger.log(`Error while waiting for container status: ${waitError}`);
121+
}
122+
}
123+
CloudRunnerLogger.log(
124+
`Container status still not available after waiting. Assuming failure due to PreStopHook issues.`,
91125
);
92-
// Still throw error for now, but with more context
93-
// The task runner will retry and get the actual container status
94126
}
95127

96128
const errorMessage = `K8s pod failed\n${errorDetails.join('\n')}`;

src/model/cloud-runner/remote-client/caching.ts

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,7 @@ export class Caching {
9595

9696
// If disk usage is high (>90%), proactively clean up old cache files
9797
if (diskUsagePercent > 90) {
98-
CloudRunnerLogger.log(
99-
`Disk usage is ${diskUsagePercent}% - cleaning up old cache files before tar operation`,
100-
);
98+
CloudRunnerLogger.log(`Disk usage is ${diskUsagePercent}% - cleaning up old cache files before tar operation`);
10199
try {
102100
const cacheParent = path.dirname(cacheFolder);
103101
if (await fileExists(cacheParent)) {
@@ -106,9 +104,7 @@ export class Caching {
106104
`find ${cacheParent} -name "*.tar*" -type f -mmin +360 -delete 2>/dev/null || true`,
107105
);
108106
// Also try to remove old cache directories
109-
await CloudRunnerSystem.Run(
110-
`find ${cacheParent} -type d -empty -delete 2>/dev/null || true`,
111-
);
107+
await CloudRunnerSystem.Run(`find ${cacheParent} -type d -empty -delete 2>/dev/null || true`);
112108
CloudRunnerLogger.log(`Cleanup completed. Checking disk space again...`);
113109
const diskCheckAfter = await CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
114110
CloudRunnerLogger.log(`Disk space after cleanup: ${diskCheckAfter}`);
@@ -143,9 +139,7 @@ export class Caching {
143139
`find ${cacheParent} -name "*.tar*" -type f -mmin +60 -delete 2>/dev/null || true`,
144140
);
145141
// Remove empty cache directories
146-
await CloudRunnerSystem.Run(
147-
`find ${cacheParent} -type d -empty -delete 2>/dev/null || true`,
148-
);
142+
await CloudRunnerSystem.Run(`find ${cacheParent} -type d -empty -delete 2>/dev/null || true`);
149143
// Also try to clean up the entire cache folder if it's getting too large
150144
const cacheRoot = path.resolve(cacheParent, '..');
151145
if (await fileExists(cacheRoot)) {
@@ -165,7 +159,9 @@ export class Caching {
165159
retrySucceeded = true;
166160
} catch (retryError: any) {
167161
throw new Error(
168-
`Failed to create cache archive after cleanup. Original error: ${errorMessage}. Retry error: ${retryError?.message || retryError}`,
162+
`Failed to create cache archive after cleanup. Original error: ${errorMessage}. Retry error: ${
163+
retryError?.message || retryError
164+
}`,
169165
);
170166
}
171167
// If retry succeeded, don't throw the original error - let execution continue after catch block
@@ -181,7 +177,9 @@ export class Caching {
181177
} catch (cleanupError: any) {
182178
CloudRunnerLogger.log(`Cleanup attempt failed: ${cleanupError}`);
183179
throw new Error(
184-
`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Cleanup failed: ${cleanupError?.message || cleanupError}`,
180+
`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Cleanup failed: ${
181+
cleanupError?.message || cleanupError
182+
}`,
185183
);
186184
}
187185
} else {

0 commit comments

Comments
 (0)