Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions openevolve/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,11 @@ async def evaluate_program(
"error_type": "timeout",
}

return {"error": 0.0, "timeout": True}
return {
"combined_score": 0.0,
"error": "Evaluation timed out",
"timeout": True,
}

except Exception as e:
last_exception = e
Expand Down Expand Up @@ -400,7 +404,12 @@ async def run_stage1():
except asyncio.TimeoutError:
logger.warning(f"Stage 1 evaluation timed out after {self.config.timeout}s")
return EvaluationResult(
metrics={"stage1_passed": 0.0, "error": 0.0, "timeout": True},
metrics={
"combined_score": 0.0,
"stage1_passed": 0.0,
"error": "Stage 1 evaluation timed out",
"timeout": True,
},
artifacts={
"failure_stage": "stage1",
"timeout": True,
Expand Down Expand Up @@ -447,7 +456,9 @@ async def run_stage2():
"failure_stage": "stage2",
}
)
stage1_eval_result.metrics["combined_score"] = 0.0
stage1_eval_result.metrics["stage2_passed"] = 0.0
stage1_eval_result.metrics["error"] = "Stage 2 evaluation timed out"
stage1_eval_result.metrics["timeout"] = True
return stage1_eval_result
except Exception as e:
Expand Down Expand Up @@ -509,7 +520,9 @@ async def run_stage3():
"failure_stage": "stage3",
}
)
merged_result.metrics["combined_score"] = 0.0
merged_result.metrics["stage3_passed"] = 0.0
merged_result.metrics["error"] = "Stage 3 evaluation timed out"
merged_result.metrics["timeout"] = True
return merged_result
except Exception as e:
Expand Down
18 changes: 14 additions & 4 deletions openevolve/utils/async_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,19 @@ async def run_with_timeout(
coro: Coroutine function to run
timeout: Timeout in seconds
*args: Arguments to pass to the coroutine
timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
timeout_error_value: Value to return on timeout
(default: {"combined_score": 0.0, "error": "Evaluation timed out", "timeout": True})
**kwargs: Keyword arguments to pass to the coroutine

Returns:
Result of the coroutine or timeout_error_value on timeout
"""
if timeout_error_value is None:
timeout_error_value = {"error": 0.0, "timeout": True}
timeout_error_value = {
"combined_score": 0.0,
"error": "Evaluation timed out",
"timeout": True,
}

try:
return await asyncio.wait_for(coro(*args, **kwargs), timeout=timeout)
Expand All @@ -68,14 +73,19 @@ async def run_sync_with_timeout(
func: Synchronous function to run
timeout: Timeout in seconds
*args: Arguments to pass to the function
timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True})
timeout_error_value: Value to return on timeout
(default: {"combined_score": 0.0, "error": "Evaluation timed out", "timeout": True})
**kwargs: Keyword arguments to pass to the function

Returns:
Result of the function or timeout_error_value on timeout
"""
if timeout_error_value is None:
timeout_error_value = {"error": 0.0, "timeout": True}
timeout_error_value = {
"combined_score": 0.0,
"error": "Evaluation timed out",
"timeout": True,
}

try:
loop = asyncio.get_event_loop()
Expand Down
6 changes: 3 additions & 3 deletions openevolve/utils/metrics_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def safe_numeric_average(metrics: Dict[str, Any]) -> float:

numeric_values = []
for value in metrics.values():
if isinstance(value, (int, float)):
if isinstance(value, (int, float)) and not isinstance(value, bool):
try:
# Convert to float and check if it's a valid number
float_val = float(value)
Expand Down Expand Up @@ -53,7 +53,7 @@ def safe_numeric_sum(metrics: Dict[str, Any]) -> float:

numeric_sum = 0.0
for value in metrics.values():
if isinstance(value, (int, float)):
if isinstance(value, (int, float)) and not isinstance(value, bool):
try:
# Convert to float and check if it's a valid number
float_val = float(value)
Expand Down Expand Up @@ -99,7 +99,7 @@ def get_fitness_score(
for key, value in metrics.items():
# Exclude MAP feature dimensions from fitness calculation
if key not in feature_dimensions:
if isinstance(value, (int, float)):
if isinstance(value, (int, float)) and not isinstance(value, bool):
try:
float_val = float(value)
if not (float_val != float_val): # Check for NaN
Expand Down
4 changes: 3 additions & 1 deletion tests/test_evaluator_timeout.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,10 @@ async def run_test():
self.assertLess(elapsed_time, 5)

# Should return timeout result
self.assertIn("combined_score", result)
self.assertEqual(result["combined_score"], 0.0)
self.assertIn("error", result)
self.assertEqual(result["error"], 0.0)
self.assertEqual(result["error"], "Evaluation timed out")
self.assertIn("timeout", result)
self.assertTrue(result["timeout"])

Expand Down
28 changes: 28 additions & 0 deletions tests/test_metrics_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import unittest

from openevolve.utils.metrics_utils import get_fitness_score, safe_numeric_average


class TestMetricsUtils(unittest.TestCase):
def test_safe_numeric_average_excludes_boolean_values(self):
metrics = {
"combined_score": 0.0,
"timeout": True,
"stage1_passed": False,
"latency_ms": 2.0,
}

self.assertEqual(safe_numeric_average(metrics), 1.0)

def test_get_fitness_score_excludes_boolean_values_without_combined_score(self):
metrics = {
"error": 0.0,
"timeout": True,
"ranking_passed": False,
}

self.assertEqual(get_fitness_score(metrics), 0.0)


if __name__ == "__main__":
unittest.main()