diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py index b1142ece50..da386577dc 100644 --- a/openevolve/evaluator.py +++ b/openevolve/evaluator.py @@ -262,7 +262,11 @@ async def evaluate_program( "error_type": "timeout", } - return {"error": 0.0, "timeout": True} + return { + "combined_score": 0.0, + "error": "Evaluation timed out", + "timeout": True, + } except Exception as e: last_exception = e @@ -400,7 +404,12 @@ async def run_stage1(): except asyncio.TimeoutError: logger.warning(f"Stage 1 evaluation timed out after {self.config.timeout}s") return EvaluationResult( - metrics={"stage1_passed": 0.0, "error": 0.0, "timeout": True}, + metrics={ + "combined_score": 0.0, + "stage1_passed": 0.0, + "error": "Stage 1 evaluation timed out", + "timeout": True, + }, artifacts={ "failure_stage": "stage1", "timeout": True, @@ -447,7 +456,9 @@ async def run_stage2(): "failure_stage": "stage2", } ) + stage1_eval_result.metrics["combined_score"] = 0.0 stage1_eval_result.metrics["stage2_passed"] = 0.0 + stage1_eval_result.metrics["error"] = "Stage 2 evaluation timed out" stage1_eval_result.metrics["timeout"] = True return stage1_eval_result except Exception as e: @@ -509,7 +520,9 @@ async def run_stage3(): "failure_stage": "stage3", } ) + merged_result.metrics["combined_score"] = 0.0 merged_result.metrics["stage3_passed"] = 0.0 + merged_result.metrics["error"] = "Stage 3 evaluation timed out" merged_result.metrics["timeout"] = True return merged_result except Exception as e: diff --git a/openevolve/utils/async_utils.py b/openevolve/utils/async_utils.py index ded1fed657..c0eaa74e37 100644 --- a/openevolve/utils/async_utils.py +++ b/openevolve/utils/async_utils.py @@ -42,14 +42,19 @@ async def run_with_timeout( coro: Coroutine function to run timeout: Timeout in seconds *args: Arguments to pass to the coroutine - timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True}) + timeout_error_value: Value to return on timeout + (default: {"combined_score": 0.0, "error": "Evaluation timed out", "timeout": True}) **kwargs: Keyword arguments to pass to the coroutine Returns: Result of the coroutine or timeout_error_value on timeout """ if timeout_error_value is None: - timeout_error_value = {"error": 0.0, "timeout": True} + timeout_error_value = { + "combined_score": 0.0, + "error": "Evaluation timed out", + "timeout": True, + } try: return await asyncio.wait_for(coro(*args, **kwargs), timeout=timeout) @@ -68,14 +73,19 @@ async def run_sync_with_timeout( func: Synchronous function to run timeout: Timeout in seconds *args: Arguments to pass to the function - timeout_error_value: Value to return on timeout (default: {"error": 0.0, "timeout": True}) + timeout_error_value: Value to return on timeout + (default: {"combined_score": 0.0, "error": "Evaluation timed out", "timeout": True}) **kwargs: Keyword arguments to pass to the function Returns: Result of the function or timeout_error_value on timeout """ if timeout_error_value is None: - timeout_error_value = {"error": 0.0, "timeout": True} + timeout_error_value = { + "combined_score": 0.0, + "error": "Evaluation timed out", + "timeout": True, + } try: loop = asyncio.get_event_loop() diff --git a/openevolve/utils/metrics_utils.py b/openevolve/utils/metrics_utils.py index 3efd18e25b..b7fdd09230 100644 --- a/openevolve/utils/metrics_utils.py +++ b/openevolve/utils/metrics_utils.py @@ -21,7 +21,7 @@ def safe_numeric_average(metrics: Dict[str, Any]) -> float: numeric_values = [] for value in metrics.values(): - if isinstance(value, (int, float)): + if isinstance(value, (int, float)) and not isinstance(value, bool): try: # Convert to float and check if it's a valid number float_val = float(value) @@ -53,7 +53,7 @@ def safe_numeric_sum(metrics: Dict[str, Any]) -> float: numeric_sum = 0.0 for value in metrics.values(): - if isinstance(value, (int, float)): + if isinstance(value, (int, float)) and not isinstance(value, bool): try: # Convert to float and check if it's a valid number float_val = float(value) @@ -99,7 +99,7 @@ def get_fitness_score( for key, value in metrics.items(): # Exclude MAP feature dimensions from fitness calculation if key not in feature_dimensions: - if isinstance(value, (int, float)): + if isinstance(value, (int, float)) and not isinstance(value, bool): try: float_val = float(value) if not (float_val != float_val): # Check for NaN diff --git a/tests/test_evaluator_timeout.py b/tests/test_evaluator_timeout.py index d9053e4a07..70d99eac9c 100644 --- a/tests/test_evaluator_timeout.py +++ b/tests/test_evaluator_timeout.py @@ -162,8 +162,10 @@ async def run_test(): self.assertLess(elapsed_time, 5) # Should return timeout result + self.assertIn("combined_score", result) + self.assertEqual(result["combined_score"], 0.0) self.assertIn("error", result) - self.assertEqual(result["error"], 0.0) + self.assertEqual(result["error"], "Evaluation timed out") self.assertIn("timeout", result) self.assertTrue(result["timeout"]) diff --git a/tests/test_metrics_utils.py b/tests/test_metrics_utils.py new file mode 100644 index 0000000000..bffff49188 --- /dev/null +++ b/tests/test_metrics_utils.py @@ -0,0 +1,28 @@ +import unittest + +from openevolve.utils.metrics_utils import get_fitness_score, safe_numeric_average + + +class TestMetricsUtils(unittest.TestCase): + def test_safe_numeric_average_excludes_boolean_values(self): + metrics = { + "combined_score": 0.0, + "timeout": True, + "stage1_passed": False, + "latency_ms": 2.0, + } + + self.assertEqual(safe_numeric_average(metrics), 1.0) + + def test_get_fitness_score_excludes_boolean_values_without_combined_score(self): + metrics = { + "error": 0.0, + "timeout": True, + "ranking_passed": False, + } + + self.assertEqual(get_fitness_score(metrics), 0.0) + + +if __name__ == "__main__": + unittest.main()