cecli-dev · dwash96 · Mar 5, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -1155,6 +1155,7 @@ async def run_test_real(
             use_enhanced_map=True,
             verbose=verbose,
             yes_always_commands=True,
+            max_reflections=0,
         ),
         map_mul_no_files=4,
         mcp_manager=None,

diff --git a/cecli/__init__.py b/cecli/__init__.py
@@ -1,6 +1,6 @@
 from packaging import version
 
-__version__ = "0.97.1.dev"
+__version__ = "0.97.3.dev"
 safe_version = __version__
 
 try:

diff --git a/cecli/args.py b/cecli/args.py
@@ -254,6 +254,12 @@ def get_parser(default_config_files, git_root):
             " If unspecified, defaults to the model's max_chat_history_tokens."
         ),
     )
+    group.add_argument(
+        "--max-reflections",
+        type=int,
+        default=3,
+        help="Maximum number of retries a model gets on malformed outputs (default: 3)",
+    )
     group.add_argument(
         "--file-diffs",
         action=argparse.BooleanOptionalAction,

diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
@@ -7,7 +7,7 @@
 import random
 import time
 import traceback
-from collections import Counter, defaultdict
+from collections import defaultdict
 from datetime import datetime
 from pathlib import Path
 
@@ -85,6 +85,7 @@ def __init__(self, *args, **kwargs):
         self.allowed_context_blocks = set()
         self.context_block_tokens = {}
         self.context_blocks_cache = {}
+        self.hot_reload_enabled = False
         self.tokens_calculated = False
         self.skip_cli_confirmations = False
         self.agent_finished = False
@@ -119,6 +120,7 @@ def _get_agent_config(self):
             config, "skip_cli_confirmations", nested.getter(config, "yolo", [])
         )
         config["command_timeout"] = nested.getter(config, "command_timeout", 30)
+        config["hot_reload"] = nested.getter(config, "hot_reload", False)
 
         config["tools_paths"] = nested.getter(config, "tools_paths", [])
         config["tools_includelist"] = nested.getter(
@@ -147,7 +149,7 @@ def _get_agent_config(self):
 
         self.large_file_token_threshold = config["large_file_token_threshold"]
         self.skip_cli_confirmations = config["skip_cli_confirmations"]
-
+        self.hot_reload_enabled = config["hot_reload"]
         self.allowed_context_blocks = config["include_context_blocks"]
 
         for context_block in config["exclude_context_blocks"]:
@@ -259,6 +261,7 @@ async def _execute_local_tool_calls(self, tool_calls_list):
                         try:
                             parsed_args_list.append(json.loads(chunk))
                         except json.JSONDecodeError as e:
+                            self.model_kwargs = {}
                             self.io.tool_warning(
                                 f"Could not parse JSON chunk for tool {tool_name}: {chunk}"
                             )
@@ -320,6 +323,7 @@ async def _execute_local_tool_calls(self, tool_calls_list):
 
                 result_message = "\n\n".join(all_results_content)
             except Exception as e:
+                self.model_kwargs = {}
                 result_message = f"Error executing {tool_name}: {e}"
                 self.io.tool_error(f"""Error during {tool_name} execution: {e}
 {traceback.format_exc()}""")
@@ -843,6 +847,10 @@ async def reply_completed(self):
         self.files_edited_by_tools = set()
         return False
 
+    async def hot_reload(self):
+        if self.hot_reload_enabled:
+            self.skills_manager.hot_reload()
+
     async def _execute_tool_with_registry(self, norm_tool_name, params):
         """
         Execute a tool using the tool registry.
@@ -879,12 +887,9 @@ def _get_repetitive_tools(self):
         """
         Identifies repetitive tool usage patterns from rounds of tool calls.
 
-        This method combines count-based and similarity-based detection:
+        This method uses similarity-based detection:
         1. If the last round contained a write tool, it assumes progress and returns no repetitive tools.
-        2. It checks for any read tool that has been used 2 or more times across rounds.
-        3. If no tools are repeated, but all tools in the history are read tools,
-           it flags all of them as potentially repetitive.
-        4. It checks for similarity-based repetition using cosine similarity on tool call strings.
+        2. It checks for similarity-based repetition using cosine similarity on tool call strings.
 
         It avoids flagging repetition if a "write" tool was used recently,
         as that suggests progress is being made.
@@ -893,9 +898,6 @@ def _get_repetitive_tools(self):
         if history_len < 5:
             return set()
         similarity_repetitive_tools = self._get_repetitive_tools_by_similarity()
-        all_tools = []
-        for round_tools in self.tool_usage_history:
-            all_tools.extend(round_tools)
         if self.last_round_tools:
             last_round_has_write = any(
                 tool.lower() in self.write_tools for tool in self.last_round_tools
@@ -909,24 +911,14 @@ def _get_repetitive_tools(self):
                     if tool.lower() in self.read_tools or tool.lower() in self.write_tools
                 }
                 return filtered_similarity_tools if len(filtered_similarity_tools) else set()
-        if all(tool.lower() in self.read_tools for tool in all_tools):
-            # Only return tools that are in read_tools
-            return {tool for tool in all_tools if tool.lower() in self.read_tools}
-        tool_counts = Counter(all_tools)
-        count_repetitive_tools = {
-            tool
-            for tool, count in tool_counts.items()
-            if count >= 5 and tool.lower() in self.read_tools
-        }
         # Filter similarity_repetitive_tools to only include tools in read_tools or write_tools
         filtered_similarity_tools = {
             tool
             for tool in similarity_repetitive_tools
             if tool.lower() in self.read_tools or tool.lower() in self.write_tools
         }
-        repetitive_tools = count_repetitive_tools.union(filtered_similarity_tools)
-        if repetitive_tools:
-            return repetitive_tools
+        if filtered_similarity_tools:
+            return filtered_similarity_tools
         return set()
 
     def _get_repetitive_tools_by_similarity(self):
@@ -983,6 +975,27 @@ def _generate_tool_context(self, repetitive_tools):
 
         context_parts.append("\n\n")
         if repetitive_tools:
+            if not self.model_kwargs:
+                self.model_kwargs = {
+                    "temperature": (self.main_model.use_temperature or 1) + 0.1,
+                    "frequency_penalty": 0.2,
+                    "presence_penalty": 0.1,
+                }
+            else:
+                temperature = nested.getter(self.model_kwargs, "temperature")
+                freq_penalty = nested.getter(self.model_kwargs, "frequency_penalty")
+                if temperature and freq_penalty:
+                    self.model_kwargs["temperature"] = min(temperature + 0.1, 2)
+                    self.model_kwargs["frequency_penalty"] = min(freq_penalty + 0.1, 1)
+
+                if random.random() < 0.25:
+                    self.model_kwargs["temperature"] = max(temperature - 0.2, 1)
+                    self.model_kwargs["frequency_penalty"] = max(freq_penalty - 0.2, 0)
+
+            # One tenth of the time, just straight reset the randomness
+            if random.random() < 0.1:
+                self.model_kwargs = {}
+
             if self.turn_count - self._last_repetitive_warning_turn > 2:
                 self._last_repetitive_warning_turn = self.turn_count
                 self._last_repetitive_warning_severity += 1
@@ -1040,7 +1053,7 @@ def _generate_tool_context(self, repetitive_tools):
 
                 repetition_warning += f"""
 ### CRITICAL: Execution Loop Detected
-You are currently "spinning." To break the logic trap, you must:
+You are currently "spinning gears". To break the exploration loop, you must:
 1. **Analyze**: Use the `Thinking` tool to summarize exactly what you have found so far and why you were stuck.
 2. **Pivot**: Abandon or modify your current exploration strategy. Try focusing on different files or running tests.
 3. **Reframe**: To ensure your logic reset, include a 2-sentence story about {animal} {verb} {fruit} in your thoughts.
@@ -1049,6 +1062,9 @@ def _generate_tool_context(self, repetitive_tools):
                 """
 
             context_parts.append(repetition_warning)
+        else:
+            self.model_kwargs = {}
+
         context_parts.append("</context>")
         return "\n".join(context_parts)
 

diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
@@ -156,6 +156,7 @@ class Coder:
     tool_reflection = False
     last_user_message = ""
     uuid = ""
+    model_kwargs = {}
 
     # Task coordination state variables
     input_running = False
@@ -380,6 +381,9 @@ def __init__(
 
         self.context_compaction_max_tokens = context_compaction_max_tokens
         self.context_compaction_summary_tokens = context_compaction_summary_tokens
+        self.max_reflections = (
+            3 if self.edit_format == "agent" else nested.getter(self.args, "max_reflections", 3)
+        )
 
         if not fnames:
             fnames = []
@@ -1615,6 +1619,8 @@ async def run_one(self, user_message, preproc):
             async for _ in self.send_message(message):
                 pass
 
+            await self.hot_reload()
+
             if not self.reflected_message:
                 await self.auto_save_session(force=True)
                 break
@@ -2750,6 +2756,9 @@ def get_tool_list(self):
     async def reply_completed(self):
         pass
 
+    async def hot_reload(self):
+        pass
+
     async def show_exhausted_error(self):
         output_tokens = 0
         if self.partial_response_content:
@@ -2995,6 +3004,7 @@ async def send(self, messages, model=None, functions=None, tools=None):
                 self.temperature,
                 # This could include any tools, but for now it is just MCP tools
                 tools=tools,
+                override_kwargs=self.model_kwargs,
             )
             self.chat_completion_call_hashes.append(hash_object.hexdigest())
 
@@ -3166,7 +3176,7 @@ async def show_send_output_stream(self, completion):
                         reasoning_content = None
 
                 if reasoning_content:
-                    if nested.getter(self, "args.show_thinking"):
+                    if nested.getter(self.args, "show_thinking"):
                         if not self.got_reasoning_content:
                             text += f"<{REASONING_TAG}>\n\n"
                         text += reasoning_content
@@ -3202,7 +3212,7 @@ async def show_send_output_stream(self, completion):
                 self.stream_wrapper(content_to_show, final=False)
             elif text:
                 # Apply reasoning tag formatting for non-pretty output
-                if nested.getter(self, "args.show_thinking"):
+                if nested.getter(self.args, "show_thinking"):
                     text = replace_reasoning_tags(text, self.reasoning_tag_name)
                 try:
                     self.stream_wrapper(text, final=False)
@@ -3405,7 +3415,7 @@ def stream_wrapper(self, content, final):
     def live_incremental_response(self, final):
         show_resp = self.render_incremental_response(final)
         # Apply any reasoning tag formatting
-        if nested.getter(self, "args.show_thinking"):
+        if nested.getter(self.args, "show_thinking"):
             show_resp = replace_reasoning_tags(show_resp, self.reasoning_tag_name)
 
         # Track streaming state to avoid repetitive output

diff --git a/cecli/commands/save_session.py b/cecli/commands/save_session.py
@@ -25,9 +25,12 @@ async def execute(cls, io, coder, args, **kwargs):
     @classmethod
     def get_completions(cls, io, coder, args) -> List[str]:
         """Get completion options for save-session command."""
-        # For save-session, we could return existing session names for completion
-        # For now, return empty list
-        return []
+        # Return existing session names for completion to prevent accidental overwrites
+        from cecli import sessions
+
+        session_manager = sessions.SessionManager(coder, io)
+        sessions_list = session_manager.list_sessions()
+        return [session_info["name"] for session_info in sessions_list]
 
     @classmethod
     def get_help(cls) -> str:
@@ -40,4 +43,8 @@ def get_help(cls) -> str:
         help_text += "  /save-session bug-fix         # Save session as 'bug-fix'\n"
         help_text += "\nSessions are saved in the .cecli/sessions/ directory as JSON files.\n"
         help_text += "Use /list-sessions to see saved sessions and /load-session to load them.\n"
+        help_text += (
+            "\nNote: Existing session names will be shown for tab completion to help prevent"
+            " accidental overwrites.\n"
+        )
         return help_text
diff --git a/cecli/helpers/conversation/integration.py b/cecli/helpers/conversation/integration.py
@@ -750,6 +750,7 @@ def add_static_context_blocks(cls, coder) -> None:
                 message_dict={"role": "user", "content": block_content},
                 tag=MessageTag.STATIC,
                 hash_key=("static", block_type),
+                force=True,
             )
 
     @classmethod
@@ -793,6 +794,7 @@ def add_pre_message_context_blocks(cls, coder) -> None:
                 tag=MessageTag.STATIC,  # Use STATIC tag but with different priority
                 priority=125,  # Between REPO (100) and READONLY_FILES (200)
                 hash_key=("pre_message", block_type),
+                force=True,
             )
 
     @classmethod

diff --git a/cecli/helpers/nested.py b/cecli/helpers/nested.py
@@ -81,3 +81,18 @@ def getter(
             return current
 
     return default
+
+
+def deep_merge(dict1, dict2):
+    """
+    Recursively merges dict2 into dict1.
+    If a key exists in both and both values are dicts, it merges the sub-dicts.
+    Otherwise, the value from dict2 overwrites the value from dict1.
+    """
+    merged = dict1.copy()  # Create a copy to avoid modifying original dict1 in place
+    for key, value in dict2.items():
+        if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
+            merged[key] = deep_merge(merged[key], value)
+        else:
+            merged[key] = value
+    return merged
diff --git a/cecli/helpers/skills.py b/cecli/helpers/skills.py
@@ -119,6 +119,11 @@ def find_skills(self, reload: bool = False) -> List[SkillMetadata]:
         self._skills_find_cache = skills
         return skills
 
+    def hot_reload(self):
+        self._skills_cache = {}
+        self._skill_metadata_cache = {}
+        self.find_skills(reload=True)
+
     def _parse_skill_metadata(self, skill_md_path: Path) -> SkillMetadata:
         """
         Parse the metadata from a SKILL.md file.

diff --git a/cecli/main.py b/cecli/main.py
@@ -645,7 +645,9 @@ def get_io(pretty):
     input_queue = None
     pre_init_io = get_io(args.pretty)
     # Check if we're in "send message and exit" mode to skip non-essential initialization
-    suppress_pre_init = args.message or args.message_file or args.apply_clipboard_edits
+    suppress_pre_init = (
+        args.message or args.message_file or args.apply_clipboard_edits or args.terminal_setup
+    )
     supress_tui = True
 
     if not suppress_pre_init:
@@ -1158,6 +1160,8 @@ def apply_model_overrides(model_name):
             await coder.commands.execute("terminal-setup", "dry_run")
         else:
             await coder.commands.execute("terminal-setup", "")
+        return await graceful_exit(coder)
+
     if args.lint or args.test or args.commit:
         return await graceful_exit(coder)
     if args.show_repo_map:

diff --git a/cecli/models.py b/cecli/models.py
@@ -22,6 +22,7 @@
 from cecli.helpers import nested
 from cecli.helpers.file_searcher import handle_core_files
 from cecli.helpers.model_providers import ModelProviderManager
+from cecli.helpers.nested import deep_merge
 from cecli.helpers.requests import model_request_parser
 from cecli.llm import litellm
 from cecli.sendchat import sanity_check_messages
@@ -955,6 +956,7 @@ async def send_completion(
         max_tokens=None,
         min_wait=0,
         max_wait=2,
+        override_kwargs={},
     ):
         if os.environ.get("CECLI_SANITY_CHECK_TURNS"):
             sanity_check_messages(messages)
@@ -1053,6 +1055,8 @@ async def send_completion(
                     if random.random() < 0.25:
                         await asyncio.sleep(random.uniform(min_wait, max_wait))
 
+                if override_kwargs:
+                    kwargs = deep_merge(kwargs, override_kwargs)
                 res = await litellm.acompletion(**kwargs)
                 return hash_object, res
             except litellm.ContextWindowExceededError as err: