Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,7 @@ async def run_test_real(
use_enhanced_map=True,
verbose=verbose,
yes_always_commands=True,
max_reflections=0,
),
map_mul_no_files=4,
mcp_manager=None,
Expand Down
2 changes: 1 addition & 1 deletion cecli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from packaging import version

__version__ = "0.97.1.dev"
__version__ = "0.97.3.dev"
safe_version = __version__

try:
Expand Down
6 changes: 6 additions & 0 deletions cecli/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,12 @@ def get_parser(default_config_files, git_root):
" If unspecified, defaults to the model's max_chat_history_tokens."
),
)
group.add_argument(
"--max-reflections",
type=int,
default=3,
help="Maximum number of retries a model gets on malformed outputs (default: 3)",
)
group.add_argument(
"--file-diffs",
action=argparse.BooleanOptionalAction,
Expand Down
62 changes: 39 additions & 23 deletions cecli/coders/agent_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import random
import time
import traceback
from collections import Counter, defaultdict
from collections import defaultdict
from datetime import datetime
from pathlib import Path

Expand Down Expand Up @@ -85,6 +85,7 @@ def __init__(self, *args, **kwargs):
self.allowed_context_blocks = set()
self.context_block_tokens = {}
self.context_blocks_cache = {}
self.hot_reload_enabled = False
self.tokens_calculated = False
self.skip_cli_confirmations = False
self.agent_finished = False
Expand Down Expand Up @@ -119,6 +120,7 @@ def _get_agent_config(self):
config, "skip_cli_confirmations", nested.getter(config, "yolo", [])
)
config["command_timeout"] = nested.getter(config, "command_timeout", 30)
config["hot_reload"] = nested.getter(config, "hot_reload", False)

config["tools_paths"] = nested.getter(config, "tools_paths", [])
config["tools_includelist"] = nested.getter(
Expand Down Expand Up @@ -147,7 +149,7 @@ def _get_agent_config(self):

self.large_file_token_threshold = config["large_file_token_threshold"]
self.skip_cli_confirmations = config["skip_cli_confirmations"]

self.hot_reload_enabled = config["hot_reload"]
self.allowed_context_blocks = config["include_context_blocks"]

for context_block in config["exclude_context_blocks"]:
Expand Down Expand Up @@ -259,6 +261,7 @@ async def _execute_local_tool_calls(self, tool_calls_list):
try:
parsed_args_list.append(json.loads(chunk))
except json.JSONDecodeError as e:
self.model_kwargs = {}
self.io.tool_warning(
f"Could not parse JSON chunk for tool {tool_name}: {chunk}"
)
Expand Down Expand Up @@ -320,6 +323,7 @@ async def _execute_local_tool_calls(self, tool_calls_list):

result_message = "\n\n".join(all_results_content)
except Exception as e:
self.model_kwargs = {}
result_message = f"Error executing {tool_name}: {e}"
self.io.tool_error(f"""Error during {tool_name} execution: {e}
{traceback.format_exc()}""")
Expand Down Expand Up @@ -843,6 +847,10 @@ async def reply_completed(self):
self.files_edited_by_tools = set()
return False

async def hot_reload(self):
if self.hot_reload_enabled:
self.skills_manager.hot_reload()

async def _execute_tool_with_registry(self, norm_tool_name, params):
"""
Execute a tool using the tool registry.
Expand Down Expand Up @@ -879,12 +887,9 @@ def _get_repetitive_tools(self):
"""
Identifies repetitive tool usage patterns from rounds of tool calls.

This method combines count-based and similarity-based detection:
This method uses similarity-based detection:
1. If the last round contained a write tool, it assumes progress and returns no repetitive tools.
2. It checks for any read tool that has been used 2 or more times across rounds.
3. If no tools are repeated, but all tools in the history are read tools,
it flags all of them as potentially repetitive.
4. It checks for similarity-based repetition using cosine similarity on tool call strings.
2. It checks for similarity-based repetition using cosine similarity on tool call strings.

It avoids flagging repetition if a "write" tool was used recently,
as that suggests progress is being made.
Expand All @@ -893,9 +898,6 @@ def _get_repetitive_tools(self):
if history_len < 5:
return set()
similarity_repetitive_tools = self._get_repetitive_tools_by_similarity()
all_tools = []
for round_tools in self.tool_usage_history:
all_tools.extend(round_tools)
if self.last_round_tools:
last_round_has_write = any(
tool.lower() in self.write_tools for tool in self.last_round_tools
Expand All @@ -909,24 +911,14 @@ def _get_repetitive_tools(self):
if tool.lower() in self.read_tools or tool.lower() in self.write_tools
}
return filtered_similarity_tools if len(filtered_similarity_tools) else set()
if all(tool.lower() in self.read_tools for tool in all_tools):
# Only return tools that are in read_tools
return {tool for tool in all_tools if tool.lower() in self.read_tools}
tool_counts = Counter(all_tools)
count_repetitive_tools = {
tool
for tool, count in tool_counts.items()
if count >= 5 and tool.lower() in self.read_tools
}
# Filter similarity_repetitive_tools to only include tools in read_tools or write_tools
filtered_similarity_tools = {
tool
for tool in similarity_repetitive_tools
if tool.lower() in self.read_tools or tool.lower() in self.write_tools
}
repetitive_tools = count_repetitive_tools.union(filtered_similarity_tools)
if repetitive_tools:
return repetitive_tools
if filtered_similarity_tools:
return filtered_similarity_tools
return set()

def _get_repetitive_tools_by_similarity(self):
Expand Down Expand Up @@ -983,6 +975,27 @@ def _generate_tool_context(self, repetitive_tools):

context_parts.append("\n\n")
if repetitive_tools:
if not self.model_kwargs:
self.model_kwargs = {
"temperature": (self.main_model.use_temperature or 1) + 0.1,
"frequency_penalty": 0.2,
"presence_penalty": 0.1,
}
else:
temperature = nested.getter(self.model_kwargs, "temperature")
freq_penalty = nested.getter(self.model_kwargs, "frequency_penalty")
if temperature and freq_penalty:
self.model_kwargs["temperature"] = min(temperature + 0.1, 2)
self.model_kwargs["frequency_penalty"] = min(freq_penalty + 0.1, 1)

if random.random() < 0.25:
self.model_kwargs["temperature"] = max(temperature - 0.2, 1)
self.model_kwargs["frequency_penalty"] = max(freq_penalty - 0.2, 0)

# One tenth of the time, just straight reset the randomness
if random.random() < 0.1:
self.model_kwargs = {}

if self.turn_count - self._last_repetitive_warning_turn > 2:
self._last_repetitive_warning_turn = self.turn_count
self._last_repetitive_warning_severity += 1
Expand Down Expand Up @@ -1040,7 +1053,7 @@ def _generate_tool_context(self, repetitive_tools):

repetition_warning += f"""
### CRITICAL: Execution Loop Detected
You are currently "spinning." To break the logic trap, you must:
You are currently "spinning gears". To break the exploration loop, you must:
1. **Analyze**: Use the `Thinking` tool to summarize exactly what you have found so far and why you were stuck.
2. **Pivot**: Abandon or modify your current exploration strategy. Try focusing on different files or running tests.
3. **Reframe**: To ensure your logic reset, include a 2-sentence story about {animal} {verb} {fruit} in your thoughts.
Expand All @@ -1049,6 +1062,9 @@ def _generate_tool_context(self, repetitive_tools):
"""

context_parts.append(repetition_warning)
else:
self.model_kwargs = {}

context_parts.append("</context>")
return "\n".join(context_parts)

Expand Down
16 changes: 13 additions & 3 deletions cecli/coders/base_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ class Coder:
tool_reflection = False
last_user_message = ""
uuid = ""
model_kwargs = {}

# Task coordination state variables
input_running = False
Expand Down Expand Up @@ -380,6 +381,9 @@ def __init__(

self.context_compaction_max_tokens = context_compaction_max_tokens
self.context_compaction_summary_tokens = context_compaction_summary_tokens
self.max_reflections = (
3 if self.edit_format == "agent" else nested.getter(self.args, "max_reflections", 3)
)

if not fnames:
fnames = []
Expand Down Expand Up @@ -1615,6 +1619,8 @@ async def run_one(self, user_message, preproc):
async for _ in self.send_message(message):
pass

await self.hot_reload()

if not self.reflected_message:
await self.auto_save_session(force=True)
break
Expand Down Expand Up @@ -2750,6 +2756,9 @@ def get_tool_list(self):
async def reply_completed(self):
pass

async def hot_reload(self):
pass

async def show_exhausted_error(self):
output_tokens = 0
if self.partial_response_content:
Expand Down Expand Up @@ -2995,6 +3004,7 @@ async def send(self, messages, model=None, functions=None, tools=None):
self.temperature,
# This could include any tools, but for now it is just MCP tools
tools=tools,
override_kwargs=self.model_kwargs,
)
self.chat_completion_call_hashes.append(hash_object.hexdigest())

Expand Down Expand Up @@ -3166,7 +3176,7 @@ async def show_send_output_stream(self, completion):
reasoning_content = None

if reasoning_content:
if nested.getter(self, "args.show_thinking"):
if nested.getter(self.args, "show_thinking"):
if not self.got_reasoning_content:
text += f"<{REASONING_TAG}>\n\n"
text += reasoning_content
Expand Down Expand Up @@ -3202,7 +3212,7 @@ async def show_send_output_stream(self, completion):
self.stream_wrapper(content_to_show, final=False)
elif text:
# Apply reasoning tag formatting for non-pretty output
if nested.getter(self, "args.show_thinking"):
if nested.getter(self.args, "show_thinking"):
text = replace_reasoning_tags(text, self.reasoning_tag_name)
try:
self.stream_wrapper(text, final=False)
Expand Down Expand Up @@ -3405,7 +3415,7 @@ def stream_wrapper(self, content, final):
def live_incremental_response(self, final):
show_resp = self.render_incremental_response(final)
# Apply any reasoning tag formatting
if nested.getter(self, "args.show_thinking"):
if nested.getter(self.args, "show_thinking"):
show_resp = replace_reasoning_tags(show_resp, self.reasoning_tag_name)

# Track streaming state to avoid repetitive output
Expand Down
13 changes: 10 additions & 3 deletions cecli/commands/save_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ async def execute(cls, io, coder, args, **kwargs):
@classmethod
def get_completions(cls, io, coder, args) -> List[str]:
"""Get completion options for save-session command."""
# For save-session, we could return existing session names for completion
# For now, return empty list
return []
# Return existing session names for completion to prevent accidental overwrites
from cecli import sessions

session_manager = sessions.SessionManager(coder, io)
sessions_list = session_manager.list_sessions()
return [session_info["name"] for session_info in sessions_list]

@classmethod
def get_help(cls) -> str:
Expand All @@ -40,4 +43,8 @@ def get_help(cls) -> str:
help_text += " /save-session bug-fix # Save session as 'bug-fix'\n"
help_text += "\nSessions are saved in the .cecli/sessions/ directory as JSON files.\n"
help_text += "Use /list-sessions to see saved sessions and /load-session to load them.\n"
help_text += (
"\nNote: Existing session names will be shown for tab completion to help prevent"
" accidental overwrites.\n"
)
return help_text
2 changes: 2 additions & 0 deletions cecli/helpers/conversation/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ def add_static_context_blocks(cls, coder) -> None:
message_dict={"role": "user", "content": block_content},
tag=MessageTag.STATIC,
hash_key=("static", block_type),
force=True,
)

@classmethod
Expand Down Expand Up @@ -793,6 +794,7 @@ def add_pre_message_context_blocks(cls, coder) -> None:
tag=MessageTag.STATIC, # Use STATIC tag but with different priority
priority=125, # Between REPO (100) and READONLY_FILES (200)
hash_key=("pre_message", block_type),
force=True,
)

@classmethod
Expand Down
15 changes: 15 additions & 0 deletions cecli/helpers/nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,18 @@ def getter(
return current

return default


def deep_merge(dict1, dict2):
"""
Recursively merges dict2 into dict1.
If a key exists in both and both values are dicts, it merges the sub-dicts.
Otherwise, the value from dict2 overwrites the value from dict1.
"""
merged = dict1.copy() # Create a copy to avoid modifying original dict1 in place
for key, value in dict2.items():
if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
merged[key] = deep_merge(merged[key], value)
else:
merged[key] = value
return merged
5 changes: 5 additions & 0 deletions cecli/helpers/skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ def find_skills(self, reload: bool = False) -> List[SkillMetadata]:
self._skills_find_cache = skills
return skills

def hot_reload(self):
self._skills_cache = {}
self._skill_metadata_cache = {}
self.find_skills(reload=True)

def _parse_skill_metadata(self, skill_md_path: Path) -> SkillMetadata:
"""
Parse the metadata from a SKILL.md file.
Expand Down
6 changes: 5 additions & 1 deletion cecli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,9 @@ def get_io(pretty):
input_queue = None
pre_init_io = get_io(args.pretty)
# Check if we're in "send message and exit" mode to skip non-essential initialization
suppress_pre_init = args.message or args.message_file or args.apply_clipboard_edits
suppress_pre_init = (
args.message or args.message_file or args.apply_clipboard_edits or args.terminal_setup
)
supress_tui = True

if not suppress_pre_init:
Expand Down Expand Up @@ -1158,6 +1160,8 @@ def apply_model_overrides(model_name):
await coder.commands.execute("terminal-setup", "dry_run")
else:
await coder.commands.execute("terminal-setup", "")
return await graceful_exit(coder)

if args.lint or args.test or args.commit:
return await graceful_exit(coder)
if args.show_repo_map:
Expand Down
4 changes: 4 additions & 0 deletions cecli/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from cecli.helpers import nested
from cecli.helpers.file_searcher import handle_core_files
from cecli.helpers.model_providers import ModelProviderManager
from cecli.helpers.nested import deep_merge
from cecli.helpers.requests import model_request_parser
from cecli.llm import litellm
from cecli.sendchat import sanity_check_messages
Expand Down Expand Up @@ -955,6 +956,7 @@ async def send_completion(
max_tokens=None,
min_wait=0,
max_wait=2,
override_kwargs={},
):
if os.environ.get("CECLI_SANITY_CHECK_TURNS"):
sanity_check_messages(messages)
Expand Down Expand Up @@ -1053,6 +1055,8 @@ async def send_completion(
if random.random() < 0.25:
await asyncio.sleep(random.uniform(min_wait, max_wait))

if override_kwargs:
kwargs = deep_merge(kwargs, override_kwargs)
res = await litellm.acompletion(**kwargs)
return hash_object, res
except litellm.ContextWindowExceededError as err:
Expand Down
Loading
Loading