Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ LLMs are a part of our lives from here on out so join us in learning about and c
* [TUI Configuration](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/tui.md)
* [Skills](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/skills.md)
* [Session Management](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/sessions.md)
* [Hooks](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/hooks.md)
* [Custom Commands](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/custom-commands.md)
* [Custom System Prompts](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/custom-system-prompts.md)
* [Custom Tools](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/agent-mode.md#creating-custom-tools)
Expand Down Expand Up @@ -171,15 +172,15 @@ The current priorities are to improve core capabilities and user experience of t
* [ ] Add visibility into active sub agent calls in TUI

8. **Hooks**
* [ ] Add hooks base class for user defined python hooks with an execute method with type and priority settings
* [ ] Add hook manager that can accept user defined files and command line commands
* [ ] Integrate hook manager with coder classes with hooks for `start`, `on_message`, `end_message`, `pre_tool`, and `post_tool`
* [x] Add hooks base class for user defined python hooks with an execute method with type and priority settings
* [x] Add hook manager that can accept user defined files and command line commands
* [x] Integrate hook manager with coder classes with hooks for `start`, `end`, `on_message`, `end_message`, `pre_tool`, and `post_tool`

9. **Efficient File Editing**
* [ ] Explore use of hashline file representation for more targeted file editing
* [ ] Assuming viability, update SEARCH part of SEARCH/REPLACE with hashline identification
* [ ] Update agent mode edit tools to work with hashline identification
* [ ] Update internal file diff representation to support hashline propagation
* [x] Explore use of hashline file representation for more targeted file editing
* [x] Assuming viability, update SEARCH part of SEARCH/REPLACE with hashline identification (Done with new edit format)
* [x] Update agent mode edit tools to work with hashline identification
* [x] Update internal file diff representation to support hashline propagation

10. **Dynamic Context Management**
* [ ] Update compaction to use observational memory sub agent calls to generate decision records that are used as the compaction basis
Expand Down
371 changes: 359 additions & 12 deletions benchmark/benchmark.py

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions benchmark/primary_variations.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
set -e # Exit on error

# Default values
BASE_NAME="cecli-base-hashline-9"
EDIT_FORMAT="hashline"
BASE_NAME="cecli-base-d-big-3"
EDIT_FORMAT="diff"
MAP_TOKENS="512"
THREADS="1"
LANGUAGES="javascript,python,rust,go,java"
HASH_RE="^[15]"
NUM_TESTS="32"
HASH_RE="^.[15ef]"
NUM_TESTS="72"
EXERCISES_DIR="polyglot-benchmark"
OUTPUT_DIR="tmp.benchmarks"
SLEEP_BETWEEN=30 # Seconds to sleep between runs
Expand All @@ -21,15 +21,14 @@ SLEEP_BETWEEN=30 # Seconds to sleep between runs
# "openrouter/minimax/minimax-m2.1"
# "openrouter/qwen/qwen3-vl-235b-a22b-thinking"
MODELS=(
# "openrouter/deepseek/deepseek-v3.2-exp"
"openrouter/moonshotai/kimi-k2.5"
"openrouter/openai/gpt-oss-120b"
"openrouter/openai/gpt-5.2"
"openrouter/google/gemini-3-flash-preview"
"openrouter/deepseek/deepseek-v3.2-exp"
# "openrouter/moonshotai/kimi-k2.5"
# "openrouter/openai/gpt-oss-120b"
# "openrouter/openai/gpt-5.2"
# "openrouter/google/gemini-3-flash-preview"
# "openrouter/google/gemini-3-pro-preview"
# "openrouter/anthropic/claude-haiku-4.5"
# "openrouter/anthropic/claude-sonnet-4.5"
"openrouter/google/gemini-3-pro-preview"
"openrouter/anthropic/claude-haiku-4.5"
"openrouter/anthropic/claude-sonnet-4.5"
)

# Parse command line arguments
Expand Down Expand Up @@ -111,6 +110,7 @@ run_benchmark() {

# Create the benchmark command
./benchmark/benchmark.py "$run_name" \
--new \
--model "$model" \
--edit-format "$EDIT_FORMAT" \
--map-tokens "$MAP_TOKENS" \
Expand Down
6 changes: 6 additions & 0 deletions cecli/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,12 @@ def get_parser(default_config_files, git_root):
help="Specify Agent Mode configuration as a JSON string",
default=None,
)
group.add_argument(
"--hooks",
metavar="HOOKS_CONFIG_JSON",
help="Specify hooks configuration as a JSON string",
default=None,
)
group.add_argument(
"--agent-model",
metavar="AGENT_MODEL",
Expand Down
147 changes: 110 additions & 37 deletions cecli/coders/agent_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import locale
import os
import platform
import random
import time
import traceback
from collections import Counter, defaultdict
Expand All @@ -25,6 +26,7 @@
normalize_vector,
)
from cecli.helpers.skills import SkillsManager
from cecli.hooks import HookIntegration
from cecli.llm import litellm
from cecli.mcp import LocalServer, McpServerManager
from cecli.tools.utils.registry import ToolRegistry
Expand All @@ -50,6 +52,8 @@ def __init__(self, *args, **kwargs):
self.tool_similarity_threshold = 0.99
self.max_tool_vector_history = 10
self.read_tools = {
"command",
"commandinteractive",
"viewfilesatglob",
"viewfilesmatching",
"ls",
Expand All @@ -60,8 +64,6 @@ def __init__(self, *args, **kwargs):
"thinking",
}
self.write_tools = {
"command",
"commandinteractive",
"deletetext",
"indenttext",
"inserttext",
Expand All @@ -75,6 +77,7 @@ def __init__(self, *args, **kwargs):
self.args = kwargs.get("args")
self.files_added_in_exploration = set()
self.tool_call_count = 0
self.turn_count = 0
self.max_reflections = 15
self.use_enhanced_context = True
self._last_edited_file = None
Expand Down Expand Up @@ -239,6 +242,17 @@ async def _execute_local_tool_calls(self, tool_calls_list):
try:
args_string = tool_call.function.arguments.strip()
parsed_args_list = []

if not await HookIntegration.call_pre_tool_hooks(self, tool_name, args_string):
tool_responses.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": "Tool Request Aborted.",
}
)
continue

if args_string:
json_chunks = utils.split_concatenated_json(args_string)
for chunk in json_chunks:
Expand Down Expand Up @@ -291,6 +305,19 @@ async def _execute_local_tool_calls(self, tool_calls_list):
if tasks:
task_results = await asyncio.gather(*tasks)
all_results_content.extend(str(res) for res in task_results)

if not await HookIntegration.call_post_tool_hooks(
self, tool_name, args_string, "\n\n".join(all_results_content)
):
tool_responses.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": "Tool Response Redacted.",
}
)
continue

result_message = "\n\n".join(all_results_content)
except Exception as e:
result_message = f"Error executing {tool_name}: {e}"
Expand Down Expand Up @@ -527,6 +554,7 @@ def format_chat_chunks(self):

ConversationChunks.add_readonly_files_messages(self)
ConversationChunks.add_chat_files_messages(self)
ConversationChunks.add_file_context_messages(self)

# Add post-message context blocks (priority 250 - between CUR and REMINDER)
ConversationChunks.add_post_message_context_blocks(self)
Expand Down Expand Up @@ -687,6 +715,8 @@ async def process_tool_calls(self, tool_call_response):

await self.auto_save_session()
self.last_round_tools = []
self.turn_count += 1

if self.partial_response_tool_calls:
for tool_call in self.partial_response_tool_calls:
tool_name = getattr(tool_call.function, "name", None)
Expand Down Expand Up @@ -931,52 +961,94 @@ def _generate_tool_context(self, repetitive_tools):
"""
if not self.tool_usage_history:
return ""

if not hasattr(self, "_last_repetitive_warning_turn"):
self._last_repetitive_warning_turn = 0
self._last_repetitive_warning_severity = 0

context_parts = ['<context name="tool_usage_history" from="agent">']
context_parts.append("## Turn and Tool Call Statistics")
context_parts.append(f"- Current turn: {self.num_reflections + 1}")
context_parts.append(f"- Current turn: {self.turn_count + 1}")
context_parts.append(f"- Total tool calls this turn: {self.num_tool_calls}")
context_parts.append("\n\n")
context_parts.append("## Recent Tool Usage History")

if len(self.tool_usage_history) > 10:
recent_history = self.tool_usage_history[-10:]
context_parts.append("(Showing last 10 tools)")
else:
recent_history = self.tool_usage_history
for i, tool in enumerate(recent_history, 1):
context_parts.append(f"{i}. {tool}")

context_parts.append("\n\n")
if repetitive_tools and len(self.tool_usage_history) >= 8:
context_parts.append("""**Instruction:**
You have used the following tool(s) repeatedly:""")
context_parts.append("### DO NOT USE THE FOLLOWING TOOLS/FUNCTIONS")
for tool in repetitive_tools:
context_parts.append(f"- `{tool}`")
context_parts.append(
"Your exploration appears to be stuck in a loop. Please try a different approach."
" Use the `Thinking` tool to clarify your intentions and new approach to what you"
" are currently attempting to accomplish."
)
context_parts.append("\n")
context_parts.append("**Suggestions for alternative approaches:**")
context_parts.append(
"- If you've been searching for files, try working with the files already in"
" context"
)
context_parts.append(
"- If you've been viewing files, try making actual edits to move forward"
)
context_parts.append("- Consider using different tools that you haven't used recently")
context_parts.append(
"- Focus on making concrete progress rather than gathering more information"
)
context_parts.append(
"- Use the files you've already discovered to implement the requested changes"
)
context_parts.append("\n")
context_parts.append(
"You most likely have enough context for a subset of the necessary changes."
)
context_parts.append("Please prioritize file editing over further exploration.")
if repetitive_tools:
if self.turn_count - self._last_repetitive_warning_turn > 2:
self._last_repetitive_warning_turn = self.turn_count
self._last_repetitive_warning_severity += 1

repetition_warning = f"""
## Repetition Detected: Strategy Adjustment Required
I have detected repetitive usage of the following tools: {', '.join([f'`{t}`' for t in repetitive_tools])}.
**Constraint:** Do not repeat the exact same parameters for these tools in your next turn.
"""

if self._last_repetitive_warning_severity > 2:
self._last_repetitive_warning_severity = 0

fruit = random.choice(
[
"an apple",
"a banana",
"a cantaloupe",
"a cherry",
"a honeydew",
"an orange",
"a mango",
"a pomegranate",
"a watermelon",
]
)
animal = random.choice(
[
"a bird",
"a bear",
"a cat",
"a deer",
"a dog",
"an elephant",
"a fish",
"a fox",
"a monkey",
"a rabbit",
]
)
verb = random.choice(
[
"absorbing",
"becoming",
"creating",
"dreaming of",
"eating",
"fighting with",
"playing with",
"painting",
"smashing",
"writing a song about",
]
)

repetition_warning += f"""
### CRITICAL: Execution Loop Detected
You are currently "spinning." To break the logic trap, you must:
1. **Analyze**: Use the `Thinking` tool to summarize exactly what you have found so far and why you were stuck.
2. **Pivot**: Abandon or modify your current exploration strategy. Try focusing on different files or running tests.
3. **Reframe**: To ensure your logic reset, include a 2-sentence story about {animal} {verb} {fruit} in your thoughts.

Prioritize editing or verification over further exploration.
"""

context_parts.append(repetition_warning)
context_parts.append("</context>")
return "\n".join(context_parts)

Expand Down Expand Up @@ -1061,6 +1133,7 @@ async def preproc_user_input(self, inp):
inp = f'<context name="user_input" from="agent">\n{inp}\n</context>'

self.agent_finished = False
self.turn_count = 0
return inp

def get_directory_structure(self):
Expand Down Expand Up @@ -1139,11 +1212,11 @@ def print_tree(node, prefix="- ", indent=" ", current_path=""):

def get_todo_list(self):
"""
Generate a todo list context block from the .cecli/todo.txt file.
Generate a todo list context block from the todo.txt file.
Returns formatted string with the current todo list or None if empty/not present.
"""
try:
todo_file_path = ".cecli/todo.txt"
todo_file_path = self.local_agent_folder("todo.txt")
abs_path = self.abs_root_path(todo_file_path)
import os

Expand Down
Loading
Loading