diff --git a/README.md b/README.md index 576e50671dd..61d3ada93b1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ## Why `cecli`? -`cecli` (probably pronounced like "Cecily", aka `aider-ce`) is a community-driven fork of the [Aider](https://aider.chat/) AI pair programming tool. +`cecli` (probably pronounced like "Cecily", aka `aider-ce`) is a community-driven fork of the [Aider](https://cecli.dev/) AI pair programming tool. Aider is a fantastic piece of software with a wonderful community but it has been painfully slow in receiving updates in the quickly evolving AI tooling space. We aim to foster an open, collaborative ecosystem where new features, experiments, and improvements can be developed and shared rapidly. We believe in genuine FOSS principles and actively welcome contributors of all skill levels. @@ -25,7 +25,7 @@ LLMs are a part of our lives from here on out so join us in learning about and c * [Custom System Prompts](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/custom-system-prompts.md) * [Custom Tools](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/agent-mode.md#creating-custom-tools) * [Advanced Model Configuration](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/model-aliases.md#advanced-model-settings) -* [Aider Original Documentation (still mostly applies)](https://aider.chat/) +* [Aider Original Documentation (still mostly applies)](https://cecli.dev/) You can see a selection of the enhancements and updates by comparing the help output: @@ -74,7 +74,7 @@ enable-context-compaction: true context-compaction-max-tokens: 0.8 env-file: .cecli.env show-model-warnings: true -use-enhanced-map: true.p +use-enhanced-map: true watch-files: false tui: true diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index dea012dc552..5cb857ce121 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -873,7 +873,9 @@ async def run_test_real( from cecli import models from cecli.coders import Coder + from cecli.helpers.conversation import ConversationFiles, ConversationManager from cecli.io import InputOutput + from cecli.main import SwitchCoderSignal if not os.path.isdir(testdir): if dry: @@ -1053,6 +1055,7 @@ async def run_test_real( use_git=True, auto_commits=False, dirty_commits=False, + auto_lint=False, stream=False, verbose=verbose, # auto_lint=False, # disabled for code-in-json experiments @@ -1088,6 +1091,15 @@ async def run_test_real( dur = 0 test_outcomes = [] + + ConversationManager.initialize( + coder, + reset=True, + reformat=True, + ) + + ConversationFiles.reset() + for i in range(tries): start = time.time() @@ -1103,7 +1115,10 @@ async def run_test_real( await coder.apply_updates() else: - response = await coder.run(with_message=instructions, preproc=False) + try: + response = await coder.run(with_message=instructions, preproc=False) + except SwitchCoderSignal: + pass dur += time.time() - start diff --git a/benchmark/primary_variations.sh b/benchmark/primary_variations.sh index 8e41edc0bd9..31b8a653b6d 100755 --- a/benchmark/primary_variations.sh +++ b/benchmark/primary_variations.sh @@ -5,12 +5,13 @@ set -e # Exit on error # Default values -BASE_NAME="primary-variation" -EDIT_FORMAT="diff" +BASE_NAME="cecli-base-hashline-9" +EDIT_FORMAT="hashline" MAP_TOKENS="512" THREADS="1" -HASH_RE="^4" -NUM_TESTS="16" +LANGUAGES="javascript,python,rust,go,java" +HASH_RE="^[15]" +NUM_TESTS="32" EXERCISES_DIR="polyglot-benchmark" OUTPUT_DIR="tmp.benchmarks" SLEEP_BETWEEN=30 # Seconds to sleep between runs @@ -20,13 +21,11 @@ SLEEP_BETWEEN=30 # Seconds to sleep between runs # "openrouter/minimax/minimax-m2.1" # "openrouter/qwen/qwen3-vl-235b-a22b-thinking" MODELS=( -# "openrouter/deepseek/deepseek-v3.2" + "openrouter/google/gemini-3-flash-preview" + "openrouter/deepseek/deepseek-v3.2-exp" # "openrouter/moonshotai/kimi-k2.5" -# "openrouter/minimax/minimax-m2.1" -# "openrouter/minimax/minimax-m2.1" -# "openrouter/qwen/qwen3-vl-235b-a22b-thinking" # "openrouter/openai/gpt-oss-120b" - "openrouter/openai/gpt-5.2" +# "openrouter/openai/gpt-5.2" # "openrouter/google/gemini-3-flash-preview" # "openrouter/google/gemini-3-pro-preview" # "openrouter/anthropic/claude-haiku-4.5" @@ -118,6 +117,7 @@ run_benchmark() { --threads "$THREADS" \ --hash-re "$HASH_RE" \ --num-tests "$NUM_TESTS" \ + --languages "$LANGUAGES" \ --exercises-dir "$EXERCISES_DIR" echo "Benchmark completed: $run_name" diff --git a/cecli/__init__.py b/cecli/__init__.py index 4abb6391fe4..3323e971c76 100644 --- a/cecli/__init__.py +++ b/cecli/__init__.py @@ -1,6 +1,6 @@ from packaging import version -__version__ = "0.97.0.dev" +__version__ = "0.97.1.dev" safe_version = __version__ try: diff --git a/cecli/args.py b/cecli/args.py index 60f04324259..03efe64c8b5 100644 --- a/cecli/args.py +++ b/cecli/args.py @@ -196,6 +196,13 @@ def get_parser(default_config_files, git_root): const="agent", help="Use agent edit format for the main chat (autonomous file management)", ) + group.add_argument( + "--hashline", + action="store_const", + dest="edit_format", + const="hashline", + help="Use hashline edit format for the main chat", + ) group.add_argument( "--auto-accept-architect", action=argparse.BooleanOptionalAction, @@ -247,6 +254,12 @@ def get_parser(default_config_files, git_root): " If unspecified, defaults to the model's max_chat_history_tokens." ), ) + group.add_argument( + "--file-diffs", + action=argparse.BooleanOptionalAction, + default=True, + help="Whether to store file diffs in context or reload files (default: True)", + ) group.add_argument( "--retries", metavar="RETRIES_JSON", @@ -297,6 +310,12 @@ def get_parser(default_config_files, git_root): help="Specify Agent Mode configuration as a JSON string", default=None, ) + group.add_argument( + "--agent-model", + metavar="AGENT_MODEL", + default=None, + help="Specify the model to use for Agent mode (default depends on --model)", + ) group.add_argument( "--auto-save", action=argparse.BooleanOptionalAction, @@ -1111,7 +1130,10 @@ def main(): shell = sys.argv[2] if shell not in shtab.SUPPORTED_SHELLS: print(f"Error: Unsupported shell '{shell}'.", file=sys.stderr) - print(f"Supported shells are: {', '.join(shtab.SUPPORTED_SHELLS)}", file=sys.stderr) + print( + f"Supported shells are: {', '.join(shtab.SUPPORTED_SHELLS)}", + file=sys.stderr, + ) sys.exit(1) parser = get_parser([], None) parser.prog = "cecli" # Set the program name on the parser @@ -1119,7 +1141,10 @@ def main(): else: print("Error: Please specify a shell for completion.", file=sys.stderr) print(f"Usage: python {sys.argv[0]} completion ", file=sys.stderr) - print(f"Supported shells are: {', '.join(shtab.SUPPORTED_SHELLS)}", file=sys.stderr) + print( + f"Supported shells are: {', '.join(shtab.SUPPORTED_SHELLS)}", + file=sys.stderr, + ) sys.exit(1) else: # Default to YAML for any other unrecognized argument, or if 'yaml' was explicitly passed diff --git a/cecli/coders/__init__.py b/cecli/coders/__init__.py index bbe3e1dd15f..2f5a90ec37f 100644 --- a/cecli/coders/__init__.py +++ b/cecli/coders/__init__.py @@ -9,6 +9,7 @@ from .editor_diff_fenced_coder import EditorDiffFencedCoder from .editor_editblock_coder import EditorEditBlockCoder from .editor_whole_coder import EditorWholeFileCoder +from .hashline_coder import HashLineCoder from .help_coder import HelpCoder from .patch_coder import PatchCoder from .udiff_coder import UnifiedDiffCoder @@ -35,4 +36,5 @@ ContextCoder, AgentCoder, CopyPasteCoder, + HashLineCoder, ] diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py index 031e3b9d13d..f6f2583eaa8 100644 --- a/cecli/coders/agent_coder.py +++ b/cecli/coders/agent_coder.py @@ -39,6 +39,7 @@ class AgentCoder(Coder): edit_format = "agent" prompt_format = "agent" context_management_enabled = True + hashlines = True def __init__(self, *args, **kwargs): self.recently_removed = {} @@ -56,6 +57,7 @@ def __init__(self, *args, **kwargs): "grep", "listchanges", "shownumberedcontext", + "thinking", } self.write_tools = { "command", @@ -497,6 +499,9 @@ def format_chat_chunks(self): # Use parent's implementation which may use conversation system if flag is enabled return super().format_chat_chunks() + # Choose appropriate fence based on file content + self.choose_fence() + ConversationChunks.initialize_conversation_system(self) # Decrement mark_for_delete values before adding new messages ConversationManager.decrement_mark_for_delete() @@ -679,7 +684,7 @@ async def process_tool_calls(self, tool_call_response): """ Track tool usage before calling the base implementation. """ - self.agent_finished = False + await self.auto_save_session() self.last_round_tools = [] if self.partial_response_tool_calls: @@ -940,7 +945,7 @@ def _generate_tool_context(self, repetitive_tools): for i, tool in enumerate(recent_history, 1): context_parts.append(f"{i}. {tool}") context_parts.append("\n\n") - if repetitive_tools: + if repetitive_tools and len(self.tool_usage_history) >= 8: context_parts.append("""**Instruction:** You have used the following tool(s) repeatedly:""") context_parts.append("### DO NOT USE THE FOLLOWING TOOLS/FUNCTIONS") @@ -1054,6 +1059,8 @@ async def preproc_user_input(self, inp): inp = await super().preproc_user_input(inp) if inp and not inp.startswith(''): inp = f'\n{inp}\n' + + self.agent_finished = False return inp def get_directory_structure(self): diff --git a/cecli/coders/architect_coder.py b/cecli/coders/architect_coder.py index 8ce450b3259..8c378efe252 100644 --- a/cecli/coders/architect_coder.py +++ b/cecli/coders/architect_coder.py @@ -18,13 +18,20 @@ async def reply_completed(self): return tweak_responses = getattr(self.args, "tweak_responses", False) - confirmation = await self.io.confirm_ask("Edit the files?", allow_tweak=tweak_responses) + confirmation = await self.io.confirm_ask( + "Edit the files?", + allow_tweak=tweak_responses, + explicit_yes_required=not self.auto_accept_architect, + ) - if not self.auto_accept_architect and not confirmation: + if not confirmation: return if confirmation == "tweak": - content = self.io.edit_in_editor(content) + if self.tui and self.tui(): + content = self.tui().get_response_from_editor(content) + else: + content = self.io.edit_in_editor(content) await asyncio.sleep(0.1) diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index 734fb2f2c65..7ca2288c45b 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -120,6 +120,8 @@ class Coder: num_tool_calls = 0 max_tool_calls = 25 edit_format = None + file_diffs = True + hashlines = False yield_stream = False temperature = None auto_lint = True @@ -587,6 +589,7 @@ def __init__( pass self.custom = customizations + self.file_diffs = nested.getter(self.args, "file_diffs", True) if nested.getter(self.custom, "prompt_map.all", None): prompts = PromptRegistry.get_prompt(nested.getter(self.custom, "prompt_map.all")) @@ -660,6 +663,7 @@ def get_announcements(self): # Model main_model = self.main_model weak_model = main_model.weak_model + agent_model = main_model.agent_model if weak_model is not main_model: prefix = "Main model" @@ -698,6 +702,10 @@ def get_announcements(self): output = f"Weak model: {weak_model.name}" lines.append(output) + if agent_model is not main_model: + output = f"Agent model: {agent_model.name}" + lines.append(output) + # Repo if self.repo: rel_repo_dir = self.repo.get_rel_repo_dir() @@ -1608,6 +1616,10 @@ async def run_one(self, user_message, preproc): if self.enable_context_compaction: await self.compact_context_if_needed() + if nested.getter(self, "agent_finished", False): + await self.auto_save_session(force=True) + break + await self.auto_save_session(force=True) def _is_url_allowed(self, url): @@ -1664,7 +1676,7 @@ async def check_for_urls(self, inp: str) -> List[str]: subject=url, group=group, allow_never=True, - explicit_yes_required=self.args.yes_always_commands, + explicit_yes_required=not self.args.yes_always_commands, ): inp += "\n\n" inp += await self.commands.execute("web", url, return_content=True) @@ -1689,13 +1701,16 @@ async def compact_context_if_needed(self, force=False, message=""): # Check if combined messages exceed the token limit, # Get messages from ConversationManager + # Get messages from ConversationManager done_messages = ConversationManager.get_messages_dict(MessageTag.DONE) cur_messages = ConversationManager.get_messages_dict(MessageTag.CUR) + diff_messages = ConversationManager.get_messages_dict(MessageTag.DIFFS) # Exclude first cur_message since that's the user's initial input done_tokens = self.summarizer.count_tokens(done_messages) cur_tokens = self.summarizer.count_tokens(cur_messages[1:] if len(cur_messages) > 1 else []) - combined_tokens = done_tokens + cur_tokens + diff_tokens = self.summarizer.count_tokens(diff_messages) + combined_tokens = done_tokens + cur_tokens + diff_tokens if not force and combined_tokens < self.context_compaction_max_tokens: return @@ -1803,6 +1818,14 @@ async def compact_context_if_needed(self, force=False, message=""): self.io.tool_output("...chat history compacted.") self.io.update_spinner(self.io.last_spinner_text) + + # Clear all diff messages + ConversationManager.clear_tag(MessageTag.DIFFS) + # Reset ConversationFiles cache entirely + from cecli.helpers.conversation.files import ConversationFiles + + ConversationFiles.clear_file_cache() + except Exception as e: self.io.tool_warning(f"Context compaction failed: {e}") self.io.tool_warning("Proceeding with full history for now.") @@ -3699,9 +3722,14 @@ async def apply_updates(self): confirmation = await self.io.confirm_ask("Tweak Response?", allow_tweak=True) if confirmation or confirmation == "tweak": - self.partial_response_content = self.io.edit_in_editor( - self.partial_response_content - ) + if self.tui and self.tui(): + self.partial_response_content = self.tui().get_response_from_editor( + self.partial_response_content + ) + else: + self.partial_response_content = self.io.edit_in_editor( + self.partial_response_content + ) await asyncio.sleep(0.1) @@ -3926,6 +3954,11 @@ async def run_shell_commands(self): async def handle_shell_commands(self, commands_str, group): commands = command_parser.split_shell_commands(commands_str) + + # Early return if none of the command strings have length after stripping whitespace + if not any(cmd.strip() for cmd in commands): + return + command_count = sum( 1 for cmd in commands if cmd.strip() and not cmd.strip().startswith("#") ) @@ -3933,7 +3966,7 @@ async def handle_shell_commands(self, commands_str, group): if not await self.io.confirm_ask( prompt, subject="\n".join(commands), - explicit_yes_required=self.args.yes_always_commands, + explicit_yes_required=not self.args.yes_always_commands, group=group, allow_never=True, ): @@ -3950,13 +3983,16 @@ async def handle_shell_commands(self, commands_str, group): self.io.tool_output() self.io.tool_output(f"Running {command}") # Add the command to input history - self.io.add_to_input_history(f"/run {command.strip()}") + # self.io.add_to_input_history(f"/run {command.strip()}") exit_status, output = await asyncio.to_thread( run_cmd, command, error_print=self.io.tool_error, cwd=self.root ) + if output: accumulated_output += f"Output from {command}\n{output}\n" + print(accumulated_output) + if accumulated_output.strip() and await self.io.confirm_ask( "Add command output to the chat?", allow_never=True ): diff --git a/cecli/coders/hashline_coder.py b/cecli/coders/hashline_coder.py new file mode 100644 index 00000000000..17d459e1baa --- /dev/null +++ b/cecli/coders/hashline_coder.py @@ -0,0 +1,813 @@ +import difflib +import json +import math +import re +import sys +from difflib import SequenceMatcher +from pathlib import Path + +from cecli import utils +from cecli.coders.base_coder import Coder +from cecli.helpers.hashline import ( + HashlineError, + apply_hashline_operations, + strip_hashline, +) + +from ..dump import dump # noqa: F401 + + +class HashLineCoder(Coder): + """A coder that uses hashlines blocks for code modifications.""" + + edit_format = "hashline" + prompt_format = "hashline" + hashlines = True + + def get_edits(self): + content = self.partial_response_content + + # might raise ValueError for malformed ORIG/UPD blocks + edits = list( + find_original_update_blocks( + content, + self.fence, + self.get_inchat_relative_files(), + ) + ) + + self.shell_commands += [edit[1] for edit in edits if edit[0] is None] + edits = [edit for edit in edits if edit[0] is not None] + + return edits + + def apply_edits_dry_run(self, edits): + return self.apply_edits(edits, dry_run=True) + + def apply_edits(self, edits, dry_run=False): + failed = [] + passed = [] + updated_edits = [] + + # Group hashline edits by file path + hashline_edits_by_file = {} + original_hashline_edits_by_file = {} + regular_edits = [] + + for edit in edits: + path, original, updated = edit + + # Check if this is a hashline edit (list with 3 elements) + is_hashline = False + if isinstance(original, list): + # Check if this is the original format: [start_hash, end_hash, operation] + if len(original) == 3 and all(isinstance(item, str) for item in original): + start_hash, end_hash, operation = original + + # Validate operation + if operation in ["replace", "insert", "delete"]: + # Validate hashline format + if (isinstance(start_hash, str) and "|" in start_hash) and ( + operation == "insert" or (isinstance(end_hash, str) and "|" in end_hash) + ): + if path not in hashline_edits_by_file: + hashline_edits_by_file[path] = [] + original_hashline_edits_by_file[path] = [] + + hashline_edits_by_file[path].append( + { + "start_line_hash": start_hash, + "end_line_hash": ( + end_hash if operation != "insert" else start_hash + ), + "operation": operation, + "text": updated if operation in ["replace", "insert"] else None, + } + ) + original_hashline_edits_by_file[path].append(edit) + is_hashline = True + + # Check if this is the transformed format: list of dictionaries (from dry run) + elif ( + not is_hashline + and isinstance(original, list) + and len(original) > 0 + and isinstance(original[0], dict) + and "start_line_hash" in original[0] + ): + if path not in hashline_edits_by_file: + hashline_edits_by_file[path] = [] + original_hashline_edits_by_file[path] = [] + + for op in original: + hashline_edits_by_file[path].append(op) + + # If it's already transformed, we should ideally keep it as is for dry run + # but the issue is that it might have been transformed by a previous dry run + # and we want to avoid double-wrapping or losing the "original" nature. + original_hashline_edits_by_file[path].append(edit) + is_hashline = True + + if not is_hashline: + regular_edits.append(edit) + + # Process hashline edits in batch for each file + for path, operations in hashline_edits_by_file.items(): + full_path = self.abs_root_path(path) + new_content = None + + if Path(full_path).exists(): + try: + content = self.io.read_text(full_path) + # Apply all hashline operations for this file in batch + new_content, _, _ = apply_hashline_operations( + original_content=strip_hashline(content), + operations=operations, + ) + + if dry_run: + # For dry runs, preserve the original edit format + updated_edits.extend(original_hashline_edits_by_file[path]) + else: + updated_edits.append((path, operations, "Batch hashline operations")) + + if new_content: + if not dry_run: + self.io.write_text(full_path, new_content) + passed.append((path, operations, "Batch hashline operations")) + else: + # No changes or failed + failed.append((path, operations, "Batch hashline operations")) + + except (ValueError, HashlineError) as e: + # Record failure + failed.append((path, operations, f"Hashline batch operation failed: {e}")) + continue + else: + # File doesn't exist + failed.append((path, operations, "File not found")) + + # Process regular edits one by one (existing logic) + for edit in regular_edits: + path, original, updated = edit + full_path = self.abs_root_path(path) + new_content = None + + if not isinstance(original, str) or not isinstance(updated, str): + continue + + if Path(full_path).exists(): + content = self.io.read_text(full_path) + new_content = do_replace(full_path, content, original, updated, self.fence) + + # If the edit failed, and + # this is not a "create a new file" with an empty original... + # https://github.com/Aider-AI/aider/issues/2258 + if not new_content and ( + (isinstance(original, str) and original.strip()) + or (isinstance(original, list) and len(original) > 0) + ): + # try patching any of the other files in the chat + for full_path in self.abs_fnames: + content = self.io.read_text(full_path) + new_content = do_replace(full_path, content, original, updated, self.fence) + if new_content: + path = self.get_rel_fname(full_path) + break + + updated_edits.append((path, original, updated)) + + if new_content: + if not dry_run: + self.io.write_text(full_path, new_content) + passed.append(edit) + else: + failed.append(edit) + + if dry_run: + return updated_edits + + if not failed: + return + + blocks = "block" if len(failed) == 1 else "blocks" + + res = f"# {len(failed)} LOCATE/CONTENTS {blocks} failed to match!\n" + for edit in failed: + path, original, updated = edit + + full_path = self.abs_root_path(path) + content = self.io.read_text(full_path) + + res += f""" +## SearchReplaceNoExactMatch: This LOCATE block failed to exactly match lines in {path} +<<<<<<< LOCATE +{original}======= +{updated}>>>>>>> CONTENTS + +Please try this operation again using the hashline range that you want to modify +""" + if isinstance(original, str): + did_you_mean = find_similar_lines(original, content) + if did_you_mean: + res += f"""Did you mean to match some of these actual lines from {path}? + +{self.fence[0]} +{did_you_mean} +{self.fence[1]} + +""" + if updated in content and updated: + res += f"""Are you sure you need this LOCATE/CONTENTS block? +The CONTENTS lines are already in {path}! + +""" + did_you_mean = find_similar_lines(original, content) + if did_you_mean: + res += f"""Did you mean to match some of these actual lines from {path}? + +{self.fence[0]} +{did_you_mean} +{self.fence[1]} + +""" + + if updated in content and updated: + res += f"""Are you sure you need this LOCATE/CONTENTS block? +The CONTENTS lines are already in {path}! + +""" + res += ( + "The search section must be a valid JSON array in the format:\n" + '["{start hashline}", "{end hashline}", "{operation}"]\n' + "Hashline prefixes must have the structure `{line_num}|{hash_fragment}` (e.g., `20|Bv`)" + " and match one found directly in the file" + ) + if passed: + pblocks = "block" if len(passed) == 1 else "blocks" + res += f""" +# The other {len(passed)} LOCATE/CONTENTS {pblocks} were applied successfully. +Don't re-send them. +Just reply with fixed versions of the {blocks} above that failed to match. +""" + raise ValueError(res) + + +def prep(content): + if content and not content.endswith("\n"): + content += "\n" + lines = content.splitlines(keepends=True) + return content, lines + + +def perfect_or_whitespace(whole_lines, part_lines, replace_lines): + # Try for a perfect match + res = perfect_replace(whole_lines, part_lines, replace_lines) + if res: + return res + + # Try being flexible about leading whitespace + res = replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines) + if res: + return res + + +def perfect_replace(whole_lines, part_lines, replace_lines): + part_tup = tuple(part_lines) + part_len = len(part_lines) + + for i in range(len(whole_lines) - part_len + 1): + whole_tup = tuple(whole_lines[i : i + part_len]) + if part_tup == whole_tup: + res = whole_lines[:i] + replace_lines + whole_lines[i + part_len :] + return "".join(res) + + +def replace_most_similar_chunk(whole, part, replace): + """Best efforts to find the `part` lines in `whole` and replace them with `replace`""" + + whole, whole_lines = prep(whole) + part, part_lines = prep(part) + replace, replace_lines = prep(replace) + + res = perfect_or_whitespace(whole_lines, part_lines, replace_lines) + if res: + return res + + # drop leading empty line, GPT sometimes adds them spuriously (issue #25) + if len(part_lines) > 2 and not part_lines[0].strip(): + skip_blank_line_part_lines = part_lines[1:] + res = perfect_or_whitespace(whole_lines, skip_blank_line_part_lines, replace_lines) + if res: + return res + + # Try to handle when it elides code with ... + try: + res = try_dotdotdots(whole, part, replace) + if res: + return res + except ValueError: + pass + + return + # Try fuzzy matching + res = replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines) + if res: + return res + + +def try_dotdotdots(whole, part, replace): + """ + See if the edit block has ... lines. + If not, return none. + + If yes, try and do a perfect edit with the ... chunks. + If there's a mismatch or otherwise imperfect edit, raise ValueError. + + If perfect edit succeeds, return the updated whole. + """ + + dots_re = re.compile(r"(^\s*\.\.\.\n)", re.MULTILINE | re.DOTALL) + + part_pieces = re.split(dots_re, part) + replace_pieces = re.split(dots_re, replace) + + if len(part_pieces) != len(replace_pieces): + raise ValueError("Unpaired ... in LOCATE/CONTENTS block") + + if len(part_pieces) == 1: + # no dots in this edit block, just return None + return + + # Compare odd strings in part_pieces and replace_pieces + all_dots_match = all(part_pieces[i] == replace_pieces[i] for i in range(1, len(part_pieces), 2)) + + if not all_dots_match: + raise ValueError("Unmatched ... in LOCATE/CONTENTS block") + + part_pieces = [part_pieces[i] for i in range(0, len(part_pieces), 2)] + replace_pieces = [replace_pieces[i] for i in range(0, len(replace_pieces), 2)] + + pairs = zip(part_pieces, replace_pieces) + for part, replace in pairs: + if not part and not replace: + continue + + if not part and replace: + if not whole.endswith("\n"): + whole += "\n" + whole += replace + continue + + if whole.count(part) == 0: + raise ValueError + if whole.count(part) > 1: + raise ValueError + + whole = whole.replace(part, replace, 1) + + return whole + + +def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines): + # GPT often messes up leading whitespace. + # It usually does it uniformly across the ORIG and UPD blocks. + # Either omitting all leading whitespace, or including only some of it. + + # Outdent everything in part_lines and replace_lines by the max fixed amount possible + leading = [len(p) - len(p.lstrip()) for p in part_lines if p.strip()] + [ + len(p) - len(p.lstrip()) for p in replace_lines if p.strip() + ] + + if leading and min(leading): + num_leading = min(leading) + part_lines = [p[num_leading:] if p.strip() else p for p in part_lines] + replace_lines = [p[num_leading:] if p.strip() else p for p in replace_lines] + + # can we find an exact match not including the leading whitespace + num_part_lines = len(part_lines) + + for i in range(len(whole_lines) - num_part_lines + 1): + add_leading = match_but_for_leading_whitespace( + whole_lines[i : i + num_part_lines], part_lines + ) + + if add_leading is None: + continue + + replace_lines = [add_leading + rline if rline.strip() else rline for rline in replace_lines] + whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + num_part_lines :] + return "".join(whole_lines) + + return None + + +def match_but_for_leading_whitespace(whole_lines, part_lines): + num = len(whole_lines) + + # does the non-whitespace all agree? + if not all(whole_lines[i].lstrip() == part_lines[i].lstrip() for i in range(num)): + return + + # are they all offset the same? + add = set( + whole_lines[i][: len(whole_lines[i]) - len(part_lines[i])] + for i in range(num) + if whole_lines[i].strip() + ) + + if len(add) != 1: + return + + return add.pop() + + +def replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines): + similarity_thresh = 0.8 + + max_similarity = 0 + most_similar_chunk_start = -1 + most_similar_chunk_end = -1 + + scale = 0.1 + min_len = math.floor(len(part_lines) * (1 - scale)) + max_len = math.ceil(len(part_lines) * (1 + scale)) + + for length in range(min_len, max_len): + for i in range(len(whole_lines) - length + 1): + chunk = whole_lines[i : i + length] + chunk = "".join(chunk) + + similarity = SequenceMatcher(None, chunk, part).ratio() + + if similarity > max_similarity and similarity: + max_similarity = similarity + most_similar_chunk_start = i + most_similar_chunk_end = i + length + + if max_similarity < similarity_thresh: + return + + modified_whole = ( + whole_lines[:most_similar_chunk_start] + + replace_lines + + whole_lines[most_similar_chunk_end:] + ) + modified_whole = "".join(modified_whole) + + return modified_whole + + +DEFAULT_FENCE = ("`" * 3, "`" * 3) + + +def strip_quoted_wrapping(res, fname=None, fence=DEFAULT_FENCE): + """ + Given an input string which may have extra "wrapping" around it, remove the wrapping. + For example: + + filename.ext + ``` + We just want this content + Not the filename and triple quotes + ``` + """ + if not res: + return res + + res = res.splitlines() + + if fname and res[0].strip().endswith(Path(fname).name): + res = res[1:] + + if res[0].startswith(fence[0]) and res[-1].startswith(fence[1]): + res = res[1:-1] + + res = "\n".join(res) + if res and res[-1] != "\n": + res += "\n" + + return res + + +def do_replace(fname, content, before_text, after_text, fence=None): + # Check if before_text is a hashline JSON block (list with 3 elements) + if isinstance(before_text, list) and len(before_text) == 3: + pass + + # Original logic for regular edit blocks + before_text = strip_quoted_wrapping(before_text, fname, fence) + after_text = strip_quoted_wrapping(after_text, fname, fence) + fname = Path(fname) + + # does it want to make a new file? + if not fname.exists() and not before_text.strip(): + fname.touch() + content = "" + + if content is None: + return + + if not before_text.strip(): + # append to existing file, or start a new file + new_content = content + after_text + else: + new_content = replace_most_similar_chunk(content, before_text, after_text) + + return new_content + + +HEAD = r"^<{5,9} LOCATE>?\s*$" +DIVIDER = r"^={5,9}\s*$" +UPDATED = r"^>{5,9} CONTENTS\s*$" + +HEAD_ERR = "<<<<<<< LOCATE" +DIVIDER_ERR = "=======" +UPDATED_ERR = ">>>>>>> CONTENTS" + +separators = "|".join([HEAD, DIVIDER, UPDATED]) + +split_re = re.compile(r"^((?:" + separators + r")[ ]*\n)", re.MULTILINE | re.DOTALL) + + +missing_filename_err = ( + "Bad/missing filename. The filename must be alone on the line before the opening fence" + " {fence[0]}" +) + +# Always be willing to treat triple-backticks as a fence when searching for filenames +triple_backticks = "`" * 3 + + +def strip_filename(filename, fence): + filename = filename.strip() + + if filename == "...": + return + + start_fence = fence[0] + if filename.startswith(start_fence): + candidate = filename[len(start_fence) :] + if candidate and ("." in candidate or "/" in candidate): + return candidate + return + + if filename.startswith(triple_backticks): + candidate = filename[len(triple_backticks) :] + if candidate and ("." in candidate or "/" in candidate): + return candidate + return + + filename = filename.rstrip(":") + filename = filename.lstrip("#") + filename = filename.strip() + filename = filename.strip("`") + filename = filename.strip("*") + + # https://github.com/Aider-AI/aider/issues/1158 + # filename = filename.replace("\\_", "_") + + return filename + + +def find_original_update_blocks(content, fence=DEFAULT_FENCE, valid_fnames=None): + lines = content.splitlines(keepends=True) + i = 0 + current_filename = None + + head_pattern = re.compile(HEAD) + divider_pattern = re.compile(DIVIDER) + updated_pattern = re.compile(UPDATED) + + while i < len(lines): + line = lines[i] + + # Check for shell code blocks + shell_starts = [ + "```bash", + "```sh", + "```shell", + "```cmd", + "```batch", + "```powershell", + "```ps1", + "```zsh", + "```fish", + "```ksh", + "```csh", + "```tcsh", + ] + + # Check if the next line or the one after that is an editblock + next_is_editblock = ( + i + 1 < len(lines) + and head_pattern.match(lines[i + 1].strip()) + or i + 2 < len(lines) + and head_pattern.match(lines[i + 2].strip()) + ) + + if any(line.strip().startswith(start) for start in shell_starts) and not next_is_editblock: + shell_content = [] + i += 1 + while i < len(lines) and not lines[i].strip().startswith("```"): + shell_content.append(lines[i]) + i += 1 + if i < len(lines) and lines[i].strip().startswith("```"): + i += 1 # Skip the closing ``` + + yield None, "".join(shell_content) + continue + + # Check for LOCATE/CONTENTS blocks + if head_pattern.match(line.strip()): + try: + # if next line after HEAD exists and is DIVIDER, it's a new file + if i + 1 < len(lines) and divider_pattern.match(lines[i + 1].strip()): + filename = find_filename(lines[max(0, i - 3) : i], fence, None) + else: + filename = find_filename(lines[max(0, i - 3) : i], fence, valid_fnames) + + if not filename: + if current_filename: + filename = current_filename + else: + raise ValueError(missing_filename_err.format(fence=fence)) + + current_filename = filename + + original_text = [] + i += 1 + while i < len(lines) and not divider_pattern.match(lines[i].strip()): + original_text.append(lines[i]) + i += 1 + + if i >= len(lines) or not divider_pattern.match(lines[i].strip()): + raise ValueError(f"Expected `{DIVIDER_ERR}`") + + updated_text = [] + i += 1 + while i < len(lines) and not ( + updated_pattern.match(lines[i].strip()) + or divider_pattern.match(lines[i].strip()) + ): + updated_text.append(lines[i]) + i += 1 + + if i >= len(lines) or not ( + updated_pattern.match(lines[i].strip()) + or divider_pattern.match(lines[i].strip()) + ): + raise ValueError(f"Expected `{UPDATED_ERR}` or `{DIVIDER_ERR}`") + + original_text_str = "".join(original_text) + updated_text_str = "".join(updated_text) + + # Check if original_text is a hashline JSON block + try: + # Try to parse as JSON + parsed = json.loads(original_text_str.strip()) + # Check if it's a list with 3 elements (start_hash, end_hash, operation) + if isinstance(parsed, list) and len(parsed) == 3: + # Validate the format: all strings + if all(isinstance(item, str) for item in parsed): + # Check if first two items look like hashline format (e.g., "1|ab") + if all("|" in item for item in parsed[:2]): + # Check if operation is valid + if parsed[2] in ["replace", "insert", "delete"]: + # This is a hashline JSON block + yield filename, parsed, updated_text_str + continue + except (json.JSONDecodeError, ValueError): + # Not a valid JSON, treat as regular edit block + pass + + yield filename, original_text_str, updated_text_str + + except ValueError as e: + processed = "".join(lines[: i + 1]) + err = e.args[0] + raise ValueError(f"{processed}\n^^^ {err}") + + i += 1 + + +def find_filename(lines, fence, valid_fnames): + """ + Deepseek Coder v2 has been doing this: + + + ```python + word_count.py + ``` + ```python + <<<<<<< LOCATE + ... + + This is a more flexible search back for filenames. + """ + + if valid_fnames is None: + valid_fnames = [] + + # Go back through the 3 preceding lines + lines.reverse() + lines = lines[:3] + + filenames = [] + for line in lines: + # If we find a filename, done + filename = strip_filename(line, fence) + if filename: + filenames.append(filename) + + # Only continue as long as we keep seeing fences + if not line.startswith(fence[0]) and not line.startswith(triple_backticks): + break + + if not filenames: + return + + # pick the *best* filename found + + # Check for exact match first + for fname in filenames: + if fname in valid_fnames: + return fname + + # Check for partial match (basename match) + for fname in filenames: + for vfn in valid_fnames: + if fname == Path(vfn).name: + return vfn + + # Perform fuzzy matching with valid_fnames + for fname in filenames: + close_matches = difflib.get_close_matches(fname, valid_fnames, n=1, cutoff=0.8) + if len(close_matches) == 1: + return close_matches[0] + + # If no fuzzy match, look for a file w/extension + for fname in filenames: + if "." in fname: + return fname + + if filenames: + return filenames[0] + + +def find_similar_lines(search_lines, content_lines, threshold=0.6): + search_lines = search_lines.splitlines() + content_lines = content_lines.splitlines() + + best_ratio = 0 + best_match = None + + for i in range(len(content_lines) - len(search_lines) + 1): + chunk = content_lines[i : i + len(search_lines)] + ratio = SequenceMatcher(None, search_lines, chunk).ratio() + if ratio > best_ratio: + best_ratio = ratio + best_match = chunk + best_match_i = i + + if best_ratio < threshold: + return "" + + if best_match[0] == search_lines[0] and best_match[-1] == search_lines[-1]: + return "\n".join(best_match) + + N = 5 + best_match_end = min(len(content_lines), best_match_i + len(search_lines) + N) + best_match_i = max(0, best_match_i - N) + + best = content_lines[best_match_i:best_match_end] + return "\n".join(best) + + +def main(): + history_md = Path(sys.argv[1]).read_text() + if not history_md: + return + + messages = utils.split_chat_history_markdown(history_md) + + for msg in messages: + msg = msg["content"] + edits = list(find_original_update_blocks(msg)) + + for fname, before, after in edits: + # Compute diff + diff = difflib.unified_diff( + before.splitlines(keepends=True), + after.splitlines(keepends=True), + fromfile="before", + tofile="after", + ) + diff = "".join(diff) + dump(before) + dump(after) + dump(diff) + + +if __name__ == "__main__": + main() diff --git a/cecli/commands/__init__.py b/cecli/commands/__init__.py index ef7dff9dad5..3f438898fde 100644 --- a/cecli/commands/__init__.py +++ b/cecli/commands/__init__.py @@ -7,6 +7,7 @@ from .add import AddCommand from .agent import AgentCommand +from .agent_model import AgentModelCommand from .architect import ArchitectCommand from .ask import AskCommand from .clear import ClearCommand @@ -26,6 +27,7 @@ from .editor_model import EditorModelCommand from .exit import ExitCommand from .git import GitCommand +from .hashline import HashlineCommand from .help import HelpCommand from .history_search import HistorySearchCommand from .lint import LintCommand @@ -77,6 +79,7 @@ # Register commands CommandRegistry.register(AddCommand) CommandRegistry.register(AgentCommand) +CommandRegistry.register(AgentModelCommand) CommandRegistry.register(ArchitectCommand) CommandRegistry.register(AskCommand) CommandRegistry.register(ClearCommand) @@ -96,6 +99,7 @@ CommandRegistry.register(EditorModelCommand) CommandRegistry.register(ExitCommand) CommandRegistry.register(GitCommand) +CommandRegistry.register(HashlineCommand) CommandRegistry.register(HelpCommand) CommandRegistry.register(HistorySearchCommand) CommandRegistry.register(LintCommand) @@ -136,6 +140,7 @@ __all__ = [ "AddCommand", "AgentCommand", + "AgentModelCommand", "ArchitectCommand", "AskCommand", "BaseCommand", @@ -163,6 +168,7 @@ "get_available_files", "GitCommand", "glob_filtered_to_repo", + "HashlineCommand", "HelpCommand", "HistorySearchCommand", "LintCommand", diff --git a/cecli/commands/agent_model.py b/cecli/commands/agent_model.py new file mode 100644 index 00000000000..64d1e4a807c --- /dev/null +++ b/cecli/commands/agent_model.py @@ -0,0 +1,143 @@ +from typing import List + +import cecli.models as models +from cecli.commands.utils.base_command import BaseCommand +from cecli.commands.utils.helpers import format_command_result +from cecli.helpers.conversation import ConversationManager, MessageTag + + +class AgentModelCommand(BaseCommand): + NORM_NAME = "agent-model" + DESCRIPTION = "Switch the Agent Model to a new LLM" + + @classmethod + async def execute(cls, io, coder, args, **kwargs): + """Execute the agent-model command with given parameters.""" + arg_split = args.split(" ", 1) + model_name = arg_split[0].strip() + if not model_name: + # If no model name provided, show current agent model + current_agent_model = coder.main_model.agent_model.name + io.tool_output(f"Current agent model: {current_agent_model}") + return format_command_result( + io, "agent-model", f"Displayed current agent model: {current_agent_model}" + ) + + # Create a new model with the same main model and editor model, but updated agent model + model = models.Model( + coder.main_model.name, + editor_model=coder.main_model.editor_model.name, + weak_model=coder.main_model.weak_model.name, + agent_model=model_name, + io=io, + retries=coder.main_model.retries, + debug=coder.main_model.debug, + ) + await models.sanity_check_models(io, model) + + if len(arg_split) > 1: + # implement architect coder-like generation call for agent model + message = arg_split[1].strip() + + # Store the original model configuration + original_main_model = coder.main_model + original_edit_format = coder.edit_format + + # Create a temporary coder with the new model + from cecli.coders import Coder + + kwargs = dict() + kwargs["main_model"] = model + kwargs["edit_format"] = coder.edit_format # Keep the same edit format + kwargs["suggest_shell_commands"] = False + kwargs["total_cost"] = coder.total_cost + kwargs["num_cache_warming_pings"] = 0 + kwargs["summarize_from_coder"] = False + kwargs["done_messages"] = [] + kwargs["cur_messages"] = [] + + new_kwargs = dict(io=io, from_coder=coder) + new_kwargs.update(kwargs) + + # Save current conversation state + original_coder = coder + + temp_coder = await Coder.create(**new_kwargs) + + # Re-initialize ConversationManager with temp coder + ConversationManager.initialize( + temp_coder, + reset=True, + reformat=True, + preserve_tags=[MessageTag.DONE, MessageTag.CUR], + ) + + verbose = kwargs.get("verbose", False) + if verbose: + temp_coder.show_announcements() + + try: + await temp_coder.generate(user_message=message, preproc=False) + coder.total_cost = temp_coder.total_cost + coder.coder_commit_hashes = temp_coder.coder_commit_hashes + + # Clear manager and restore original state + ConversationManager.initialize( + original_coder, + reset=True, + reformat=True, + preserve_tags=[MessageTag.DONE, MessageTag.CUR], + ) + + # Restore the original model configuration + from cecli.commands import SwitchCoderSignal + + raise SwitchCoderSignal( + main_model=original_main_model, edit_format=original_edit_format + ) + except Exception as e: + # If there's an error, still restore the original model + if not isinstance(e, SwitchCoderSignal): + io.tool_error(str(e)) + raise SwitchCoderSignal( + main_model=original_main_model, edit_format=original_edit_format + ) + else: + # Re-raise SwitchCoderSignal if that's what was thrown + raise + else: + from cecli.commands import SwitchCoderSignal + + raise SwitchCoderSignal(main_model=model, edit_format=coder.edit_format) + + @classmethod + def get_completions(cls, io, coder, args) -> List[str]: + """Get completion options for agent-model command.""" + return models.get_chat_model_names() + + @classmethod + def get_help(cls) -> str: + """Get help text for the agent-model command.""" + help_text = super().get_help() + help_text += "\nUsage:\n" + help_text += " /agent-model # Switch to a new agent model\n" + help_text += ( + " /agent-model # Use a specific agent model for a single" + " prompt\n" + ) + help_text += "\nExamples:\n" + help_text += ( + " /agent-model gpt-4o-mini # Switch to GPT-4o Mini as agent model\n" + ) + help_text += ( + " /agent-model claude-3-haiku # Switch to Claude 3 Haiku as agent model\n" + ) + help_text += ' /agent-model o1-mini "review this code" # Use o1-mini to review code\n' + help_text += ( + "\nWhen switching agent models, the main model and editor model remain unchanged.\n" + ) + help_text += ( + "\nIf you provide a prompt after the model name, that agent model will be used\n" + ) + help_text += "just for that prompt, then you'll return to your original agent model.\n" + return help_text diff --git a/cecli/commands/clear.py b/cecli/commands/clear.py index 47a816f0906..64a1c21c45c 100644 --- a/cecli/commands/clear.py +++ b/cecli/commands/clear.py @@ -11,10 +11,17 @@ class ClearCommand(BaseCommand): @classmethod async def execute(cls, io, coder, args, **kwargs): # Clear chat history using ConversationManager - from cecli.helpers.conversation import ConversationManager, MessageTag + from cecli.helpers.conversation import ( + ConversationFiles, + ConversationManager, + MessageTag, + ) ConversationManager.clear_tag(MessageTag.CUR) ConversationManager.clear_tag(MessageTag.DONE) + ConversationManager.clear_tag(MessageTag.DIFFS) + + ConversationFiles.reset() # Clear TUI output if available if coder.tui and coder.tui(): diff --git a/cecli/commands/editor_model.py b/cecli/commands/editor_model.py index de3b581cb2d..646604f9463 100644 --- a/cecli/commands/editor_model.py +++ b/cecli/commands/editor_model.py @@ -28,6 +28,7 @@ async def execute(cls, io, coder, args, **kwargs): coder.main_model.name, editor_model=model_name, weak_model=coder.main_model.weak_model.name, + agent_model=coder.main_model.agent_model.name, io=io, retries=coder.main_model.retries, debug=coder.main_model.debug, diff --git a/cecli/commands/hashline.py b/cecli/commands/hashline.py new file mode 100644 index 00000000000..58690a77f75 --- /dev/null +++ b/cecli/commands/hashline.py @@ -0,0 +1,43 @@ +from typing import List + +from cecli.commands.utils.base_command import BaseCommand + + +class HashlineCommand(BaseCommand): + NORM_NAME = "hashline" + DESCRIPTION = ( + "Enter agent mode to autonomously discover and manage relevant files. If no prompt" + " provided, switches to hashline mode." + ) + + @classmethod + async def execute(cls, io, coder, args, **kwargs): + """Execute the agent command with given parameters.""" + + return await cls._generic_chat_command( + io, coder, args, "hashline", placeholder=args.strip() or None + ) + + @classmethod + def get_completions(cls, io, coder, args) -> List[str]: + """Get completion options for agent command.""" + # The original completions_agent raises CommandCompletionException + # This is handled by the completion system + from cecli.io import CommandCompletionException + + raise CommandCompletionException() + + @classmethod + def get_help(cls) -> str: + """Get help text for the agent command.""" + help_text = super().get_help() + help_text += "\nUsage:\n" + help_text += " /hashline # Enter hashline mode\n" + help_text += "\nExamples:\n" + help_text += ( + " /hashline Fix this bug # Use hashline edit format to autonomously fix a bug\n" + ) + help_text += ( + " /hashline Add a new feature # Use hashline edit format to implement a feature\n" + ) + return help_text diff --git a/cecli/commands/run.py b/cecli/commands/run.py index 8225f61f36e..8bd186404c3 100644 --- a/cecli/commands/run.py +++ b/cecli/commands/run.py @@ -72,6 +72,9 @@ async def execute(cls, io, coder, args, **kwargs): elif add and exit_status != 0: io.placeholder = "What's wrong? Fix" + if add_on_nonzero_exit and not exit_status: + return "" # No test failures + # Return None if output wasn't added or command succeeded return format_command_result(io, "run", "Command executed successfully") diff --git a/cecli/commands/settings.py b/cecli/commands/settings.py index 2dd7f6010ab..864db4686bf 100644 --- a/cecli/commands/settings.py +++ b/cecli/commands/settings.py @@ -30,6 +30,7 @@ async def execute(cls, io, coder, args, **kwargs): ("Main model", coder.main_model), ("Editor model", getattr(coder.main_model, "editor_model", None)), ("Weak model", getattr(coder.main_model, "weak_model", None)), + ("Agent model", getattr(coder.main_model, "agent_model", None)), ] for label, model in active_models: if not model: diff --git a/cecli/commands/tokens.py b/cecli/commands/tokens.py index 11e785b7d28..49651332bc2 100644 --- a/cecli/commands/tokens.py +++ b/cecli/commands/tokens.py @@ -58,8 +58,10 @@ async def execute(cls, io, coder, args, **kwargs): # chat history msgs_done = ConversationManager.get_messages_dict(tag=MessageTag.DONE) msgs_cur = ConversationManager.get_messages_dict(tag=MessageTag.CUR) + msgs_diffs = ConversationManager.get_messages_dict(tag=MessageTag.DIFFS) tokens_done = 0 tokens_cur = 0 + tokens_diffs = 0 if msgs_done: tokens_done = coder.main_model.token_count(msgs_done) @@ -67,8 +69,15 @@ async def execute(cls, io, coder, args, **kwargs): if msgs_cur: tokens_cur = coder.main_model.token_count(msgs_cur) + if msgs_diffs: + tokens_diffs = coder.main_model.token_count(msgs_diffs) + if tokens_cur + tokens_done: res.append((tokens_cur + tokens_done, "chat history", "use /clear to clear")) + # Add separate line for diffs if they exist + + if tokens_diffs: + res.append((tokens_diffs, "file diffs", "part of chat history")) # repo map if coder.repo_map: @@ -98,18 +107,17 @@ async def execute(cls, io, coder, args, **kwargs): for msg in readonly_msgs: # Extract file name from message content content = msg.get("content", "") - if content.startswith("File Contents"): + if content.startswith("Original File Contents For"): # Extract file path from "File Contents {path}:" - lines = content.split("\n", 1) + lines = content.split("\n", 3) if lines: - file_line = lines[0] - if file_line.startswith("File Contents"): - fname = file_line[13:].rstrip(":") - # Calculate tokens for this message - tokens = coder.main_model.token_count([msg]) - if fname not in file_tokens: - file_tokens[fname] = 0 - file_tokens[fname] += tokens + file_line = lines[1] + fname = file_line.strip() + # Calculate tokens for this message + tokens = coder.main_model.token_count([msg]) + if fname not in file_tokens: + file_tokens[fname] = 0 + file_tokens[fname] += tokens elif "image_file" in msg: # Handle image files fname = msg.get("image_file") @@ -134,18 +142,17 @@ async def execute(cls, io, coder, args, **kwargs): for msg in msgs: # Extract file name from message content content = msg.get("content", "") - if content.startswith("File Contents"): + if content.startswith("Original File Contents For"): # Extract file path from "File Contents {path}:" - lines = content.split("\n", 1) + lines = content.split("\n", 3) if lines: - file_line = lines[0] - if file_line.startswith("File Contents"): - fname = file_line[13:].rstrip(":") - # Calculate tokens for this message - tokens = coder.main_model.token_count([msg]) - if fname not in editable_file_tokens: - editable_file_tokens[fname] = 0 - editable_file_tokens[fname] += tokens + file_line = lines[1] + fname = file_line.strip() + # Calculate tokens for this message + tokens = coder.main_model.token_count([msg]) + if fname not in editable_file_tokens: + editable_file_tokens[fname] = 0 + editable_file_tokens[fname] += tokens elif "image_file" in msg: # Handle image files fname = msg.get("image_file") diff --git a/cecli/help.py b/cecli/help.py index d87e6aa7ea2..22ae6b03104 100755 --- a/cecli/help.py +++ b/cecli/help.py @@ -13,6 +13,11 @@ warnings.simplefilter("ignore", category=FutureWarning) +os.environ["TQDM_DISABLE"] = "1" +os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" +os.environ["HF_DATASETS_DISABLE_PROGRESS_BARS"] = "1" +os.environ["TOKENIZERS_PARALLELISM"] = "true" + async def install_help_extra(io): pip_install_cmd = [ @@ -102,10 +107,14 @@ def get_index(): class Help: def __init__(self): + from huggingface_hub.utils import disable_progress_bars from llama_index.core import Settings from llama_index.embeddings.huggingface import HuggingFaceEmbedding + from transformers import logging + + disable_progress_bars() + logging.set_verbosity_error() - os.environ["TOKENIZERS_PARALLELISM"] = "true" Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") index = get_index() self.retriever = index.as_retriever(similarity_top_k=20) diff --git a/cecli/helpers/conversation/base_message.py b/cecli/helpers/conversation/base_message.py index 65aa519a04b..052259b528e 100644 --- a/cecli/helpers/conversation/base_message.py +++ b/cecli/helpers/conversation/base_message.py @@ -29,11 +29,12 @@ class BaseMessage: timestamp: int = field(default_factory=lambda: time.monotonic_ns()) mark_for_delete: Optional[int] = field(default=None) hash_key: Optional[Tuple[str, ...]] = field(default=None) - message_id: str = field(init=False) + message_id: Optional[str] = field(default=None) def __post_init__(self): """Generate message ID after initialization.""" - self.message_id = self.generate_id() + if self.message_id is None: + self.message_id = self.generate_id() # Validate message structure if "role" not in self.message_dict: diff --git a/cecli/helpers/conversation/files.py b/cecli/helpers/conversation/files.py index 2266ce54a41..b4fe510a6c1 100644 --- a/cecli/helpers/conversation/files.py +++ b/cecli/helpers/conversation/files.py @@ -1,8 +1,8 @@ -import difflib import os import weakref from typing import Any, Dict, Optional +from cecli.helpers.hashline import get_hashline_content_diff, hashline from cecli.repomap import RepoMap from .manager import ConversationManager @@ -70,6 +70,8 @@ def add_file( coder = cls.get_coder() try: content = coder.io.read_text(abs_fname) + if coder.hashlines: + content = hashline(content) except Exception: content = "" # Empty content for unreadable files @@ -183,6 +185,8 @@ def generate_diff(cls, fname: str) -> Optional[str]: rel_fname = coder.get_rel_fname(fname) try: current_content = coder.io.read_text(abs_fname) + if coder.hashlines: + current_content = hashline(current_content) except Exception: return None @@ -195,18 +199,14 @@ def generate_diff(cls, fname: str) -> Optional[str]: abs_fname, cls._file_contents_original[abs_fname] ) - # Generate diff between snapshot and current content - diff_lines = difflib.unified_diff( - snapshot_content.splitlines(), - current_content.splitlines(), + # Generate diff between snapshot and current content using hashline helper + diff_text = get_hashline_content_diff( + old_content=snapshot_content, + new_content=current_content, fromfile=f"{rel_fname} (snapshot)", tofile=f"{rel_fname} (current)", - lineterm="", - n=3, ) - diff_text = "\n".join([line for line in list(diff_lines)]) - # If there's a diff, update the last snapshot with current content if diff_text.strip(): cls._file_contents_snapshot[abs_fname] = current_content @@ -240,23 +240,12 @@ def update_file_diff(cls, fname: str) -> Optional[str]: # Add diff message to conversation diff_message = { "role": "user", - "content": ( - f"File {rel_fname} has changed. Here is a diff of the changes:\n\n{diff}" - ), + "content": f"File Diff For:\n{rel_fname}\n\n{diff}", } - if coder and hasattr(coder, "abs_fnames"): - tag = ( - MessageTag.EDIT_FILES - if abs_fname in coder.abs_fnames - else MessageTag.CHAT_FILES - ) - else: - tag = MessageTag.CHAT_FILES - ConversationManager.add_message( message_dict=diff_message, - tag=tag, + tag=MessageTag.DIFFS, ) return diff diff --git a/cecli/helpers/conversation/integration.py b/cecli/helpers/conversation/integration.py index 4c6c0d60c8b..ba26651084e 100644 --- a/cecli/helpers/conversation/integration.py +++ b/cecli/helpers/conversation/integration.py @@ -37,7 +37,6 @@ def add_system_messages(cls, coder) -> None: Args: coder: The coder instance """ - # Add system prompt system_prompt = coder.gpt_prompts.main_system if system_prompt: # Apply system_prompt_prefix if set on the model @@ -45,16 +44,20 @@ def add_system_messages(cls, coder) -> None: system_prompt = coder.main_model.system_prompt_prefix + "\n" + system_prompt ConversationManager.add_message( - message_dict={"role": "system", "content": system_prompt}, + message_dict={"role": "system", "content": coder.fmt_system_prompt(system_prompt)}, tag=MessageTag.SYSTEM, + hash_key=("main", "system_prompt"), + force=True, ) # Add examples if available if hasattr(coder.gpt_prompts, "example_messages"): example_messages = coder.gpt_prompts.example_messages for i, msg in enumerate(example_messages): + msg_copy = msg.copy() + msg_copy["content"] = coder.fmt_system_prompt(msg_copy["content"]) ConversationManager.add_message( - message_dict=msg, + message_dict=msg_copy, tag=MessageTag.EXAMPLES, priority=75 + i, # Slight offset for ordering within examples ) @@ -68,6 +71,8 @@ def add_system_messages(cls, coder) -> None: ConversationManager.add_message( message_dict=msg, tag=MessageTag.REMINDER, + hash_key=("main", "system_reminder"), + force=True, ) @classmethod @@ -79,6 +84,50 @@ def cleanup_files(cls, coder) -> None: Args: coder: The coder instance """ + + """ + # Check diff message ratio and clear if too many diffs + diff_messages = ConversationManager.get_messages_dict(MessageTag.DIFFS) + read_only_messages = ConversationManager.get_messages_dict(MessageTag.READONLY_FILES) + chat_messages = ConversationManager.get_messages_dict(MessageTag.CHAT_FILES) + edit_messages = ConversationManager.get_messages_dict(MessageTag.EDIT_FILES) + + # Calculate token counts for token-based ratio check + diff_tokens = coder.main_model.token_count(diff_messages) if diff_messages else 0 + + # Calculate tokens for readonly, chat, and edit tag messages + other_tokens = 0 + if read_only_messages: + other_tokens += coder.main_model.token_count(read_only_messages) + if chat_messages: + other_tokens += coder.main_model.token_count(chat_messages) + if edit_messages: + other_tokens += coder.main_model.token_count(edit_messages) + + # Calculate message counts for message-based ratio check + diff_count = len(diff_messages) + other_count = len(read_only_messages) + len(chat_messages) + len(edit_messages) + + # Clear diff messages and file caches if EITHER: + # 1. Diff tokens > 33% of other message tokens (token-based check) + # 2. Diff message count ratio > 5:1 (message count-based check for periodic refresh) + should_clear = False + + # Token-based check + if diff_tokens > 0 and other_tokens > 0 and diff_tokens / other_tokens > 0.33: + should_clear = True + + # Message count-based check (for periodic refresh) + if diff_count > 0 and other_count > 0 and diff_count / other_count > 5: + should_clear = True + + if should_clear: + # Clear all diff messages + ConversationManager.clear_tag(MessageTag.DIFFS) + # Clear ConversationFiles caches to force regeneration + ConversationFiles.clear_file_cache() + """ + # Get all tracked files (both regular and image files) tracked_files = ConversationFiles.get_all_tracked_files() @@ -342,6 +391,7 @@ def add_readonly_files_messages(cls, coder) -> List[Dict[str, Any]]: List of read-only file messages """ messages = [] + refresh = not coder.file_diffs # Separate image files from regular files regular_files = [] @@ -363,11 +413,7 @@ def add_readonly_files_messages(cls, coder) -> List[Dict[str, Any]]: # Process regular files for fname in regular_files: # First, add file to cache and check for changes - ConversationFiles.add_file(fname) - - # Check if file has changed and add diff message if needed - if ConversationFiles.has_file_changed(fname): - ConversationFiles.update_file_diff(fname) + ConversationFiles.add_file(fname, force_refresh=refresh) # Get file content (with proper caching and stub generation) content = ConversationFiles.get_file_stub(fname) @@ -375,18 +421,25 @@ def add_readonly_files_messages(cls, coder) -> List[Dict[str, Any]]: # Add user message with file path as hash_key rel_fname = coder.get_rel_fname(fname) + # Create user message + file_preamble = "Original File Contents For:" + file_postamble = "Modifications will be communicated as diff messages.\n\n" + + if refresh: + file_preamble = "Current File Contents For:" + file_postamble = "" + user_msg = { "role": "user", - "content": ( - f"Here are the original file contents for {rel_fname}:\n\n{content}" - "\n\nModifications will be communicated as diff messages." - ), + "content": f"{file_preamble}\n{rel_fname}\n\n{content}\n\n{file_postamble}", } ConversationManager.add_message( message_dict=user_msg, tag=MessageTag.READONLY_FILES, hash_key=("file_user", fname), # Use file path as part of hash_key + force=True, + update_timestamp=False, ) messages.append(user_msg) @@ -399,9 +452,15 @@ def add_readonly_files_messages(cls, coder) -> List[Dict[str, Any]]: message_dict=assistant_msg, tag=MessageTag.READONLY_FILES, hash_key=("file_assistant", fname), # Use file path as part of hash_key + force=True, + update_timestamp=False, ) messages.append(assistant_msg) + # Check if file has changed and add diff message if needed + if ConversationFiles.has_file_changed(fname): + ConversationFiles.update_file_diff(fname) + # Handle image files using coder.get_images_message() if image_files: image_messages = coder.get_images_message(image_files) @@ -445,6 +504,7 @@ def add_chat_files_messages(cls, coder) -> Dict[str, Any]: Dictionary with chat_files and edit_files lists """ result = {"chat_files": [], "edit_files": []} + refresh = not coder.file_diffs if not hasattr(coder, "abs_fnames"): return result @@ -463,11 +523,7 @@ def add_chat_files_messages(cls, coder) -> Dict[str, Any]: # Process regular files for fname in regular_files: # First, add file to cache and check for changes - ConversationFiles.add_file(fname) - - # Check if file has changed and add diff message if needed - if ConversationFiles.has_file_changed(fname): - ConversationFiles.update_file_diff(fname) + ConversationFiles.add_file(fname, force_refresh=refresh) # Get file content (with proper caching and stub generation) content = ConversationFiles.get_file_stub(fname) @@ -477,12 +533,16 @@ def add_chat_files_messages(cls, coder) -> Dict[str, Any]: rel_fname = coder.get_rel_fname(fname) # Create user message + file_preamble = "Original File Contents For:" + file_postamble = "Modifications will be communicated as diff messages.\n\n" + + if refresh: + file_preamble = "Current File Contents For:" + file_postamble = "" + user_msg = { "role": "user", - "content": ( - f"Here are the original file contents for {rel_fname}:\n\n{content}" - "\n\nModifications will be communicated as diff messages." - ), + "content": f"{file_preamble}\n{rel_fname}\n\n{content}\n\n{file_postamble}", } # Create assistant message @@ -500,6 +560,8 @@ def add_chat_files_messages(cls, coder) -> Dict[str, Any]: message_dict=user_msg, tag=tag, hash_key=("file_user", fname), # Use file path as part of hash_key + force=True, + update_timestamp=False, ) # Add assistant message to ConversationManager with file path as hash_key @@ -507,8 +569,14 @@ def add_chat_files_messages(cls, coder) -> Dict[str, Any]: message_dict=assistant_msg, tag=tag, hash_key=("file_assistant", fname), # Use file path as part of hash_key + force=True, + update_timestamp=False, ) + # Check if file has changed and add diff message if needed + if ConversationFiles.has_file_changed(fname): + ConversationFiles.update_file_diff(fname) + # Handle image files using coder.get_images_message() if image_files: image_messages = coder.get_images_message(image_files) diff --git a/cecli/helpers/conversation/manager.py b/cecli/helpers/conversation/manager.py index 3d1e275afd4..285d8e249f8 100644 --- a/cecli/helpers/conversation/manager.py +++ b/cecli/helpers/conversation/manager.py @@ -89,6 +89,8 @@ def initialize( timestamp=time.monotonic_ns(), # Updated timestamp mark_for_delete=msg.mark_for_delete, force=True, + update_timestamp=True, + message_id=msg.message_id, ) # Enable debug mode if coder has verbose attribute and it's True @@ -119,6 +121,8 @@ def add_message( mark_for_delete: Optional[int] = None, hash_key: Optional[Tuple[str, ...]] = None, force: bool = False, + update_timestamp: bool = True, + message_id: Optional[str] = None, ) -> BaseMessage: """ Idempotently add message if hash not already present. @@ -132,6 +136,7 @@ def add_message( mark_for_delete: Countdown for deletion (None = permanent) hash_key: Custom hash key for message identification force: If True, update existing message with same hash + update_timestamp: If True, update timestamp when force=True (default True) Returns: The created or updated BaseMessage instance @@ -158,6 +163,7 @@ def add_message( timestamp=timestamp, mark_for_delete=mark_for_delete, hash_key=hash_key, + message_id=message_id, ) # Check if message already exists @@ -169,7 +175,8 @@ def add_message( existing_message.message_dict = message_dict existing_message.tag = tag.value existing_message.priority = priority - existing_message.timestamp = timestamp + if update_timestamp: + existing_message.timestamp = timestamp existing_message.mark_for_delete = mark_for_delete # Clear cache for this tag and all messages cache since message was updated cls._tag_cache.pop(tag.value, None) diff --git a/cecli/helpers/conversation/tags.py b/cecli/helpers/conversation/tags.py index 15246070c60..9755b198ea7 100644 --- a/cecli/helpers/conversation/tags.py +++ b/cecli/helpers/conversation/tags.py @@ -7,7 +7,7 @@ class MessageTag(str, Enum): Enumeration of message tags matching current chunk types. Fixed set of valid tags matching current chunk types: - - SYSTEM, STATIC, EXAMPLES, REPO, READONLY_FILES, CHAT_FILES, EDIT_FILES, CUR, DONE, REMINDER + - SYSTEM, STATIC, EXAMPLES, REPO, READONLY_FILES, CHAT_FILES, EDIT_FILES, DIFFS, CUR, DONE, REMINDER """ SYSTEM = "system" @@ -17,6 +17,7 @@ class MessageTag(str, Enum): READONLY_FILES = "readonly_files" CHAT_FILES = "chat_files" EDIT_FILES = "edit_files" + DIFFS = "diffs" CUR = "cur" DONE = "done" REMINDER = "reminder" @@ -32,6 +33,7 @@ class MessageTag(str, Enum): MessageTag.READONLY_FILES: 200, MessageTag.CHAT_FILES: 200, MessageTag.EDIT_FILES: 200, + MessageTag.DIFFS: 200, MessageTag.DONE: 200, MessageTag.CUR: 200, MessageTag.REMINDER: 300, @@ -48,6 +50,7 @@ class MessageTag(str, Enum): MessageTag.READONLY_FILES: 0, MessageTag.CHAT_FILES: 0, MessageTag.EDIT_FILES: 0, + MessageTag.DIFFS: 0, MessageTag.DONE: 0, MessageTag.CUR: 0, MessageTag.REMINDER: 0, diff --git a/cecli/helpers/hashline.py b/cecli/helpers/hashline.py new file mode 100644 index 00000000000..47c8008618f --- /dev/null +++ b/cecli/helpers/hashline.py @@ -0,0 +1,1229 @@ +import difflib +import re + +import xxhash + +# Regex patterns for hashline parsing +# Format: {line_number}|{hash_fragment}| +HASHLINE_PREFIX_RE = re.compile(r"^(-?\d+)\|([a-zA-Z]{2})\|") +# Format: {line_number}|{hash_fragment} +PARSE_NEW_FORMAT_RE = re.compile(r"^(-?\d+)\|([a-zA-Z]{2})$") +# Format: {hash_fragment}|{line_number} +PARSE_OLD_FORMAT_RE = re.compile(r"^([a-zA-Z]{2})\|(-?\d+)$") + + +class HashlineError(Exception): + """Custom exception for hashline-specific errors.""" + + pass + + +def hashline(text: str, start_line: int = 1) -> str: + """ + Add a hash scheme to each line of text. + + For each line in the input text, returns a string where each line is prefixed with: + "{line number}|{2-digit base52 of xxhash mod 52^2}|{line contents}" + + Args: + text: Input text (most likely representing a file's text) + start_line: Starting line number (default: 1) + + Returns: + String with hash scheme added to each line + """ + lines = text.splitlines(keepends=True) + result_lines = [] + + for i, line in enumerate(lines, start=start_line): + # Calculate xxhash for the line content + hash_value = xxhash.xxh3_64_intdigest(line.encode("utf-8")) + + # Use mod 52^2 (2704) for faster computation + mod_value = hash_value % 2704 # 52^2 = 2704 + + # Convert to 2-digit base52 using helper function + last_two_str = int_to_2digit_52(mod_value) + + # Format the line + formatted_line = f"{i}|{last_two_str}|{line}" + result_lines.append(formatted_line) + + return "".join(result_lines) + + +def int_to_2digit_52(n: int) -> str: + """ + Convert integer to 2-digit base52 with 'a' padding. + + Base52 uses characters: a-z (lowercase) and A-Z (uppercase). + + Args: + n: Integer in range 0-2703 (52^2 - 1) + + Returns: + 2-character base52 string + """ + # Ensure n is in valid range + n = n % 2704 # 52^2 + + # Convert to base52 + if n == 0: + return "aa" + + digits = [] + while n > 0: + n, remainder = divmod(n, 52) + if remainder < 26: + # a-z (lowercase) + digits.append(chr(remainder + ord("a"))) + else: + # A-Z (uppercase) + digits.append(chr(remainder - 26 + ord("A"))) + + # Pad to 2 digits with 'a' + while len(digits) < 2: + digits.append("a") + + # Return in correct order (most significant first) + return "".join(reversed(digits)) + + +def strip_hashline(text: str) -> str: + """ + Remove hashline-like sequences from the start of every line. + + Removes prefixes that match the pattern: "{line number}|{2-digit base52}|" + where line number can be any integer (positive, negative, or zero) and + the 2-digit base52 is exactly 2 characters from the set [a-zA-Z]. + + Args: + text: Input text with hashline prefixes + + Returns: + String with hashline prefixes removed from each line + """ + lines = text.splitlines(keepends=True) + result_lines = [] + for line in lines: + # Remove the hashline prefix if present + stripped_line = HASHLINE_PREFIX_RE.sub("", line, count=1) + result_lines.append(stripped_line) + + return "".join(result_lines) + + +def parse_hashline(hashline_str: str): + """ + Parse a hashline string into hash fragment and line number. + + Args: + hashline_str: Hashline format string: "{line_num}|{hash_fragment}" + + Returns: + tuple: (hash_fragment, line_num_str, line_num) + + Raises: + HashlineError: If format is invalid + """ + if hashline_str is None: + raise HashlineError("Hashline string cannot be None") + + try: + hashline_str = hashline_str.rstrip("|") + + # Try new format first: {line_num}|{hash_fragment} + match = PARSE_NEW_FORMAT_RE.match(hashline_str) + if match: + line_num_str, hash_fragment = match.groups() + return hash_fragment, line_num_str, int(line_num_str) + + # Try old order with new separator: {hash_fragment}|{line_num} + match = PARSE_OLD_FORMAT_RE.match(hashline_str) + if match: + hash_fragment, line_num_str = match.groups() + return hash_fragment, line_num_str, int(line_num_str) + + raise HashlineError(f"Invalid hashline format '{hashline_str}'") + except (ValueError, AttributeError) as e: + raise HashlineError(f"Invalid hashline format '{hashline_str}': {e}") + + +def normalize_hashline(hashline_str: str) -> str: + """ + Normalize a hashline string to the proper "{line_num}|{hash_fragment}" format. + + Accepts hashline strings in either "{hash_fragment}|{line_num}" format or + "{line_num}|{hash_fragment}" format and returns it in the proper format. + + Args: + hashline_str: Hashline string in either format + + Returns: + str: Hashline string in "{line_num}|{hash_fragment}" format + + Raises: + HashlineError: If format is invalid + """ + if hashline_str is None: + raise HashlineError("Hashline string cannot be None") + + # Try to parse as "{line_num}|{hash_fragment}" first (preferred) + match1 = PARSE_NEW_FORMAT_RE.match(hashline_str) + if match1: + return hashline_str + + # Try to parse as "{hash_fragment}|{line_num}" + match2 = PARSE_OLD_FORMAT_RE.match(hashline_str) + if match2: + hash_fragment, line_num_str = match2.groups() + return f"{line_num_str}|{hash_fragment}" + + # If neither pattern matches, raise error + raise HashlineError( + f"Invalid hashline format '{hashline_str}'. " + "Expected either '{line_num}|{hash_fragment}' or '{hash_fragment}|{line_num}' " + "where hash_fragment is exactly 2 letters and line_num is an integer." + ) + + +def find_hashline_by_exact_match(hashed_lines, hash_fragment, line_num_str): + """ + Find a hashline by exact line_num|hash_fragment match. + + Args: + hashed_lines: List of hashed lines + hash_fragment: Hash fragment to match + line_num_str: Line number as string + + Returns: + int: Index of matching line, or None if not found + """ + for i, line in enumerate(hashed_lines): + if line.startswith(f"{line_num_str}|{hash_fragment}|"): + return i + return None + + +def find_hashline_by_fragment(hashed_lines, hash_fragment, target_line_num=None): + """ + Find a hashline by hash fragment only. + + Args: + hashed_lines: List of hashed lines + hash_fragment: Hash fragment to search for + target_line_num: Optional target line number to find closest match + + Returns: + int: Index of line with matching hash fragment, or None if not found. + If target_line_num is provided, returns the match with smallest + absolute distance to target_line_num. + """ + matches = [] + for i, line in enumerate(hashed_lines): + parts = line.split("|", 2) + if len(parts) < 3: + continue + line_hash_fragment = parts[1] + if line_hash_fragment == hash_fragment: + if target_line_num is None: + return i # Return first match for backward compatibility + + # Extract line number from hashline + line_num_part = parts[0] + try: + line_num = int(line_num_part) + distance = abs(line_num - target_line_num) + matches.append((distance, i, line_num)) + except ValueError: + # If line number can't be parsed, treat as distance 0 + matches.append((0, i, 0)) + + if not matches: + return None + + if target_line_num is None: + # Should not reach here if target_line_num is None (returned above) + return matches[0][1] if matches else None + + # Return the match with smallest distance, preferring later instances when distances are equal + matches.sort(key=lambda x: (x[0], -x[2])) + return matches[0][1] + + +def find_hashline_range( + hashed_lines, + start_line_hash, + end_line_hash, + allow_exact_match=True, +): + """ + Find start and end line indices in hashed content. + + Args: + hashed_lines: List of hashed lines + start_line_hash: Hashline format for start line + end_line_hash: Hashline format for end line + allow_exact_match: Whether to try exact match first (default: True) + + Returns: + tuple: (found_start_line, found_end_line) + + Raises: + HashlineError: If range cannot be found or is invalid + """ + # Parse start_line_hash + start_hash_fragment, start_line_num_str, start_line_num = parse_hashline(start_line_hash) + + # Try to find start line + found_start_line = None + if allow_exact_match: + found_start_line = find_hashline_by_exact_match( + hashed_lines, start_hash_fragment, start_line_num_str + ) + + if found_start_line is None: + found_start_line = find_hashline_by_fragment( + hashed_lines, start_hash_fragment, start_line_num + ) + + if found_start_line is None: + raise HashlineError(f"Start line hash fragment '{start_hash_fragment}' not found in file") + + # Parse end_line_hash + end_hash_fragment, end_line_num_str, end_line_num = parse_hashline(end_line_hash) + + # Try to find end line + found_end_line = None + if allow_exact_match: + found_end_line = find_hashline_by_exact_match( + hashed_lines, end_hash_fragment, end_line_num_str + ) + + if found_end_line is None: + # Calculate line distance + line_distance = end_line_num - start_line_num + if line_distance < 0: + raise HashlineError( + f"End line {end_line_num} must be equal to or after start line {start_line_num}" + ) + + # Check if end hash fragment exists at the expected distance + expected_found_end_line = found_start_line + line_distance + if expected_found_end_line >= len(hashed_lines): + raise HashlineError( + f"Start hash fragment found at line {found_start_line + 1}, but " + f"end line {expected_found_end_line + 1} is out of range." + ) + + # Check if end hash fragment matches at the expected position + # If not, use find_hashline_by_fragment() to find the closest match + actual_end_hashed_line = hashed_lines[expected_found_end_line] + actual_end_hash_fragment = actual_end_hashed_line.split(":", 1)[0] + + if actual_end_hash_fragment != end_hash_fragment: + # Instead of raising an error, try to find the closest matching hash fragment + # near where the end line would be based on distance from start line + found_end_line = find_hashline_by_fragment( + hashed_lines, end_hash_fragment, expected_found_end_line + ) + if found_end_line is None: + raise HashlineError( + f"End line hash fragment '{end_hash_fragment}' not found near " + f"expected position {expected_found_end_line + 1}." + ) + else: + found_end_line = expected_found_end_line + + # Verify end line is not before start line + if found_end_line < found_start_line: + raise HashlineError( + f"End line {found_end_line + 1} must be equal to or after start line" + f" {found_start_line + 1}" + ) + + return found_start_line, found_end_line + + +def apply_hashline_operation( + original_content, + start_line_hash, + end_line_hash=None, + operation="replace", + text=None, +): + """ + Apply an operation (replace, insert, delete) using hashline ranges. + + Uses regex/find to locate hashline ranges in the content and applies + the specified operation directly. + + Note: Ranges are inclusive of both start and end boundaries. + For example, a range from line 3 to line 6 includes lines 3, 4, 5, and 6. + + Args: + original_content: Original file content + start_line_hash: Hashline format for start line: "{hash_fragment}:{line_num}" + end_line_hash: Hashline format for end line: "{hash_fragment}:{line_num}" (optional for insert operations) + operation: One of "replace", "insert", or "delete" + text: Text to insert or replace with (required for replace/insert operations) + + Returns: + Modified content after applying the operation + + Raises: + HashlineError: If hashline verification fails or operation is invalid + """ + # Handle empty content as a special case + if original_content == "" or original_content is None: + if operation == "insert" or operation == "replace": + if text is None: + raise HashlineError( + f"Text parameter is required for '{operation}' operation on empty file" + ) + # For empty files, just return the text to insert/replace with + return text if text.endswith("\n") else text + "\n" + elif operation == "delete": + # Deleting from empty file returns empty + return "" + else: + # Should not happen due to validation above, but handle anyway + raise HashlineError(f"Invalid operation '{operation}' for empty file") + + # Validate operation + valid_operations = {"replace", "insert", "delete"} + if operation not in valid_operations: + raise HashlineError( + f"Invalid operation '{operation}'. Must be one of: {', '.join(valid_operations)}" + ) + + # Validate text parameter for replace/insert operations + if operation in {"replace", "insert"} and text is None: + raise HashlineError(f"Text parameter is required for '{operation}' operation") + + # Build operation dictionary for apply_hashline_operations + op_dict = { + "start_line_hash": start_line_hash, + "operation": operation, + } + + if end_line_hash is not None: + op_dict["end_line_hash"] = end_line_hash + + if text is not None: + op_dict["text"] = text + + # Call apply_hashline_operations with single operation + modified_content, successful_ops, failed_ops = apply_hashline_operations( + original_content, [op_dict] + ) + + # Check if operation failed + if failed_ops: + raise HashlineError(failed_ops[0]["error"]) + + return modified_content + + +def extract_hashline_range( + original_content, + start_line_hash, + end_line_hash, +): + """ + Extract the content between hashline markers. + + Args: + original_content: Original file content + start_line_hash: Hashline format for start line: "{hash_fragment}:{line_num}" + end_line_hash: Hashline format for end line: "{hash_fragment}:{line_num}" + + Returns: + str: The extracted content between the hashline markers (with hashline prefixes preserved) + + Raises: + HashlineError: If hashline verification fails + """ + # Normalize hashline inputs + start_line_hash = normalize_hashline(start_line_hash) + end_line_hash = normalize_hashline(end_line_hash) + + # Apply hashline to original content to find the range + hashed_original = hashline(original_content) + hashed_lines = hashed_original.splitlines(keepends=True) + + # Use find_hashline_range to locate the range + found_start_line, found_end_line = find_hashline_range( + hashed_lines, + start_line_hash, + end_line_hash, + allow_exact_match=True, + ) + + # Now we have the exact range in the hashed content + # Extract the original content from the range + original_range_lines = hashed_lines[found_start_line : found_end_line + 1] + original_range_content = "".join(original_range_lines) + + # Return the hashed content (with hashline prefixes preserved) + return original_range_content + + +def find_best_line(content, target_line_num, content_to_lines, used_lines, hashlines): + """ + Find the best matching line for given content near target_line_num. + + This helper function is used by get_hashline_content_diff to handle duplicate lines. + It finds the line number closest to the target position that hasn't been used yet. + + Args: + content: The content to find + target_line_num: The target line number we're trying to match + content_to_lines: Dictionary mapping content to list of line numbers where it appears + used_lines: Set of line numbers that have already been used + hashlines: List of hashline-prefixed lines + + Returns: + tuple: (best_line_num, best_hashline) or None if not found + """ + if content not in content_to_lines: + return None + + # Get all line numbers where this content appears + line_numbers = content_to_lines[content] + + # Filter out already used lines + available_lines = [ln for ln in line_numbers if ln not in used_lines] + + if not available_lines: + return None + + # Find the line closest to the target line number + # For diffs, we want the line that's in the right position + best_line_num = min(available_lines, key=lambda ln: abs(ln - target_line_num)) + return best_line_num, hashlines[best_line_num - 1] # Convert to 0-based index + + +def get_hashline_diff( + original_content, + start_line_hash, + end_line_hash, + operation, + text=None, +): + """ + Generate a diff for a hashline operation in the format used by the original format_output. + Returns a diff between the original range content and the replacement text. + + Args: + original_content: Original file content + start_line_hash: Hashline format for start line: "{hash_fragment}:{line_num}" + end_line_hash: Hashline format for end line: "{hash_fragment}:{line_num}" + operation: One of "replace", "insert", or "delete" + text: Text to insert or replace with (required for replace/insert operations) + + Returns: + str: A formatted diff snippet showing changes, or empty string if no changes + + Raises: + HashlineError: If hashline verification fails or operation is invalid + """ + + if operation == "insert": + end_line_hash = start_line_hash + + # Extract the original range content using the new helper method + # This now returns the hashed content with hashlines preserved + original_range_content = extract_hashline_range( + original_content=original_content, + start_line_hash=start_line_hash, + end_line_hash=end_line_hash, + ) + + # Parse start_line_hash to get the start line number + try: + _, start_line_num_str, start_line_num = parse_hashline(start_line_hash) + except ValueError as e: + raise HashlineError(f"Invalid start_line_hash format '{start_line_hash}': {e}") + + # For delete operation, we're removing the range + if operation == "delete": + find_text = original_range_content + replace_text = "" + # For insert operation, we're inserting after the range + elif operation == "insert": + find_text = "" + # For insert operations, we need to calculate hashlines for the text to insert + # The text should be hashed starting at the line after the end line + if text: + # Parse end_line_hash to get the end line number + try: + _, end_line_num_str, end_line_num = parse_hashline(end_line_hash) + except ValueError as e: + raise HashlineError(f"Invalid end_line_hash format '{end_line_hash}': {e}") + # Insert after the end line, so start hashline at end_line_num + 1 + replace_text = hashline(text, start_line=end_line_num + 1) + else: + replace_text = "" + # For replace operation, we're replacing the range + elif operation == "replace": + find_text = original_range_content + # For replace operations, the replacement text should be hashed starting at the start line + if text: + replace_text = hashline(text, start_line=start_line_num) + else: + replace_text = "" + else: + raise HashlineError( + f"Invalid operation '{operation}'. Must be one of: replace, insert, delete" + ) + + # Generate diff in the same format as original format_output + # Use splitlines(keepends=True) to preserve line endings for accurate hash comparison + find_lines = find_text.splitlines(keepends=True) + replace_lines = replace_text.splitlines(keepends=True) + + # Strip line endings for difflib comparison but keep them in the actual lines + diff = difflib.unified_diff( + [line.rstrip("\r\n") for line in find_lines], + [line.rstrip("\r\n") for line in replace_lines], + lineterm="", + n=1, + ) + + # Skip header lines (first 2 lines) as in original format_output + diff_lines = list(diff)[2:] + + if diff_lines: + return "\n".join([line for line in diff_lines]) + else: + return "" + + +CHUNK_HEADER_RE = re.compile(r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@") + + +def _parse_content_for_diff(content: str): + """Helper to parse hashline content in a single pass.""" + hashlines = [] + content_to_lines = {} + content_only_lines = [] + + for line_num, line in enumerate(content.splitlines(keepends=True), 1): + if "|" in line: + parts = line.split("|", 1) + if len(parts) == 2: + line_content = parts[1].rstrip("\r\n") + hashline_prefixed = line.rstrip("\r\n") + hashlines.append(hashline_prefixed) + content_only_lines.append(line_content) + if line_content not in content_to_lines: + content_to_lines[line_content] = [] + content_to_lines[line_content].append(line_num) + continue + + # Line without hashline prefix or malformed + stripped = line.rstrip("\r\n") + hashlines.append(stripped) + content_only_lines.append(stripped) + if stripped not in content_to_lines: + content_to_lines[stripped] = [] + content_to_lines[stripped].append(line_num) + + return hashlines, content_to_lines, content_only_lines + + +def get_hashline_content_diff( + old_content: str, new_content: str, fromfile: str = "", tofile: str = "", context_lines: int = 1 +) -> str: + """ + Generate a unified diff between two hashline-prefixed contents. + + This function generates a content-only diff first, then uses it as a template + to build a hashline diff that only shows actual content changes. + + Args: + old_content: Old content with hashline prefixes + new_content: New content with hashline prefixes + fromfile: Optional filename for the old content in diff header + tofile: Optional filename for the new content in diff header + + Returns: + str: Unified diff string, or empty string if no changes + """ + if old_content == new_content: + return "" + + # Generate content-only versions by stripping hashline prefixes + # Parse the original hashline content into lists for lookup + old_hashlines, old_content_to_lines, old_content_lines = _parse_content_for_diff(old_content) + new_hashlines, new_content_to_lines, new_content_lines = _parse_content_for_diff(new_content) + + # Generate content-only diff + content_diff = difflib.unified_diff( + old_content_lines, + new_content_lines, + fromfile=fromfile, + tofile=tofile, + lineterm="", + n=context_lines, + ) + content_diff_lines = list(content_diff) + + # If there's no content change, return empty string + if not content_diff_lines: + return "" + + # Build the hashline diff using the content diff as a template + # We need to track which lines have been "used" to handle duplicates + old_used_lines = set() + new_used_lines = set() + hashline_diff_lines = [] + + # Parse the content diff to understand line numbers + current_old_line = 1 + current_new_line = 1 + + for line in content_diff_lines: + if line.startswith("---") or line.startswith("+++") or line.startswith("@@"): + # Keep headers and chunk headers as-is + hashline_diff_lines.append(line) + + # Parse chunk header to update line numbers + if line.startswith("@@"): + match = CHUNK_HEADER_RE.match(line) + if match: + current_old_line = int(match.group(1)) + current_new_line = int(match.group(3)) + elif line.startswith(" ") or line.startswith("-") or line.startswith("+"): + # This is a content line + marker = line[0] + content = line[1:] + + if marker == " ": + # Context line - exists in both + # Try to find matching line in new content first + result = find_best_line( + content, current_new_line, new_content_to_lines, new_used_lines, new_hashlines + ) + if result: + best_line_num, best_hashline = result + hashline_diff_lines.append(f" {best_hashline}") + # Mark this line as used + new_used_lines.add(best_line_num) + else: + # Fallback to old content + result = find_best_line( + content, + current_old_line, + old_content_to_lines, + old_used_lines, + old_hashlines, + ) + if result: + best_line_num, best_hashline = result + hashline_diff_lines.append(f" {best_hashline}") + # Mark this line as used + old_used_lines.add(best_line_num) + else: + # Fallback: use the content as-is + hashline_diff_lines.append(line) + + current_old_line += 1 + current_new_line += 1 + + elif marker == "-": + # Line removed - exists in old + result = find_best_line( + content, current_old_line, old_content_to_lines, old_used_lines, old_hashlines + ) + if result: + best_line_num, best_hashline = result + hashline_diff_lines.append(f"-{best_hashline}") + # Mark this line as used + old_used_lines.add(best_line_num) + else: + # Fallback: use the content as-is + hashline_diff_lines.append(line) + + current_old_line += 1 + + elif marker == "+": + # Line added - exists in new + result = find_best_line( + content, current_new_line, new_content_to_lines, new_used_lines, new_hashlines + ) + if result: + best_line_num, best_hashline = result + hashline_diff_lines.append(f"+{best_hashline}") + # Mark this line as used + new_used_lines.add(best_line_num) + else: + # Fallback: use the content as-is + hashline_diff_lines.append(line) + + current_new_line += 1 + else: + # Unknown line type, keep as-is + hashline_diff_lines.append(line) + + diff_text = "\n".join(hashline_diff_lines) + return diff_text if diff_text.strip() else "" + + +def _apply_start_stitching( + hashed_lines, + start_idx, + end_idx, + replacement_lines, + resolved_ops, + current_resolved, + max_overlap_check=3, +): + """ + Check for overlapping lines BEFORE the replacement range and adjust start_idx and replacement_lines. + + This handles cases where the replacement text contains lines that already exist before the target range. + It "stitches" the replacement at the matching line to prevent duplicate code structures. + + Args: + hashed_lines: List of hashed lines from the file + start_idx: Current start index of the replacement range + end_idx: Current end index of the replacement range + replacement_lines: List of replacement lines to insert + resolved_ops: List of all resolved operations + current_resolved: The current operation being processed + max_overlap_check: Maximum number of lines to check for overlap (default: 3) + + Returns: + tuple: (new_start_idx, new_replacement_lines) - adjusted start index and replacement lines + """ + if start_idx > 0: + # Get the lines before the replacement range (up to max_overlap_check lines) + lines_before_range = hashed_lines[max(0, start_idx - max_overlap_check) : start_idx] + + # Strip hashlines from lines_before_range for comparison + lines_before_range_stripped = [strip_hashline(line) for line in lines_before_range] + + # Normalize newlines for comparison + lines_before_range_normalized = [] + for line in lines_before_range_stripped: + if line.endswith("\n"): + lines_before_range_normalized.append(line) + else: + lines_before_range_normalized.append(line + "\n") + + # Check for overlapping lines from the beginning of replacement_lines + # We check each line from the beginning of replacement_lines to see if it exists + # in lines_before_range, starting from the END (closest to replacement range) + for i in range(min(max_overlap_check, len(replacement_lines))): + # Check line from the beginning of replacement_lines + line_idx = i + + # Get the line and strip hashline + replacement_line = replacement_lines[line_idx] + replacement_line_stripped = strip_hashline(replacement_line) + + # Normalize newline for comparison + if not replacement_line_stripped.endswith("\n"): + replacement_line_stripped += "\n" + + # Skip stitching for empty lines only + # Empty lines are too common and don't indicate meaningful duplication + trimmed_line = replacement_line_stripped.strip() + if not trimmed_line: + continue + + # Check if this line exists in lines_before_range_normalized + # We need to find the LAST occurrence (closest to replacement range) + # by searching from the end of the list + match_index = -1 + for j in range(len(lines_before_range_normalized) - 1, -1, -1): + if lines_before_range_normalized[j] == replacement_line_stripped: + match_index = j + break + if match_index != -1: + # Check if the replacement line also matches the line at start_idx + # If it does, we shouldn't stitch to a line in lines_before_range + # because we're replacing that line, not inserting before it + line_at_start_idx = hashed_lines[start_idx] if start_idx < len(hashed_lines) else "" + line_at_start_idx_stripped = strip_hashline(line_at_start_idx) + if not line_at_start_idx_stripped.endswith("\n"): + line_at_start_idx_stripped += "\n" + + if replacement_line_stripped == line_at_start_idx_stripped: + # The replacement line matches the line being replaced + # Don't stitch to a line in lines_before_range + continue + # Found a line that already exists before the range! + # This is a non-contiguous match - we need to "stitch" the replacement + # at this exact content match to prevent duplicate code structures + + # Truncate replacement_lines to exclude this line and any lines before it + new_replacement_lines = replacement_lines[line_idx + 1 :] + + # Move the start_idx backward to include lines AFTER the matching line + # match_index is 0-based in lines_before_range_normalized + # lines_before_range ends at start_idx - 1 + # We want to include lines from (match_index + 1) onward + # So we need to move start_idx back by (lines_before_count - match_index - 1) + # This includes lines AFTER the matching line, not including the matching line itself + lines_before_count = len(lines_before_range) + backward_extension = lines_before_count - match_index - 1 + + # If backward_extension is negative (shouldn't happen), set to 0 + if backward_extension < 0: + backward_extension = 0 + + new_start_idx = start_idx - backward_extension + + # Check if extending backward would overlap with any other operation's range + # We need to check all other resolved operations + would_overlap = False + for other_resolved in resolved_ops: + # Skip ourselves + if other_resolved["index"] == current_resolved["index"]: + continue + + other_start = other_resolved["start_idx"] + other_end = other_resolved["end_idx"] + + # Check if our new range would overlap with this other operation's range + # Overlap occurs if: new_start_idx <= other_end AND end_idx >= other_start + if new_start_idx <= other_end and end_idx >= other_start: + would_overlap = True + break + + # Only extend if it wouldn't create an overlap + if not would_overlap: + start_idx = new_start_idx + replacement_lines = new_replacement_lines + else: + # Can't extend backward due to overlap, but we can still truncate + # the replacement text to avoid duplication + replacement_lines = new_replacement_lines + + # We've found our stitching point, break out of the loop + break + # If no match found for this line, continue checking next line + # (implicit continue - no else block needed) + + return start_idx, replacement_lines + + +def _apply_end_stitching( + hashed_lines, + start_idx, + end_idx, + replacement_lines, + max_overlap_check=3, +): + """ + Check for overlapping lines AFTER the replacement range and adjust end_idx and replacement_lines. + + This handles cases where the replacement text contains lines that already exist after the target range. + It "stitches" the replacement at the matching line to prevent duplicate code structures. + + Args: + hashed_lines: List of hashed lines from the file + start_idx: Current start index of the replacement range + end_idx: Current end index of the replacement range + replacement_lines: List of replacement lines to insert + max_overlap_check: Maximum number of lines to check for overlap (default: 3) + + Returns: + tuple: (new_end_idx, new_replacement_lines) - adjusted end index and replacement lines + """ + if end_idx + 1 < len(hashed_lines): + # Get the lines after the replacement range (up to max_overlap_check lines) + lines_after_range = hashed_lines[end_idx + 1 : end_idx + 1 + max_overlap_check] + + # Strip hashlines from lines_after_range for comparison + lines_after_range_stripped = [strip_hashline(line) for line in lines_after_range] + + # Normalize newlines for comparison + # Some lines might not have newlines (e.g., last line of file) + lines_after_range_normalized = [] + for line in lines_after_range_stripped: + if line.endswith("\n"): + lines_after_range_normalized.append(line) + else: + lines_after_range_normalized.append(line + "\n") + + # Check for non-contiguous overlap from the end of replacement_lines + # We check each line from the end of replacement_lines to see if it exists + # anywhere in lines_after_range (not just at the beginning) + # This prevents duplication of lines that already exist after the range + for i in range(min(max_overlap_check, len(replacement_lines))): + # Check line from the end of replacement_lines + line_idx = len(replacement_lines) - 1 - i + if line_idx < 0: + break + + # Get the line and strip hashline + replacement_line = replacement_lines[line_idx] + replacement_line_stripped = strip_hashline(replacement_line) + + # Normalize newline for comparison + if not replacement_line_stripped.endswith("\n"): + replacement_line_stripped += "\n" + + # Skip stitching for empty lines only + # Empty lines are too common and don't indicate meaningful duplication + trimmed_line = replacement_line_stripped.strip() + if not trimmed_line: + continue + + # Check if this line exists anywhere in lines_after_range_normalized + try: + match_index = lines_after_range_normalized.index(replacement_line_stripped) + # Found a line that already exists after the range! + # This is a non-contiguous match - we need to "stitch" the replacement + # at this exact content match to prevent duplicate code structures + + # Truncate replacement_lines to exclude this line and any lines after it + new_replacement_lines = replacement_lines[:line_idx] + + # Extend the replacement range to include the matching line + # match_index is 0-based in lines_after_range_normalized + # lines_after_range starts at end_idx + 1 + # So we need to extend end_idx by match_index to include + # all lines up to but NOT including the matching line + # (we stitch AT the matching line, not THROUGH it) + extension = match_index + end_idx = end_idx + extension + + replacement_lines = new_replacement_lines + + # We've found our stitching point, break out of the loop + break + except ValueError: + # Line not found in lines_after_range_normalized, continue checking + pass + + return end_idx, replacement_lines + + +def apply_hashline_operations( + original_content: str, + operations: list, +) -> tuple[str, list, list]: + """ + Apply multiple hashline operations sequentially. + + This function hashes the content once, resolves all operations to line indices, + and applies them in reverse order (bottom-to-top) to avoid line number shifts. + + Args: + original_content: Original file content + operations: List of operation dictionaries + + Returns: + tuple: (modified_content, successful_operations, failed_operations) + - modified_content: Modified content after applying all operations + - successful_operations: List of successfully applied operation indices + - failed_operations: List of dictionaries with failed operation info + Each dict contains: {"index": int, "error": str, "operation": dict} + """ + # Normalize hashline inputs in operations + normalized_operations = [] + failed_ops = [] + for i, op in enumerate(operations): + try: + normalized_op = op.copy() + normalized_op["start_line_hash"] = normalize_hashline(op["start_line_hash"]) + if "end_line_hash" in op: + normalized_op["end_line_hash"] = normalize_hashline(op["end_line_hash"]) + normalized_operations.append(normalized_op) + except Exception as e: + failed_ops.append({"index": i, "error": str(e), "operation": op}) + + if not normalized_operations: + return original_content, [], failed_ops + + # Apply hashline to original content once + hashed_content = hashline(original_content) + hashed_lines = hashed_content.splitlines(keepends=True) + + # Resolve all operations to indices first + resolved_ops = [] + for i, op in enumerate(normalized_operations): + try: + if op["operation"] == "insert": + start_hash_fragment, start_line_num_str, start_line_num = parse_hashline( + op["start_line_hash"] + ) + + # Try exact match first for insert operations + found_start = find_hashline_by_exact_match( + hashed_lines, start_hash_fragment, start_line_num_str + ) + + if found_start is None: + found_start = find_hashline_by_fragment( + hashed_lines, start_hash_fragment, start_line_num + ) + + if found_start is None: + raise HashlineError( + f"Start line hash fragment '{start_hash_fragment}' not found in file" + ) + + resolved_ops.append( + {"index": i, "start_idx": found_start, "end_idx": found_start, "op": op} + ) + else: + # Use find_hashline_range for replace/delete to leverage its robust logic + # which handles exact matches (including line numbers) and relative offsets + found_start, found_end = find_hashline_range( + hashed_lines, op["start_line_hash"], op["end_line_hash"], allow_exact_match=True + ) + + resolved_ops.append( + {"index": i, "start_idx": found_start, "end_idx": found_end, "op": op} + ) + except Exception as e: + failed_ops.append({"index": i, "error": str(e), "operation": op}) + + # Deduplicate: if multiple operations start on the same line, keep only the latest one + # This handles cases where a model might generate multiple operations for the same line while "thinking" + deduplicated_ops = [] + # Group operations by start_idx + start_idx_to_ops = {} + for op in resolved_ops: + start_idx = op["start_idx"] + if start_idx not in start_idx_to_ops: + start_idx_to_ops[start_idx] = [] + start_idx_to_ops[start_idx].append(op) + + # For each start_idx, keep only the operation with the highest original index (latest in the list) + for start_idx, ops in start_idx_to_ops.items(): + # Sort by original index descending and take the first one + ops.sort(key=lambda x: x["index"], reverse=True) + deduplicated_ops.append(ops[0]) + + # Replace resolved_ops with deduplicated version + resolved_ops = deduplicated_ops + + # Optimize: discard inner ranges that are completely contained within outer ranges + + # Optimize: discard inner ranges that are completely contained within outer ranges + # This prevents redundant operations and potential errors + optimized_ops = [] + for i, op_a in enumerate(resolved_ops): + keep_op = True + + # Check if this operation is contained within any other operation + for j, op_b in enumerate(resolved_ops): + if i == j: + continue + + # Check if op_a is completely inside op_b + # op_a is inside op_b if: + # op_b.start_idx <= op_a.start_idx and op_a.end_idx <= op_b.end_idx + if op_b["start_idx"] <= op_a["start_idx"] and op_a["end_idx"] <= op_b["end_idx"]: + # Special case: operations with the same indices but different types + # should both be kept (e.g., replace and insert at same line) + if ( + op_a["start_idx"] == op_b["start_idx"] + and op_a["end_idx"] == op_b["end_idx"] + and op_a["op"]["operation"] != op_b["op"]["operation"] + ): + # Keep both operations if they have different types + continue + # op_a is inside op_b, discard op_a + keep_op = False + break + + if keep_op: + optimized_ops.append(op_a) + + # Replace resolved_ops with optimized version + resolved_ops = optimized_ops + + # Sort by start_idx descending to apply from bottom to top + # When operations have same start_idx, apply in order: insert, replace, delete + # This ensures correct behavior when multiple operations target the same line + def sort_key(op): + start_idx = op["start_idx"] + # Operation type priority: insert (0), replace (1), delete (2) + # Lower priority number means applied first + op_type = op["op"]["operation"] + if op_type == "insert": + priority = 0 + elif op_type == "replace": + priority = 1 + else: # delete + priority = 2 + # Sort by start_idx descending, then priority ascending + return (-start_idx, priority) + + resolved_ops.sort(key=sort_key) + + successful_ops = [] + for resolved in resolved_ops: + try: + op = resolved["op"] + start_idx = resolved["start_idx"] + end_idx = resolved["end_idx"] + + if op["operation"] == "insert": + text = op["text"] + if text and not text.endswith("\n"): + text += "\n" + if not hashed_lines[start_idx].endswith("\n"): + hashed_lines[start_idx] += "\n" + hashed_lines.insert(start_idx + 1, text) + elif op["operation"] == "delete": + del hashed_lines[start_idx : end_idx + 1] + elif op["operation"] == "replace": + text = op["text"] + if text: + # Split text into lines, preserving trailing newline behavior + # If text doesn't end with newline, we add one to ensure proper line separation + if not text.endswith("\n"): + text += "\n" + # Split into lines and replace the range + replacement_lines = text.splitlines(keepends=True) + + # Check for overlapping lines to prevent duplication + # This handles cases where the model underspecifies the range and + # the replacement text includes lines that already exist after the range + max_overlap_check = 2 # Check up to 2 lines for overlap + + # Check for overlapping lines BEFORE the range (bidirectional stitching) + start_idx, replacement_lines = _apply_start_stitching( + hashed_lines, + start_idx, + end_idx, + replacement_lines, + resolved_ops, + resolved, + max_overlap_check, + ) + + # Now check for overlapping lines AFTER the range + end_idx, replacement_lines = _apply_end_stitching( + hashed_lines, start_idx, end_idx, replacement_lines, max_overlap_check + ) + + hashed_lines[start_idx : end_idx + 1] = replacement_lines + else: + # Empty text - replace with nothing (delete) + hashed_lines[start_idx : end_idx + 1] = [] + + successful_ops.append(resolved["index"]) + except Exception as e: + failed_ops.append( + {"index": resolved["index"], "error": str(e), "operation": resolved["op"]} + ) + + # Join and strip hashlines + result_with_hashes = "".join(hashed_lines) + result = strip_hashline(result_with_hashes) + + # Respect original trailing newline + if not original_content.endswith("\n") and result.endswith("\n"): + result = result[:-1] + + return result, successful_ops, failed_ops diff --git a/cecli/linter.py b/cecli/linter.py index 360655bf650..065cdaad638 100644 --- a/cecli/linter.py +++ b/cecli/linter.py @@ -108,7 +108,7 @@ def lint(self, fname, cmd=None): if not lintres: return - res = "# Fix any errors below, if possible.\n\n" + res = "# Fix any linting errors below, if possible.\n\n" res += lintres.text res += "\n" res += tree_context(rel_fname, code, lintres.lines) diff --git a/cecli/main.py b/cecli/main.py index 33310f01d09..2390f5cb02d 100644 --- a/cecli/main.py +++ b/cecli/main.py @@ -805,9 +805,10 @@ def get_io(pretty): def apply_model_overrides(model_name): """Return (effective_model_name, override_kwargs) for a given model_name. - If model_name exactly matches a configured "base:suffix" override, we - switch to the base model and apply that override dict. Otherwise we - leave the name unchanged and return empty overrides. + If model_name ends with ":suffix" where suffix is configured for the + prefix (everything before the last colon), we switch to the prefix model + and apply that override dict. Otherwise we leave the name unchanged + and return empty overrides. """ if not model_name: return model_name, {} @@ -815,17 +816,33 @@ def apply_model_overrides(model_name): if model_name.startswith(models.COPY_PASTE_PREFIX): prefix = models.COPY_PASTE_PREFIX model_name = model_name[len(prefix) :] - entry = override_index.get(model_name) - if not entry: - model_name = prefix + model_name - return model_name, {} - base_model, cfg = entry - model_name = prefix + base_model - return model_name, cfg.copy() + + # Try to find a matching override by checking all possible suffix matches. + # We iterate from right to left splitting on colons to handle cases where + # the base model name itself contains colons (e.g. "provider/model:tag:alias") + parts = model_name.split(":") + # We need at least one split to have a base and a suffix + for i in range(len(parts) - 1, 0, -1): + potential_base = ":".join(parts[:i]) + potential_suffix = ":".join(parts[i:]) + + # Check if this base has the suffix configured + if potential_base in model_overrides: + suffixes = model_overrides[potential_base] + if isinstance(suffixes, dict) and potential_suffix in suffixes: + cfg = suffixes[potential_suffix] + if isinstance(cfg, dict): + model_name = prefix + potential_base + return model_name, cfg.copy() + + # No match found + model_name = prefix + model_name + return model_name, {} main_model_name, main_model_overrides = apply_model_overrides(args.model) weak_model_name, weak_model_overrides = apply_model_overrides(args.weak_model) editor_model_name, editor_model_overrides = apply_model_overrides(args.editor_model) + agent_model_name, agent_model_overrides = apply_model_overrides(args.agent_model) weak_model_obj = None if weak_model_name: weak_model_obj = models.Model( @@ -848,6 +865,18 @@ def apply_model_overrides(model_name): retries=args.retries, debug=args.debug, ) + agent_model_obj = None + if agent_model_name: + agent_model_obj = models.Model( + agent_model_name, + agent_model=False, + verbose=args.verbose, + io=io, + override_kwargs=agent_model_overrides, + retries=args.retries, + debug=args.debug, + ) + if main_model_name.startswith("openrouter/") and not os.environ.get("OPENROUTER_API_KEY"): io.tool_warning( f"The specified model '{main_model_name}' requires an OpenRouter API key, which was not" @@ -873,6 +902,7 @@ def apply_model_overrides(model_name): main_model_name, weak_model=weak_model_obj, editor_model=editor_model_obj, + agent_model=agent_model_obj, editor_edit_format=args.editor_edit_format, verbose=args.verbose, io=io, diff --git a/cecli/models.py b/cecli/models.py index b96ce661628..418b7188101 100644 --- a/cecli/models.py +++ b/cecli/models.py @@ -105,6 +105,7 @@ class ModelSettings: name: str edit_format: str = "diff" weak_model_name: Optional[str] = None + agent_model_name: Optional[str] = None use_repo_map: bool = False send_undo_reply: bool = False lazy: bool = False @@ -314,6 +315,7 @@ def __init__( model, weak_model=None, editor_model=None, + agent_model=None, editor_edit_format=None, verbose=False, io=None, @@ -341,6 +343,7 @@ def __init__( self.max_chat_history_tokens = 1024 self.weak_model = None self.editor_model = None + self.agent_model = None self.extra_model_settings = next( (ms for ms in MODEL_SETTINGS if ms.name == "cecli/extra_params"), None ) @@ -354,6 +357,7 @@ def __init__( self.configure_model_settings(model) self._apply_provider_defaults() self.get_weak_model(weak_model) + self.get_agent_model(agent_model) self.retries = retries self.debug = debug @@ -409,8 +413,23 @@ def configure_model_settings(self, model): if self.override_kwargs: if not self.extra_params: self.extra_params = {} + + valid_model_settings_fields = {f.name for f in fields(ModelSettings)} + for key, value in self.override_kwargs.items(): - if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict): + if key == "model_settings": + if not isinstance(value, dict): + raise ValueError( + f"override_kwargs 'model_settings' must be a dict, got {type(value)}" + ) + for setting_key, setting_value in value.items(): + if setting_key not in valid_model_settings_fields: + raise ValueError( + f"Invalid model_settings key '{setting_key}'. " + f"Must be one of: {sorted(valid_model_settings_fields)}" + ) + setattr(self, setting_key, setting_value) + elif isinstance(value, dict) and isinstance(self.extra_params.get(key), dict): self.extra_params[key] = {**self.extra_params[key], **value} else: self.extra_params[key] = value @@ -580,6 +599,30 @@ def get_weak_model(self, provided_weak_model): self.weak_model = Model(self.weak_model_name, weak_model=False, io=self.io) return self.weak_model + def get_agent_model(self, provided_weak_model): + if provided_weak_model is False: + self.agent_model = self + self.agent_model_name = None + return + if self.copy_paste_transport == "clipboard": + self.agent_model = self + self.agent_model_name = None + return + if isinstance(provided_weak_model, Model): + self.agent_model = provided_weak_model + self.agent_model_name = provided_weak_model.name + return + if provided_weak_model: + self.agent_model_name = provided_weak_model + if not self.agent_model_name: + self.agent_model = self + return + if self.agent_model_name == self.name: + self.agent_model = self + return + self.agent_model = Model(self.agent_model_name, agent_model=False, io=self.io) + return self.agent_model + def commit_message_models(self): return [self.weak_model, self] diff --git a/cecli/prompts/agent.yml b/cecli/prompts/agent.yml index b4deab401b1..2faf1508824 100644 --- a/cecli/prompts/agent.yml +++ b/cecli/prompts/agent.yml @@ -19,17 +19,16 @@ files_no_full_files_with_repo_map_reply: | main_system: | ## Core Directives - - **Role**: Act as an expert software engineer. + - **Role**: Act as an expert software engineer. - **Act Proactively**: Autonomously use file discovery and context management tools (`ViewFilesAtGlob`, `ViewFilesMatching`, `Ls`, `ContextManager`) to gather information and fulfill the user's request. Chain tool calls across multiple turns to continue exploration. - **Be Decisive**: Trust that your initial findings are valid. Refrain from asking the same question or searching for the same term in multiple similar ways. - - **Be Concise**: Keep all responses brief and direct (1-3 sentences). Avoid preamble, postamble, and unnecessary explanations. Do not repeat yourself. - **Be Efficient**: Some tools allow you to perform multiple actions at a time, use them to work quickly and effectively. Respect their usage limits ## Core Workflow 1. **Plan**: Determine the necessary changes. Use the `UpdateTodoList` tool to manage your plan. Always begin by updating the todo list. 2. **Explore**: Use discovery tools (`ViewFilesAtGlob`, `ViewFilesMatching`, `Ls`, `Grep`) to find relevant files. These tools add files to context as read-only. Use `Grep` first for broad searches to avoid context clutter. Concisely describe your search strategy with the `Thinking` tool. - 3. **Think**: Given the contents of your exploration, concisely reason through the edits with the `Thinking` tool that need to be made to accomplish the goal. For complex edits, briefly outline your plan for the user. + 3. **Think**: Given the contents of your exploration, concisely reason through the edits with the `Thinking` tool that need to be made to accomplish the goal. For complex edits, briefly outline your plan for the user. Do not chain multiple `Thinking` calls in a row 4. **Execute**: Use the appropriate editing tool. Remember to mark a file as editable with `ContextManager` before modifying it. Do not attempt large contiguous edits (those greater than 100 lines). Break them into multiple smaller steps. Proactively use skills if they are available 5. **Verify & Recover**: After every edit, check the resulting diff snippet. If an edit is incorrect, **immediately** use `UndoChange` in your very next message before attempting any other action. 6. **Finished**: Use the `Finished` tool when all tasks and changes needed to accomplish the goal are finished @@ -38,9 +37,8 @@ main_system: | - **Plan Steps**: Create a todo list at the start of complex tasks to track your progress through multiple exploration rounds. - **Stay Organized**: Update the todo list as you complete steps every 3-10 tool calls to maintain context across multiple tool calls. ### Editing Tools - Use these for precision and safety. - - **Text/Block Manipulation**: `ReplaceText` - - **Line-Based Edits**: `InsertText`, `DeleteText`, `IndentText` + Use these for precision and safety. Files are provided with hashline prefixes in the format `{{line_num}}|{{hash_fragment}}` (e.g., `20|Bv`) and separated from the content by a pipe (|). + - **Line-Based Edits**: `ReplaceText`, `InsertText`, `DeleteText`, `IndentText` - **Refactoring & History**: `ListChanges`, `UndoChange` - **Skill Management**: `LoadSkill`, `RemoveSkill` **MANDATORY Safety Protocol for Line-Based Tools:** Line numbers are fragile. You **MUST** use a two-turn process: @@ -63,7 +61,7 @@ system_reminder: | - Stay on task. Do not pursue goals the user did not ask for. - Any tool call automatically continues to the next turn. Provide no tool calls in your final answer. - Use the .cecli/workspace directory for temporary and test files you make to verify functionality - - Do not neglect spaces and indentation, they are EXTREMELY important to preserve. + - Do not neglect spaces and indentation, they are EXTREMELY important to preserve. Fix indentation errors with the `IndentText` tool. - Remove files from the context when you no longer need them with the `ContextManager` tool. It is fine to re-add them later, if they are needed again - Remove skills if they are not helpful for your current task with `RemoveSkill` {lazy_prompt} diff --git a/cecli/prompts/base.yml b/cecli/prompts/base.yml index d67260bdb05..ec9f1b79f29 100644 --- a/cecli/prompts/base.yml +++ b/cecli/prompts/base.yml @@ -84,17 +84,14 @@ rename_with_shell: "" go_ahead_tip: "" compaction_prompt: | - You are an expert at summarizing conversations. The user is going to provide you with a conversation. - This conversation is getting too long to fit in the context window of a language model. + This conversation is getting too long to fit in the context window of a large language model. You need to summarize the conversation to reduce its length, while retaining all the important information. Prioritize the latest instructions and don't include conflicting information from earlier instructions. The summary should contain four parts: - Overall Goal: What is the user trying to achieve with this conversation? + - Event Log: Keep information most important to prevent having to search for it again + This should be quite specific (e/g. the list of actions taken so far in a bulleted list so the next round maintains history) - Next Steps: What are the next steps for the language model to take to help the user? Describe the current investigation path and intention. - - Key Findings: Keep information most important to prevent having to search for it again - This should be quite specific (e/g. relevant files, method names, relevant lines of code, and code structure) - - Active files: What files are currently most relevant to the discussion? - Be confident in proceeding with any in progress edits. Here is the conversation so far: diff --git a/cecli/prompts/hashline.yml b/cecli/prompts/hashline.yml new file mode 100644 index 00000000000..6e18957c4d8 --- /dev/null +++ b/cecli/prompts/hashline.yml @@ -0,0 +1,145 @@ +# Hashline prompts - inherits from base.yaml +# Overrides specific prompts for hashline format +_inherits: [base] + +main_system: | + Act as an expert software developer. Plan carefully, explain your logic briefly, and execute via LOCATE/CONTENTS blocks. + + ### 1. SOURCE FORMAT + Files are provided in "Hashline" format. Each line starts with the line number and a 2-character hash, separated and followed by pipes. + + **Example Input Format:** + 1|Hm|#!/usr/bin/env python3 + 2|eU| + 3|mL|def example_method(): + 4|bk| return "example" + 5|eU| + + ### 2. FILE ACCESS & WORKFLOW + - If you need to edit files NOT yet in the chat, list their full paths and ask the user to add them. + - You may create NEW files immediately without asking. + - Explain your plan concisely, then provide the LOCATE/CONTENTS blocks. + + ### 3. EDITING PROTOCOL (LOCATE/CONTENTS) + You must use this exact structure for every edit: + + [File Path] + {fence[0]}[language] + <<<<<<< LOCATE + ["start_hashline", "end_hashline", "operation"] + ======= + [replacement code] + >>>>>>> CONTENTS + {fence[1]} + + **Strict LOCATE Rules:** + - **JSON ONLY:** The area between `<<<<<<< LOCATE` and `=======` must contain ONLY the JSON array (e.g., `["3|mL", "4|bk", "replace"]`). Never include source code here. + - **Operations:** `replace` (overwrites range) or `delete` (removes range). + - **Inclusion:** Ranges are inclusive of the start and end hashlines. + - **New Files:** To create a file, use the "Genesis" anchor: `["0|aa", "0|aa", "replace"]`. + + ### 4. QUALITY STANDARDS + - Respect existing conventions and libraries. + - Include full method/function bodies in edits to ensure syntactical correctness. + - Verify changes mentally for edge cases before outputting blocks. + + {shell_cmd_prompt} + {final_reminders} + +example_messages: + - role: user + content: Change get_factorial() to use math.factorial + - role: assistant + content: | + To make this change we need to modify `mathweb/flask/app.py` to: + 1. Import the math package. + 2. Remove the existing factorial() function. + 3. Update get_factorial() to call math.factorial instead. + Here are the *LOCATE/CONTENTS* blocks: + mathweb/flask/app.py + {fence[0]}python + <<<<<<< LOCATE + ["1|aB", "1|aB", "replace"] + ======= + import math + from flask import Flask + >>>>>>> CONTENTS + {fence[1]} + mathweb/flask/app.py + {fence[0]}python + <<<<<<< LOCATE + ["10|cD", "15|eF", "delete"] + ======= + >>>>>>> CONTENTS + {fence[1]} + mathweb/flask/app.py + {fence[0]}python + <<<<<<< LOCATE + ["20|gH", "20|gH", "replace"] + ======= + return str(math.factorial(n)) + >>>>>>> CONTENTS + {fence[1]} + - role: user + content: Refactor hello() into its own file. + - role: assistant + content: | + To make this change we need to modify `main.py` and make a new file `hello.py`: + 1. Make a new hello.py file with hello() in it. + 2. Remove hello() from main.py and replace it with an import. + Here are the *LOCATE/CONTENTS* blocks: + hello.py + {fence[0]}python + <<<<<<< LOCATE + ["0|aa", "0|aa", "replace"] + ======= + def hello(): + "print a greeting" + print("hello") + >>>>>>> CONTENTS + {fence[1]} + main.py + {fence[0]}python + <<<<<<< LOCATE + ["5|iJ", "8|kL", "replace"] + ======= + from hello import hello + >>>>>>> CONTENTS + {fence[1]} + + +system_reminder: | + # CRITICAL FORMATTING RULES: + 1. **Path Accuracy:** The filename must be on its own line above the code fence, exactly as shown in the chat. + 2. **JSON Only:** The area between `<<<<<<< LOCATE` and `=======` must be a valid JSON array with format: ["start_hashline", "end_hashline", "operation"]. + 3. **No Overlaps:** Ensure blocks target unique ranges. If multiple blocks share the same starting hashline, only the final block provided for that line will be processed. + 4. **Moving Code:** Use one `delete` block at the source and one `replace` block at the destination. + 5. **Empty Deletes:** For `delete` operations, the area between `=======` and `>>>>>>> CONTENTS` must be empty. + + # NEW FILE TEMPLATE: + To create a file, use `["0|aa", "0|aa", "replace"]`. + + {quad_backtick_reminder} + {rename_with_shell}{go_ahead_tip}{final_reminders} + {shell_cmd_reminder} + +rename_with_shell: | + To rename files which have been added to the chat, use shell commands at the end of your response. + +go_ahead_tip: | + If the user just says something like "ok" or "go ahead" or "do that" they probably want you to make LOCATE/CONTENTS blocks for the code changes you just proposed. + The user will say when they've applied your edits. If they haven't explicitly confirmed the edits have been applied, they probably want proper LOCATE/CONTENTS blocks. + +shell_cmd_prompt: | + You can run shell commands with: + ```bash + + ``` + Put shell commands inside a ```bash code block. + +shell_cmd_reminder: | + You can run shell commands with: + ```bash + + ``` + Put shell commands inside a ```bash code block. diff --git a/cecli/queries/tree-sitter-languages/README.md b/cecli/queries/tree-sitter-languages/README.md index 59e7219cacb..6a7a56b4251 100644 --- a/cecli/queries/tree-sitter-languages/README.md +++ b/cecli/queries/tree-sitter-languages/README.md @@ -1,7 +1,7 @@ # Credits -Aider uses modified versions of the tags.scm files from these open source +Cecli uses modified versions of the tags.scm files from these open source tree-sitter language implementations: * [https://github.com/tree-sitter/tree-sitter-c](https://github.com/tree-sitter/tree-sitter-c) — licensed under the MIT License. diff --git a/cecli/sessions.py b/cecli/sessions.py index 18c13d4b9a5..2c5b633db6e 100644 --- a/cecli/sessions.py +++ b/cecli/sessions.py @@ -152,6 +152,7 @@ def _build_session_data(self, session_name) -> Dict: "model": self.coder.main_model.name, "weak_model": self.coder.main_model.weak_model.name, "editor_model": self.coder.main_model.editor_model.name, + "agent_model": self.coder.main_model.agent_model.name, "editor_edit_format": self.coder.main_model.editor_edit_format, "edit_format": self.coder.edit_format, "chat_history": { diff --git a/cecli/tools/delete_text.py b/cecli/tools/delete_text.py index e3bde91764b..2b563929369 100644 --- a/cecli/tools/delete_text.py +++ b/cecli/tools/delete_text.py @@ -1,8 +1,8 @@ +from cecli.helpers.hashline import HashlineError, apply_hashline_operation from cecli.tools.utils.base_tool import BaseTool from cecli.tools.utils.helpers import ( ToolError, apply_change, - determine_line_range, format_tool_result, handle_tool_error, validate_file_for_edit, @@ -15,17 +15,29 @@ class Tool(BaseTool): "type": "function", "function": { "name": "DeleteText", - "description": "Delete a block of lines from a file.", + "description": ( + "Delete a block of lines from a file using hashline markers. " + 'Uses start_line and end_line parameters with format "{line_num}|{hash_fragment}" ' + "to specify the range to delete." + ), "parameters": { "type": "object", "properties": { "file_path": {"type": "string"}, - "line_number": {"type": "integer"}, - "line_count": {"type": "integer"}, + "start_line": { + "type": "string", + "description": ( + 'Hashline format for start line: "{line_num}|{hash_fragment}"' + ), + }, + "end_line": { + "type": "string", + "description": 'Hashline format for end line: "{line_num}|{hash_fragment}"', + }, "change_id": {"type": "string"}, "dry_run": {"type": "boolean", "default": False}, }, - "required": ["file_path", "line_number"], + "required": ["file_path", "start_line", "end_line"], }, }, } @@ -35,54 +47,41 @@ def execute( cls, coder, file_path, - line_number, - line_count=None, + start_line, + end_line, change_id=None, dry_run=False, **kwargs, ): """ - Delete a block of text based on line numbers. + Delete a block of text using hashline markers. """ tool_name = "DeleteText" try: # 1. Validate file and get content abs_path, rel_path, original_content = validate_file_for_edit(coder, file_path) - lines = original_content.splitlines() - # 2. Determine the range - start_line_idx = line_number - 1 - pattern_desc = f"line {line_number}" - - start_line, end_line = determine_line_range( - coder=coder, - file_path=rel_path, - lines=lines, - start_pattern_line_index=start_line_idx, - end_pattern=None, - line_count=line_count, - target_symbol=None, - pattern_desc=pattern_desc, - ) - - # 4. Prepare the deletion - deleted_lines = lines[start_line : end_line + 1] - new_lines = lines[:start_line] + lines[end_line + 1 :] - new_content = "\n".join(new_lines) + # 2. Apply hashline operation for deletion + try: + new_content = apply_hashline_operation( + original_content=original_content, + start_line_hash=start_line, + end_line_hash=end_line, + operation="delete", + text=None, + ) + except (ToolError, HashlineError) as e: + raise ToolError(f"Hashline deletion failed: {str(e)}") + # Check if any changes were made if original_content == new_content: coder.io.tool_warning("No changes made: deletion would not change file") return "Warning: No changes made (deletion would not change file)" - # 5. Generate diff for feedback - num_deleted = end_line - start_line + 1 - basis_desc = f"line {line_number}" - - # 6. Handle dry run + # 3. Handle dry run if dry_run: dry_run_message = ( - f"Dry run: Would delete {num_deleted} lines ({start_line + 1}-{end_line + 1})" - f" based on {basis_desc} in {file_path}." + f"Dry run: Would delete lines {start_line} to {end_line} in {file_path}." ) return format_tool_result( coder, @@ -92,13 +91,10 @@ def execute( dry_run_message=dry_run_message, ) - # 7. Apply Change (Not dry run) + # 4. Apply Change (Not dry run) metadata = { - "start_line": start_line + 1, - "end_line": end_line + 1, - "line_number": line_number, - "line_count": line_count, - "deleted_content": "\n".join(deleted_lines), + "start_line": start_line, + "end_line": end_line, } final_change_id = apply_change( coder, @@ -112,11 +108,8 @@ def execute( ) coder.files_edited_by_tools.add(rel_path) - # 8. Format and return result, adding line range to success message - success_message = ( - f"Deleted {num_deleted} lines ({start_line + 1}-{end_line + 1}) (from" - f" {basis_desc}) in {file_path}" - ) + # 5. Format and return result + success_message = f"Deleted lines {start_line} to {end_line} in {file_path}" return format_tool_result( coder, tool_name, diff --git a/cecli/tools/indent_text.py b/cecli/tools/indent_text.py index 81b1f225518..f3f9af21400 100644 --- a/cecli/tools/indent_text.py +++ b/cecli/tools/indent_text.py @@ -1,8 +1,12 @@ +from cecli.helpers.hashline import ( + HashlineError, + apply_hashline_operation, + extract_hashline_range, +) from cecli.tools.utils.base_tool import BaseTool from cecli.tools.utils.helpers import ( ToolError, apply_change, - determine_line_range, format_tool_result, handle_tool_error, validate_file_for_edit, @@ -20,13 +24,21 @@ class Tool(BaseTool): "type": "object", "properties": { "file_path": {"type": "string"}, - "line_number": {"type": "integer"}, - "line_count": {"type": "integer"}, + "start_line": { + "type": "string", + "description": ( + 'Hashline format for start line: "{line_num}|{hash_fragment}"' + ), + }, + "end_line": { + "type": "string", + "description": 'Hashline format for end line: "{line_num}|{hash_fragment}"', + }, "indent_levels": {"type": "integer", "default": 1}, "change_id": {"type": "string"}, "dry_run": {"type": "boolean", "default": False}, }, - "required": ["file_path", "line_number"], + "required": ["file_path", "start_line", "end_line"], }, }, } @@ -36,21 +48,21 @@ def execute( cls, coder, file_path, - line_number, - line_count=None, + start_line, + end_line, indent_levels=1, change_id=None, dry_run=False, **kwargs, ): """ - Indent or unindent a block of lines in a file. + Indent or unindent a block of lines in a file using hashline markers. Parameters: - coder: The Coder instance - file_path: Path to the file to modify - - line_number: Line number to start indenting from (1-based) - - line_count: Optional number of lines to indent + - start_line: Hashline format for start line: "{line_num}|{hash_fragment}" + - end_line: Hashline format for end line: "{line_num}|{hash_fragment}" - indent_levels: Number of levels to indent (positive) or unindent (negative) - change_id: Optional ID for tracking the change - dry_run: If True, simulate the change without modifying the file @@ -61,24 +73,8 @@ def execute( try: # 1. Validate file and get content abs_path, rel_path, original_content = validate_file_for_edit(coder, file_path) - lines = original_content.splitlines() - - # 2. Determine the range - start_line_idx = line_number - 1 - pattern_desc = f"line {line_number}" - - start_line, end_line = determine_line_range( - coder=coder, - file_path=rel_path, - lines=lines, - start_pattern_line_index=start_line_idx, - end_pattern=None, - line_count=line_count, - target_symbol=None, - pattern_desc=pattern_desc, - ) - # 4. Validate and prepare indentation + # 2. Validate indent_levels parameter try: indent_levels = int(indent_levels) except ValueError: @@ -86,40 +82,72 @@ def execute( f"Invalid indent_levels value: '{indent_levels}'. Must be an integer." ) + # 3. Extract the range content using hashline + try: + range_content = extract_hashline_range( + original_content=original_content, + start_line_hash=start_line, + end_line_hash=end_line, + ) + except HashlineError as e: + raise ToolError(f"Hashline range extraction failed: {str(e)}") + + # 4. Apply indentation to the extracted range + # Strip hashline prefixes to get original content + from cecli.helpers.hashline import strip_hashline + + original_range_content = strip_hashline(range_content) + + # Split into lines and apply indentation + range_lines = original_range_content.splitlines(keepends=True) indent_str = " " * 4 # Assume 4 spaces per level - modified_lines = list(lines) + modified_range_lines = [] - # Apply indentation logic (core logic remains) - for i in range(start_line, end_line + 1): + for line in range_lines: if indent_levels > 0: - modified_lines[i] = (indent_str * indent_levels) + modified_lines[i] + # Indent: add spaces + modified_line = (indent_str * indent_levels) + line elif indent_levels < 0: + # Unindent: remove spaces spaces_to_remove = abs(indent_levels) * len(indent_str) - current_leading_spaces = len(modified_lines[i]) - len( - modified_lines[i].lstrip(" ") - ) + current_leading_spaces = len(line) - len(line.lstrip(" ")) actual_remove = min(spaces_to_remove, current_leading_spaces) if actual_remove > 0: - modified_lines[i] = modified_lines[i][actual_remove:] - - new_content = "\n".join(modified_lines) - - if original_content == new_content: + modified_line = line[actual_remove:] + else: + modified_line = line + else: + # indent_levels == 0, no change + modified_line = line + modified_range_lines.append(modified_line) + + # Join back into text + indented_range_content = "".join(modified_range_lines) + + # 5. Check if any changes were made + if original_range_content == indented_range_content: coder.io.tool_warning("No changes made: indentation would not change file") return "Warning: No changes made (indentation would not change file)" - # 5. Generate diff for feedback - action = "indent" if indent_levels > 0 else "unindent" - levels = abs(indent_levels) - level_text = "level" if levels == 1 else "levels" - num_lines = end_line - start_line + 1 - basis_desc = f"line {line_number}" - # 6. Handle dry run if dry_run: + # Parse line numbers for display + try: + start_line_num_str, _ = start_line.split(":", 1) + end_line_num_str, _ = end_line.split(":", 1) + start_line_num = int(start_line_num_str) + end_line_num = int(end_line_num_str) + num_lines = end_line_num - start_line_num + 1 + except (ValueError, IndexError): + num_lines = "unknown" + + action = "indent" if indent_levels > 0 else "unindent" + levels = abs(indent_levels) + level_text = "level" if levels == 1 else "levels" + dry_run_message = ( - f"Dry run: Would {action} {num_lines} lines ({start_line + 1}-{end_line + 1})" - f" by {levels} {level_text} (based on {basis_desc}) in {file_path}." + f"Dry run: Would {action} {num_lines} lines ({start_line} to {end_line})" + f" by {levels} {level_text} in {file_path}." ) return format_tool_result( coder, @@ -129,12 +157,22 @@ def execute( dry_run_message=dry_run_message, ) - # 7. Apply Change (Not dry run) + # 7. Apply Change (Not dry run) using replace operation + try: + new_content = apply_hashline_operation( + original_content=original_content, + start_line_hash=start_line, + end_line_hash=end_line, + operation="replace", + text=indented_range_content, + ) + except (ToolError, HashlineError) as e: + raise ToolError(f"Hashline replacement failed: {str(e)}") + + # 8. Apply the change metadata = { - "start_line": start_line + 1, - "end_line": end_line + 1, - "line_number": line_number, - "line_count": line_count, + "start_line": start_line, + "end_line": end_line, "indent_levels": indent_levels, } final_change_id = apply_change( @@ -150,11 +188,24 @@ def execute( coder.files_edited_by_tools.add(rel_path) - # 8. Format and return result + # 9. Format and return result + # Parse line numbers for display + try: + start_line_num_str, _ = start_line.split(":", 1) + end_line_num_str, _ = end_line.split(":", 1) + start_line_num = int(start_line_num_str) + end_line_num = int(end_line_num_str) + num_lines = end_line_num - start_line_num + 1 + except (ValueError, IndexError): + num_lines = "unknown" + action_past = "Indented" if indent_levels > 0 else "Unindented" + levels = abs(indent_levels) + level_text = "level" if levels == 1 else "levels" + success_message = ( - f"{action_past} {num_lines} lines by {levels} {level_text} (from" - f" {basis_desc}) in {file_path}" + f"{action_past} {num_lines} lines ({start_line} to {end_line})" + f" by {levels} {level_text} in {file_path}" ) return format_tool_result( coder, @@ -162,6 +213,7 @@ def execute( success_message, change_id=final_change_id, ) + except ToolError as e: # Handle errors raised by utility functions (expected errors) return handle_tool_error(coder, tool_name, e, add_traceback=False) diff --git a/cecli/tools/insert_text.py b/cecli/tools/insert_text.py index c29ef09e449..762b4d6885a 100644 --- a/cecli/tools/insert_text.py +++ b/cecli/tools/insert_text.py @@ -1,13 +1,12 @@ -import re import traceback +from cecli.helpers.hashline import HashlineError, apply_hashline_operation from cecli.tools.utils.base_tool import BaseTool from cecli.tools.utils.helpers import ( ToolError, apply_change, format_tool_result, handle_tool_error, - is_provided, validate_file_for_edit, ) from cecli.tools.utils.output import tool_body_unwrapped, tool_footer, tool_header @@ -20,21 +19,26 @@ class Tool(BaseTool): "function": { "name": "InsertText", "description": ( - "Insert a content into a file. Mutually Exclusive Parameters:" - " position, line_number." + "Insert content into a file using hashline markers. " + 'Uses start_line parameter with format "{line_num}|{hash_fragment}" ' + "to specify where to insert content. For empty files, " + 'use "0|aa" as the hashline reference.' ), "parameters": { "type": "object", "properties": { "file_path": {"type": "string"}, "content": {"type": "string"}, - "line_number": {"type": "integer"}, + "start_line": { + "type": "string", + "description": ( + 'Hashline format for insertion point: "{line_num}|{hash_fragment}"' + ), + }, "change_id": {"type": "string"}, "dry_run": {"type": "boolean", "default": False}, - "position": {"type": "string", "enum": ["top", "bottom", ""]}, - "auto_indent": {"type": "boolean", "default": True}, }, - "required": ["file_path", "content"], + "required": ["file_path", "content", "start_line"], }, }, } @@ -45,132 +49,47 @@ def execute( coder, file_path, content, - line_number=None, + start_line, change_id=None, dry_run=False, - position=None, - auto_indent=True, **kwargs, ): """ - Insert a block of text at a line number or special position. + Insert content into a file using hashline markers. Args: coder: The coder instance file_path: Path to the file to modify content: The content to insert - line_number: Line number to insert at (1-based, mutually exclusive with position) + start_line: Hashline format for insertion point: "{line_num}|{hash_fragment}" change_id: Optional ID for tracking changes dry_run: If True, only simulate the change - position: Special position like "top" or "bottom" (mutually exclusive with line_number) - auto_indent: If True, automatically adjust indentation of inserted content """ tool_name = "InsertText" try: - # 1. Validate parameters - if sum(is_provided(x) for x in [position, line_number]) != 1: - # Check if file is empty or contains only whitespace - abs_path, rel_path, original_content = validate_file_for_edit(coder, file_path) - if not original_content.strip(): - # File is empty or contains only whitespace, default to inserting at beginning - position = "top" - else: - raise ToolError("Must specify exactly one of: position or line_number") - - # 2. Validate file and get content + # 1. Validate file and get content abs_path, rel_path, original_content = validate_file_for_edit(coder, file_path) - lines = original_content.splitlines() - - # Handle empty files - if not lines: - lines = [""] - - # 3. Determine insertion point - insertion_line_idx = 0 - pattern_type = "" - - if position: - # Handle special positions - if position == "start_of_file" or position == "top": - insertion_line_idx = 0 - pattern_type = "at start of" - elif position == "end_of_file" or position == "bottom": - insertion_line_idx = len(lines) - pattern_type = "at end of" - else: - raise ToolError( - f"Invalid position: '{position}'. Valid values are 'start_of_file' or" - " 'end_of_file'" - ) - else: - # Handle line number insertion (1-based) - if line_number < 1: - insertion_line_idx = 0 - elif line_number > len(lines) + 1: - insertion_line_idx = len(lines) - else: - insertion_line_idx = line_number - 1 - pattern_type = "at line" - - # 4. Handle indentation if requested - content_lines = content.splitlines() - - if auto_indent and content_lines: - # Determine base indentation level - base_indent = "" - if insertion_line_idx > 0 and lines: - # Use indentation from the line before insertion point - reference_line_idx = min(insertion_line_idx - 1, len(lines) - 1) - reference_line = lines[reference_line_idx] - base_indent = re.match(r"^(\s*)", reference_line).group(1) - - # Apply indentation to content lines, preserving relative indentation - if content_lines: - # Find minimum indentation in content to preserve relative indentation - content_indents = [ - len(re.match(r"^(\s*)", line).group(1)) - for line in content_lines - if line.strip() - ] - min_content_indent = min(content_indents) if content_indents else 0 - # Apply base indentation while preserving relative indentation - indented_content_lines = [] - for line in content_lines: - if not line.strip(): # Empty or whitespace-only line - indented_content_lines.append("") - else: - # Remove existing indentation and add new base indentation - stripped_line = ( - line[min_content_indent:] - if min_content_indent <= len(line) - else line - ) - indented_content_lines.append(base_indent + stripped_line) - - content_lines = indented_content_lines - - # 5. Prepare the insertion - new_lines = lines[:insertion_line_idx] + content_lines + lines[insertion_line_idx:] - new_content = "\n".join(new_lines) - - # Restore trailing newline if original file had one - if original_content.endswith("\n"): - new_content += "\n" + # 2. Apply hashline operation for insertion + try: + new_content = apply_hashline_operation( + original_content=original_content, + start_line_hash=start_line, + end_line_hash=start_line, # For insert, end_line is same as start_line + operation="insert", + text=content, + ) + except (ToolError, HashlineError) as e: + raise ToolError(f"Hashline insertion failed: {str(e)}") + # Check if any changes were made if original_content == new_content: coder.io.tool_warning("No changes made: insertion would not change file") return "Warning: No changes made (insertion would not change file)" - # 6. Handle dry run + # 3. Handle dry run if dry_run: - if position: - dry_run_message = f"Dry run: Would insert block {pattern_type} {file_path}." - else: - dry_run_message = ( - f"Dry run: Would insert block {pattern_type} {line_number} in {file_path}." - ) - + dry_run_message = f"Dry run: Would insert content at {start_line} in {file_path}." return format_tool_result( coder, tool_name, @@ -179,13 +98,10 @@ def execute( dry_run_message=dry_run_message, ) - # 7. Apply Change (Not dry run) + # 4. Apply Change (Not dry run) metadata = { - "insertion_line_idx": insertion_line_idx, - "line_number": line_number, - "position": position, + "start_line": start_line, "content": content, - "auto_indent": auto_indent, } final_change_id = apply_change( coder, @@ -200,12 +116,8 @@ def execute( coder.files_edited_by_tools.add(rel_path) - # 8. Format and return result - if position: - success_message = f"Inserted block {pattern_type} {file_path}" - else: - success_message = f"Inserted block {pattern_type} {line_number} in {file_path}" - + # 5. Format and return result + success_message = f"Inserted content at {start_line} in {file_path}" return format_tool_result( coder, tool_name, diff --git a/cecli/tools/replace_text.py b/cecli/tools/replace_text.py index 96486dd0e9b..ccb85af154a 100644 --- a/cecli/tools/replace_text.py +++ b/cecli/tools/replace_text.py @@ -1,6 +1,10 @@ -import difflib import json +from cecli.helpers.hashline import ( + HashlineError, + apply_hashline_operations, + get_hashline_diff, +) from cecli.tools.utils.base_tool import BaseTool from cecli.tools.utils.helpers import ( ToolError, @@ -20,7 +24,10 @@ class Tool(BaseTool): "name": "ReplaceText", "description": ( "Replace text in one or more files. Can handle an array of up to 10 edits across" - " multiple files. Each edit must include its own file_path." + " multiple files. Each edit must include its own file_path. Use hashline ranges" + " with the start_line and end_line parameters with format" + ' "{line_num}|{hash_fragment}". For empty files, use "0|aa" as the hashline' + " reference." ), "parameters": { "type": "object", @@ -34,13 +41,22 @@ class Tool(BaseTool): "type": "string", "description": "Required file path for this specific edit.", }, - "find_text": {"type": "string"}, "replace_text": {"type": "string"}, - "line_number": {"type": "integer"}, - "occurrence": {"type": "integer", "default": 1}, - "replace_all": {"type": "boolean", "default": False}, + "start_line": { + "type": "string", + "description": ( + "Hashline format for start line:" + ' "{line_num}|{hash_fragment}"' + ), + }, + "end_line": { + "type": "string", + "description": ( + 'Hashline format for end line: "{line_num}|{hash_fragment}"' + ), + }, }, - "required": ["file_path", "find_text", "replace_text"], + "required": ["file_path", "replace_text", "start_line", "end_line"], }, "description": "Array of edits to apply.", }, @@ -98,51 +114,43 @@ def execute( coder, file_path_key ) - # Process all edits for this file - current_content = original_content + # Process all edits for this file using batch operations + operations = [] file_metadata = [] file_successful_edits = 0 file_failed_edits = [] for edit_index, edit in file_edits: try: - edit_find_text = edit.get("find_text") edit_replace_text = edit.get("replace_text") - edit_line_number = edit.get("line_number") - edit_occurrence = edit.get("occurrence", 1) - edit_replace_all = edit.get("replace_all", False) + edit_start_line = edit.get("start_line") + edit_end_line = edit.get("end_line") - if edit_find_text is None or edit_replace_text is None: + if edit_replace_text is None: raise ToolError( - f"Edit {edit_index + 1} missing find_text or replace_text" + f"Edit {edit_index + 1} missing required replace_text parameter" ) - # Process this edit - new_content, metadata = cls._process_single_edit( - coder, - file_path_key, - edit_find_text, - edit_replace_text, - edit_line_number, - edit_occurrence, - current_content, - rel_path, - abs_path, - edit_replace_all, + # Add operation to batch + operations.append( + { + "start_line_hash": edit_start_line, + "end_line_hash": edit_end_line, + "operation": "replace", + "text": edit_replace_text, + } ) - if metadata is not None: # Edit made a change - current_content = new_content - file_metadata.append(metadata) - file_successful_edits += 1 - else: - # Edit didn't change anything (identical replacement) - file_failed_edits.append( - f"Edit {edit_index + 1}: No change (replacement identical to" - " original)" - ) + # Create metadata for this edit + metadata = { + "start_line": edit_start_line, + "end_line": edit_end_line, + "replace_text": edit_replace_text, + } + file_metadata.append(metadata) + file_successful_edits += 1 - except ToolError as e: + except (ToolError, HashlineError) as e: # Record failed edit but continue with others file_failed_edits.append(f"Edit {edit_index + 1}: {str(e)}") continue @@ -152,7 +160,17 @@ def execute( all_failed_edits.extend(file_failed_edits) continue - new_content = current_content + # Apply all operations in batch + try: + new_content, _, _ = apply_hashline_operations( + original_content=original_content, + operations=operations, + ) + except (ToolError, HashlineError) as e: + # If batch operation fails, mark all operations as failed + for edit_index, _ in file_edits: + all_failed_edits.append(f"Edit {edit_index + 1}: {str(e)}") + continue # Check if any changes were made for this file if original_content == new_content: @@ -181,6 +199,7 @@ def execute( "failed_edits": file_failed_edits if file_failed_edits else None, } + # Apply the change (common path for both hashline and non-hashline cases) final_change_id = apply_change( coder, abs_path, @@ -235,6 +254,11 @@ def execute( ) # 6. Format and return result + # Log failed edit messages to console for visibility + if all_failed_edits: + for failed_msg in all_failed_edits: + coder.io.tool_error(failed_msg) + if files_processed == 1: # Single file case for backward compatibility result = all_results[0] @@ -243,6 +267,8 @@ def execute( ) if result["failed_edits"]: success_message += f" ({len(result['failed_edits'])} failed)" + # Include failed edit details in message to LLM + success_message += "\nFailed edits:\n" + "\n".join(result["failed_edits"]) change_id_to_return = result.get("change_id") else: # Multiple files case @@ -251,6 +277,8 @@ def execute( ) if all_failed_edits: success_message += f" ({len(all_failed_edits)} failed)" + # Include failed edit details in message to LLM + success_message += "\nFailed edits:\n" + "\n".join(all_failed_edits) change_id_to_return = None # Multiple change IDs, can't return single one return format_tool_result( @@ -292,168 +320,44 @@ def format_output(cls, coder, mcp_server, tool_response): coder.io.tool_output("") for edit_index, edit in file_edits: - # Show diff for this edit - diff = difflib.unified_diff( - edit.get("find_text", "").splitlines(), - edit.get("replace_text", "").splitlines(), - lineterm="", - n=float("inf"), - ) - diff_lines = list(diff)[2:] # Skip header lines - if diff_lines: + # Show diff for this edit using hashline diff + replace_text = edit.get("replace_text", "") + start_line = edit.get("start_line") + end_line = edit.get("end_line") + + # Try to read the file to get original content for diff + diff_output = "" + + if file_path_key and start_line and end_line: + try: + # Try to read the file + abs_path = coder.abs_root_path(file_path_key) + original_content = coder.io.read_text(abs_path) + + if original_content is not None: + # Generate diff using get_hashline_diff + diff_output = get_hashline_diff( + original_content=original_content, + start_line_hash=start_line, + end_line_hash=end_line, + operation="replace", + text=replace_text, + ) + except HashlineError as e: + # If hashline verification fails, show the error + diff_output = f"Hashline verification failed: {str(e)}" + except Exception: + # If we can't read the file or generate diff, continue without it + pass + + # Only show diff section if we have diff output + if diff_output: if len(params["edits"]) > 1: coder.io.tool_output(f"{color_start}diff_{edit_index + 1}:{color_end}") else: coder.io.tool_output(f"{color_start}diff:{color_end}") - coder.io.tool_output("\n".join([line for line in diff_lines])) + coder.io.tool_output(diff_output) coder.io.tool_output("") tool_footer(coder=coder, tool_response=tool_response) - - @classmethod - def _process_single_edit( - cls, - coder, - file_path, - find_text, - replace_text, - line_number=None, - occurrence=1, - original_content=None, - rel_path=None, - abs_path=None, - replace_all=False, - ): - """ - Process a single edit and return the modified content and metadata. - """ - # Find all occurrences of the text in the file - occurrence_indices = coder._find_occurrences(original_content, find_text, None) - - if not occurrence_indices: - err_msg = f"Text '{find_text}' not found in file '{file_path}'." - raise ToolError(err_msg) - - # Handle replace_all case - if replace_all: - # Replace all occurrences - new_content = original_content - replaced_count = 0 - - # Need to process from end to beginning to maintain correct indices - for idx in reversed(occurrence_indices): - new_content = new_content[:idx] + replace_text + new_content[idx + len(find_text) :] - replaced_count += 1 - - if original_content == new_content: - return original_content, None # No change - - metadata = { - "start_index": occurrence_indices[0] if occurrence_indices else None, - "find_text": find_text, - "replace_text": replace_text, - "line_number": line_number, - "occurrence": -1, # Special value indicating all occurrences - "replaced_count": replaced_count, - } - - return new_content, metadata - - # Original logic for single occurrence replacement - # If line_number is provided, find the occurrence closest to that line - if line_number is not None: - try: - line_number = int(line_number) - # Validate line number is within file bounds - lines = original_content.splitlines(keepends=True) - if line_number < 1 or line_number > len(lines): - raise ToolError( - f"Line number {line_number} is out of range. File has {len(lines)} lines." - ) - - # Calculate which line each occurrence is on - occurrence_lines = [] - for occ_idx in occurrence_indices: - # Count newlines before this occurrence to determine line number - lines_before = original_content[:occ_idx].count("\n") - line_num = lines_before + 1 # Convert to 1-based line numbering - occurrence_lines.append((occ_idx, line_num)) - - # Find the occurrence on or after the specified line number - # If none found, use the last occurrence before the line number - target_idx = None - min_distance_after = float("inf") - last_before_idx = None - last_before_distance = float("inf") - - for i, (occ_idx, occ_line) in enumerate(occurrence_lines): - distance = occ_line - line_number - - if distance >= 0: # On or after the line number - if distance < min_distance_after: - min_distance_after = distance - target_idx = i - else: # Before the line number - if abs(distance) < last_before_distance: - last_before_distance = abs(distance) - last_before_idx = i - - # If no occurrence on or after, use the closest before - if target_idx is None and last_before_idx is not None: - target_idx = last_before_idx - - if target_idx is None: - raise ToolError(f"No occurrence of '{find_text}' found in file '{file_path}'.") - - selected_occurrence = ( - 1 # We're selecting based on line_number, so occurrence is always 1 - ) - - except ValueError: - raise ToolError(f"Invalid line number: '{line_number}'. Must be an integer.") - else: - # No line_number specified, use the occurrence parameter - num_occurrences = len(occurrence_indices) - try: - occurrence = int(occurrence) - if occurrence == -1: - if num_occurrences == 0: - raise ToolError( - f"Text '{find_text}' not found, cannot select last occurrence." - ) - target_idx = num_occurrences - 1 - selected_occurrence = occurrence - elif 1 <= occurrence <= num_occurrences: - target_idx = occurrence - 1 # Convert 1-based to 0-based - selected_occurrence = occurrence - else: - err_msg = ( - f"Occurrence number {occurrence} is out of range. Found" - f" {num_occurrences} occurrences of '{find_text}' in '{file_path}'." - ) - raise ToolError(err_msg) - except ValueError: - raise ToolError(f"Invalid occurrence value: '{occurrence}'. Must be an integer.") - - start_index = occurrence_indices[target_idx] - - # Perform the replacement - new_content = ( - original_content[:start_index] - + replace_text - + original_content[start_index + len(find_text) :] - ) - - if original_content == new_content: - return original_content, None # No change - - metadata = { - "start_index": start_index, - "find_text": find_text, - "replace_text": replace_text, - "line_number": line_number, - "occurrence": selected_occurrence, - } - - return new_content, metadata diff --git a/cecli/tools/show_numbered_context.py b/cecli/tools/show_numbered_context.py index 3663552119a..2d47b568fb6 100644 --- a/cecli/tools/show_numbered_context.py +++ b/cecli/tools/show_numbered_context.py @@ -1,5 +1,6 @@ import os +from cecli.helpers.hashline import hashline from cecli.tools.utils.base_tool import BaseTool from cecli.tools.utils.helpers import ( ToolError, @@ -176,11 +177,17 @@ def execute(cls, coder, show, **kwargs): # 6. Format output for this operation # Use rel_path for user-facing messages output_lines = [f"Displaying context around {found_by} in {rel_path}:"] - max_line_num_width = len(str(end_line_idx + 1)) # Width for padding + + # Generate hashline for the entire file + hashed_content = hashline(content) + hashed_lines = hashed_content.splitlines() + + # Extract the context window from hashed lines + context_hashed_lines = hashed_lines[start_line_idx : end_line_idx + 1] for i in range(start_line_idx, end_line_idx + 1): - line_num_str = str(i + 1).rjust(max_line_num_width) - output_lines.append(f"{line_num_str} | {lines[i]}") + hashed_line = context_hashed_lines[i - start_line_idx] + output_lines.append(hashed_line) # Add separator between multiple show operations if show_index > 0: diff --git a/cecli/tools/thinking.py b/cecli/tools/thinking.py index 151032f026e..9e40c3fe311 100644 --- a/cecli/tools/thinking.py +++ b/cecli/tools/thinking.py @@ -35,7 +35,7 @@ def execute(cls, coder, content, **kwargs): iterates over tools to ideally help it guide itself to a proper solution """ coder.io.tool_output("🧠 Thoughts recorded in context") - return "🧠 Thoughts recorded in context" + return "🧠 Thoughts recorded in context. Please proceed with your task" @classmethod def format_output(cls, coder, mcp_server, tool_response): diff --git a/cecli/tui/app.py b/cecli/tui/app.py index 5b012759488..7423cc41205 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -738,6 +738,28 @@ def _open_editor_suspended(self, initial_content=""): else: input_area.focus() + return edited_text + + def get_response_from_editor(self, initial_content=""): + """Open an external editor with proper TUI suspension. + + Args: + initial_content: Initial text to populate the editor with + + Returns: + Edited text + """ + # Get editor from coder's commands or default + editor = getattr(self.worker.coder.commands, "editor", None) + + # Suspend TUI and open editor + input_area = self.query_one("#input", InputArea) + edited_text = "" + edited_text = self.run_obstructive(pipe_editor, initial_content, suffix="md", editor=editor) + input_area.focus() + + return edited_text.rstrip() + def _encode_keys(self, key): key = key.replace("shift+enter", "ctrl+j") diff --git a/cecli/website/HISTORY.md b/cecli/website/HISTORY.md deleted file mode 100644 index c31856be365..00000000000 --- a/cecli/website/HISTORY.md +++ /dev/null @@ -1,1536 +0,0 @@ ---- -title: Release history -nav_order: 925 -highlight_image: /assets/blame.jpg -description: Release notes and stats on aider writing its own code. ---- - -# Release history -*Note: This release history is a carry-over from before this project was forked into cecli.* - -Aider writes most of its own code, usually about 70-80% of the new code in each release. -These -[statistics are based on the git commit history](/docs/faq.html#how-are-the-aider-wrote-xx-of-code-stats-computed) -of the aider repo. - -{% include blame.md %} - -## Release notes - - - - -### Aider v0.86.0 - -- Expanded GPT-5 model support across family variants and providers (OpenAI, Azure, OpenRouter), including dated and chat/mini/nano variants. -- Aider wrote 88% of the code in this release. - -### Aider v0.85.5 - -- Enforced diff edit format for GPT-5 models. -- Added support for the reasoning_effort setting for GPT-5 models. -- Fixed model detection to correctly apply GPT-5 settings to versioned names (gpt-5 and gpt-5-2025-08-07). - -### Aider v0.85.4 - -- Added support for openai/gpt-5 -- Fixed analytics to support the latest PostHog SDK event-capture API. -- Disabled temperature when using GPT-5 models for more deterministic outputs. - -### Aider v0.85.3 - -- Bumped dependencies to pick up latest litellm==1.75.0. - -### Aider v0.85.2 - -- Added support for Grok-4 via `xai/grok-4` and `openrouter/x-ai/grok-4` model names. -- Added support for `gemini/gemini-2.5-flash-lite-preview-06-17` model, by Tamir Zahavi-Brunner. -- `/clear` now prints “All chat history cleared.” so you know it worked, by Zexin Yuan. -- `/undo` output now shows only the first line of each commit message, making it easier to read. -- Fixed an issue where new settings for an existing model didn't replace the old ones, by Andrew Grigorev. -- Added support for `openrouter/moonshotai/kimi-k2` model, by Jack Harrington. - -### Aider v0.85.1 - -- Display model announcements with no-arg `/model` command. - -### Aider v0.85.0 - -- Support for Responses API models like o1-pro, o3-pro. -- Updated pricing for o3. -- Added support for new Gemini models including `gemini-2.5-pro`, `gemini-2.5-flash`, and `gemini-2.5-pro-preview-06-05` with thinking tokens support. -- Updated model aliases: `flash` now points to `gemini-2.5-flash` and `gemini` now points to `gemini-2.5-pro`. -- Added `--add-gitignore-files` flag to enable adding files listed in .gitignore to Aider's editing scope, by omarcinkonis. -- Added `--commit-language` option to specify the language for commit messages, by Kyosuke Takayama. -- Enhanced thinking tokens support: can now be disabled by setting to 0, and improved help text with examples. -- Added MATLAB language support for repository maps, by Matthew Tofano. -- Added support for OpenAI o3-pro model across multiple providers. -- Improved GitHub Copilot token handling with better validation and error messages, by Vincent Taverna and Sebastian Estrella. -- Fixed encoding issues in git diff output and LLM history logging. -- Enhanced commit message generation to use system prompt prefixes, by Luke Reeves. -- Improved inline code rendering in Rich markdown output, by Vamsi Talupula. -- Fixed Vertex AI model name prefixes in settings, by Wietse Venema. -- Improved `/read-only` command to resolve literal paths correctly, by Matteo Landi. -- Skip expensive file tracking operations when `--skip-sanity-check-repo` is enabled for better performance, by Makar Ivashko. -- Ensure pip is available before package installation. -- Auto-create parent directories for chat history files to prevent startup errors, by contributor. -- Fixed search block regex to accept optional closing tags when working with HTML content, by Mathis Beer. -- Co-authored-by attribution is now enabled by default for commit messages. -- Added Clojure language support for repository maps, by Garrett Hopper. -- Added custom PostHog analytics configuration options with `--analytics-posthog-host` and `--analytics-posthog-project-api-key` flags, by Vasil Markoukin. -- Optimized chat history summarization performance, by jayeshthk. -- Improved kebab-case identifier recognition in repository maps for better code analysis. -- Increased max tokens for Deepseek models to 65536 for better performance. -- Aider wrote 21% of the code in this release. - -### Aider v0.84.0 - -- Added support for new Claude models including the Sonnet 4 and Opus 4 series (e.g., `claude-sonnet-4-20250514`, -`claude-opus-4-20250514`) across various providers. The default `sonnet` and `opus` aliases were updated to these newer -versions. -- Added support for the `vertex_ai/gemini-2.5-flash-preview-05-20` model. -- Fixed OpenRouter token cost calculation for improved accuracy. -- Updated default OpenRouter models during onboarding to `deepseek/deepseek-r1:free` for the free tier and -`anthropic/claude-sonnet-4` for paid tiers. -- Automatically refresh GitHub Copilot tokens when used as OpenAI API keys, by Lih Chen. -- Aider wrote 79% of the code in this release. - -### Aider v0.83.2 - -- Bumped configargparse to 1.7.1 as 1.7 was pulled. -- Added shell tab completion for file path arguments (by saviour) and for `--edit-format`/`--editor-edit-format` options. -- Improved OpenRouter model metadata handling by introducing a local cache, increasing reliability and performance. -- The `/settings` command now displays detailed metadata for active main, editor, and weak models. -- Fixed an issue where files explicitly added via the command line were not correctly ignored if listed in `.gitignore`. -- Improved automatic commit messages by providing more context during their generation, by wangboxue. - -### Aider v0.83.1 - -- Improved user language detection by correctly normalizing hyphenated language codes (e.g., `en-US` to `en`) and enhancing the validation of locale results. -- Prevented Aider from instructing the LLM to reply in 'C' or 'POSIX' when these are detected as the system locale. -- Displayed a spinner with the model name when generating commit messages. - -### Aider v0.83.0 - -- Added support for `gemini-2.5-pro-preview-05-06` models. -- Added support for `qwen3-235b` models. -- Added repo-map support for OCaml and OCaml interface files, by Andrey Popp. -- Added a spinner animation while waiting for the LLM to start streaming its response. -- Updated the spinner animation to a Knight Rider style. -- Introduced `--attribute-co-authored-by` option to add co-author trailer to commit messages, by Andrew Grigorev. -- Updated Gemini model aliases (e.g., `gemini`, `gemini-2.5-pro`) to point to the `05-06` preview versions. -- Marked Gemini 2.5 Pro preview models as `overeager` by default. -- Commit message prompt specifies the user's language. -- Updated the default weak model for Gemini 2.5 Pro models to `gemini/gemini-2.5-flash-preview-04-17`. -- Corrected `gemini-2.5-pro-exp-03-25` model settings to reflect its lack of support for `thinking_budget`. -- Ensured model-specific system prompt prefixes are placed on a new line before the main system prompt. -- Added tracking of total tokens sent and received, now included in benchmark statistics. -- Automatically fetch model parameters (context window, pricing) for OpenRouter models directly from their website, by Stefan Hladnik. -- Enabled support for `thinking_tokens` and `reasoning_effort` parameters for OpenRouter models. -- Improved cost calculation using `litellm.completion_cost` where available. -- Added model settings for `openrouter/google/gemini-2.5-pro-preview-03-25`. -- Added `--disable-playwright` flag to prevent Playwright installation prompts and usage, by Andrew Grigorev. -- The `aider scrape` command-line tool will now use Playwright for web scraping if it is available, by Jon Keys. -- Fixed linter command execution on Windows by adopting `oslex` for argument quoting, by Titusz Pan. -- Improved cross-platform display of shell commands by using `oslex` for robust argument quoting, by Titusz Pan. -- Improved `/ask` mode to instruct the LLM to elide unchanging code in its responses. -- Ensured web scraping in the GUI also respects Playwright availability and the `--disable-playwright` flag. -- Improved display of filenames in the prompt header using rich Text formatting. -- Enabled `reasoning_effort` for Gemini 2.5 Flash models. -- Added a `--shell-completions` argument to generate shell completion scripts (e.g., for bash, zsh). -- Explicit `--attribute-author` or `--attribute-committer` flags now override the default behavior when `--attribute-co-authored-by` is used, allowing finer control over commit attribution, by Andrew Grigorev. -- Fixed an issue where read-only status of files might not be preserved correctly by some commands (e.g. `/drop` after adding a read-only file). -- The `aider-args` utility (or `python -m aider.args`) now defaults to printing a sample YAML configuration if no arguments are provided. -- Displayed token count progress and the name of the file or identifier being processed during repo map updates. -- Extended the waiting spinner to also show for non-streaming responses and further enhanced its animation with console width clipping, cursor hiding, and a more continuous appearance. -- Dropped support for Python 3.9. -- Aider wrote 55% of the code in this release. - -### Aider v0.82.3 - -- Add support for `gemini-2.5-flash-preview-04-17` models. -- Improved robustness of edit block parsing when filenames start with backticks or fences. -- Add new `udiff-simple` edit format, for Gemini 2.5 Pro. -- Update default weak/editor models for Gemini 2.5 Pro models to use `gemini-2.5-flash-preview-04-17`. -- Instruct models to reply in the user's detected system language. -- Fix parsing of diffs for newly created files (`--- /dev/null`). -- Add markdown syntax highlighting support when editing multi-line commit messages via `/commit`, by Kay Gosho. -- Set Gemini 2.5 Pro models to use the `overeager` prompt setting by default. -- Add common file types (`.svg`, `.pdf`) to the default list of ignored files for AI comment scanning (`--watch`). -- Skip scanning files larger than 1MB for AI comments (`--watch`). - -### Aider v0.82.2 - -- Fix editing shell files with diff-fenced, by zjy1412. -- Improve robustness of patch application by allowing multiple update/delete actions for the same file within a single response. -- Update prompts to instruct LLMs to consolidate all edits for a given file into a single block within the patch. - -### Aider v0.82.1 - -- Added support for `o3` and `o4-mini` including provider-specific versions for OpenAI, OpenRouter, and Azure. -- Added support for Azure specific `gpt-4.1` and `gpt-4.1-mini` models. -- Disabled streaming for `o3` models since you need identity verification to stream. -- Fixed handling of file paths in unified diffs, especially those generated by git. - -### Aider v0.82.0 - -- Support for GPT 4.1, mini and nano. -- Added new `patch` edit format for OpenAI's GPT-4.1 model. -- Improved support for using architect mode with Gemini 2.5 Pro. -- Added new `editor-diff`, `editor-whole`, and `editor-diff-fenced` edit formats. -- Bugfix for automatically selecting the best edit format to use in architect mode. -- Added support for `grok-3-fast-beta` and `grok-3-mini-fast-beta` models. -- Aider wrote 92% of the code in this release. - -### Aider v0.81.3 - -- Commit messages generated by aider are no longer forced to be entirely lowercase, by Peter Hadlaw. -- Updated default settings for Grok models. - -### Aider v0.81.2 - -- Add support for `xai/grok-3-beta`, `xai/grok-3-mini-beta`, `openrouter/x-ai/grok-3-beta`, `openrouter/x-ai/grok-3-mini-beta`, and `openrouter/openrouter/optimus-alpha` models. -- Add alias "grok3" for `xai/grok-3-beta`. -- Add alias "optimus" for `openrouter/openrouter/optimus-alpha`. -- Fix URL extraction from error messages. -- Allow adding files by full path even if a file with the same basename is already in the chat. -- Fix quoting of values containing '#' in the sample `aider.conf.yml`. -- Add support for Fireworks AI model 'deepseek-v3-0324', by Felix Lisczyk. -- Commit messages generated by aider are now lowercase, by Anton Ödman. - -### Aider v0.81.1 - -- Added support for the `gemini/gemini-2.5-pro-preview-03-25` model. -- Updated the `gemini` alias to point to `gemini/gemini-2.5-pro-preview-03-25`. -- Added the `gemini-exp` alias for `gemini/gemini-2.5-pro-exp-03-25`. - -### Aider v0.81.0 - -- Added support for the `openrouter/openrouter/quasar-alpha` model. - - Run with `aider --model quasar` -- Offer OpenRouter OAuth authentication if an OpenRouter model is specified but the API key is missing. -- Prevent retrying API calls when the provider reports insufficient credits. -- Improve URL detection to exclude trailing double quotes. -- Aider wrote 86% of the code in this release. - -### Aider v0.80.4 - -- Bumped deps to pickup litellm change to properly display the root cause of OpenRouter "choices" errors. - -### Aider v0.80.3 - -- Improve error message for OpenRouter API connection issues to mention potential rate limiting or upstream provider issues. -- Configure weak models (`gemini/gemini-2.0-flash` and `openrouter/google/gemini-2.0-flash-exp:free`) for Gemini 2.5 Pro models. -- Add model metadata for `openrouter/google/gemini-2.0-flash-exp:free`. - -### Aider v0.80.2 - -- Bumped deps. - -### Aider v0.80.1 - -- Updated deps for yanked fsspec and aiohttp packages #3699 -- Removed redundant dependency check during OpenRouter OAuth flow, by Claudia Pellegrino. - -### Aider v0.80.0 - -- OpenRouter OAuth integration: - - Offer to OAuth against OpenRouter if no model and keys are provided. - - Select OpenRouter default model based on free/paid tier status if `OPENROUTER_API_KEY` is set and no model is specified. -- Prioritize `gemini/gemini-2.5-pro-exp-03-25` if `GEMINI_API_KEY` is set, and `vertex_ai/gemini-2.5-pro-exp-03-25` if `VERTEXAI_PROJECT` is set, when no model is specified. -- Validate user-configured color settings on startup and warn/disable invalid ones. -- Warn at startup if `--stream` and `--cache-prompts` are used together, as cost estimates may be inaccurate. -- Boost repomap ranking for files whose path components match identifiers mentioned in the chat. -- Change web scraping timeout from an error to a warning, allowing scraping to continue with potentially incomplete content. -- Left-align markdown headings in the terminal output, by Peter Schilling. -- Update edit format to the new model's default when switching models with `/model`, if the user was using the old model's default format. -- Add `Ctrl-X Ctrl-E` keybinding to edit the current input buffer in an external editor, by Matteo Landi. -- Fix linting errors for filepaths containing shell metacharacters, by Mir Adnan ALI. -- Add the `openrouter/deepseek-chat-v3-0324:free` model. -- Add repomap support for the Scala language, by Vasil Markoukin. -- Fixed bug in `/run` that was preventing auto-testing. -- Fix bug preventing `UnboundLocalError` during git tree traversal. -- Handle `GitCommandNotFound` error if git is not installed or not in PATH. -- Handle `FileNotFoundError` if the current working directory is deleted while aider is running. -- Fix completion menu current item color styling, by Andrey Ivanov. -- Aider wrote 87% of the code in this release. - -### Aider v0.79.2 - -- Added 'gemini' alias for gemini-2.5-pro model. -- Updated Gemini 2.5 Pro max output tokens to 64k. -- Added support for Lisp-style semicolon comments in file watcher, by Matteo Landi. -- Added OpenRouter API error detection and retries. -- Added openrouter/deepseek-chat-v3-0324 model. -- Aider wrote 93% of the code in this release. - -### Aider v0.79.1 - -- Improved model listing to include all models in fuzzy matching, including those provided by aider (not litellm). - -### Aider v0.79.0 - -- Added support for Gemini 2.5 Pro models. -- Added support for DeepSeek V3 0324 model. -- Added a new `/context` command that automatically identifies which files need to be edited for a given request. -- Added `/edit` as an alias for the `/editor` command. -- Added "overeager" mode for Claude 3.7 Sonnet models to try and keep it working within the requested scope. -- Aider wrote 65% of the code in this release. - -### Aider v0.78.0 - -- Added support for thinking tokens for OpenRouter Sonnet 3.7. -- Added commands to switch between model types: `/editor-model` for Editor Model, and `/weak-model` for Weak Model, by csala. -- Added model setting validation to ignore `--reasoning-effort` and `--thinking-tokens` if the model doesn't support them. -- Added `--check-model-accepts-settings` flag (default: true) to force unsupported model settings. -- Annotated which models support reasoning_effort and thinking_tokens settings in the model settings data. -- Improved code block rendering in markdown output with better padding using NoInsetMarkdown. -- Added `--git-commit-verify` flag (default: False) to control whether git commit hooks are bypassed. -- Fixed autocompletion for `/ask`, `/code`, and `/architect` commands, by shladnik. -- Added vi-like behavior when pressing enter in multiline-mode while in vi normal/navigation-mode, by Marco Mayer. -- Added AWS_PROFILE support for Bedrock models, allowing use of AWS profiles instead of explicit credentials, by lentil32. -- Enhanced `--aiderignore` argument to resolve both absolute and relative paths, by mopemope. -- Improved platform information handling to gracefully handle retrieval errors. -- Aider wrote 92% of the code in this release. - -### Aider v0.77.1 - -- Bumped dependencies to pickup litellm fix for Ollama. -- Added support for `openrouter/google/gemma-3-27b-it` model. -- Updated exclude patterns for help documentation. - -### Aider v0.77.0 - -- Big upgrade in [programming languages supported](https://aider.chat/docs/languages.html) by adopting [tree-sitter-language-pack](https://github.com/Goldziher/tree-sitter-language-pack/). - - 130 new languages with linter support. - - 20 new languages with repo-map support. -- Added `/think-tokens` command to set thinking token budget with support for human-readable formats (8k, 10.5k, 0.5M). -- Added `/reasoning-effort` command to control model reasoning level. -- The `/think-tokens` and `/reasoning-effort` commands display current settings when called without arguments. -- Display of thinking token budget and reasoning effort in model information. -- Changed `--thinking-tokens` argument to accept string values with human-readable formats. -- Added `--auto-accept-architect` flag (default: true) to automatically accept changes from architect coder format without confirmation. -- Added support for `cohere_chat/command-a-03-2025` and `gemini/gemma-3-27b-it` -- The bare `/drop` command now preserves original read-only files provided via args.read. -- Fixed a bug where default model would be set by deprecated `--shortcut` switches even when already specified in the command line. -- Improved AutoCompleter to require 3 characters for autocompletion to reduce noise. -- Aider wrote 72% of the code in this release. - -### Aider v0.76.2 - -- Fixed handling of JSONDecodeError when loading model cache file. -- Fixed handling of GitCommandError when retrieving git user configuration. -- Aider wrote 75% of the code in this release. - -### Aider v0.76.1 - -- Added ignore_permission_denied option to file watcher to prevent errors when accessing restricted files, by Yutaka Matsubara. -- Aider wrote 0% of the code in this release. - -### Aider v0.76.0 - -- Improved support for thinking/reasoningmodels: - - Added `--thinking-tokens` CLI option to control token budget for models that support thinking. - - Display thinking/reasoning content from LLMs which return it. - - Enhanced handling of reasoning tags to better clean up model responses. - - Added deprecation warning for `remove_reasoning` setting, now replaced by `reasoning_tag`. -- Aider will notify you when it's completed the last request and needs your input: - - Added [notifications when LLM responses are ready](https://aider.chat/docs/usage/notifications.html) with `--notifications` flag. - - Specify desktop notification command with `--notifications-command`. -- Added support for QWQ 32B. -- Switch to `tree-sitter-language-pack` for tree sitter support. -- Improved error handling for EOF (Ctrl+D) in user input prompts. -- Added helper function to ensure hex color values have a # prefix. -- Fixed handling of Git errors when reading staged files. -- Improved SSL verification control for model information requests. -- Improved empty LLM response handling with clearer warning messages. -- Fixed Git identity retrieval to respect global configuration, by Akira Komamura. -- Offer to install dependencies for Bedrock and Vertex AI models. -- Deprecated model shortcut args (like --4o, --opus) in favor of the --model flag. -- Aider wrote 85% of the code in this release. - -### Aider v0.75.3 - -- Support for V3 free on OpenRouter: `--model openrouter/deepseek/deepseek-chat:free`. - -### Aider v0.75.2 - -- Added support for Claude 3.7 Sonnet models on OpenRouter, Bedrock and Vertex AI. -- Updated default model to Claude 3.7 Sonnet on OpenRouter. -- Added support for GPT-4.5-preview model. -- Added support for Claude 3.7 Sonnet:beta on OpenRouter. -- Fixed weak_model_name patterns to match main model name patterns for some models. - -### Aider v0.75.1 - -- Added support for `openrouter/anthropic/claude-3.7-sonnet` - -### Aider v0.75.0 - -- Basic support for Claude 3.7 Sonnet - - Use `--model sonnet` to use the new 3.7 - - Thinking support coming soon. -- Bugfix to `/editor` command. -- Aider wrote 46% of the code in this release. - -### Aider v0.74.3 - -- Downgrade streamlit dependency to avoid threading bug. -- Added support for tree-sitter language pack. -- Added openrouter/o3-mini-high model configuration. -- Added build.gradle.kts to special files for Kotlin project support, by Lucas Shadler. - -### Aider v0.74.2 - -- Prevent more than one cache warming thread from becoming active. -- Fixed continuation prompt ". " for multiline input. -- Added HCL (Terraform) syntax support, by Warren Krewenki. - -### Aider v0.74.1 - -- Have o1 & o3-mini generate markdown by sending the magic "Formatting re-enabled." string. -- Bugfix for multi-line inputs, which should not include the ". " continuation prompt. - -### Aider v0.74.0 - -- Dynamically changes the Ollama context window to hold the current chat. -- Better support for o3-mini, DeepSeek V3 & R1, o1-mini, o1 especially via third-party API providers. -- Remove `` tags from R1 responses for commit messages (and other weak model uses). -- Can now specify `use_temperature: ` in model settings, not just true/false. -- The full docker container now includes `boto3` for Bedrock. -- Docker containers now set `HOME=/app` which is the normal project mount-point, to persist `~/.aider`. -- Bugfix to prevent creating incorrect filenames like `python`, `php`, etc. -- Bugfix for `--timeout` -- Bugfix so that `/model` now correctly reports that the weak model is not changed. -- Bugfix so that multi-line mode persists through ^C at confirmation prompts. -- Watch files now fully ignores top-level directories named in ignore files, to reduce the chance of hitting OS watch limits. Helpful to ignore giant subtrees like `node_modules`. -- Fast startup with more providers and when model metadata provided in local files. -- Improved .gitignore handling: - - Honor ignores already in effect regardless of how they've been configured. - - Check for .env only when the file exists. -- Yes/No prompts now accept All/Skip as alias for Y/N even when not processing a group of confirmations. -- Aider wrote 77% of the code in this release. - -### Aider v0.73.0 - -- Full support for o3-mini: `aider --model o3-mini` -- New `--reasoning-effort` argument: low, medium, high. -- Improved handling of context window size limits, with better messaging and Ollama-specific guidance. -- Added support for removing model-specific reasoning tags from responses with `remove_reasoning: tagname` model setting. -- Auto-create parent directories when creating new files, by xqyz. -- Support for R1 free on OpenRouter: `--model openrouter/deepseek/deepseek-r1:free` -- Aider wrote 69% of the code in this release. - -### Aider v0.72.3 - -- Enforce user/assistant turn order to avoid R1 errors, by miradnanali. -- Case-insensitive model name matching while preserving original case. - -### Aider v0.72.2 -- Harden against user/assistant turn order problems which cause R1 errors. - -### Aider v0.72.1 -- Fix model metadata for `openrouter/deepseek/deepseek-r1` - -### Aider v0.72.0 -- Support for DeepSeek R1. - - Use shortcut: `--model r1` - - Also via OpenRouter: `--model openrouter/deepseek/deepseek-r1` -- Added Kotlin syntax support to repo map, by Paul Walker. -- Added `--line-endings` for file writing, by Titusz Pan. -- Added examples_as_sys_msg=True for GPT-4o models, improves benchmark scores. -- Bumped all dependencies, to pick up litellm support for o1 system messages. -- Bugfix for turn taking when reflecting lint/test errors. -- Aider wrote 52% of the code in this release. - -### Aider v0.71.1 - -- Fix permissions issue in Docker images. -- Added read-only file announcements. -- Bugfix: ASCII fallback for unicode errors. -- Bugfix: integer indices for list slicing in repomap calculations. - -### Aider v0.71.0 - -- Prompts to help DeepSeek work better when alternating between `/ask` and `/code`. -- Streaming pretty LLM responses is smoother and faster for long replies. -- Streaming automatically turns of for model that don't support it - - Can now switch to/from `/model o1` and a streaming model -- Pretty output remains enabled even when editing files with triple-backtick fences -- Bare `/ask`, `/code` and `/architect` commands now switch the chat mode. -- Increased default size of the repomap. -- Increased max chat history tokens limit from 4k to 8k. -- Turn off fancy input and watch files if terminal is dumb. -- Added support for custom voice format and input device settings. -- Disabled Streamlit email prompt, by apaz-cli. -- Docker container runs as non-root user. -- Fixed lint command handling of nested spaced strings, by Aaron Weisberg. -- Added token count feedback when adding command output to chat. -- Improved error handling for large audio files with automatic format conversion. -- Improved handling of git repo index errors, by Krazer. -- Improved unicode handling in console output with ASCII fallback. -- Added AssertionError, AttributeError to git error handling. -- Aider wrote 60% of the code in this release. - -### Aider v0.70.0 - -- Full support for o1 models. -- Watch files now honors `--subtree-only`, and only watches that subtree. -- Improved prompting for watch files, to work more reliably with more models. -- New install methods via uv, including one-liners. -- Support for openrouter/deepseek/deepseek-chat model. -- Better error handling when interactive commands are attempted via `/load` or `--load`. -- Display read-only files with abs path if its shorter than rel path. -- Ask 10% of users to opt-in to analytics. -- Bugfix for auto-suggest. -- Gracefully handle unicode errors in git path names. -- Aider wrote 74% of the code in this release. - -### Aider v0.69.1 - -- Fix for gemini model names in model metadata. -- Show hints about AI! and AI? when user makes AI comments. -- Support for running without git installed. -- Improved environment variable setup messages on Windows. - -### Aider v0.69.0 - -- [Watch files](https://aider.chat/docs/usage/watch.html) improvements: - - Use `# ... AI?` comments to trigger aider and ask questions about your code. - - Now watches *all* files, not just certain source files. - - Use `# AI comments`, `// AI comments`, or `-- AI comments` to give aider instructions in any text file. -- Full support for Gemini Flash 2.0 Exp: - - `aider --model flash` or `aider --model gemini/gemini-2.0-flash-exp` -- [New `--multiline` flag and `/multiline-mode` command](https://aider.chat/docs/usage/commands.html#entering-multi-line-chat-messages) makes ENTER a soft newline and META-ENTER send the message, by @miradnanali. -- `/copy-context ` now takes optional "instructions" when [copying code context to the clipboard](https://aider.chat/docs/usage/copypaste.html#copy-aiders-code-context-to-your-clipboard-paste-into-the-web-ui). -- Improved clipboard error handling with helpful requirements install info. -- Ask 5% of users if they want to opt-in to analytics. -- `/voice` now lets you edit the transcribed text before sending. -- Disabled auto-complete in Y/N prompts. -- Aider wrote 68% of the code in this release. - -### Aider v0.68.0 - -- [Aider works with LLM web chat UIs](https://aider.chat/docs/usage/copypaste.html). - - New `--copy-paste` mode. - - New `/copy-context` command. -- [Set API keys and other environment variables for all providers from command line or YAML conf file](https://aider.chat/docs/config/aider_conf.html#storing-llm-keys). - - New `--api-key provider=key` setting. - - New `--set-env VAR=value` setting. -- Added bash and zsh support to `--watch-files`. -- Better error messages when missing dependencies for Gemini and Bedrock models. -- Control-D now properly exits the program. -- Don't count token costs when API provider returns a hard error. -- Bugfix so watch files works with files that don't have tree-sitter support. -- Bugfix so o1 models can be used as weak model. -- Updated shell command prompt. -- Added docstrings for all Coders. -- Reorganized command line arguments with improved help messages and grouping. -- Use the exact `sys.python` for self-upgrades. -- Added experimental Gemini models. -- Aider wrote 71% of the code in this release. - -### Aider v0.67.0 - -- [Use aider in your IDE or editor](https://aider.chat/docs/usage/watch.html). - - Run `aider --watch-files` and it will watch for instructions you add to your source files. - - One-liner `# ...` or `// ...` comments that start or end with "AI" are instructions to aider. - - When aider sees "AI!" it reads and follows all the instructions in AI comments. -- Support for new Amazon Bedrock Nova models. -- When `/run` or `/test` have non-zero exit codes, pre-fill "Fix that" into the next message prompt. -- `/diff` now invokes `git diff` to use your preferred diff tool. -- Added Ctrl-Z support for process suspension. -- Spinner now falls back to ASCII art if fancy symbols throw unicode errors. -- `--read` now expands `~` home dirs. -- Enabled exception capture in analytics. -- [Aider wrote 61% of the code in this release.](https://aider.chat/HISTORY.html) - -### Aider v0.66.0 - -- PDF support for Sonnet and Gemini models. -- Added `--voice-input-device` to select audio input device for voice recording, by @preynal. -- Added `--timeout` option to configure API call timeouts. -- Set cwd to repo root when running shell commands. -- Added Ctrl-Up/Down keyboard shortcuts for per-message history navigation. -- Improved error handling for failed .gitignore file operations. -- Improved error handling for input history file permissions. -- Improved error handling for analytics file access. -- Removed spurious warning about disabling pretty in VSCode. -- Removed broken support for Dart. -- Bugfix when scraping URLs found in chat messages. -- Better handling of __version__ import errors. -- Improved `/drop` command to support substring matching for non-glob patterns. -- Aider wrote 82% of the code in this release. - -### Aider v0.65.1 - -- Bugfix to `--alias`. - -### Aider v0.65.0 - -- Added `--alias` config to define [custom model aliases](https://aider.chat/docs/config/model-aliases.html). -- Added `--[no-]detect-urls` flag to disable detecting and offering to scrape URLs found in the chat. -- Ollama models now default to an 8k context window. -- Added [RepoMap support for Dart language](https://aider.chat/docs/languages.html) by @malkoG. -- Ask 2.5% of users if they want to opt-in to [analytics](https://aider.chat/docs/more/analytics.html). -- Skip suggesting files that share names with files already in chat. -- `/editor` returns and prefill the file content into the prompt, so you can use `/editor` to compose messages that start with `/commands`, etc. -- Enhanced error handling for analytics. -- Improved handling of UnknownEditFormat exceptions with helpful documentation links. -- Bumped dependencies to pick up grep-ast 0.4.0 for Dart language support. -- Aider wrote 81% of the code in this release. - -### Aider v0.64.1 - -- Disable streaming for o1 on OpenRouter. - -### Aider v0.64.0 - -- Added [`/editor` command](https://aider.chat/docs/usage/commands.html) to open system editor for writing prompts, by @thehunmonkgroup. -- Full support for `gpt-4o-2024-11-20`. -- Stream o1 models by default. -- `/run` and suggested shell commands are less mysterious and now confirm that they "Added XX lines of output to the chat." -- Ask 1% of users if they want to opt-in to [analytics](https://aider.chat/docs/more/analytics.html). -- Added support for [optional multiline input tags](https://aider.chat/docs/usage/commands.html#entering-multi-line-chat-messages) with matching closing tags. -- Improved [model settings configuration](https://aider.chat/docs/config/adv-model-settings.html#global-extra-params) with support for global `extra_params` for `litellm.completion()`. -- Architect mode now asks to add files suggested by the LLM. -- Fixed bug in fuzzy model name matching. -- Added Timeout exception to handle API provider timeouts. -- Added `--show-release-notes` to control release notes display on first run of new version. -- Save empty dict to cache file on model metadata download failure, to delay retry. -- Improved error handling and code formatting. -- Aider wrote 74% of the code in this release. - -### Aider v0.63.2 - -- Fixed bug in fuzzy model name matching when litellm provider info is missing. -- Modified model metadata file loading to allow override of resource file. -- Allow recursive loading of dirs using `--read`. -- Updated dependency versions to pick up litellm fix for ollama models. -- Added exponential backoff retry when writing files to handle editor file locks. -- Updated Qwen 2.5 Coder 32B model configuration. - -### Aider v0.63.1 - -- Fixed bug in git ignored file handling. -- Improved error handling for git operations. - -### Aider v0.63.0 - -- Support for Qwen 2.5 Coder 32B. -- `/web` command just adds the page to the chat, without triggering an LLM response. -- Improved prompting for the user's preferred chat language. -- Improved handling of LiteLLM exceptions. -- Bugfix for double-counting tokens when reporting cache stats. -- Bugfix for the LLM creating new files. -- Other small bug fixes. -- Aider wrote 55% of the code in this release. - -### Aider v0.62.0 - -- Full support for Claude 3.5 Haiku - - Scored 75% on [aider's code editing leaderboard](https://aider.chat/docs/leaderboards/). - - Almost as good as Sonnet at much lower cost. - - Launch with `--haiku` to use it. -- Easily apply file edits from ChatGPT, Claude or other web apps - - Chat with ChatGPT or Claude via their web app. - - Give it your source files and ask for the changes you want. - - Use the web app's "copy response" button to copy the entire reply from the LLM. - - Run `aider --apply-clipboard-edits file-to-edit.js`. - - Aider will edit your file with the LLM's changes. -- Bugfix for creating new files. -- Aider wrote 84% of the code in this release. - -### Aider v0.61.0 - -- Load and save aider slash-commands to files: - - `/save ` command will make a file of `/add` and `/read-only` commands that recreate the current file context in the chat. - - `/load ` will replay the commands in the file. - - You can use `/load` to run any arbitrary set of slash-commands, not just `/add` and `/read-only`. - - Use `--load ` to run a list of commands on launch, before the interactive chat begins. -- Anonymous, opt-in [analytics](https://aider.chat/docs/more/analytics.html) with no personal data sharing. -- Aider follows litellm's `supports_vision` attribute to enable image support for models. -- Bugfix for when diff mode flexibly handles the model using the wrong filename. -- Displays filenames in sorted order for `/add` and `/read-only`. -- New `--no-fancy-input` switch disables prompt toolkit input, now still available with `--no-pretty`. -- Override browser config with `--no-browser` or `--no-gui`. -- Offer to open documentation URLs when errors occur. -- Properly support all o1 models, regardless of provider. -- Improved layout of filenames above input prompt. -- Better handle corrupted repomap tags cache. -- Improved handling of API errors, especially when accessing the weak model. -- Aider wrote 68% of the code in this release. - -### Aider v0.60.1 - -- Enable image support for Sonnet 10/22. -- Display filenames in sorted order. - -### Aider v0.60.0 - -- Full support for Sonnet 10/22, the new SOTA model on aider's code editing benchmark. - - Aider uses Sonnet 10/22 by default. -- Improved formatting of added and read-only files above chat prompt, by @jbellis. -- Improved support for o1 models by more flexibly parsing their nonconforming code edit replies. -- Corrected diff edit format prompt that only the first match is replaced. -- Stronger whole edit format prompt asking for clean file names. -- Now offers to add `.env` to the `.gitignore` file. -- Ships with a small model metadata json file to handle models not yet updated in litellm. -- Model settings for o1 models on azure. -- Bugfix to properly include URLs in `/help` RAG results. -- Aider wrote 49% of the code in this release. - -### Aider v0.59.1 - -- Check for obsolete `yes: true` in YAML config, show helpful error. -- Model settings for openrouter/anthropic/claude-3.5-sonnet:beta - -### Aider v0.59.0 - -- Improvements to `/read-only`: - - Now supports shell-style auto-complete of the full file system. - - Still auto-completes the full paths of the repo files like `/add`. - - Now supports globs like `src/**/*.py` -- Renamed `--yes` to `--yes-always`. - - Now uses `CECLI_YES_ALWAYS` env var and `yes-always:` YAML key. - - Existing YAML and .env files will need to be updated. - - Can still abbreviate to `--yes` on the command line. -- Config file now uses standard YAML list syntax with ` - list entries`, one per line. -- `/settings` now includes the same announcement lines that would print at launch. -- Sanity checks the `--editor-model` on launch now, same as main and weak models. -- Added `--skip-sanity-check-repo` switch to speedup launch in large repos. -- Bugfix so architect mode handles Control-C properly. -- Repo-map is deterministic now, with improved caching logic. -- Improved commit message prompt. -- Aider wrote 77% of the code in this release. - -### Aider v0.58.1 - -- Fixed bug where cache warming pings caused subsequent user messages to trigger a tight loop of LLM requests. - -### Aider v0.58.0 - -- [Use a pair of Architect/Editor models for improved coding](https://aider.chat/2024/09/26/architect.html) - - Use a strong reasoning model like o1-preview as your Architect. - - Use a cheaper, faster model like gpt-4o as your Editor. -- New `--o1-preview` and `--o1-mini` shortcuts. -- Support for new Gemini 002 models. -- Better support for Qwen 2.5 models. -- Many confirmation questions can be skipped for the rest of the session with "(D)on't ask again" response. -- Autocomplete for `/read-only` supports the entire filesystem. -- New settings for completion menu colors. -- New `/copy` command to copy the last LLM response to the clipboard. -- Renamed `/clipboard` to `/paste`. -- Will now follow HTTP redirects when scraping urls. -- New `--voice-format` switch to send voice audio as wav/mp3/webm, by @mbailey. -- ModelSettings takes `extra_params` dict to specify any extras to pass to `litellm.completion()`. -- Support for cursor shapes when in vim mode. -- Numerous bug fixes. -- Aider wrote 53% of the code in this release. - -### Aider v0.57.1 - -- Fixed dependency conflict between aider-chat[help] and [playwright]. - -### Aider v0.57.0 - -- Support for OpenAI o1 models: - - o1-preview now works well with diff edit format. - - o1-preview with diff now matches SOTA leaderboard result with whole edit format. - - `aider --model o1-mini` - - `aider --model o1-preview` -- On Windows, `/run` correctly uses PowerShell or cmd.exe. -- Support for new 08-2024 Cohere models, by @jalammar. -- Can now recursively add directories with `/read-only`. -- User input prompts now fall back to simple `input()` if `--no-pretty` or a Windows console is not available. -- Improved sanity check of git repo on startup. -- Improvements to prompt cache chunking strategy. -- Removed "No changes made to git tracked files". -- Numerous bug fixes for corner case crashes. -- Updated all dependency versions. -- Aider wrote 70% of the code in this release. - -### Aider v0.56.0 - -- Enables prompt caching for Sonnet via OpenRouter by @fry69 -- Enables 8k output tokens for Sonnet via VertexAI and DeepSeek V2.5. -- New `/report` command to open your browser with a pre-populated GitHub Issue. -- New `--chat-language` switch to set the spoken language. -- Now `--[no-]suggest-shell-commands` controls both prompting for and offering to execute shell commands. -- Check key imports on launch, provide helpful error message if dependencies aren't available. -- Renamed `--models` to `--list-models` by @fry69. -- Numerous bug fixes for corner case crashes. -- Aider wrote 56% of the code in this release. - -### Aider v0.55.0 - -- Only print the pip command when self updating on Windows, without running it. -- Converted many error messages to warning messages. -- Added `--tool-warning-color` setting. -- Blanket catch and handle git errors in any `/command`. -- Catch and handle glob errors in `/add`, errors writing files. -- Disabled built in linter for typescript. -- Catch and handle terminals which don't support pretty output. -- Catch and handle playwright and pandoc errors. -- Catch `/voice` transcription exceptions, show the WAV file so the user can recover it. -- Aider wrote 53% of the code in this release. - -### Aider v0.54.12 - -- Switched to `vX.Y.Z.dev` version naming. - -### Aider v0.54.11 - -- Improved printed pip command output on Windows. - -### Aider v0.54.10 - -- Bugfix to test command in platform info. - -### Aider v0.54.9 - -- Include important devops files in the repomap. -- Print quoted pip install commands to the user. -- Adopt setuptools_scm to provide dev versions with git hashes. -- Share active test and lint commands with the LLM. -- Catch and handle most errors creating new files, reading existing files. -- Catch and handle most git errors. -- Added --verbose debug output for shell commands. - -### Aider v0.54.8 - -- Startup QOL improvements: - - Sanity check the git repo and exit gracefully on problems. - - Pause for confirmation after model sanity check to allow user to review warnings. -- Bug fix for shell commands on Windows. -- Do not fuzzy match filenames when LLM is creating a new file, by @ozapinq -- Numerous corner case bug fixes submitted via new crash report -> GitHub Issue feature. -- Crash reports now include python version, OS, etc. - -### Aider v0.54.7 - -- Offer to submit a GitHub issue pre-filled with uncaught exception info. -- Bugfix for infinite output. - -### Aider v0.54.6 - -- New `/settings` command to show active settings. -- Only show cache warming status update if `--verbose`. - -### Aider v0.54.5 - -- Bugfix for shell commands on Windows. -- Refuse to make git repo in $HOME, warn user. -- Don't ask again in current session about a file the user has said not to add to the chat. -- Added `--update` as an alias for `--upgrade`. - -### Aider v0.54.4 - -- Bugfix to completions for `/model` command. -- Bugfix: revert home dir special case. - -### Aider v0.54.3 - -- Dependency `watchdog<5` for docker image. - -### Aider v0.54.2 - -- When users launch aider in their home dir, help them find/create a repo in a subdir. -- Added missing `pexpect` dependency. - -### Aider v0.54.0 - -- Added model settings for `gemini/gemini-1.5-pro-exp-0827` and `gemini/gemini-1.5-flash-exp-0827`. -- Shell and `/run` commands can now be interactive in environments where a pty is available. -- Optionally share output of suggested shell commands back to the LLM. -- New `--[no-]suggest-shell-commands` switch to configure shell commands. -- Performance improvements for autocomplete in large/mono repos. -- New `--upgrade` switch to install latest version of aider from pypi. -- Bugfix to `--show-prompt`. -- Disabled automatic reply to the LLM on `/undo` for all models. -- Removed pager from `/web` output. -- Aider wrote 64% of the code in this release. - -### Aider v0.53.0 - -- [Keep your prompt cache from expiring](https://aider.chat/docs/usage/caching.html#preventing-cache-expiration) with `--cache-keepalive-pings`. - - Pings the API every 5min to keep the cache warm. -- You can now bulk accept/reject a series of add url and run shell confirmations. -- Improved matching of filenames from S/R blocks with files in chat. -- Stronger prompting for Sonnet to make edits in code chat mode. -- Stronger prompting for the LLM to specify full file paths. -- Improved shell command prompting. -- Weak model now uses `extra_headers`, to support Anthropic beta features. -- New `--install-main-branch` to update to the latest dev version of aider. -- Improved error messages on attempt to add not-git subdir to chat. -- Show model metadata info on `--verbose`. -- Improved warnings when LLMs env variables aren't set. -- Bugfix to windows filenames which contain `\_`. -- Aider wrote 59% of the code in this release. - -### Aider v0.52.1 - -- Bugfix for NameError when applying edits. - -### Aider v0.52.0 - -- Aider now offers to run shell commands: - - Launch a browser to view updated html/css/js. - - Install new dependencies. - - Run DB migrations. - - Run the program to exercise changes. - - Run new test cases. -- `/read` and `/drop` now expand `~` to the home dir. -- Show the active chat mode at aider prompt. -- New `/reset` command to `/drop` files and `/clear` chat history. -- New `--map-multiplier-no-files` to control repo map size multiplier when no files are in the chat. - - Reduced default multiplier to 2. -- Bugfixes and improvements to auto commit sequencing. -- Improved formatting of token reports and confirmation dialogs. -- Default OpenAI model is now `gpt-4o-2024-08-06`. -- Bumped dependencies to pickup litellm bugfixes. -- Aider wrote 68% of the code in this release. - -### Aider v0.51.0 - -- Prompt caching for Anthropic models with `--cache-prompts`. - - Caches the system prompt, repo map and `/read-only` files. -- Repo map recomputes less often in large/mono repos or when caching enabled. - - Use `--map-refresh ` to configure. -- Improved cost estimate logic for caching. -- Improved editing performance on Jupyter Notebook `.ipynb` files. -- Show which config YAML file is loaded with `--verbose`. -- Bumped dependency versions. -- Bugfix: properly load `.aider.models.metadata.json` data. -- Bugfix: Using `--msg /ask ...` caused an exception. -- Bugfix: litellm tokenizer bug for images. -- Aider wrote 56% of the code in this release. - -### Aider v0.50.1 - -- Bugfix for provider API exceptions. - -### Aider v0.50.0 - -- Infinite output for DeepSeek Coder, Mistral models in addition to Anthropic's models. -- New `--deepseek` switch to use DeepSeek Coder. -- DeepSeek Coder uses 8k token output. -- New `--chat-mode ` switch to launch in ask/help/code modes. -- New `/code ` command request a code edit while in `ask` mode. -- Web scraper is more robust if page never idles. -- Improved token and cost reporting for infinite output. -- Improvements and bug fixes for `/read` only files. -- Switched from `setup.py` to `pyproject.toml`, by @branchvincent. -- Bug fix to persist files added during `/ask`. -- Bug fix for chat history size in `/tokens`. -- Aider wrote 66% of the code in this release. - -### Aider v0.49.1 - -- Bugfix to `/help`. - -### Aider v0.49.0 - -- Add read-only files to the chat context with `/read` and `--read`, including from outside the git repo. -- `/diff` now shows diffs of all changes resulting from your request, including lint and test fixes. -- New `/clipboard` command to paste images or text from the clipboard, replaces `/add-clipboard-image`. -- Now shows the markdown scraped when you add a url with `/web`. -- When [scripting aider](https://aider.chat/docs/scripting.html) messages can now contain in-chat `/` commands. -- Aider in docker image now suggests the correct command to update to latest version. -- Improved retries on API errors (was easy to test during Sonnet outage). -- Added `--mini` for `gpt-4o-mini`. -- Bugfix to keep session cost accurate when using `/ask` and `/help`. -- Performance improvements for repo map calculation. -- `/tokens` now shows the active model. -- Enhanced commit message attribution options: - - New `--attribute-commit-message-author` to prefix commit messages with 'aider: ' if aider authored the changes, replaces `--attribute-commit-message`. - - New `--attribute-commit-message-committer` to prefix all commit messages with 'aider: '. -- Aider wrote 61% of the code in this release. - -### Aider v0.48.1 - -- Added `openai/gpt-4o-2024-08-06`. -- Worked around litellm bug that removes OpenRouter app headers when using `extra_headers`. -- Improved progress indication during repo map processing. -- Corrected instructions for upgrading the docker container to latest aider version. -- Removed obsolete 16k token limit on commit diffs, use per-model limits. - -### Aider v0.48.0 - -- Performance improvements for large/mono repos. -- Added `--subtree-only` to limit aider to current directory subtree. - - Should help with large/mono repo performance. -- New `/add-clipboard-image` to add images to the chat from your clipboard. -- Use `--map-tokens 1024` to use repo map with any model. -- Support for Sonnet's 8k output window. - - [Aider already supported infinite output from Sonnet.](https://aider.chat/2024/07/01/sonnet-not-lazy.html) -- Workaround litellm bug for retrying API server errors. -- Upgraded dependencies, to pick up litellm bug fixes. -- Aider wrote 44% of the code in this release. - -### Aider v0.47.1 - -- Improvements to conventional commits prompting. - -### Aider v0.47.0 - -- [Commit message](https://aider.chat/docs/git.html#commit-messages) improvements: - - Added Conventional Commits guidelines to commit message prompt. - - Added `--commit-prompt` to customize the commit message prompt. - - Added strong model as a fallback for commit messages (and chat summaries). -- [Linting](https://aider.chat/docs/usage/lint-test.html) improvements: - - Ask before fixing lint errors. - - Improved performance of `--lint` on all dirty files in repo. - - Improved lint flow, now doing code edit auto-commit before linting. - - Bugfix to properly handle subprocess encodings (also for `/run`). -- Improved [docker support](https://aider.chat/docs/install/docker.html): - - Resolved permission issues when using `docker run --user xxx`. - - New `paulgauthier/aider-full` docker image, which includes all extras. -- Switching to code and ask mode no longer summarizes the chat history. -- Added graph of aider's contribution to each release. -- Generic auto-completions are provided for `/commands` without a completion override. -- Fixed broken OCaml tags file. -- Bugfix in `/run` add to chat approval logic. -- Aider wrote 58% of the code in this release. - -### Aider v0.46.1 - -- Downgraded stray numpy dependency back to 1.26.4. - -### Aider v0.46.0 - -- New `/ask ` command to ask about your code, without making any edits. -- New `/chat-mode ` command to switch chat modes: - - ask: Ask questions about your code without making any changes. - - code: Ask for changes to your code (using the best edit format). - - help: Get help about using aider (usage, config, troubleshoot). -- Add `file: CONVENTIONS.md` to `.aider.conf.yml` to always load a specific file. - - Or `file: [file1, file2, file3]` to always load multiple files. -- Enhanced token usage and cost reporting. Now works when streaming too. -- Filename auto-complete for `/add` and `/drop` is now case-insensitive. -- Commit message improvements: - - Updated commit message prompt to use imperative tense. - - Fall back to main model if weak model is unable to generate a commit message. -- Stop aider from asking to add the same url to the chat multiple times. -- Updates and fixes to `--no-verify-ssl`: - - Fixed regression that broke it in v0.42.0. - - Disables SSL certificate verification when `/web` scrapes websites. -- Improved error handling and reporting in `/web` scraping functionality -- Fixed syntax error in Elm's tree-sitter scm file (by @cjoach). -- Handle UnicodeEncodeError when streaming text to the terminal. -- Updated dependencies to latest versions. -- Aider wrote 45% of the code in this release. - -### Aider v0.45.1 - -- Use 4o-mini as the weak model wherever 3.5-turbo was used. - -### Aider v0.45.0 - -- GPT-4o mini scores similar to the original GPT 3.5, using whole edit format. -- Aider is better at offering to add files to the chat on Windows. -- Bugfix corner cases for `/undo` with new files or new repos. -- Now shows last 4 characters of API keys in `--verbose` output. -- Bugfix to precedence of multiple `.env` files. -- Bugfix to gracefully handle HTTP errors when installing pandoc. -- Aider wrote 42% of the code in this release. - -### Aider v0.44.0 - -- Default pip install size reduced by 3-12x. -- Added 3 package extras, which aider will offer to install when needed: - - `aider-chat[help]` - - `aider-chat[browser]` - - `aider-chat[playwright]` -- Improved regex for detecting URLs in user chat messages. -- Bugfix to globbing logic when absolute paths are included in `/add`. -- Simplified output of `--models`. -- The `--check-update` switch was renamed to `--just-check-updated`. -- The `--skip-check-update` switch was renamed to `--[no-]check-update`. -- Aider wrote 29% of the code in this release (157/547 lines). - -### Aider v0.43.4 - -- Added scipy back to main requirements.txt. - -### Aider v0.43.3 - -- Added build-essentials back to main Dockerfile. - -### Aider v0.43.2 - -- Moved HuggingFace embeddings deps into [hf-embed] extra. -- Added [dev] extra. - -### Aider v0.43.1 - -- Replace the torch requirement with the CPU only version, because the GPU versions are huge. - -### Aider v0.43.0 - -- Use `/help ` to [ask for help about using aider](https://aider.chat/docs/troubleshooting/support.html), customizing settings, troubleshooting, using LLMs, etc. -- Allow multiple use of `/undo`. -- All config/env/yml/json files now load from home, git root, cwd and named command line switch. -- New `$HOME/.aider/caches` dir for app-wide expendable caches. -- Default `--model-settings-file` is now `.aider.model.settings.yml`. -- Default `--model-metadata-file` is now `.aider.model.metadata.json`. -- Bugfix affecting launch with `--no-git`. -- Aider wrote 9% of the 424 lines edited in this release. - -### Aider v0.42.0 - -- Performance release: - - 5X faster launch! - - Faster auto-complete in large git repos (users report ~100X speedup)! - -### Aider v0.41.0 - -- [Allow Claude 3.5 Sonnet to stream back >4k tokens!](https://aider.chat/2024/07/01/sonnet-not-lazy.html) - - It is the first model capable of writing such large coherent, useful code edits. - - Do large refactors or generate multiple files of new code in one go. -- Aider now uses `claude-3-5-sonnet-20240620` by default if `ANTHROPIC_API_KEY` is set in the environment. -- [Enabled image support](https://aider.chat/docs/usage/images-urls.html) for 3.5 Sonnet and for GPT-4o & 3.5 Sonnet via OpenRouter (by @yamitzky). -- Added `--attribute-commit-message` to prefix aider's commit messages with "aider:". -- Fixed regression in quality of one-line commit messages. -- Automatically retry on Anthropic `overloaded_error`. -- Bumped dependency versions. - -### Aider v0.40.6 - -- Fixed `/undo` so it works regardless of `--attribute` settings. - -### Aider v0.40.5 - -- Bump versions to pickup latest litellm to fix streaming issue with Gemini - - https://github.com/BerriAI/litellm/issues/4408 - -### Aider v0.40.1 - -- Improved context awareness of repomap. -- Restored proper `--help` functionality. - -### Aider v0.40.0 - -- Improved prompting to discourage Sonnet from wasting tokens emitting unchanging code (#705). -- Improved error info for token limit errors. -- Options to suppress adding "(aider)" to the [git author and committer names](https://aider.chat/docs/git.html#commit-attribution). -- Use `--model-settings-file` to customize per-model settings, like use of repo-map (by @caseymcc). -- Improved invocation of flake8 linter for python code. - - -### Aider v0.39.0 - -- Use `--sonnet` for Claude 3.5 Sonnet, which is the top model on [aider's LLM code editing leaderboard](https://aider.chat/docs/leaderboards/#claude-35-sonnet-takes-the-top-spot). -- All `CECLI_xxx` environment variables can now be set in `.env` (by @jpshack-at-palomar). -- Use `--llm-history-file` to log raw messages sent to the LLM (by @daniel-vainsencher). -- Commit messages are no longer prefixed with "aider:". Instead the git author and committer names have "(aider)" added. - -### Aider v0.38.0 - -- Use `--vim` for [vim keybindings](https://aider.chat/docs/usage/commands.html#vi) in the chat. -- [Add LLM metadata](https://aider.chat/docs/llms/warnings.html#specifying-context-window-size-and-token-costs) via `.aider.models.json` file (by @caseymcc). -- More detailed [error messages on token limit errors](https://aider.chat/docs/troubleshooting/token-limits.html). -- Single line commit messages, without the recent chat messages. -- Ensure `--commit --dry-run` does nothing. -- Have playwright wait for idle network to better scrape js sites. -- Documentation updates, moved into website/ subdir. -- Moved tests/ into aider/tests/. - -### Aider v0.37.0 - -- Repo map is now optimized based on text of chat history as well as files added to chat. -- Improved prompts when no files have been added to chat to solicit LLM file suggestions. -- Aider will notice if you paste a URL into the chat, and offer to scrape it. -- Performance improvements the repo map, especially in large repos. -- Aider will not offer to add bare filenames like `make` or `run` which may just be words. -- Properly override `GIT_EDITOR` env for commits if it is already set. -- Detect supported audio sample rates for `/voice`. -- Other small bug fixes. - -### Aider v0.36.0 - -- [Aider can now lint your code and fix any errors](https://aider.chat/2024/05/22/linting.html). - - Aider automatically lints and fixes after every LLM edit. - - You can manually lint-and-fix files with `/lint` in the chat or `--lint` on the command line. - - Aider includes built in basic linters for all supported tree-sitter languages. - - You can also configure aider to use your preferred linter with `--lint-cmd`. -- Aider has additional support for running tests and fixing problems. - - Configure your testing command with `--test-cmd`. - - Run tests with `/test` or from the command line with `--test`. - - Aider will automatically attempt to fix any test failures. - - -### Aider v0.35.0 - -- Aider now uses GPT-4o by default. - - GPT-4o tops the [aider LLM code editing leaderboard](https://aider.chat/docs/leaderboards/) at 72.9%, versus 68.4% for Opus. - - GPT-4o takes second on [aider's refactoring leaderboard](https://aider.chat/docs/leaderboards/#code-refactoring-leaderboard) with 62.9%, versus Opus at 72.3%. -- Added `--restore-chat-history` to restore prior chat history on launch, so you can continue the last conversation. -- Improved reflection feedback to LLMs using the diff edit format. -- Improved retries on `httpx` errors. - -### Aider v0.34.0 - -- Updated prompting to use more natural phrasing about files, the git repo, etc. Removed reliance on read-write/read-only terminology. -- Refactored prompting to unify some phrasing across edit formats. -- Enhanced the canned assistant responses used in prompts. -- Added explicit model settings for `openrouter/anthropic/claude-3-opus`, `gpt-3.5-turbo` -- Added `--show-prompts` debug switch. -- Bugfix: catch and retry on all litellm exceptions. - - -### Aider v0.33.0 - -- Added native support for [Deepseek models](https://aider.chat/docs/llms.html#deepseek) using `DEEPSEEK_API_KEY` and `deepseek/deepseek-chat`, etc rather than as a generic OpenAI compatible API. - -### Aider v0.32.0 - -- [Aider LLM code editing leaderboards](https://aider.chat/docs/leaderboards/) that rank popular models according to their ability to edit code. - - Leaderboards include GPT-3.5/4 Turbo, Opus, Sonnet, Gemini 1.5 Pro, Llama 3, Deepseek Coder & Command-R+. -- Gemini 1.5 Pro now defaults to a new diff-style edit format (diff-fenced), enabling it to work better with larger code bases. -- Support for Deepseek-V2, via more a flexible config of system messages in the diff edit format. -- Improved retry handling on errors from model APIs. -- Benchmark outputs results in YAML, compatible with leaderboard. - -### Aider v0.31.0 - -- [Aider is now also AI pair programming in your browser!](https://aider.chat/2024/05/02/browser.html) Use the `--browser` switch to launch an experimental browser based version of aider. -- Switch models during the chat with `/model ` and search the list of available models with `/models `. - -### Aider v0.30.1 - -- Adding missing `google-generativeai` dependency - -### Aider v0.30.0 - -- Added [Gemini 1.5 Pro](https://aider.chat/docs/llms.html#free-models) as a recommended free model. -- Allow repo map for "whole" edit format. -- Added `--models ` to search the available models. -- Added `--no-show-model-warnings` to silence model warnings. - -### Aider v0.29.2 - -- Improved [model warnings](https://aider.chat/docs/llms.html#model-warnings) for unknown or unfamiliar models - -### Aider v0.29.1 - -- Added better support for groq/llama3-70b-8192 - -### Aider v0.29.0 - -- Added support for [directly connecting to Anthropic, Cohere, Gemini and many other LLM providers](https://aider.chat/docs/llms.html). -- Added `--weak-model ` which allows you to specify which model to use for commit messages and chat history summarization. -- New command line switches for working with popular models: - - `--4-turbo-vision` - - `--opus` - - `--sonnet` - - `--anthropic-api-key` -- Improved "whole" and "diff" backends to better support [Cohere's free to use Command-R+ model](https://aider.chat/docs/llms.html#cohere). -- Allow `/add` of images from anywhere in the filesystem. -- Fixed crash when operating in a repo in a detached HEAD state. -- Fix: Use the same default model in CLI and python scripting. - -### Aider v0.28.0 - -- Added support for new `gpt-4-turbo-2024-04-09` and `gpt-4-turbo` models. - - Benchmarked at 61.7% on Exercism benchmark, comparable to `gpt-4-0613` and worse than the `gpt-4-preview-XXXX` models. See [recent Exercism benchmark results](https://aider.chat/2024/03/08/claude-3.html). - - Benchmarked at 34.1% on the refactoring/laziness benchmark, significantly worse than the `gpt-4-preview-XXXX` models. See [recent refactor bencmark results](https://aider.chat/2024/01/25/benchmarks-0125.html). - - Aider continues to default to `gpt-4-1106-preview` as it performs best on both benchmarks, and significantly better on the refactoring/laziness benchmark. - -### Aider v0.27.0 - -- Improved repomap support for typescript, by @ryanfreckleton. -- Bugfix: Only /undo the files which were part of the last commit, don't stomp other dirty files -- Bugfix: Show clear error message when OpenAI API key is not set. -- Bugfix: Catch error for obscure languages without tags.scm file. - -### Aider v0.26.1 - -- Fixed bug affecting parsing of git config in some environments. - -### Aider v0.26.0 - -- Use GPT-4 Turbo by default. -- Added `-3` and `-4` switches to use GPT 3.5 or GPT-4 (non-Turbo). -- Bug fix to avoid reflecting local git errors back to GPT. -- Improved logic for opening git repo on launch. - -### Aider v0.25.0 - -- Issue a warning if user adds too much code to the chat. - - https://aider.chat/docs/faq.html#how-can-i-add-all-the-files-to-the-chat -- Vocally refuse to add files to the chat that match `.aiderignore` - - Prevents bug where subsequent git commit of those files will fail. -- Added `--openai-organization-id` argument. -- Show the user a FAQ link if edits fail to apply. -- Made past articles part of https://aider.chat/blog/ - -### Aider v0.24.1 - -- Fixed bug with cost computations when --no-steam in effect - -### Aider v0.24.0 - -- New `/web ` command which scrapes the url, turns it into fairly clean markdown and adds it to the chat. -- Updated all OpenAI model names, pricing info -- Default GPT 3.5 model is now `gpt-3.5-turbo-0125`. -- Bugfix to the `!` alias for `/run`. - -### Aider v0.23.0 - -- Added support for `--model gpt-4-0125-preview` and OpenAI's alias `--model gpt-4-turbo-preview`. The `--4turbo` switch remains an alias for `--model gpt-4-1106-preview` at this time. -- New `/test` command that runs a command and adds the output to the chat on non-zero exit status. -- Improved streaming of markdown to the terminal. -- Added `/quit` as alias for `/exit`. -- Added `--skip-check-update` to skip checking for the update on launch. -- Added `--openrouter` as a shortcut for `--openai-api-base https://openrouter.ai/api/v1` -- Fixed bug preventing use of env vars `OPENAI_API_BASE, OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_DEPLOYMENT_ID`. - -### Aider v0.22.0 - -- Improvements for unified diff editing format. -- Added ! as an alias for /run. -- Autocomplete for /add and /drop now properly quotes filenames with spaces. -- The /undo command asks GPT not to just retry reverted edit. - -### Aider v0.21.1 - -- Bugfix for unified diff editing format. -- Added --4turbo and --4 aliases for --4-turbo. - -### Aider v0.21.0 - -- Support for python 3.12. -- Improvements to unified diff editing format. -- New `--check-update` arg to check if updates are available and exit with status code. - -### Aider v0.20.0 - -- Add images to the chat to automatically use GPT-4 Vision, by @joshuavial - -- Bugfixes: - - Improved unicode encoding for `/run` command output, by @ctoth - - Prevent false auto-commits on Windows, by @ctoth - -### Aider v0.19.1 - -- Removed stray debug output. - -### Aider v0.19.0 - -- [Significantly reduced "lazy" coding from GPT-4 Turbo due to new unified diff edit format](https://aider.chat/docs/unified-diffs.html) - - Score improves from 20% to 61% on new "laziness benchmark". - - Aider now uses unified diffs by default for `gpt-4-1106-preview`. -- New `--4-turbo` command line switch as a shortcut for `--model gpt-4-1106-preview`. - -### Aider v0.18.1 - -- Upgraded to new openai python client v1.3.7. - -### Aider v0.18.0 - -- Improved prompting for both GPT-4 and GPT-4 Turbo. - - Far fewer edit errors from GPT-4 Turbo (`gpt-4-1106-preview`). - - Significantly better benchmark results from the June GPT-4 (`gpt-4-0613`). Performance leaps from 47%/64% up to 51%/71%. -- Fixed bug where in-chat files were marked as both read-only and ready-write, sometimes confusing GPT. -- Fixed bug to properly handle repos with submodules. - -### Aider v0.17.0 - -- Support for OpenAI's new 11/06 models: - - gpt-4-1106-preview with 128k context window - - gpt-3.5-turbo-1106 with 16k context window -- [Benchmarks for OpenAI's new 11/06 models](https://aider.chat/docs/benchmarks-1106.html) -- Streamlined [API for scripting aider, added docs](https://aider.chat/docs/faq.html#can-i-script-aider) -- Ask for more concise SEARCH/REPLACE blocks. [Benchmarked](https://aider.chat/docs/benchmarks.html) at 63.9%, no regression. -- Improved repo-map support for elisp. -- Fixed crash bug when `/add` used on file matching `.gitignore` -- Fixed misc bugs to catch and handle unicode decoding errors. - -### Aider v0.16.3 - -- Fixed repo-map support for C#. - -### Aider v0.16.2 - -- Fixed docker image. - -### Aider v0.16.1 - -- Updated tree-sitter dependencies to streamline the pip install process - -### Aider v0.16.0 - -- [Improved repository map using tree-sitter](https://aider.chat/docs/repomap.html) -- Switched from "edit block" to "search/replace block", which reduced malformed edit blocks. [Benchmarked](https://aider.chat/docs/benchmarks.html) at 66.2%, no regression. -- Improved handling of malformed edit blocks targeting multiple edits to the same file. [Benchmarked](https://aider.chat/docs/benchmarks.html) at 65.4%, no regression. -- Bugfix to properly handle malformed `/add` wildcards. - - -### Aider v0.15.0 - -- Added support for `.aiderignore` file, which instructs aider to ignore parts of the git repo. -- New `--commit` cmd line arg, which just commits all pending changes with a sensible commit message generated by gpt-3.5. -- Added universal ctags and multiple architectures to the [aider docker image](https://aider.chat/docs/install/docker.html) -- `/run` and `/git` now accept full shell commands, like: `/run (cd subdir; ls)` -- Restored missing `--encoding` cmd line switch. - -### Aider v0.14.2 - -- Easily [run aider from a docker image](https://aider.chat/docs/install/docker.html) -- Fixed bug with chat history summarization. -- Fixed bug if `soundfile` package not available. - -### Aider v0.14.1 - -- /add and /drop handle absolute filenames and quoted filenames -- /add checks to be sure files are within the git repo (or root) -- If needed, warn users that in-chat file paths are all relative to the git repo -- Fixed /add bug in when aider launched in repo subdir -- Show models supported by api/key if requested model isn't available - -### Aider v0.14.0 - -- [Support for Claude2 and other LLMs via OpenRouter](https://aider.chat/docs/faq.html#accessing-other-llms-with-openrouter) by @joshuavial -- Documentation for [running the aider benchmarking suite](https://github.com/Aider-AI/aider/tree/main/benchmark) -- Aider now requires Python >= 3.9 - - -### Aider v0.13.0 - -- [Only git commit dirty files that GPT tries to edit](https://aider.chat/docs/faq.html#how-did-v0130-change-git-usage) -- Send chat history as prompt/context for Whisper voice transcription -- Added `--voice-language` switch to constrain `/voice` to transcribe to a specific language -- Late-bind importing `sounddevice`, as it was slowing down aider startup -- Improved --foo/--no-foo switch handling for command line and yml config settings - -### Aider v0.12.0 - -- [Voice-to-code](https://aider.chat/docs/usage/voice.html) support, which allows you to code with your voice. -- Fixed bug where /diff was causing crash. -- Improved prompting for gpt-4, refactor of editblock coder. -- [Benchmarked](https://aider.chat/docs/benchmarks.html) at 63.2% for gpt-4/diff, no regression. - -### Aider v0.11.1 - -- Added a progress bar when initially creating a repo map. -- Fixed bad commit message when adding new file to empty repo. -- Fixed corner case of pending chat history summarization when dirty committing. -- Fixed corner case of undefined `text` when using `--no-pretty`. -- Fixed /commit bug from repo refactor, added test coverage. -- [Benchmarked](https://aider.chat/docs/benchmarks.html) at 53.4% for gpt-3.5/whole (no regression). - -### Aider v0.11.0 - -- Automatically summarize chat history to avoid exhausting context window. -- More detail on dollar costs when running with `--no-stream` -- Stronger GPT-3.5 prompt against skipping/eliding code in replies (51.9% [benchmark](https://aider.chat/docs/benchmarks.html), no regression) -- Defend against GPT-3.5 or non-OpenAI models suggesting filenames surrounded by asterisks. -- Refactored GitRepo code out of the Coder class. - -### Aider v0.10.1 - -- /add and /drop always use paths relative to the git root -- Encourage GPT to use language like "add files to the chat" to ask users for permission to edit them. - -### Aider v0.10.0 - -- Added `/git` command to run git from inside aider chats. -- Use Meta-ENTER (Esc+ENTER in some environments) to enter multiline chat messages. -- Create a `.gitignore` with `.aider*` to prevent users from accidentally adding aider files to git. -- Check pypi for newer versions and notify user. -- Updated keyboard interrupt logic so that 2 ^C in 2 seconds always forces aider to exit. -- Provide GPT with detailed error if it makes a bad edit block, ask for a retry. -- Force `--no-pretty` if aider detects it is running inside a VSCode terminal. -- [Benchmarked](https://aider.chat/docs/benchmarks.html) at 64.7% for gpt-4/diff (no regression) - - -### Aider v0.9.0 - -- Support for the OpenAI models in [Azure](https://aider.chat/docs/faq.html#azure) -- Added `--show-repo-map` -- Improved output when retrying connections to the OpenAI API -- Redacted api key from `--verbose` output -- Bugfix: recognize and add files in subdirectories mentioned by user or GPT -- [Benchmarked](https://aider.chat/docs/benchmarks.html) at 53.8% for gpt-3.5-turbo/whole (no regression) - -### Aider v0.8.3 - -- Added `--dark-mode` and `--light-mode` to select colors optimized for terminal background -- Install docs link to [NeoVim plugin](https://github.com/joshuavial/aider.nvim) by @joshuavial -- Reorganized the `--help` output -- Bugfix/improvement to whole edit format, may improve coding editing for GPT-3.5 -- Bugfix and tests around git filenames with unicode characters -- Bugfix so that aider throws an exception when OpenAI returns InvalidRequest -- Bugfix/improvement to /add and /drop to recurse selected directories -- Bugfix for live diff output when using "whole" edit format - -### Aider v0.8.2 - -- Disabled general availability of gpt-4 (it's rolling out, not 100% available yet) - -### Aider v0.8.1 - -- Ask to create a git repo if none found, to better track GPT's code changes -- Glob wildcards are now supported in `/add` and `/drop` commands -- Pass `--encoding` into ctags, require it to return `utf-8` -- More robust handling of filepaths, to avoid 8.3 windows filenames -- Added [FAQ](https://aider.chat/docs/faq.html) -- Marked GPT-4 as generally available -- Bugfix for live diffs of whole coder with missing filenames -- Bugfix for chats with multiple files -- Bugfix in editblock coder prompt - -### Aider v0.8.0 - -- [Benchmark comparing code editing in GPT-3.5 and GPT-4](https://aider.chat/docs/benchmarks.html) -- Improved Windows support: - - Fixed bugs related to path separators in Windows - - Added a CI step to run all tests on Windows -- Improved handling of Unicode encoding/decoding - - Explicitly read/write text files with utf-8 encoding by default (mainly benefits Windows) - - Added `--encoding` switch to specify another encoding - - Gracefully handle decoding errors -- Added `--code-theme` switch to control the pygments styling of code blocks (by @kwmiebach) -- Better status messages explaining the reason when ctags is disabled - -### Aider v0.7.2: - -- Fixed a bug to allow aider to edit files that contain triple backtick fences. - -### Aider v0.7.1: - -- Fixed a bug in the display of streaming diffs in GPT-3.5 chats - -### Aider v0.7.0: - -- Graceful handling of context window exhaustion, including helpful tips. -- Added `--message` to give GPT that one instruction and then exit after it replies and any edits are performed. -- Added `--no-stream` to disable streaming GPT responses. - - Non-streaming responses include token usage info. - - Enables display of cost info based on OpenAI advertised pricing. -- Coding competence benchmarking tool against suite of programming tasks based on Execism's python repo. - - https://github.com/exercism/python -- Major refactor in preparation for supporting new function calls api. -- Initial implementation of a function based code editing backend for 3.5. - - Initial experiments show that using functions makes 3.5 less competent at coding. -- Limit automatic retries when GPT returns a malformed edit response. - -### Aider v0.6.2 - -* Support for `gpt-3.5-turbo-16k`, and all OpenAI chat models -* Improved ability to correct when gpt-4 omits leading whitespace in code edits -* Added `--openai-api-base` to support API proxies, etc. - -### Aider v0.5.0 - -- Added support for `gpt-3.5-turbo` and `gpt-4-32k`. -- Added `--map-tokens` to set a token budget for the repo map, along with a PageRank based algorithm for prioritizing which files and identifiers to include in the map. -- Added in-chat command `/tokens` to report on context window token usage. -- Added in-chat command `/clear` to clear the conversation history. - diff --git a/cecli/website/_config.yml b/cecli/website/_config.yml index 6ce81506b44..df729e63b62 100644 --- a/cecli/website/_config.yml +++ b/cecli/website/_config.yml @@ -11,7 +11,7 @@ defaults: path: "README.md" type: "pages" values: - description: "aider is AI pair programming in your terminal" + description: "cecli speaks fluent terminal" exclude: - "tmp*" diff --git a/cecli/website/_includes/help-tip.md b/cecli/website/_includes/help-tip.md index 7947e1e5621..f8f333f34e2 100644 --- a/cecli/website/_includes/help-tip.md +++ b/cecli/website/_includes/help-tip.md @@ -1,5 +1,5 @@ {: .tip } Use `/help ` to -[ask for help about using aider](/docs/troubleshooting/support.html), +[ask for help about using cecli](/docs/troubleshooting/support.html), customizing settings, troubleshooting, using LLMs, etc. diff --git a/cecli/website/_includes/replit-pipx.md b/cecli/website/_includes/replit-pipx.md deleted file mode 100644 index 317489089b7..00000000000 --- a/cecli/website/_includes/replit-pipx.md +++ /dev/null @@ -1,9 +0,0 @@ -To use aider with pipx on replit, you can run these commands in the replit shell: - -```bash -pip install pipx -pipx run aider-chat ...normal aider args... -``` - -If you install aider with pipx on replit and try and run it as just `aider` it will crash with a missing `libstdc++.so.6` library. - diff --git a/cecli/website/_posts/2023-05-25-ctags.md b/cecli/website/_posts/2023-05-25-ctags.md deleted file mode 120000 index eb8637641c5..00000000000 --- a/cecli/website/_posts/2023-05-25-ctags.md +++ /dev/null @@ -1 +0,0 @@ -../docs/ctags.md \ No newline at end of file diff --git a/cecli/website/_posts/2023-07-02-benchmarks.md b/cecli/website/_posts/2023-07-02-benchmarks.md deleted file mode 120000 index a02a2275499..00000000000 --- a/cecli/website/_posts/2023-07-02-benchmarks.md +++ /dev/null @@ -1 +0,0 @@ -../docs/benchmarks.md \ No newline at end of file diff --git a/cecli/website/_posts/2023-10-22-repomap.md b/cecli/website/_posts/2023-10-22-repomap.md deleted file mode 100644 index 0c09d00eb80..00000000000 --- a/cecli/website/_posts/2023-10-22-repomap.md +++ /dev/null @@ -1,268 +0,0 @@ ---- -title: Building a better repository map with tree sitter -excerpt: Tree-sitter allows aider to build a repo map that better summarizes large code bases. -highlight_image: /assets/robot-ast.png -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Building a better repository map with tree sitter - -![robot flowchat](/assets/robot-ast.png) - -GPT-4 is extremely useful for "self-contained" coding tasks, -like generating or modifying a simple function -that has no dependencies. Tools like GitHub CoPilot serve -these simple coding tasks well. - -But making complex changes in a larger, pre-existing codebase -is much more difficult, for both humans and AIs. -To do this successfully, you need to: - -1. Find the code that needs to be changed. -2. Understand how that code relates to the rest of the codebase. -3. Make the correct code change to accomplish the task. - -GPT-4 is actually great at making the code changes (3), -once you tell it which files need to be changed (1) -and show it how they fit into the rest of the codebase (2). - -This article is going to focus on step (2), providing "code context": - - - We need to help GPT understand the overall codebase. - - This will help it understand the code it needs to change, which may depend on other parts of the codebase. - - It will also help GPT write new code and modify the existing code in a way -that respects and utilizes existing libraries, modules and abstractions -found elsewhere in the codebase. - - We must convey all of this "code context" to GPT in an -efficient manner that fits within the limited context window. - -To address these issues, aider -sends GPT a **concise map of your whole git repository** -that includes -the most important classes and functions along with their types and call signatures. - -This **repository map** is now built automatically using -[tree-sitter](https://tree-sitter.github.io/tree-sitter/) -to extract symbol definitions from source files. -Tree-sitter is used by many IDEs, editors and LSP servers to -help humans search and navigate large codebases. -Aider now uses it to help GPT better comprehend, navigate -and edit code in larger repos. - -*To code with GPT-4 using the techniques discussed here, just install [aider](https://aider.chat/docs/install.html).* - - -## The problem: code context - -GPT-4 is great at "self contained" coding tasks, like writing or -modifying a pure function with no external dependencies. -GPT can easily handle requests like "write a -Fibonacci function" or "rewrite this loop using list -comprehensions", because they require no context beyond the code -being discussed. - -Most real code is not pure and self-contained, it is intertwined with -and depends on code from many different files in a repo. -If you ask GPT to "switch all the print statements in class Foo to -use the BarLog logging system", it needs to see and -modify the code in the Foo class, but it also needs to understand -how to use -the project's BarLog -subsystem. - -A simple solution is to **send the entire codebase** to GPT along with -each change request. Now GPT has all the context! But this won't work -for even moderately -sized repos, because they won't fit into the context window. - -A better approach is to be selective, -and **hand pick which files to send**. -For the example above, you could send the file that -contains the Foo class -and the file that contains the BarLog logging subsystem. -This works pretty well, and is supported by aider -- you -can manually specify which files to "add to the chat" you are having with GPT. - -But sending whole files is a bulky way to send code context, -wasting the precious context window. -GPT doesn't need to see the entire implementation of BarLog, -it just needs to understand it well enough to use it. -You may quickly run out of context window by sending -full files of code -just to convey context. - -Aider also strives to reduce the manual work involved in -coding with AI. -So in an ideal world, we'd like aider to automatically -identify and provide the needed code context. - -## Using a repo map to provide context - -Aider sends a **repo map** to GPT along with -each request from the user to make a code change. -The map contains a list of the files in the -repo, along with the key symbols which are defined in each file. -It shows how each of these symbols are defined in the -source code, by including the critical lines of code for each definition. - -Here's a -sample of the map of the aider repo, just showing the maps of -[base_coder.py](https://github.com/Aider-AI/aider/blob/main/aider/coders/base_coder.py) -and -[commands.py](https://github.com/Aider-AI/aider/blob/main/aider/commands.py) -: - -``` -aider/coders/base_coder.py: -⋮... -│class Coder: -│ abs_fnames = None -⋮... -│ @classmethod -│ def create( -│ self, -│ main_model, -│ edit_format, -│ io, -│ skip_model_availabily_check=False, -│ **kwargs, -⋮... -│ def abs_root_path(self, path): -⋮... -│ def run(self, with_message=None): -⋮... - -aider/commands.py: -⋮... -│class Commands: -│ voice = None -│ -⋮... -│ def get_commands(self): -⋮... -│ def get_command_completions(self, cmd_name, partial): -⋮... -│ def run(self, inp): -⋮... -``` - -Mapping out the repo like this provides some key benefits: - - - GPT can see classes, methods and function signatures from everywhere in the repo. This alone may give it enough context to solve many tasks. For example, it can probably figure out how to use the API exported from a module just based on the details shown in the map. - - If it needs to see more code, GPT can use the map to figure out by itself which files it needs to look at in more detail. GPT will then ask to see these specific files, and aider will automatically add them to the chat context. - -## Optimizing the map - -Of course, for large repositories even just the repo map might be too large -for GPT's context window. -Aider solves this problem by sending just the **most relevant** -portions of the repo map. -It does this by analyzing the full repo map using -a graph ranking algorithm, computed on a graph -where each source file is a node and edges connect -files which have dependencies. -Aider optimizes the repo map by -selecting the most important parts of the codebase -which will -fit into the token budget assigned by the user -(via the `--map-tokens` switch, which defaults to 1k tokens). - -The sample map shown above doesn't contain *every* class, method and function from those -files. -It only includes the most important identifiers, -the ones which are most often referenced by other portions of the code. -These are the key pieces of context that GPT needs to know to understand -the overall codebase. - - -## Using tree-sitter to make the map - -Under the hood, aider uses -[tree sitter](https://tree-sitter.github.io/tree-sitter/) -to build the -map. -It specifically uses the -[py-tree-sitter-languages](https://github.com/grantjenks/py-tree-sitter-languages) -python module, -which provides simple, pip-installable binary wheels for -[most popular programming languages](https://github.com/Aider-AI/grep-ast/blob/main/grep_ast/parsers.py). - -Tree-sitter parses source code into an Abstract Syntax Tree (AST) based -on the syntax of the programming language. -Using the AST, we can identify where functions, classes, variables, types and -other definitions occur in the source code. -We can also identify where else in the code these things are used or referenced. - -Aider uses all of these definitions and references to -determine which are the most important identifiers in the repository, -and to produce the repo map that shows just those key -lines from the codebase. - -## What about ctags? - -The tree-sitter repository map replaces the -[ctags based map](https://aider.chat/docs/ctags.html) -that aider originally used. -Switching from ctags to tree-sitter provides a bunch of benefits: - -- The map is richer, showing full function call signatures and other details straight from the source files. -- Thanks to `py-tree-sitter-languages`, we get full support for many programming languages via a python package that's automatically installed as part of the normal `python -m pip install -U aider-chat`. -- We remove the requirement for users to manually install `universal-ctags` via some external tool or package manager (brew, apt, choco, etc). -- Tree-sitter integration is a key enabler for future work and capabilities for aider. - -## Future work - -You'll recall that we identified the 3 key steps -required to use GPT -to complete a coding task within a large, pre-existing codebase: - -1. Find the code that needs to be changed. -2. Understand how that code relates to the rest of the codebase. -3. Make the correct code change to accomplish the task. - -We're now using tree-sitter to help solve the code context problem (2), -but it's also an important foundation -for future work on automatically finding all the code which -will need to be changed (1). - -Right now, aider relies on the user to specify which source files -will need to be modified to complete their request. -Users manually "add files to the chat" using aider's `/add` command, -which makes those files available for GPT to modify. - -This works well, but a key piece of future work is to harness the -power of GPT and tree-sitter to automatically identify -which parts of the code will need changes. - -## Try it out - -To code with GPT-4 using the techniques discussed here, -just install [aider](https://aider.chat/docs/install.html). - -## Credits - -Aider uses -[modified versions of the tags.scm files](https://github.com/Aider-AI/aider/tree/main/aider/queries) -from these -open source tree-sitter language implementations: - -* [https://github.com/tree-sitter/tree-sitter-c](https://github.com/tree-sitter/tree-sitter-c) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-c-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-cpp](https://github.com/tree-sitter/tree-sitter-cpp) — licensed under the MIT License. -* [https://github.com/Wilfred/tree-sitter-elisp](https://github.com/Wilfred/tree-sitter-elisp) — licensed under the MIT License. -* [https://github.com/elixir-lang/tree-sitter-elixir](https://github.com/elixir-lang/tree-sitter-elixir) — licensed under the Apache License, Version 2.0. -* [https://github.com/elm-tooling/tree-sitter-elm](https://github.com/elm-tooling/tree-sitter-elm) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-go](https://github.com/tree-sitter/tree-sitter-go) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-java](https://github.com/tree-sitter/tree-sitter-java) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-javascript](https://github.com/tree-sitter/tree-sitter-javascript) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-ocaml](https://github.com/tree-sitter/tree-sitter-ocaml) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-php](https://github.com/tree-sitter/tree-sitter-php) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-python](https://github.com/tree-sitter/tree-sitter-python) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-ql](https://github.com/tree-sitter/tree-sitter-ql) — licensed under the MIT License. -* [https://github.com/r-lib/tree-sitter-r](https://github.com/r-lib/tree-sitter-r) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-ruby](https://github.com/tree-sitter/tree-sitter-ruby) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) — licensed under the MIT License. -* [https://github.com/tree-sitter/tree-sitter-typescript](https://github.com/tree-sitter/tree-sitter-typescript) — licensed under the MIT License. diff --git a/cecli/website/_posts/2023-11-06-benchmarks-1106.md b/cecli/website/_posts/2023-11-06-benchmarks-1106.md deleted file mode 120000 index ffae92178ee..00000000000 --- a/cecli/website/_posts/2023-11-06-benchmarks-1106.md +++ /dev/null @@ -1 +0,0 @@ -../docs/benchmarks-1106.md \ No newline at end of file diff --git a/cecli/website/_posts/2023-11-06-benchmarks-speed-1106.md b/cecli/website/_posts/2023-11-06-benchmarks-speed-1106.md deleted file mode 120000 index 75c682ca387..00000000000 --- a/cecli/website/_posts/2023-11-06-benchmarks-speed-1106.md +++ /dev/null @@ -1 +0,0 @@ -../docs/benchmarks-speed-1106.md \ No newline at end of file diff --git a/cecli/website/_posts/2023-12-21-unified-diffs.md b/cecli/website/_posts/2023-12-21-unified-diffs.md deleted file mode 120000 index 700b88ac37a..00000000000 --- a/cecli/website/_posts/2023-12-21-unified-diffs.md +++ /dev/null @@ -1 +0,0 @@ -../docs/unified-diffs.md \ No newline at end of file diff --git a/cecli/website/_posts/2024-01-25-benchmarks-0125.md b/cecli/website/_posts/2024-01-25-benchmarks-0125.md deleted file mode 120000 index d89e2518969..00000000000 --- a/cecli/website/_posts/2024-01-25-benchmarks-0125.md +++ /dev/null @@ -1 +0,0 @@ -../docs/benchmarks-0125.md \ No newline at end of file diff --git a/cecli/website/_posts/2024-03-08-claude-3.md b/cecli/website/_posts/2024-03-08-claude-3.md deleted file mode 100644 index 84b6ff35585..00000000000 --- a/cecli/website/_posts/2024-03-08-claude-3.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Claude 3 beats GPT-4 on Aider's code editing benchmark -excerpt: Claude 3 Opus outperforms all of OpenAI's models on Aider's code editing benchmark, making it the best available model for pair programming with AI. -highlight_image: /assets/2024-03-07-claude-3.jpg -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Claude 3 beats GPT-4 on Aider's code editing benchmark - -[![benchmark results](/assets/2024-03-07-claude-3.svg)](https://aider.chat/assets/2024-03-07-claude-3.svg) - -[Anthropic just released their new Claude 3 models](https://www.anthropic.com/news/claude-3-family) -with evals showing better performance on coding tasks. -With that in mind, I've been benchmarking the new models -using Aider's code editing benchmark suite. - -Claude 3 Opus outperforms all of OpenAI's models, -making it the best available model for pair programming with AI. - -To use Claude 3 Opus with aider: - -``` -python -m pip install -U aider-chat -export ANTHROPIC_API_KEY=sk-... -aider --opus -``` - -## Aider's code editing benchmark - -[Aider](https://github.com/Aider-AI/aider) -is an open source command line chat tool that lets you -pair program with AI on code in your local git repo. - -Aider relies on a -[code editing benchmark](https://aider.chat/docs/benchmarks.html) -to quantitatively evaluate how well -an LLM can make changes to existing code. -The benchmark uses aider to try and complete -[133 Exercism Python coding exercises](https://github.com/exercism/python). -For each exercise, -Exercism provides a starting python file with stubs for the needed functions, -a natural language description of the problem to solve -and a test suite to evaluate whether the coder has correctly solved the problem. - -The LLM gets two tries to solve each problem: - -1. On the first try, it gets the initial stub code and the English description of the coding task. If the tests all pass, we are done. -2. If any tests failed, aider sends the LLM the failing test output and gives it a second try to complete the task. - -## Benchmark results - -### Claude 3 Opus - -- The new `claude-3-opus-20240229` model got the highest score ever on this benchmark, completing 68.4% of the tasks with two tries. -- Its single-try performance was comparable to the latest GPT-4 Turbo model `gpt-4-0125-preview`, at 54.1%. -- While Opus got the highest score, it was only a few points higher than the GPT-4 Turbo results. Given the extra costs of Opus and the slower response times, it remains to be seen which is the most practical model for daily coding use. - -### Claude 3 Sonnet - -- The new `claude-3-sonnet-20240229` model performed similarly to OpenAI's GPT-3.5 Turbo models with an overall score of 54.9% and a first-try score of 43.6%. - -## Code editing - -It's highly desirable to have the LLM send back code edits as -some form of diffs, rather than having it send back an updated copy of the -entire source code. - -Weaker models like GPT-3.5 are unable to use diffs, and are stuck sending back -updated copies of entire source files. -Aider uses more efficient -[search/replace blocks](https://aider.chat/2023/07/02/benchmarks.html#diff) -with the original GPT-4 -and -[unified diffs](https://aider.chat/2023/12/21/unified-diffs.html#unified-diff-editing-format) -with the newer GPT-4 Turbo models. - -Claude 3 Opus works best with the search/replace blocks, allowing it to send back -code changes efficiently. -Unfortunately, the Sonnet model was only able to work reliably with whole files, -which limits it to editing smaller source files and uses more tokens, money and time. - -## Other observations - -There are a few other things worth noting: - -- Claude 3 Opus and Sonnet are both slower and more expensive than OpenAI's models. You can get almost the same coding skill faster and cheaper with OpenAI's models. -- Claude 3 has a 2X larger context window than the latest GPT-4 Turbo, which may be an advantage when working with larger code bases. -- The Claude models refused to perform a number of coding tasks and returned the error "Output blocked by content filtering policy". They refused to code up the [beer song](https://exercism.org/tracks/python/exercises/beer-song) program, which makes some sort of superficial sense. But they also refused to work in some larger open source code bases, for unclear reasons. -- The Claude APIs seem somewhat unstable, returning HTTP 5xx errors of various sorts. Aider automatically recovers from these errors with exponential backoff retries, but it's a sign that Anthropic made be struggling under surging demand. - diff --git a/cecli/website/_posts/2024-04-09-gpt-4-turbo.md b/cecli/website/_posts/2024-04-09-gpt-4-turbo.md deleted file mode 100644 index c055b7dac13..00000000000 --- a/cecli/website/_posts/2024-04-09-gpt-4-turbo.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: GPT-4 Turbo with Vision is a step backwards for coding -excerpt: OpenAI's GPT-4 Turbo with Vision model scores worse on aider's code editing benchmarks than all the previous GPT-4 models. In particular, it seems much more prone to "lazy coding" than the existing GPT-4 Turbo "preview" models. -highlight_image: /assets/2024-04-09-gpt-4-turbo-laziness.jpg -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# GPT-4 Turbo with Vision is a step backwards for coding - -[OpenAI just released GPT-4 Turbo with Vision](https://twitter.com/OpenAIDevs/status/1777769463258988634) -and it performs worse on aider's coding benchmark suites than all the previous GPT-4 models. -In particular, it seems much more prone to "lazy coding" than the -existing GPT-4 Turbo "preview" models. - -## Code editing skill - -[![benchmark results](/assets/2024-04-09-gpt-4-turbo.svg)](https://aider.chat/assets/2024-04-09-gpt-4-turbo.svg) - -Aider relies on a -[code editing benchmark](https://aider.chat/docs/benchmarks.html#the-benchmark) -to quantitatively evaluate how well -an LLM can make changes to existing code. -The benchmark uses aider to try and complete -[133 Exercism Python coding exercises](https://github.com/exercism/python). - -For each exercise, the LLM gets two tries to solve each problem: - -1. On the first try, it gets initial stub code and the English description of the coding task. If the tests all pass, we are done. -2. If any tests failed, aider sends the LLM the failing test output and gives it a second try to complete the task. - -**GPT-4 Turbo with Vision -scores only 62% on this benchmark, -the lowest score of any of the existing GPT-4 models.** -The other models scored 63-66%, so this represents only a small -regression, and is likely statistically insignificant when compared -against `gpt-4-0613`. - -## Lazy coding - -[![benchmark results](/assets/2024-04-09-gpt-4-turbo-laziness.svg)](https://aider.chat/assets/2024-04-09-gpt-4-turbo-laziness.svg) - -The GPT-4 Turbo "preview" models have been widely criticized for being "lazy" -when coding. -They often omit needed code -and instead leave comments with homework assignments like "implement method here". - -``` -def some_complex_method(foo, bar): - # ... implement method here ... -``` - -Aider uses a ["laziness" benchmark suite](https://github.com/Aider-AI/refactor-benchmark) -which is designed to both provoke and quantify lazy coding. -It consists of -89 python refactoring tasks -which tend to make GPT-4 Turbo code in that lazy manner. - -**The new GPT-4 Turbo with Vision model scores only 34% on aider's -refactoring benchmark, making it the laziest coder of all the GPT-4 Turbo models -by a significant margin.** - -# Conclusions - -Aider has full support for the new GPT-4 Turbo with Vision -model, which you can access using the switch `--model gpt-4-turbo-2024-04-09`. -But aider will continue to use `gpt-4-1106-preview` by default, -as it is by far the strongest coder of the GPT-4 models. - - - - diff --git a/cecli/website/_posts/2024-05-02-browser.md b/cecli/website/_posts/2024-05-02-browser.md deleted file mode 100644 index f48d363daeb..00000000000 --- a/cecli/website/_posts/2024-05-02-browser.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Aider in your browser -excerpt: Aider has an experimental browser UI, allowing you to collaborate with LLMs on code in your local git repo. -highlight_image: /assets/browser.jpg ---- -{% if page.date %} - -{% endif %} - -# Aider in your browser - - - - - -Use aider's new experimental browser UI to collaborate with LLMs -to edit code in your local git repo. -Aider will directly edit the code in your local source files, -and [git commit the changes](https://aider.chat/docs/git.html) -with sensible commit messages. -You can start a new project or work with an existing git repo. -{% include works-best.md %} - -Use the `--browser` switch to launch the browser version of aider: - -``` -python -m pip install -U aider-chat - -export OPENAI_API_KEY= # Mac/Linux -setx OPENAI_API_KEY # Windows, restart shell after setx - -aider --browser -``` diff --git a/cecli/website/_posts/2024-05-13-models-over-time.md b/cecli/website/_posts/2024-05-13-models-over-time.md deleted file mode 100644 index 57cc08f89c0..00000000000 --- a/cecli/website/_posts/2024-05-13-models-over-time.md +++ /dev/null @@ -1,327 +0,0 @@ ---- -title: Drawing graphs with aider, GPT-4o and matplotlib -excerpt: Use GPT-4o to draw graphs with matplotlib, including adjusting styles and making visual changes. You get the graph, but you also get the code in your repo. -highlight_image: /assets/models-over-time.png -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -![LLM coding skill over time](/assets/models-over-time.svg) - -# Drawing graphs with aider, GPT-4o and matplotlib - -I recently wanted to draw a graph showing how LLM code editing skill has been -changing over time as new models have been released by OpenAI, Anthropic and others. -I have all the -[data in a YAML file](https://github.com/Aider-AI/aider/blob/main/website/_data/edit_leaderboard.yml) that is used to render -[aider's LLM leaderboards](https://aider.chat/docs/leaderboards/). - -Below is the aider chat transcript, which shows: - -- I launch aider with the YAML file, a file with other plots I've done recently (so GPT can crib the style) and an empty file called `over_time.py`. -- Then I ask GPT to draw the scatterplot I want. -- I run the resulting script and share the error output with GPT so it can fix a small bug. -- I ask it to color the points for GPT-4 and GPT-3.5 family models differently, to better see trends within those model families. -- I add a PNG of the graph to the chat, so GPT can see the current graph and understand why I want it to move the point labels so they don't overlap the points. -- I work through a series of other small style changes, like changing fonts and the graph border. - -In the end I have the graph, but I also have the python code in my repo. -So I can update this graph easily whenever I add new entries to the YAML data file. - - -## Aider chat transcript - -
- -> Aider v0.35.1-dev -> Models: gpt-4o with diff edit format, weak model gpt-3.5-turbo -> Added _data/edit_leaderboard.yml to the chat. -> Added benchmark/over_time.py to the chat. -> Added benchmark/plots.py to the chat. - -#### in over_time.py, draw a graph for each of the yaml entries with a `released` date. plot date on x, `pass_rate_2` on y and label with the `model` name. do a scatter plot. use colors/styles similar to the other graphs. - -To draw a scatter plot for each of the YAML entries with a `released` date, we need to: - -1. Read the YAML file. -2. Extract the `released` date and `pass_rate_2` for each entry. -3. Plot the data using matplotlib. - -Here are the changes: - -benchmark/over_time.py -```python -<<<<<<< SEARCH -======= -import matplotlib.pyplot as plt -import yaml -from datetime import datetime - -def plot_over_time(yaml_file): - with open(yaml_file, 'r') as file: - data = yaml.safe_load(file) - - dates = [] - pass_rates = [] - models = [] - - for entry in data: - if 'released' in entry and 'pass_rate_2' in entry: - dates.append(datetime.strptime(entry['released'], '%Y-%m-%d')) - pass_rates.append(entry['pass_rate_2']) - models.append(entry['model']) - - plt.figure(figsize=(10, 6)) - plt.scatter(dates, pass_rates, c='blue', alpha=0.5) - - for i, model in enumerate(models): - plt.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75) - - plt.xlabel('Release Date') - plt.ylabel('Pass Rate 2') - plt.title('Model Performance Over Time') - plt.grid(True) - plt.tight_layout() - plt.show() - -# Example usage -plot_over_time('_data/edit_leaderboard.yml') ->>>>>>> REPLACE -``` - -> Applied edit to benchmark/over_time.py -> Commit 49ac9ca aider: Added scatter plot functionality for YAML entries with a `released` date and `pass_rate_2`. - -
-
- -#### /run python benchmark/over_time.py -> Traceback (most recent call last): - File "/Users/gauthier/Projects/aider/benchmark/over_time.py", line 33, in - plot_over_time('_data/edit_leaderboard.yml') - File "/Users/gauthier/Projects/aider/benchmark/over_time.py", line 15, in plot_over_time - dates.append(datetime.strptime(entry['released'], '%Y-%m-%d')) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -TypeError: strptime() argument 1 must be str, not datetime.date -> Add the output to the chat? y -> Traceback (most recent call last): -> File "/Users/gauthier/Projects/aider/benchmark/over_time.py", line 33, in -> plot_over_time('_data/edit_leaderboard.yml') -> File "/Users/gauthier/Projects/aider/benchmark/over_time.py", line 15, in plot_over_time -> dates.append(datetime.strptime(entry['released'], '%Y-%m-%d')) -> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -> TypeError: strptime() argument 1 must be str, not datetime.date - -The error indicates that the `released` field in the YAML file is already a `datetime.date` object, so we don't need to parse it with `datetime.strptime`. We can directly use it in the plot. - -Here are the changes: - -benchmark/over_time.py -```python -<<<<<<< SEARCH - dates.append(datetime.strptime(entry['released'], '%Y-%m-%d')) -======= - dates.append(entry['released']) ->>>>>>> REPLACE -``` - -> Applied edit to benchmark/over_time.py -> Commit af2f4b7 aider: Use the `released` field directly in the plot instead of parsing it with `datetime.strptime`. - -
-
- -#### color the "gpt-4" models differently - -To color the "gpt-4" models differently, we need to modify the plotting code in `benchmark/over_time.py` to check for "gpt-4" in the model name and assign a different color. - -Here are the changes: - -benchmark/over_time.py -```python -<<<<<<< SEARCH - plt.scatter(dates, pass_rates, c='blue', alpha=0.5) -======= - colors = ['red' if 'gpt-4' in model else 'blue' for model in models] - plt.scatter(dates, pass_rates, c=colors, alpha=0.5) ->>>>>>> REPLACE -``` - -This change will color the points red if the model name contains "gpt-4", otherwise, it will color them blue. - -> Applied edit to benchmark/over_time.py -> Commit 0d11357 aider: Color the "gpt-4" models differently. - -
-
- -#### color the gpt-3.5 models a new color - -To color the "gpt-3.5" models a new color, we need to modify the plotting code in `benchmark/over_time.py` to check for "gpt-3.5" in the model name and assign a different color. - -Here are the changes: - -benchmark/over_time.py -```python -<<<<<<< SEARCH - colors = ['red' if 'gpt-4' in model else 'blue' for model in models] -======= - colors = ['red' if 'gpt-4' in model else 'green' if 'gpt-3.5' in model else 'blue' for model in models] ->>>>>>> REPLACE -``` - -> Applied edit to benchmark/over_time.py -> Commit 85d9d84 aider: Color the "gpt-3.5" models green in the plotting code. - -
-
- -#### use the colorscheme and fonts from the other graphs in plots.py - -To use the colorscheme and fonts from the graphs in `plots.py`, we need to: - -1. Import the necessary settings from `plots.py`. -2. Apply these settings to the graph in `over_time.py`. - -Here are the changes: - -benchmark/over_time.py -```python -<<<<<<< SEARCH -import yaml -from datetime import datetime -======= -import yaml -from datetime import datetime -from matplotlib import rc ->>>>>>> REPLACE -``` - -benchmark/over_time.py -```python -<<<<<<< SEARCH - plt.figure(figsize=(10, 6)) -======= - plt.rcParams["hatch.linewidth"] = 0.5 - plt.rcParams["hatch.color"] = "#444444" - - rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"], "size": 10}) - - fig, ax = plt.subplots(figsize=(6, 4)) - ax.grid(axis="y", zorder=0, lw=0.2) ->>>>>>> REPLACE -``` - -benchmark/over_time.py -```python -<<<<<<< SEARCH - plt.scatter(dates, pass_rates, c=colors, alpha=0.5) -======= - ax.scatter(dates, pass_rates, c=colors, alpha=0.5) ->>>>>>> REPLACE -``` - -benchmark/over_time.py -```python -<<<<<<< SEARCH - plt.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75) - - plt.xlabel('Release Date') - plt.ylabel('Pass Rate 2') - plt.title('Model Performance Over Time') - plt.grid(True) - plt.tight_layout() - plt.show() -======= - ax.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75) - - ax.set_xlabel('Release Date') - ax.set_ylabel('Pass Rate 2') - ax.set_title('Model Performance Over Time') - plt.tight_layout() - plt.savefig("tmp_over_time.svg") - imgcat(fig) ->>>>>>> REPLACE -``` - -> Applied edit to benchmark/over_time.py -> Commit 8d670f1 aider: Used the colorscheme and fonts from the graphs in `plots.py` in `over_time.py`. - - -
-
- -#### /add tmp_over_time.png -> Added /Users/gauthier/Projects/aider/tmp_over_time.png to the chat - -#### make the labels not overlap the dots - -To make the labels not overlap the dots, we can adjust the position of the labels slightly using the `xytext` parameter in the `annotate` function. - -Here are the changes: - -benchmark/over_time.py -```python -<<<<<<< SEARCH - ax.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75) -======= - ax.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75, - xytext=(5, 5), textcoords='offset points') ->>>>>>> REPLACE -``` - -> Committing benchmark/over_time.py before applying edits. -> Commit 49eb72c Updated image file format from SVG to PNG and added imgcat library for displaying the plot. -> Applied edit to benchmark/over_time.py -> Commit 75fa061 aider: Adjust the position of labels to prevent overlap with dots. - - -
-
- -#### make the dots bigger - -To make the dots bigger, we can adjust the `s` parameter in the `scatter` function, which controls the size of the dots. - -Here are the changes: - -benchmark/over_time.py -```python -<<<<<<< SEARCH - ax.scatter(dates, pass_rates, c=colors, alpha=0.5) -======= - ax.scatter(dates, pass_rates, c=colors, alpha=0.5, s=100) ->>>>>>> REPLACE -``` - -> Applied edit to benchmark/over_time.py -> Commit a018fdf aider: Make the dots bigger. - -
-
- -#### make the border of the graph thinner and lighter - -To make the border of the graph thinner and lighter, we can adjust the `spines` properties of the axes. - -Here are the changes: - -benchmark/over_time.py -```python -<<<<<<< SEARCH - colors = ['red' if 'gpt-4' in model else 'green' if 'gpt-3.5' in model else 'blue' for model in models] -======= - for spine in ax.spines.values(): - spine.set_edgecolor('#DDDDDD') - spine.set_linewidth(0.5) - - colors = ['red' if 'gpt-4' in model else 'green' if 'gpt-3.5' in model else 'blue' for model in models] ->>>>>>> REPLACE -``` - -> Applied edit to benchmark/over_time.py -> Commit 77ba518 aider: Made the border of the graph thinner and lighter by adjusting the `spines` properties of the axes. - -
diff --git a/cecli/website/_posts/2024-05-22-draft.md b/cecli/website/_posts/2024-05-22-draft.md deleted file mode 100644 index 729b638a506..00000000000 --- a/cecli/website/_posts/2024-05-22-draft.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: A draft post. -excerpt: With a draft summary. -highlight_image: /assets/linting.jpg -draft: true -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# A draft post - -Content TBD. diff --git a/cecli/website/_posts/2024-05-22-linting.md b/cecli/website/_posts/2024-05-22-linting.md deleted file mode 100644 index 14442a52d26..00000000000 --- a/cecli/website/_posts/2024-05-22-linting.md +++ /dev/null @@ -1,149 +0,0 @@ ---- -title: Linting code for LLMs with tree-sitter -excerpt: Aider now lints code after every LLM edit and automatically fixes errors, using tree-sitter and AST-aware code context. -highlight_image: /assets/linting.jpg -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -[![Linting code](/assets/linting.jpg)](https://aider.chat/assets/linting.jpg) - -# Linting code for LLMs with tree-sitter - -Aider now lints your code after every LLM edit, and offers to automatically fix -any linting errors. -You can also use aider's lint-and-fix functionality on your source files any time -you like, to speedily resolve issues with code written by humans. - -Aider shows linting errors to the LLM in a novel format, -using tree-sitter -to help display relevant code context for each -error. -This increases the ability of the LLM to understand the problem and -make the correct changes to resolve it. - -Aider ships with basic linters built with tree-sitter that support -[most popular programming languages](https://github.com/Aider-AI/grep-ast/blob/main/grep_ast/parsers.py). -These built in linters will detect syntax errors and other fatal problems with the code. - -You can also configure aider to use your preferred linters. -This allows aider to check for a larger class of problems, keep the code style -aligned with the rest of your team, etc. - -## Linting and fixing your code - -Aider now lints each source file after it applies the edits -suggested by an LLM. -If problems are found, aider will ask if you'd like it to -attempt to fix the errors. -If so, aider will send the LLM a report of the lint errors -and request changes to fix them. This process may iterate a few times -as the LLM works to fully resolve all the issues. - -You can also lint and fix files any time, on demand from within the aider chat or via the -command line: - -- The in-chat `/lint` command will lint and fix all the files which have -been added to the chat by default. Or you can name any files -in your git repo as arguments. -- From the command line, you can run `aider --lint` to lint and fix -all the dirty files in the repo. -Or you can specify specific filenames on the command line. - - -## An LLM-friendly lint report - -Most linting tools produce terse and cryptic output, -which is one reason many engineers appreciate IDEs that highlight -linting errors. -LLM's don't have the luxury of using an IDE, so aider sends -the linting errors in an LLM friendly format. - -Here's an example of raw output of the `flake8` python linter: - -``` -app.py:23:36: F821 undefined name 'num' -app.py:41:16: F541 f-string is missing placeholders -``` - -This sort of output depends on the user to reference line numbers to find and fix -each reported error. -LLMs are quite bad at working with source code line numbers, often -making off-by-one errors and other mistakes even when provided with -a fully numbered code listing. - -Aider augments the raw linter by -displaying and -highlighting the lines that have errors within their -containing functions, methods, classes. -To do this, aider uses tree-sitter to obtain the code's AST and analyzes it -in light of the linting errors. -LLMs are more effective at editing code that's provided -with context like this. - -``` -app.py:23:36: F821 undefined name 'num' -app.py:41:16: F541 f-string is missing placeholders - -app.py: -...⋮... - 6│class LongNum: - 7│ def __init__(self, num): - 8│ """ - 9│ Initialize the number. - 10│ """ -...⋮... - 19│ def __str__(self): - 20│ """ - 21│ Render the number as a string. - 22│ """ - 23█ return str(num) - 24│ - 25│ - 26│@app.route('/subtract//') -...⋮... - 38│@app.route('/divide//') - 39│def divide(x, y): - 40│ if y == 0: - 41█ return f"Error: Cannot divide by zero" - 42│ else: - 43│ result = x / y - 44│ return str(result) - 45│ -...⋮... -``` - -## Basic linters for most popular languages - -Aider comes batteries-included with built in linters for -[most popular programming languages](https://aider.chat/docs/languages.html). -This provides wide support for linting without requiring -users to manually install a linter and configure it to work with aider. - -Aider's built in language-agnostic linter uses tree-sitter to parse -the AST of each file. -When tree-sitter encounters a syntax error or other fatal issue -parsing a source file, it inserts an AST node with type `ERROR`. -Aider simply uses these `ERROR` nodes to identify all the lines -with syntax or other types of fatal error, and displays -them in the LLM friendly format described above. - -## Configuring your preferred linters - -You can optionally configure aider to use -your preferred linters with the `--lint-cmd` switch. - -``` -# To lint javascript with jslint -aider --lint-cmd javascript:jslint - -# To lint python with flake8 using some specific args: -aider --lint-cmd "python:flake8 --select=E9,F821,F823..." -``` - -You can provide multiple `--lint-cmd` switches -to set linters for various languages. -You can also durably set linters in your `.aider.conf.yml` file. - diff --git a/cecli/website/_posts/2024-05-22-swe-bench-lite.md b/cecli/website/_posts/2024-05-22-swe-bench-lite.md deleted file mode 100644 index 72ccf74d60f..00000000000 --- a/cecli/website/_posts/2024-05-22-swe-bench-lite.md +++ /dev/null @@ -1,454 +0,0 @@ ---- -title: How aider scored SOTA 26.3% on SWE Bench Lite -excerpt: Aider achieved this result mainly through its existing features that focus on static code analysis, reliable LLM code editing, and pragmatic UX for AI pair programming. -highlight_image: /assets/swe_bench_lite.jpg -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# How aider scored SOTA 26.3% on SWE Bench Lite - -[Aider scored 26.3%](https://github.com/swe-bench/experiments/pull/7) -on the -[SWE Bench Lite benchmark](https://www.swebench.com), -achieving a state-of-the-art result. -The previous top leaderboard entry was 20.3% -from Amazon Q Developer Agent. - -See also [aider's SOTA result on the main SWE Bench](https://aider.chat/2024/06/02/main-swe-bench.html). - -[![SWE Bench Lite results](/assets/swe_bench_lite.svg)](https://aider.chat/assets/swe_bench_lite.svg) - -**All of aider's results reported here are pass@1 results, -obtained without using the SWE Bench `hints_text`.** -All results in the above chart are unhinted pass@1 results. -Please see the [references](#references) -for details on the data presented in this chart. -It was corrected on 5/30/24 to reflect apples-to-apples comparisons, -using pass@1 results from AutoCodeRover -and results from OpenDevin that don't use hints. -The [official SWE Bench Lite leaderboard](https://www.swebench.com) -only accepts pass@1 results that do not use hints. - -## Interactive, not agentic - -Aider achieved this result mainly through its existing features that focus on static code analysis, reliable LLM code editing, and pragmatic UX for AI pair programming. -Aider intentionally has quite limited and narrow "agentic behavior" -to avoid long delays, high token costs -and the need for users to repeatedly code review incorrect solutions. -It's also worth noting that aider currently does not use -RAG, vector search, tools or give the LLM access to search the web -or unilaterally execute code. - -Aider is first and foremost an interactive tool for engineers to get real work done in -real code bases using a chat interface. -Aider provides a pair programming UX where users can ask for a change -and see the edits performed in real-time. -Aider can also offer additional help like fixing lint or test errors, -but the user is always in full interactive control. -This lets them quickly steer misunderstandings back on course and -avoid wasting time and token costs. - - -## Benchmark methodology - -For the benchmark, -aider was launched in each problem's git repository -with the problem statement -submitted as the opening chat message from "the user." -After that aider runs as normal, with the following modifications: - -- Aider's suggestions were always accepted without user approval. -- A simple harness was used to retry the SWE Bench problem if aider produced code that wasn't *plausibly correct*. -Plausibly correct means that aider reported that it had successfully edited the repo -without causing syntax errors or breaking any *pre-existing* tests. -- If the solution isn't plausible, the harness launches aider to try again from scratch, -alternating between using aider with GPT-4o and Opus. -- If no plausible solution is found after six tries, the harness picks the solution -with the fewest edit/lint/test problems. - -It's important to be clear that -*aider and the benchmark harness -only had access to the pre-existing tests in each problem's repo*. -The held out "acceptance tests" were *only* used -after benchmarking to compute statistics on which problems aider -correctly resolved. - -The [full harness to run aider on SWE Bench Lite is available on GitHub](https://github.com/Aider-AI/aider-swe-bench). - -The benchmarking process was similar to how a developer might use aider to -resolve a GitHub issue: - -- They could launch aider in their repo with the command below, which -tells aider they want to accept every suggestion -and to use pytest to run tests. - - `aider --yes --test-cmd pytest` -- They could start the chat by pasting in the URL or text of a GitHub issue. -Aider will pull in the URL's content and then try and solve the issue. -- If aider doesn't produce code that lints and tests clean, the user might decide to revert the changes and try again, maybe using aider with a different LLM this time. -[Aider is tightly integrated with git](https://aider.chat/docs/git.html), -so it's always easy to revert AI changes that don't pan out. - -Outside a benchmark setting, it's probably -unwise or at least highly inefficient -to let *any* AI agent run unsupervised on your code base. -The reason aider is intended to be used interactively -is so that the user can participate and direct aider's work and approve suggestions. -This way the user can offer immediate feedback or corrections if their initial -instructions turn out to be ambiguous, -or if the AI starts going down a wrong path. - -## Aider with GPT-4o alone was SOTA - -Running the benchmark harness -only using aider with GPT-4o to find plausible solutions -achieved a score of 25.0%. -This was itself matching the state-of-the-art, before being surpassed by the main -result being reported here -that used aider with both GPT-4o & Opus. - -As noted below, a single attempt using Aider with GPT-4o tied -the current top entry on the leaderboard. - -## Aider with GPT-4o & Opus - -The benchmark harness alternated between running aider with GPT-4o and Opus. -The harness proceeded in a fixed order, always starting with GPT-4o and -then alternating with Opus until a plausible solution was found for each -problem. - -The table below breaks down the plausible solutions that -were found for the 300 problems. -It also provides details on the 79 that were ultimately -verified as correctly resolving their issue. -Some noteworthy observations: - -- *Just the first attempt* of Aider with GPT-4o resolved 20.3% of the problems, which ties the Amazon Q Developer Agent currently atop the official leaderboard. -- Including the second attempt, Aider with GPT-4o and Opus scored 23.6% on the benchmark. -These first two attempts obtained ~75% of all plausible and ~90% of all resolved solutions. -- A long tail of solutions continued to be found using both models including one correctly resolved solution on the final, sixth attempt of that problem. - - -| Attempt | Agent |Number of
plausible
solutions|Percent of
plausible
solutions| Number of
correctly
resolved
solutions | Percent of
correctly
resolved
solutions | Score on
SWE Bench
Lite | -|:--------:|------------|---------:|---------:|----:|---:|--:| -| 1 | Aider with GPT-4o | 208 | 69.3% | 61 | 77.2% | 20.3% | -| 2 | Aider with Opus | 49 | 16.3% | 10 | 12.7% | 3.3% | -| 3 | Aider with GPT-4o | 20 | 6.7% | 3 | 3.8% | 1.0% | -| 4 | Aider with Opus | 9 | 3.0% | 2 | 2.5% | 0.7% | -| 5 | Aider with GPT-4o | 11 | 3.7% | 2 | 2.5% | 0.7% | -| 6 | Aider with Opus | 3 | 1.0% | 1 | 1.3% | 0.3% | -| **Total** | | **300** | **100%** | **79** | **100%** | **26.3%** | - - -If we break down the solutions solely by model, -we can see that aider with GPT-4o outperforms Opus. -This isn't a fair and direct comparison, because GPT-4o always took the first -turn and therefore got first crack at all the "easiest" problems. -Aider with Opus only ever saw problems that GPT-4o failed to -find plausible solutions for on its first try. - -Aider with GPT-4o was producing higher quality plausible solutions, -with a greater chance of going on to be accepted as resolving the issue. -Again, this is biased by the turn ordering. -But other anecdotal evidence from earlier runs of the benchmark -also supports the observation that aider with GPT-4o is significantly stronger than Opus -for this benchmark. - - -| Agent | Number of
plausible
solutions | Number of
correctly
resolved
solutions | Percent of
plausible
which
correctly
resolved
| -|------------|---------:|---------:|---:| -| Aider with GPT-4o | 239 | 66 |27.6% | -| Aider with Opus | 61 | 13 |21.3% | -| **Total** | **300** | **79** |**26.3%** | - -## Repository map, not RAG - -The crucial first step in solving a SWE Bench problem is figuring out -which parts of the repo are relevant and which files need to be edited. -Most coding agents use some combination of RAG, vector search -and providing the LLM with -tools to interactively explore the code base. - -Aider instead uses a -[repository map](https://aider.chat/2023/10/22/repomap.html) -to help the LLM understand the -layout, code structure, and content of a git repo. -The repo map is created through static analysis of the code's -abstract syntax tree and call graph -to provide a compact and powerful summary of the entire code base. -The map is constantly -tailored to show -repo context that is relevant to the current state of the chat conversation. -This is done by performing a graph optimization on the code's call graph. - -When the user asks for a change to their code, the LLM can use the repo map -to decide which files to edit. -The LLM simply returns a normal text response explaining which files -it needs to edit and why. -Aider notices when the LLM mentions filenames from the repo, -and asks the user if they should be added to the chat. -Adding a file to the chat allows the LLM to see the full contents -of the file and edit it. - -
- -#### Please add a new /factorial/N endpoint. - -To add a new /factorial/N endpoint, the most likely file that needs to be edited is app.py. -Please add app.py to the chat so I can proceed with the changes. - -> app.py -> Add these files to the chat? yes - -
- -This is a convenient and natural workflow for interactive chat, -and it worked well for the SWE Bench problems. -Aider successfully identified the correct file to edit -in 70.3% of the benchmark tasks. - -We can determine which file needs to be edited using the "gold" patch -which is associated with each SWE Bench task. -This patch was created by a human developer -to solve the issue, and therefore reveals a file which can -be edited to solve the problem. -Of course aider is not able to see or use the gold patch -or the file names it contains in any way. -This information was only used to compute -statistics outside the benchmarking process. - - -## Reliable code editing - -Once files have been selected for editing, -the next step is of course to edit the source code to fix the problem. - -Aider goes to great lengths to ensure that LLMs can not just write code, -but reliably *edit* code. -Aider has a collection of prompting strategies and code editing backends which have -been honed through -[extensive benchmarking](https://aider.chat/docs/leaderboards/). -These foundational capabilities help ensure that aider can -properly integrate code from LLMs into an existing code base and source files. - -The repository map helps here too, making sure that the LLM -can see relevant classes, functions and variables from the entire repo. -This helps ensure that the project's existing APIs and conventions are -respected and utilized when new code is added. - -Regardless, there are still cases where aider may be unable to cleanly -complete the edits specified by the LLM. -This is usually because the LLM has failed to conform to the editing -instructions in its system prompt. -When aider completes, it returns an editing outcome that indicates -whether it was able to successfully apply all edits. -The benchmark harness uses this editing status as -one criteria to determine if aider has -created a plausible solution. - -## Linting and fixing - -Another key criteria for a plausible solution is that it passes basic -linting, which means that the code has no syntax -or other fatal errors. -[Aider lints code](https://aider.chat/2024/05/22/linting.html) -after every LLM edit and offers to automatically fix -any problems. - -Aider ships with built-in linters based on tree-sitter -which work with most popular programming languages. -Aider shows linting errors to the LLM in a novel format, -using the abstract syntax tree to display relevant code context for each -error. -This context helps LLMs understand the problem and -make the correct changes to resolve it. - -
- -``` -app.py:23:36: F821 undefined name 'num' - -app.py: -...⋮... - 6│class LongNum: -...⋮... - 19│ def expound(self, threshold): - 20│ number = self.basis - 21│ while number < threshold: - 22│ number *= self.factor - 23█ return num - 24│ - 25│ -...⋮... -``` - -> Attempt to fix lint errors? yes - -
- -In the benchmark, these linting suggestions are always accepted. -At completion, -aider reports a linting outcome that -indicates if it was able to produce -code without any outstanding linting errors. -The benchmark harness uses this status as -one of the criteria to determine if aider has -created a plausible solution. - -## Testing and fixing - -The final crtieria for a plausible solution is that -all tests must be passing. -Aider can be configured with the command to run tests for a repo, -and will automatically attempt to fix any test failures. - -A user working on a python project might configure testing -by launching aider like this: - -``` -aider --test-cmd pytest -``` - -For the benchmark, aider is configured with a test command that will run the -tests that already exist in each problem's repository. -SWE Bench problems are based on repositories from large open -source projects with extensive existing test suites. -This means that -testing will fail if aider has broken any of these -pre-existing tests or if any new -tests that it created aren't passing. - -As with editing and linting, aider reports a testing outcome -that indicates if it completed with any outstanding failing tests. -The benchmark harness uses this status when deciding if aider -has produced a plausible solution. - -To be clear, *aider cannot run or even see the held out "acceptance tests"* that -are used to judge if a proposed solution correctly -resolves the problem. -Those tests are only run outside of aider and the benchmark harness, -to compute the final benchmark statistics. - -## Finding a plausible solution - -Each time aider executes, it reports -the outcome of the editing, linting, and testing -steps. -Each of these steps may complete successfully or -return a status that indicates that there were outstanding -problems that remain unresolved. - -The benchmark harness uses these outcomes to determine if -aider has produced a plausible -solution to the current SWE Bench task. -A plausible solution is one where aider -returns saying that it -edited the repo with no outstanding -edit, lint, or test errors. -In this case, aider's changes are recorded -as the SWE Bench `model_patch` to be evaluated later with the -acceptance tests. - -If the solution is not plausible, another -instance of aider is launched again from scratch on the same problem. -The harness alternates launching aider with GPT-4o and Opus to solve the problem, -and gives each model three attempts -- for a total of six attempts. -As soon as a plausible solution is found, it is accepted and the -harness moves on to the next SWE Bench instance. - -It's worth noting that repositories may have lint or test errors -present before aider even starts to edit them. -Whether unresolved errors were caused by aider or were pre-existing, -there will be instances where -no plausible solution is -found after six tries. - -If all six attempts fail to produce a plausible solution, -then the "best" solution available is selected as the -`model_patch`. -Which of the non-plausible solutions to use is determined -by ignoring the testing outcome -and prioritizing solutions in the following order: - - - Pick a solution where editing and linting were completed successfully. - - Pick a solution where editing was at least partially successful and linting succeeded. - - Pick a solution where editing was successful. - - Pick a solution where editing was at least partially successful. - -## Computing the benchmark score - -The benchmark harness produced a plausible solution for each of the 300 -SWE Bench Lite instances and saved it as the `model_patch`. - -A separate evaluation script was used to -test each of these solutions with the full test suite, -including the held out acceptance tests. -For this final acceptance testing, any edits that aider made to tests -are discarded. -This ensures that the correct, -unmodified test suite is used for acceptance testing. -The evaluation script compares the test results -with results from testing -the "gold" patch that was developed by a human to correctly solve the issue. -If they match, the candidate solution has correctly resolved the issue. - -These acceptance tests are only ever run outside of aider -and the benchmark harness, and only to compute the number of -correctly resolved instances. -They are never run, used, or even visible during aider's attempts to solve the problems. - -Aider correctly resolved 79 out of 300 SWE Bench Lite instances, or 26.3%. - -## Acknowledgments - -Much thanks to the team behind the -[SWE Bench](https://www.swebench.com) -family of AI coding benchmarks. -Also thanks to Albert Örwall who has -[dockerized the SWE Bench evaluation scripts](https://github.com/aorwall/SWE-bench-docker) -making it faster, easier, and more reliable to run the acceptance tests. - - -## References - -All of aider's results reported here are pass@1 results, -obtained without using the SWE Bench `hints_text`. - -The "aider agent" internally makes multiple "attempts" at solving the problem, -but it picks and returns one single candidate solution. -Only that one candidate solution is evaluated with the acceptance tests -and contributes to the benchmark score. -Thus it is a pass@1 result. - -This is contrast to a pass@N result for N>1, where N attempts are made -and all N solutions are evaluated by the acceptance tests. -If *any* of the N solution pass, that counts as a pass@N success. - -Below are the references for the other pass@1 unhinted SWE-Bench results -displayed in the graph at the beginning of this article. - -- [20.3% Amazon Q Developer Agent (v20240430-dev)](https://www.swebench.com) -- [19.0% AutoCodeRover](https://www.swebench.com/) -- [18.0% SWE-Agent + GPT-4](https://www.swebench.com) -- [16.7% OpenDevin](https://github.com/OpenDevin/OpenDevin/issues/2149) -- [11.7% SWE-Agent + Opus](https://www.swebench.com) - -Note, the graph was corrected on 5/30/24 as follows. - -The graph now contains AutoCodeRover's average pass@1 results. -Previously it displayed pass@3 results, which are -not comparable -to the pass@1 results for aider being reported here. -The [AutoCodeRover GitHub page](https://github.com/nus-apr/auto-code-rover) -features pass@3 results -without being clearly labeled. - -The graph now contains the best OpenDevin results obtained without using -the SWE Bench `hints_text` to provide hints to the agent. -The previous graph contained their hinted result, -which is not comparable -to the unhinted aider results being reported here. -[OpenDevin reported hinted results](https://x.com/gneubig/status/1791498953709752405) -without noting that hints were used. diff --git a/cecli/website/_posts/2024-05-24-self-assembly.md b/cecli/website/_posts/2024-05-24-self-assembly.md deleted file mode 100644 index 31e2ec745d3..00000000000 --- a/cecli/website/_posts/2024-05-24-self-assembly.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Aider has written 7% of its own code (outdated, now 70%) -excerpt: This article is quite out dated. Aider is currently writing about 70% of the new code in each release. -highlight_image: /assets/self-assembly.jpg -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Aider has written 7% of its own code (outdated, now 70%) - -[![self assembly](/assets/self-assembly.jpg)](https://aider.chat/assets/self-assembly.jpg) - -{: .note } -This article is quite old and outdated. -Aider is currently writing about 70% of the new code -in each release. -See -[aider's release history](/HISTORY.html) for the latest statistics. - -The -[aider git repo](https://github.com/Aider-AI/aider) -currently contains about 4K commits and 14K lines of code. - -Aider made 15% of the commits, inserting 4.8K and deleting 1.5K lines of code. - -About 7% of the code now in the repo is attributable to an aider commit -using `git blame`. -This number is probably a significant undercount, because periodic reformatting -by `black` is likely obscuring aider's authorship of many lines. - -Here's the breakdown of the code aider wrote in the current code base -according to `git blame`. - -| File | Lines | Percent | -|---|---:|---:| -|aider/args.py| 6 of 449 | 1.3% | -|aider/coders/base_coder.py| 37 of 1354 | 2.7% | -|aider/coders/editblock_coder.py| 14 of 507 | 2.8% | -|aider/coders/editblock_func_coder.py| 6 of 141 | 4.3% | -|aider/coders/udiff_coder.py| 2 of 421 | 0.5% | -|aider/coders/wholefile_coder.py| 5 of 146 | 3.4% | -|aider/coders/wholefile_func_coder.py| 4 of 134 | 3.0% | -|aider/commands.py| 67 of 703 | 9.5% | -|aider/diffs.py| 15 of 129 | 11.6% | -|aider/gui.py| 2 of 533 | 0.4% | -|aider/history.py| 19 of 124 | 15.3% | -|aider/io.py| 55 of 368 | 14.9% | -|aider/linter.py| 30 of 240 | 12.5% | -|aider/main.py| 30 of 466 | 6.4% | -|aider/mdstream.py| 3 of 122 | 2.5% | -|aider/models.py| 22 of 549 | 4.0% | -|aider/repo.py| 19 of 266 | 7.1% | -|aider/repomap.py| 17 of 518 | 3.3% | -|aider/scrape.py| 12 of 199 | 6.0% | -|aider/versioncheck.py| 10 of 37 | 27.0% | -|aider/voice.py| 9 of 104 | 8.7% | -|benchmark/benchmark.py| 33 of 730 | 4.5% | -|benchmark/over_time.py| 32 of 60 | 53.3% | -|benchmark/swe_bench_lite.py| 40 of 71 | 56.3% | -|scripts/blame.py| 55 of 212 | 25.9% | -|scripts/versionbump.py| 96 of 123 | 78.0% | -|setup.py| 11 of 47 | 23.4% | -|tests/test_coder.py| 48 of 612 | 7.8% | -|tests/test_commands.py| 135 of 588 | 23.0% | -|tests/test_editblock.py| 23 of 403 | 5.7% | -|tests/test_io.py| 30 of 65 | 46.2% | -|tests/test_main.py| 13 of 239 | 5.4% | -|tests/test_models.py| 6 of 28 | 21.4% | -|tests/test_repo.py| 2 of 296 | 0.7% | -|tests/test_repomap.py| 70 of 217 | 32.3% | -|tests/test_udiff.py| 7 of 119 | 5.9% | -|tests/test_wholefile.py| 37 of 321 | 11.5% | -| **Total** | **1022 of 14219** | 7.2% | - - diff --git a/cecli/website/_posts/2024-06-02-main-swe-bench.md b/cecli/website/_posts/2024-06-02-main-swe-bench.md deleted file mode 100644 index 72c7b2faa55..00000000000 --- a/cecli/website/_posts/2024-06-02-main-swe-bench.md +++ /dev/null @@ -1,267 +0,0 @@ ---- -title: Aider is SOTA for both SWE Bench and SWE Bench Lite -excerpt: Aider sets SOTA for the main SWE Bench, after recently setting SOTA for the Lite version. -highlight_image: /assets/swe_bench.jpg -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Aider is SOTA for both SWE Bench and SWE Bench Lite - -Aider scored 18.9% -on the main -[SWE Bench benchmark](https://www.swebench.com), -achieving a state-of-the-art result. -The current top leaderboard entry is 13.8% -from Amazon Q Developer Agent. -The best result reported elsewhere seems to be -[13.9% from Devin](https://www.cognition.ai/post/swe-bench-technical-report). - -This result on the main SWE Bench builds on -[aider's recent SOTA result on the easier SWE Bench Lite](https://aider.chat/2024/05/22/swe-bench-lite.html). - -[![SWE Bench results](/assets/swe_bench.svg)](https://aider.chat/assets/swe_bench.svg) - -**All of aider's results reported here are pass@1 results, -obtained without using the SWE Bench `hints_text`.** -Aider was benchmarked on the same -[570 randomly selected SWE Bench problems](https://github.com/CognitionAI/devin-swebench-results/tree/main/output_diffs) -that were used in the -[Devin evaluation](https://www.cognition.ai/post/swe-bench-technical-report). -See the [references](#references) -for more details on the data presented in this chart. - -## Interactive, not agentic - -Aider achieved this result mainly through its existing features that focus on static -code analysis, reliable LLM code editing, and pragmatic UX for automatically -fixing linting and testing errors. -Aider intentionally has quite limited and narrow "agentic behavior" -to avoid long delays, high token costs -and the need for users to repeatedly code review incorrect solutions. -It's also worth noting that aider currently does not use -RAG, vector search, tools or give the LLM access to search the web -or unilaterally execute code. - -Aider is first and foremost an interactive tool for engineers to get real work done in -real code bases using a chat interface. -Aider provides a pair programming UX where users can ask for a change -and see code edits performed in real-time. -Aider can also offer additional help like fixing lint or test errors, -but the user is always in full interactive control. -This allows them to quickly steer misunderstandings back on course and -avoid wasting time and token costs. - - -## Benchmark methodology - -Benchmarking was conducted as follows: - -- Aider with GPT-4o was launched in each problem's git repository -with the problem statement -submitted as the opening chat message from "the user". -- After that aider ran as normal, except all of aider's -suggestions were always accepted without user approval. -- A [simple harness](https://github.com/Aider-AI/aider-swe-bench#the-aider-agent) was used to retry the SWE Bench problem if aider produced code that wasn't *plausibly correct*. -Plausibly correct means that aider reported that it had successfully edited the repo -without causing syntax errors or breaking any *pre-existing* tests. -- If the solution from aider with GPT-4o wasn't plausible, the harness launched aider to try again from scratch using Claude 3 Opus. -- If no plausible solution was found after those two tries, the harness picked the "most plausible" solution with the fewest edit/lint/test problems. - -It's important to be clear that -*aider and the benchmark harness -only had access to the pre-existing tests in each problem's repo*. -The held out "acceptance tests" were *only* used -after benchmarking to compute statistics on which problems aider -correctly resolved. - -This is the same approach -that was used for -[aider's recent SOTA result on SWE Bench Lite](https://aider.chat/2024/05/22/swe-bench-lite.html). -For the Lite benchmark, -aider alternated between GPT-4o and Opus for up to six total attempts. -To manage the cost of running the main SWE Bench benchmark, -aider was limited to two total attempts: -one with GPT-4o and one with Opus. - -For a detailed discussion of the benchmark -methodology, see the -[article about aider's SWE Bench Lite results](https://aider.chat/2024/05/22/swe-bench-lite.html). -Also, the -[aider SWE Bench repository on GitHub](https://github.com/Aider-AI/aider-swe-bench) -contains the harness and statistics code used for the benchmarks. - -The benchmarking process was similar to how a developer might use aider to -resolve a GitHub issue: - -- They could launch aider in their repo with the command below, which -tells aider they want to accept every suggestion -and to use pytest to run tests. - - `aider --yes --test-cmd pytest` -- They could start the chat by pasting in the URL or text of a GitHub issue. -Aider will pull in the URL's content and then try and resolve the issue. -- If aider doesn't produce code that lints and tests clean, the user might decide to -[use git to revert the changes](https://aider.chat/docs/git.html), -and try again with `aider --opus`. - -## Aider with GPT-4o alone was SOTA - -Using aider with GPT-4o to make a single attempt at resolving each problem -achieved a score of 17.0%. -This was itself a state-of-the-art result, before being surpassed by the main -result being reported here -that used aider with both GPT-4o & Opus. - -## Aider with GPT-4o & Opus - -The benchmark harness started by using aider with GPT-4o to try -and resolve each problem. -For problems where this didn't produce a plausible solution, -the harness tried again using aider with Opus. -So at most, two attempts were made for each problem. - -The table below breaks down the proposed solutions that -were found from each attempt at the 570 problems. -A proposed solution is either: - -- A plausible solution where -aider reported no outstanding errors from editing, linting and testing. -- Or, the "most plausible" solution generated by either attempt, with the -[fewest outstanding editing, linting or testing errors](https://aider.chat/2024/05/22/swe-bench-lite.html#finding-a-plausible-solution). - -The table also provides details on the 108 solutions that were ultimately -verified as correctly resolving their issue. - -| Attempt | Agent |Number of
proposed
solutions|Percent of
proposed
solutions| Number of
correctly
resolved
solutions | Percent of
correctly
resolved
solutions | Score on
SWE Bench
Lite | -|:--------:|------------|---------:|---------:|----:|---:|--:| -| 1 | Aider with GPT-4o | 419 | 73.5% | 87 | 80.6% | 15.3% | -| 2 | Aider with Opus | 151 | 26.5% | 21 | 19.4% | 3.7% | -| **Total** | | **570** | **100%** | **108** | **100%** | **18.9%** | - -## Non-plausible but correct solutions? - -A solution doesn't actually have to be plausible in order to correctly resolve the issue. -Recall that plausible is simply defined as aider -reporting that it successfully completed all file edits, -repaired and resolved any linting errors -and resolved any test failures. -But there are many reasons why aider might fail to do those things -and yet still produce a solution that will pass -acceptance testing: - -- There may have been pre-existing failing tests in the repo, -before aider even started working on the SWE Bench problem. -Aider may not have resolved such issues, and yet they may not be -relevant to the acceptance testing. -The SWE Bench acceptance testing just confirms that tests pass or fail -in the same pattern as the "gold patch" developed by a human to resolve the -problem. -Some tests may fail during acceptance testing, -and that's ok as long as they failed for the gold -patch too. -- There may have been pre-existing linting problems in the repo. -If lingering linting issues affected code paths that are not well tested, -they may not impact acceptance testing. -- Aider may have reported file editing errors because it thought the LLM -specified edits that it wasn't able to successfully apply. -This can only happen when the LLM specified edits in -a way that doesn't comply with the editing instructions in the system prompt. -Given that the LLM isn't complying with the system prompt, -it may have become confused and -asked for redundant or otherwise irrelevant edits. -Such outstanding edit errors might not be fatal for acceptance testing. -- Etc. - -Keeping all this in mind, we can understand why -GPT-4o accounts for 15.3% of the benchmark score in the table above, -but benchmarking with just one attempt of aider with GPT-4o scored 17.0%. -When an Opus attempt is allowed after GPT-4o, -it may propose some *incorrect* solutions which -are "more plausible" than some of GPT-4o's non-plausible solutions. -These more plausible, incorrect solutions can -eclipse some of -the earlier non-plausible correct solutions that GPT-4o generated. -This is why GPT-4o's score in the table -showing the combined GPT-4o & Opus results (15.3%) -is lower than the result from just one try using aider with GPT-4o (17.0%). - -For these reasons, adding additional attempts is not guaranteed to monotonically -increase the number of resolved problems. -New solutions may resolve some new problems but they may also -eclipse and discard some of the previous non-plausible correct solutions. - -Luckily, the net effect of additional attempts -usually increases or at least maintains the -number of resolved solutions. -This was the case for all the attempts made in both this main SWE Bench result and the -earlier Lite result. - -## Computing the benchmark score - -The benchmark harness produced one proposed solution for each of -the 570 SWE Bench problems. - -A separate evaluation script was used to -test each of these solutions with the full test suite, -including the held out acceptance tests. -For this final acceptance testing, any edits that aider made to tests -were discarded. -This ensured that the correct, -unmodified test suite was used for acceptance testing. -The evaluation script compared each proposed solution's test results -with results from testing -the "gold" patch that was developed by a human to correctly resolve the issue. -If they matched, the proposed solution correctly resolved the issue. - -These acceptance tests were only ever run outside of aider -and the benchmark harness, and only to compute statistics about the -correctly resolved instances. -They were never run, used, or even visible during aider's attempts to resolve the problems. - -Aider correctly resolved 108 out of 570 SWE Bench instances that were benchmarked, -or 18.9%. - -## Acknowledgments - -Much thanks to the team behind the -[SWE Bench](https://www.swebench.com) -family of AI coding benchmarks. -Also thanks to Albert Örwall who has -[dockerized the SWE Bench evaluation scripts](https://github.com/aorwall/SWE-bench-docker) -making it faster, easier, and more reliable to run the acceptance tests. - - -## References - -All of aider's results reported here are pass@1 results, -obtained without using the SWE Bench `hints_text`. - -The "aider agent" internally makes multiple "attempts" at solving the problem, -but it picks and returns one single candidate solution. -Only that one candidate solution is evaluated with the acceptance tests -and contributes to the benchmark score. -Thus it is a pass@1 result. - -This is contrast to a pass@N result for N>1, where N attempts are made -and all N solutions are evaluated by the acceptance tests. -If *any* of the N solution pass, that counts as a pass@N success. - -Below are the references for the other pass@1 unhinted SWE-Bench results -displayed in the graph at the beginning of this article. - -- [13.9% Devin, benchmarked on 570 instances.](https://www.cognition.ai/post/swe-bench-technical-report) -- [13.8% Amazon Q Developer Agent, benchmarked on 2,294 instances.](https://www.swebench.com) -- [12.5% SWE- Agent + GPT-4, benchmarked on 2,294 instances.](https://www.swebench.com) -- [10.6% AutoCode Rover, benchmarked on 2,294 instances.](https://arxiv.org/pdf/2404.05427v2) -- [10.5% SWE- Agent + Opus, benchmarked on 2,294 instances.](https://www.swebench.com) - -The graph contains average pass@1 results for AutoCodeRover. -The [AutoCodeRover GitHub page](https://github.com/nus-apr/auto-code-rover) -features their pass@3 results -without being clearly labeled. -Table 2 of their -[paper](https://arxiv.org/pdf/2404.05427v2) -reports an `ACR-avg` result of 10.59% which is an average pass@1 result. - diff --git a/cecli/website/_posts/2024-07-01-sonnet-not-lazy.md b/cecli/website/_posts/2024-07-01-sonnet-not-lazy.md deleted file mode 100644 index 5cb7050e056..00000000000 --- a/cecli/website/_posts/2024-07-01-sonnet-not-lazy.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: Sonnet is the opposite of lazy -excerpt: Claude 3.5 Sonnet can easily write more good code than fits in one 4k token API response. -highlight_image: /assets/sonnet-not-lazy.jpg -nav_exclude: true ---- - -[![sonnet is the opposite of lazy](/assets/sonnet-not-lazy.jpg)](https://aider.chat/assets/sonnet-not-lazy.jpg) - -{% if page.date %} - -{% endif %} - -# Sonnet is the opposite of lazy - -Claude 3.5 Sonnet represents a step change -in AI coding. -It is incredibly industrious, diligent and hard working. -Unexpectedly, -this presented a challenge: -Sonnet -was often writing so much code that -it was hitting the 4k output token limit, -truncating its coding in mid-stream. - -Aider now works -around this 4k limit and allows Sonnet to produce -as much code as it wants. -The result is surprisingly powerful. -Sonnet's score on -[aider's refactoring benchmark](https://aider.chat/docs/leaderboards/#code-refactoring-leaderboard) -jumped from 55.1% up to 64.0%. -This moved Sonnet into second place, ahead of GPT-4o and -behind only Opus. - -Users who tested Sonnet with a preview of -[aider's latest release](https://aider.chat/HISTORY.html#aider-v0410) -were thrilled: - -- *Works like a charm. It is a monster. It refactors files of any size like it is nothing. The continue trick with Sonnet is truly the holy grail. Aider beats [other tools] hands down. I'm going to cancel both subscriptions.* -- [Emasoft](https://github.com/Aider-AI/aider/issues/705#issuecomment-2200338971) -- *Thanks heaps for this feature - it's a real game changer. I can be more ambitious when asking Claude for larger features.* -- [cngarrison](https://github.com/Aider-AI/aider/issues/705#issuecomment-2196026656) -- *Fantastic...! It's such an improvement not being constrained by output token length issues. [I refactored] a single JavaScript file into seven smaller files using a single Aider request.* -- [John Galt](https://discord.com/channels/1131200896827654144/1253492379336441907/1256250487934554143) - -## Hitting the 4k token output limit - -All LLMs have various token limits, the most familiar being their -context window size. -But they also have a limit on how many tokens they can output -in response to a single request. -Sonnet and the majority of other -models are limited to returning 4k tokens. - -Sonnet's amazing work ethic caused it to -regularly hit this 4k output token -limit for a few reasons: - -1. Sonnet is capable of outputting a very large amount of correct, -complete new code in one response. -2. Similarly, Sonnet can specify long sequences of edits in one go, -like changing a majority of lines while refactoring a large file. -3. Sonnet tends to quote large chunks of a -file when performing a SEARCH & REPLACE edits. -Beyond token limits, this is very wasteful. - -## Good problems - -Problems (1) and (2) are "good problems" -in the sense that Sonnet is -able to write more high quality code than any other model! -We just don't want it to be interrupted prematurely -by the 4k output limit. - -Aider now allows Sonnet to return code in multiple 4k token -responses. -Aider seamlessly combines them so that Sonnet can return arbitrarily -long responses. -This gets all the upsides of Sonnet's prolific coding skills, -without being constrained by the 4k output token limit. - - -## Wasting tokens - -Problem (3) is more complicated, as Sonnet isn't just -being stopped early -- it's actually wasting a lot -of tokens, time and money. - -Faced with a few small changes spread far apart in -a source file, -Sonnet would often prefer to do one giant SEARCH/REPLACE -operation of almost the entire file. -It would be far faster and less expensive to instead -do a few surgical edits. - -Aider now prompts Sonnet to discourage these long-winded -SEARCH/REPLACE operations -and promotes much more concise edits. - - -## Aider with Sonnet - -[The latest release of aider](https://aider.chat/HISTORY.html#aider-v0410) -has specialized support for Claude 3.5 Sonnet: - -- Aider allows Sonnet to produce as much code as it wants, -by automatically and seamlessly spreading the response -out over a sequence of 4k token API responses. -- Aider carefully prompts Sonnet to be concise when proposing -code edits. -This reduces Sonnet's tendency to waste time, tokens and money -returning large chunks of unchanging code. -- Aider now uses Claude 3.5 Sonnet by default if the `ANTHROPIC_API_KEY` is set in the environment. - -See -[aider's install instructions](https://aider.chat/docs/install.html) -for more details, but -you can get started quickly with aider and Sonnet like this: - -``` -$ python -m pip install -U aider-chat - -$ export ANTHROPIC_API_KEY= # Mac/Linux -$ setx ANTHROPIC_API_KEY # Windows, restart shell after setx - -$ aider -``` - diff --git a/cecli/website/_posts/2024-07-25-new-models.md b/cecli/website/_posts/2024-07-25-new-models.md deleted file mode 100644 index 67ffa45c526..00000000000 --- a/cecli/website/_posts/2024-07-25-new-models.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: Coding with Llama 3.1, new DeepSeek Coder & Mistral Large -excerpt: Summary of code editing skill for the new models, with Sonnet and GPT-3.5 for scale. -highlight_image: /assets/2024-07-new-models.jpg -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Coding with Llama 3.1, new DeepSeek Coder & Mistral Large - -![Summary of code editing skill for the new models, with Sonnet and GPT-3.5 for scale.](/assets/2024-07-new-models.jpg) - -Five noteworthy models have been released in the last few days, -with a wide range of code editing capabilities. -Here are their results from -[aider's code editing leaderboard](https://aider.chat/docs/leaderboards/) -with Claude 3.5 Sonnet and the best GPT-3.5 model -included for scale. - -- **77% claude-3.5-sonnet** -- 73% DeepSeek Coder V2 0724 -- 66% llama-3.1-405b-instruct -- 60% Mistral Large 2 (2407) -- 59% llama-3.1-70b-instruct -- **58% gpt-3.5-turbo-0301** -- 38% llama-3.1-8b-instruct - -You can code with all of these models using aider like this: - -``` -$ python -m pip install -U aider-chat - -# Change directory into a git repo to work on -$ cd /to/your/git/repo - -$ export DEEPSEEK_API_KEY=your-key-goes-here -$ aider --model deepseek/deepseek-coder - -$ export MISTRAL_API_KEY=your-key-goes-here -$ aider --model mistral/mistral-large-2407 - -$ export OPENROUTER_API_KEY=your-key-goes-here -$ aider --model openrouter/meta-llama/llama-3.1-405b-instruct -$ aider --model openrouter/meta-llama/llama-3.1-70b-instruct -$ aider --model openrouter/meta-llama/llama-3.1-8b-instruct -``` - -See the -[installation instructions](https://aider.chat/docs/install.html) -and other -[documentation](https://aider.chat/docs/usage.html) -for more details. - -## DeepSeek Coder V2 0724 - -DeepSeek Coder V2 0724 was by far the biggest surprise -and strongest code editing model, coming in 2nd on the leaderboard. -It can -efficiently edit code with SEARCH/REPLACE, unlike -the prior DeepSeek Coder version. -This unlocks the ability to edit large files. - -This new Coder version got 73% on the benchmark, -very -close to Sonnet's 77% but 20-50X less expensive! - -## LLama 3.1 - -Meta released the -Llama 3.1 family of models, -which have performed well on many evals. - -The flagship Llama 3.1 405B instruct only -secured #7 on aider's leaderboard, -well behind frontier models like -Claude 3.5 Sonnet & GPT-4o. - -The 405B model can use SEARCH/REPLACE to efficiently -edit code, but with a decrease in the benchmark score. -When using this "diff" editing format, its score dropped -from 66% to 64%. - -The smaller 70B model was competitive with GPT-3.5, while -the 8B model lags far behind. -Both seem unable to reliably use SEARCH/REPLACE to edit files. -This limits them to editing smaller files that can -fit into their output token limit. - -## Mistral Large 2 (2407) - -Mistral Large 2 (2407) scored only 60% on aider's code editing -benchmark. -This puts it just ahead of the best GPT-3.5 model. -It -doesn't seem able to reliably use SEARCH/REPLACE to efficiently edit -code, -which limits its use to small source files. - - - - diff --git a/cecli/website/_posts/2024-08-14-code-in-json.md b/cecli/website/_posts/2024-08-14-code-in-json.md deleted file mode 100644 index a6e4d495235..00000000000 --- a/cecli/website/_posts/2024-08-14-code-in-json.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: LLMs are bad at returning code in JSON -excerpt: LLMs write worse code if you ask them to return the code wrapped in JSON via a tool function call. -highlight_image: /assets/code-in-json.jpg -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# LLMs are bad at returning code in JSON - - -LLMs produce lower quality code if they’re asked to return it as part of a structured JSON response. This seems to be true for many top models, including those with specialized support for JSON. Benchmarks show that models struggle with syntax errors in the code -they write, related to quoting and escaping it into JSON. -The benchmark results also imply a decreased capacity for solving coding problems due to the burden of JSON formatting. - -{% include code-in-json-benchmark.js %} - -> Figure 1: Aider coding benchmark scores of models using either plain markdown text or JSON to return code. -> Pass rate (%) averaged over 5 runs. -> Models produce better code when they return it as markdown text, -> as compared to returning code in a structured JSON response. - - -## Background - -People often ask why aider uses a plain text format for LLMs to specify code edits (below), -rather than relying on LLM tools and structured JSON responses. - -```python -greeting.py -<<<<<<< SEARCH -def greeting(): - print("Hello") -======= -def greeting(): - print("Goodbye") ->>>>>>> REPLACE -``` - -People expect that it would be easier and more reliable to use tool calls, -which would involve a structured JSON response more like this: - -```json -{ - "filename": "greeting.py", - "search": "def greeting():\n print(\"Hello\")\n" - "replace": "def greeting():\n print(\"Goodbye\")\n" -} -``` - -This question becomes increasingly relevant as LLM providers -continue to improve their tooling for reliably generating JSON. -For example, -[OpenAI recently announced](https://openai.com/index/introducing-structured-outputs-in-the-api/) -the ability to -strictly enforce that JSON responses will be syntactically correct -and conform to a specified schema. - -But just producing valid JSON is not sufficient for AI code generation -- -the code inside the JSON matters too. -It has to be high quality code that solves the assigned coding task without errors or bugs. -Unfortunately, -LLMs write worse code when they're asked to -wrap it in JSON. - -In some sense this shouldn't be surprising. -Just look at the very simple -JSON example above, with the escaped -quotes `\"` and -newlines `\n` -mixed into the code. -Imagine the additional -complexity -if the code itself contained quoted strings -with their -own escape sequences. - -Would *you* write better code by -typing it out normally -or typing it as a properly escaped -JSON string? - - -## Quantifying the benefits of plain text - -Previous [aider benchmark results](/2023/07/02/benchmarks.html) -showed -the superiority of returning code -as plain text compared to JSON-wrapped function calls. -Those results were obtained -over a year ago, against models far less capable than those available today. -OpenAI's newly announced support for "strict" JSON -suggests the possibility that modern models might be able -to return quality code inside a structured JSON response. - -The results presented here are based on -the -[aider "code editing" benchmark](/2023/07/02/benchmarks.html#the-benchmark) -of 133 practice exercises from the Exercism python repository. -The benchmark was simplified somewhat to focus on the differences between -plain text and JSON responses. -In particular, models were -restricted to a single attempt to solve each task -without a second try to fix errors. - -The performance of each model was compared across different strategies for returning code: - -- **Markdown** -- the model returned the whole source code file in standard markdown triple-backtick fences. -- **JSON** -- the model used a tool function call to return the whole source code file in a structured JSON response. -- **JSON (strict)** -- the same as the "JSON" strategy, but with `strict=True`. Only gpt-4o-2024-08-06 supported this setting. - -The markdown strategy was the same as -aider's "whole" edit format, where the -LLM returns an entire updated copy of the source file like this: - -```` -Here is the program you asked for which prints "Hello": - -greeting.py -``` -def greeting(): - print("Hello") -``` -```` - -Both JSON strategies required the LLM to call the `write_file` function with -an explanation/plan and -the entire updated copy of the source file. -The LLM didn't have to specify the filename, -since the benchmark operates on one source file at a time. - -```json -{ - "explanation": "Here is the program you asked for which prints \"Hello\"", - "content": "def greeting():\n print(\"Hello\")\n" -} -``` - -This experimental setup was designed to quantify -the effects of JSON-wrapping on the LLMs ability to write code to solve a task. - -## Results - -Four of the strongest code editing models were benchmarked -to assess the impact of JSON-wrapping code: - -- claude-3-5-sonnet-20240620 -- deepseek-coder (V2 0724) -- gpt-4o-2024-05-13 -- gpt-4o-2024-08-06 - -Each combination of model and code wrapping strategy was benchmarked 5 times -on all 133 problems. - -### Overall coding skill - -As shown in Figure 1, -all of the models did worse on the benchmark when asked to -return code in a structured JSON response. -Most did significantly worse, performing well below -their result with the markdown strategy. - -Some noteworthy observations: - -- OpenAI's gpt-4o-2024-05-13 was the only model where the markdown and JSON results were -close. Using JSON only dropped the score by 0.4 percent, a difference which is -within the margin of error for 5 trials. -- The use of OpenAI's new strict mode offered no improvement -as compared to non-strict JSON. -Both JSON results were well below the markdown result. -- The results from Sonnet and DeepSeek Coder suffered the worst harm from JSON wrapping. - -### Syntax errors - -Models tend to make more syntax errors *in the code they write* -when asked to wrap it in JSON. -The models can reliably -produce valid JSON, but code inside is more prone to syntax errors. - -Figure 2 shows the number of syntax errors found in the code produced by each -model and code wrapping strategy. -It totals up the `SyntaxError` and `IndentationError` errors from all 5 runs, -for each model and strategy combination. - -Below is an example of a `SyntaxError` created by gpt-4o-2024-05-13 using the -JSON code wrapping strategy. -It appears that the model got confused about escaping and quoting while trying -to format the JSON response. - -```python -Traceback (most recent call last): - ... - File "bottle-song/bottle_song.py", line 9 - lyrics.append(f'There'll be {i - 1} green bottles hanging on the wall.') - ^ -SyntaxError: unterminated string literal (detected at line 9) -``` - -The problematic line of code contains a single-quoted string which also -contains a single-quote character. -It should have been output as the following chunk of JSON, with -a double slash in `There\\'ll`. -That is needed to JSON-escape the `\` so that it survives -JSON-decoding to -produce `There\'ll` in the resulting code. -That would correctly escape the single-quote inside the single-quoted string. - -``` -...lyrics.append(f'There\\'ll be {i - 1} green bottles hanging on the wall.')\n... -``` - - - -{% include code-in-json-syntax.js %} - -> Figure 2: Number of `SyntaxError` and `IndentationError` errors found in model generated code, -> totaled from 5 runs. -> Models tend to make more syntax and formatting errors when asked to wrap code in JSON. - -### Beyond syntax errors - -Sonnet's results seems to indicate that the negative effects of JSON-wrapping -go beyond just syntactic difficulties. -Sonnet avoided syntax errors regardless of the code wrapping strategy, -but its benchmark scores in Figure 1 were nonetheless lower with JSON. -This implies that JSON-wrapping may distract or challenge models in a way that -reduces their ability to reason about solving coding problems. - - - -## Conclusions - -While the specific results differ from the similar -[July 2023 experiments](/2023/07/02/benchmarks.html), -the conclusion remains unchanged: LLMs are bad at returning code in -structured JSON responses. - -OpenAI appears to be making progress in allowing LLMs to -return JSON-wrapped code -without harming the code quality. -But it seems premature to consider switching from plain text -to JSON-wrapped code at this time. - ---------- - -#### Notes on the aider leaderboard - -*The results presented here are not directly comparable to results -from the main -[aider LLM leaderboard](https://aider.chat/docs/leaderboards/). -A number of settings were changed to simplify the benchmark -in order to focus on comparing plain text and JSON-wrapped code.* diff --git a/cecli/website/_posts/2024-08-26-sonnet-seems-fine.md b/cecli/website/_posts/2024-08-26-sonnet-seems-fine.md deleted file mode 100644 index 850aa392948..00000000000 --- a/cecli/website/_posts/2024-08-26-sonnet-seems-fine.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: Sonnet seems as good as ever -excerpt: Sonnet's score on the aider code editing benchmark has been stable since it launched. -highlight_image: /assets/sonnet-seems-fine.jpg ---- -{% if page.date %} - -{% endif %} - -# Sonnet seems as good as ever - -Recently there has been a lot of speculation that Sonnet has been -dumbed-down, nerfed or is otherwise performing worse. -Sonnet seems as good as ever, when performing the -[aider code editing benchmark](/docs/benchmarks.html#the-benchmark) -via the API. - -Below is a graph showing the performance of Claude 3.5 Sonnet over time. -It shows every clean, comparable benchmark run performed since Sonnet launched. -Benchmarks were performed for various reasons, usually -to evaluate the effects of small changes to aider's system prompts. - -The graph shows variance, but no indication of a noteworthy -degradation. -There is always some variance in benchmark results, typically +/- 2% -between runs with identical prompts. - -It's worth noting that these results would not capture any changes -made to Anthropic web chat's use of Sonnet. - -
- -
- - - - - - -> This graph shows the performance of Claude 3.5 Sonnet on -[aider's code editing benchmark](/docs/benchmarks.html#the-benchmark) -> over time. 'Pass Rate 1' represents the initial success rate, while 'Pass Rate 2' shows the success rate after a second attempt with a chance to fix testing errors. -> The -> [aider LLM code editing leaderboard](https://aider.chat/docs/leaderboards/) -> ranks models based on Pass Rate 2. - diff --git a/cecli/website/_posts/2024-09-12-o1.md b/cecli/website/_posts/2024-09-12-o1.md deleted file mode 100644 index 7b44aa67939..00000000000 --- a/cecli/website/_posts/2024-09-12-o1.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: o1-preview is SOTA on the aider leaderboard -excerpt: Preliminary benchmark results for the new OpenAI o1 models. -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# OpenAI o1-preview is SOTA on the aider leaderboard - - - -{% assign edit_sorted = site.data.o1_results | sort: 'pass_rate_2' | reverse %} -{% include leaderboard_graph.html - chart_id="editChart" - data=edit_sorted - row_prefix="edit-row" - pass_rate_key="pass_rate_2" -%} - - -## o1-preview - -OpenAI o1-preview scored 79.7% on aider's code editing benchmark, -a state of the art result. -It achieved this result with the -["whole" edit format](/docs/leaderboards/#notes-on-the-edit-format), -where the LLM returns a full copy of the source code file with changes. - -It is much more practical to use aider's -["diff" edit format](/docs/leaderboards/#notes-on-the-edit-format), -which allows the LLM to return search/replace blocks to -efficiently edit the source code. -This saves significant time and token costs. - -Using the diff edit format the o1-preview model had a strong -benchmark score of 75.2%. -This likely places o1-preview between Sonnet and GPT-4o for practical use, -but at significantly higher cost. - -## o1-mini - -OpenAI o1-mini is priced similarly to GPT-4o and Claude 3.5 Sonnet, -but scored below those models. -It also works best with the whole edit format. - - -## Future work - -The o1-preview model had trouble conforming to aider's diff edit format. -The o1-mini model had trouble conforming to both the whole and diff edit formats. -Aider is extremely permissive and tries hard to accept anything close -to the correct formats. - -It is surprising that such strong models had trouble with -the syntactic requirements of simple text output formats. -It seems likely that aider could optimize its prompts and edit formats to -better harness the o1 models. - - -## Using aider with o1 - -OpenAI's new o1 models are supported in v0.57.0 of aider: - -``` -aider --model o1-mini -aider --model o1-preview -``` - -{: .note } -> These are initial benchmark results for the o1 models, -> based on aider v0.56.1-dev. -> See the [aider leaderboards](/docs/leaderboards/) for up-to-date results -> based on the latest aider releases. - - - - - - - - - - - - - - {% for row in edit_sorted %} - - - - - - - - {% endfor %} - -
ModelPercent completed correctlyPercent using correct edit formatCommandEdit format
{{ row.model }}{{ row.pass_rate_2 }}%{{ row.percent_cases_well_formed }}%{{ row.command }}{{ row.edit_format }}
- - - diff --git a/cecli/website/_posts/2024-09-26-architect.md b/cecli/website/_posts/2024-09-26-architect.md deleted file mode 100644 index ddd3ceab3c7..00000000000 --- a/cecli/website/_posts/2024-09-26-architect.md +++ /dev/null @@ -1,418 +0,0 @@ ---- -title: Separating code reasoning and editing -excerpt: An Architect model describes how to solve the coding problem, and an Editor model translates that into file edits. This Architect/Editor approach produces SOTA benchmark results. -highlight_image: /assets/architect.jpg -draft: false -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Separating code reasoning and editing - -Aider now has experimental support for using two models to complete each coding task: - -- An Architect model is asked to describe how to solve the coding problem. -- An Editor model is given the Architect's solution and asked to produce specific code editing instructions to apply those changes to existing source files. - -Splitting up "code reasoning" and "code editing" in this manner -has produced SOTA results on -[aider's code editing benchmark](/docs/benchmarks.html#the-benchmark). -Using o1-preview as the Architect with either DeepSeek or o1-mini as the -Editor produced the SOTA score of 85%. -Using the Architect/Editor approach -also significantly improved the benchmark scores of many -models, compared to their previous "solo" baseline scores (striped bars). - - - - - - -{% assign sorted_data = site.data.architect | sort: "pass_rate_2" | reverse %} - - - -## Motivation - -This approach was motivated by the release of OpenAI's o1 models. -They are strong at reasoning, but often fail to output properly formatted -code editing instructions. -It helps to instead let them describe the solution -however they prefer and then pass that output to a more traditional LLM. -This second Editor LLM can then interpret the solution description and -produce the code editing instructions needed to update -the existing source code. - -This approach has recently become attractive for aider due to -rapid improvements in the speed and costs of frontier models. -In particular, chaining older LLMs would have been quite slow and -incompatible with aider's goal of providing an interactive, -pair programming AI coding experience. - -## Code reasoning and code editing - -Normally aider asks the model to solve a coding problem in one prompt, -asking the LLM to explain the solution and return -a well formatted series of file edits. -All of [aider's editing formats](/docs/more/edit-formats.html) -require the LLM to return source code edits in a specific text -format, so that aider can process the edits and apply them to the local source files. - -Because this all happens in a single prompt/response round trip to the LLM, -the model has to split its attention between -solving the coding problem and conforming to the edit format. - -The Architect/Editor approach splits this into two inference steps, possibly -using two different LLMs: - -1. Solve the coding problem (Architect). -2. Turn the proposed solution into a series of well formed code edits (Editor). - -The Architect/Editor approach allows the Architect to focus on solving the coding problem -and *describe the solution however comes naturally to it*. -Similarly, the Editor can focus all of its attention on properly formatting the edits -without needing to reason much about how to solve the coding problem. - -We can assign the Architect and Editor roles to LLMs which are well suited to their needs. -Strong reasoning model like o1-preview make excellent Architects, while -the Editor role can be assigned to an appropriate model based on cost, speed -and code editing skill. - -## Results - -The graph above and the table below show the -[aider's code editing benchmark](/docs/benchmarks.html#the-benchmark) -score for various combinations of Architect and Editor models. - - -Some noteworthy observations: - -- Pairing o1-preview as Architect with either Deepseek or o1-mini as Editor sets a SOTA significantly above the previous best score. This result is obtained with the "whole" editing format, requiring the Editor to output a full update copy of each edited source file. Both of these steps are therefore quite slow, so probably not practical for interactive use with aider. -- Pairing OpenAI's o1-preview with Anthropic's Sonnet as the Editor produces the second best result. This is an entirely practical configuration for users able to work with both providers. -- Pairing many models with themselves in the Architect/Editor configuration can provide -significant benefits. -Sonnet, GPT-4o and GPT-4o-mini all scored higher when used as an Architect/Editor pair. -- Deepseek is surprisingly effective as an Editor model. It seems remarkably capable at turning proposed coding solutions into new, updated versions of the source files. Using the efficient "diff" editing format, Deepseek helps all the Architect models except for Sonnet. - -## Try it! - -The development version of aider -has built in defaults to support Architect/Editor coding with -o1-preview, o1-mini, GPT-4o and Claude 3.5 Sonnet. -Run aider with `--architect` or get started quickly like this: - -``` -pip install -U aider-chat - -# Change directory into a git repo -cd /to/your/git/repo - -# Work with Claude 3.5 Sonnet as the Architect and Editor -export ANTHROPIC_API_KEY=your-key-goes-here -aider --sonnet --architect - -# Work with OpenAI models, using gpt-4o as the Editor -export OPENAI_API_KEY=your-key-goes-here -aider --4o --architect -aider --o1-mini --architect -aider --o1-preview --architect -``` - -## More info - -Aider has a number of "chat modes", and "architect" is available as a new chat mode. -The `--architect` switch is a shortcut for `--chat-mode architect`. -For more details, see documentation on -[aider's chat modes](/docs/usage/modes.html). - - -## Full results - -Below are the benchmark results using various models as the Architect, paired with -various models as the Editor. -Each section includes a "baseline" result, -where the model works -by itself in aider's normal "code" editing mode -(not as part of an Architect/Editor configuration). -This "solo" baseline represents the performance previously available when using -this model with aider. - -
- - - - - - - - - - - {% for group in grouped_data %} - {% assign group_class = forloop.index | modulo: 2 | plus: 1 %} - {% for item in group.items %} - - - - - - - {% endfor %} - {% endfor %} - -
ArchitectEditorEdit FormatPass Rate
{{ item.model }}{% if item.editor_model %}{{ item.editor_model }}{% else %}Baseline{% endif %}{{ item.editor_edit_format | default: item.edit_format }}{{ item.pass_rate_2 }}%
-
diff --git a/cecli/website/_posts/2024-11-21-quantization.md b/cecli/website/_posts/2024-11-21-quantization.md deleted file mode 100644 index 33677b5d833..00000000000 --- a/cecli/website/_posts/2024-11-21-quantization.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: Details matter with open source models -excerpt: Open source LLMs are becoming very powerful, but pay attention to how you (or your provider) are serving the model. It can affect code editing skill. -highlight_image: /assets/quantization.jpg -draft: false -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Details matter with open source models -{: .no_toc } - - - -Open source models like Qwen 2.5 32B Instruct are performing very well on -aider's code editing benchmark, rivaling closed source frontier models. - -But pay attention to how your model is being served and quantized, -as it can impact code editing skill. -Open source models are often available at a variety of quantizations, -and can be served with different token limits. -These details matter when working with code. - -The graph above and table below compares different versions of the Qwen 2.5 Coder 32B Instruct model, -served both locally and from a variety of cloud providers. - -- The [HuggingFace BF16 weights](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) served via [glhf.chat](https://glhf.chat). -- [4bit and 8bit quants for mlx](https://t.co/cwX3DYX35D). -- The results from [OpenRouter's mix of providers](https://openrouter.ai/qwen/qwen-2.5-coder-32b-instruct/providers) which serve the model with different levels of quantization. -- Results from OpenRouter's providers, both served via OpenRouter and directly to their own APIs. -- Ollama locally serving different quantizations from the [Ollama model library](https://ollama.com/library/qwen2.5-coder:32b-instruct-q4_K_M) with 8k+ -context windows. -- An Ollama fp16 quantization served with Ollama's default 2k context window. - -### Pitfalls and details - -This benchmarking effort highlighted a number of pitfalls and details specific to open source -models which -can have a significant impact on their ability to correctly edit code: - -- **Quantization** -- Open source models are often available at dozens of different quantizations. -Most seem to only modestly decrease code editing skill, but stronger quantizations -do have a real impact. -- **Context window** -- Cloud providers can decide how large a context window to accept, -and they often choose differently. Ollama's local API server -defaults to a tiny 2k context window, -and silently discards data that exceeds it. Such a small window has -catastrophic effects on performance, without throwing obvious hard errors. -- **Output token limits** -- Open source models are often served with wildly -differing output token limits. This has a direct impact on how much code the -model can write or edit in a response. -- **Buggy cloud providers** -- While benchmarking Qwen 2.5 Coder 32B Instruct -and DeepSeek V2.5, I discovered -multiple cloud providers with broken or buggy API endpoints. -They seemed -to be returning results different from expected based on the advertised -quantization and context sizes. -The harm caused to the code editing benchmark varied from serious -to catastrophic. -One provider scored 0.5% on the benchmark with DeepSeek V2.5, a highly capable model. - -Closed source, proprietary models don't typically have these issues. -They are owned and operated by the organization that created them, -and typically served with specific, predictable context window and output token limits. -Their quantization level is usually unknown, but fixed and unchanging for all users. - -### Conclusions - -The best versions of the Qwen model rival GPT-4o, while the worst performing -quantization is more like the older GPT-4 Turbo when served competently. -Even an otherwise excellent fp16 quantization falls to GPT-3.5 Turbo levels of performance -if run with Ollama's default 2k context window. - -### Sections -{: .no_toc } - -- TOC -{:toc} - -## Benchmark results - -{: .note :} -These are results from single benchmark runs, so expect normal variance of +/- 1-2%. - - - - - - - - - - - - - - - - - - {% assign quant_sorted = site.data.quant | sort: 'pass_rate_2' | reverse %} - {% for row in quant_sorted %} - - - - - - - - {% endfor %} - -
ModelPercent completed correctlyPercent using correct edit formatCommandEdit format
{{ row.model }}{{ row.pass_rate_2 }}%{{ row.percent_cases_well_formed }}%{{ row.command }}{{ row.edit_format }}
- - - - - -## Setting Ollama's context window size - -[Ollama uses a 2k context window by default](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-can-i-specify-the-context-window-size), -which is very small for working with aider. -Unlike most other LLM servers, Ollama does not throw an error if you submit -a request that exceeds the context window. -Instead, it just silently truncates the request by discarding the "oldest" messages -in the chat to make it fit within the context window. - -Except for the single 2k context result, -all of the Ollama results above were collected with at least an 8k context window. -An 8k window is large enough to attempt all the coding problems in the benchmark. -Aider sets Ollama's context window to 8k by default, starting in aider v0.65.0. - -You can change the Ollama server's context window with a -[`.aider.model.settings.yml` file](https://aider.chat/docs/config/adv-model-settings.html#model-settings) -like this: - -``` -- name: ollama/qwen2.5-coder:32b-instruct-fp16 - extra_params: - num_ctx: 8192 -``` - -## Choosing providers with OpenRouter - -OpenRouter allows you to ignore specific providers in your -[preferences](https://openrouter.ai/settings/preferences). -This can be used to limit your OpenRouter requests to be -served by only your preferred providers. - -## Notes - -This article went through many revisions as I received feedback from -numerous members of the community. -Here are some of the noteworthy learnings and changes: - -- The first version of this article included incorrect Ollama models. -- Earlier Ollama results used the too small default 2k context window, -artificially harming the benchmark results. -- The benchmark results appear to have uncovered a problem in the way -OpenRouter was communicating with Hyperbolic. -They fixed the issue 11/24/24, shortly after it was pointed out. diff --git a/cecli/website/_posts/2024-12-03-qwq.md b/cecli/website/_posts/2024-12-03-qwq.md deleted file mode 100644 index a10ce53e26f..00000000000 --- a/cecli/website/_posts/2024-12-03-qwq.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: QwQ is a code architect, not an editor -excerpt: QwQ is reasoning model like o1, and needs to be used as an architect with another model as editor. -highlight_image: /assets/qwq.jpg -draft: false -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# QwQ is a code architect, not an editor -{: .no_toc } - - - -QwQ 32B Preview is a "reasoning" model, which spends a lot of tokens thinking before -rendering a final response. -This is similar to OpenAI's o1 models, which are most effective with aider -[when paired as an architect with a traditional LLM as an editor](https://aider.chat/2024/09/26/architect.html). -In this mode, the reasoning model acts as an "architect" to propose a solution to the -coding problem without regard for how to actually make edits to the source files. -The "editor" model receives that proposal, and focuses solely on how to -edit the existing source code to implement it. - -Used alone without being paired with an editor, -QwQ was unable to comply with even the simplest -[editing format](https://aider.chat/docs/more/edit-formats.html). -It was not able to reliably edit source code files. -As a result, QwQ's solo score on the benchmark was quite underwhelming -(and far worse than the o1 models performing solo). - -QwQ is based on -Qwen 2.5 Coder 32B Instruct, -and does better when paired with it as an architect + editor combo. -Though this provided only a modest benchmark improvement over just using Qwen alone, -and comes with a fairly high cost in terms of latency. -Each request must wait for QwQ to return all its thinking text -and the final solution proposal. -And then one must wait for Qwen to turn that large -response into actual file edits. - -Pairing QwQ with other sensible editor models performed the same or worse than -just using Qwen 2.5 Coder 32B Instruct alone. - -QwQ+Qwen seems to be the best way to use QwQ, achieving a score of 74%. -That is well below the -SOTA results for this benchmark: Sonnet alone scores 84%, and -o1-preview + o1-mini as architect + editor scores 85%. - - -## QwQ specific editing formats - -I spent some time experimenting with a variety of custom editing formats -for QwQ. -In particular, I tried to parse the QwQ response and discard the long -sections of "thinking" and retain only the "final" solution. -None of this custom work seemed to translate -into any significant improvement in the benchmark results. - - -## Results - - - - - - - - - - - - - - - - {% assign qwq_sorted = site.data.qwq | sort: 'pass_rate_2' | reverse %} - {% for row in qwq_sorted %} - - - - - - - - {% endfor %} - -
ModelPercent completed correctlyPercent using correct edit formatCommandEdit format
{{ row.model }}{{ row.pass_rate_2 }}%{{ row.percent_cases_well_formed }}%{{ row.command }}{{ row.edit_format }}
- - - - - -## Open source model caveats - -As discussed in a recent blog post, -[details matter with open source models](https://aider.chat/2024/11/21/quantization.html). -For clarity, new benchmark runs for this article were -performed against OpenRouter's endpoints for -QwQ 32B Preview and Qwen 2.5 Coder 32B Instruct. -For the other models, the benchmark was direct to their providers' APIs. - -Having recently done extensive testing of OpenRouter's Qwen 2.5 Coder 32B Instruct endpoint, -it seems reliable. -The provider Mancer was blocked due to the small context window it provides. - -For QwQ 32B Preview, Fireworks was blocked because of its small context window. diff --git a/cecli/website/_posts/2024-12-21-polyglot.md b/cecli/website/_posts/2024-12-21-polyglot.md deleted file mode 100644 index 4b2f9bdc7b2..00000000000 --- a/cecli/website/_posts/2024-12-21-polyglot.md +++ /dev/null @@ -1,216 +0,0 @@ ---- -title: o1 tops aider's new polyglot leaderboard -excerpt: o1 scores the top result on aider's new multi-language, more challenging coding benchmark. -highlight_image: /assets/o1-polyglot.jpg -draft: false -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# o1 tops aider's new polyglot leaderboard -{: .no_toc } - - - -OpenAI's new o1 model with "high" reasoning effort -gets the top score on the -new -[aider polyglot leaderboard](/docs/leaderboards/), significantly ahead of -other top LLMs. -The new polyglot benchmark uses many popular coding languages -and was designed to be -*much more challenging* than aider's original -[code editing benchmark](/docs/leaderboards/edit.html). -This more clearly distinguishes -the performance of -today's strongest coding models and -leaves headroom for future LLMs. - -{: .note :} -See the main -[aider leaderboard](https://aider.chat/docs/leaderboards/) -for benchmark results from more models. -This article only contains a snapshot -of results at the time of publication. - -## The polyglot benchmark - -Like aider's original code editing benchmark, -the new polyglot benchmark is based on Exercism -coding exercises. - -The new polyglot benchmark: - -- Contains coding problems in C++, Go, Java, JavaScript, Python and Rust. -The old benchmark was solely based on Python exercises. -- Focuses on the *most difficult* 225 exercises out of the 697 that -Exercism provides for those languages. -The old benchmark simply included all 133 Python exercises, -regardless of difficulty. - -## Motivation and goals - -Aider's original code editing benchmark was -saturating as the top scores approached and then surpassed 80%. -Sonnet's score of 84.2% was based on solving 112 of the 133 -exercises, leaving only 21 unsolved exercises. -New champions were advancing the top score by -solving just 1-2 more problems than the previous record. -This made it hard to clearly -measure the -difference in code editing skill between these top models. - -Part of the problem is that many of the original -133 Python problems are very easy -and provide -little challenge to today's frontier LLMs. -Models as old as GPT 3.5 Turbo were able to solve half of the -133 problems. -Such easy problems simply inflate the benchmark scores -of modern LLMs without -providing any data about which models are better or worse. - -The main goal for a new benchmark -was to re-calibrate the scale so that -today's top coding LLMs -would occupy a wide range of scores between about 5% and 50%. -This should leave headroom for future LLMs and -make it possible to -more clearly compare the relative performance of top models. - -## Designing the polyglot benchmark - -The new benchmark: - -- Tests LLMs with more coding languages, to increase diversity and source a larger pool of problems. -- Includes just the most challenging coding problems and excludes easy problems that are solvable by most of today's top coding LLMs. -- Includes more total coding problems, to enable more granularity of comparison. - -The new benchmark is based on Exercism coding problems -from 6 of the most popular programming languages: - -- C++ -- Go -- Java -- JavaScript -- Python -- Rust - -Exercism provides a total of 697 coding problems in those 6 languages. -A set of 7 of today's top coding models each attempted all 697 of -the Exercism problems: - -- Sonnet -- Haiku -- o1 Mini -- DeepSeek -- GPT-4o -- Qwen 32B Coder Instruct -- GPT-4o Mini - -Depending on the difficulty of the problems, -a different number of solutions were found by the collection of -7 models: - -| Solutions
found | Number of
problems | Cumulative number
of problems | -|--------|-----------|------------| -| 0 | 66 | 66 | -| 1 | 61 | 127 | -| 2 | 50 | 177 | -| 3 | 48 | 225 | -| 4 | 53 | 278 | -| 5 | 71 | 349 | -| 6 | 90 | 439 | -| 7 | 258 | 697 | - -In the table above, you can see that 258 of the problems were solved -by all 7 LLMs. -These problems are far too easy, and wouldn't be good choices for the new benchmark. -Instead, we need hard problems like the -66 that none of the 7 models were able to solve. - -The new benchmark uses -the 225 problems that were solved by 3 or fewer models. -This achieves a balance between hard and moderate problems, -and provides a large but not excessive total pool of problems. -It also represents a good diversity of coding languages: - -| Language | Problems | -|-------------|----------| -| C++ | 26 | -| Go | 39 | -| Java | 47 | -| JavaScript | 49 | -| Python | 34 | -| Rust | 30 | -| **Total** | **225** | - -## o1 - -OpenAI's new o1 model established a very strong -top score of 62% on the new benchmark. -This still leaves 86 problems of headroom for future models -to solve. -Given the incredible pace of recent advancements, it -will be interesting to see -how long it will take for this new benchmark to saturate. - -## Benchmark problems - -The 225 coding problems are available in the -[aider polyglot benchmark repo](https://github.com/Aider-AI/polyglot-benchmark) -on GitHub. - - - -## Results - - - - - - - - - - - - - {% assign edit_sorted = site.data.o1_polyglot_leaderboard | sort: 'pass_rate_2' | reverse %} - {% for row in edit_sorted %} - - - - - - - - {% endfor %} - -
ModelPercent completed correctlyPercent using correct edit formatCommandEdit format
{{ row.model }}{{ row.pass_rate_2 }}%{{ row.percent_cases_well_formed }}%{{ row.command }}{{ row.edit_format }}
- - - - - diff --git a/cecli/website/_posts/2025-01-15-uv.md b/cecli/website/_posts/2025-01-15-uv.md deleted file mode 100644 index 0ec36f22bfd..00000000000 --- a/cecli/website/_posts/2025-01-15-uv.md +++ /dev/null @@ -1,102 +0,0 @@ ---- -title: Using uv as an installer -excerpt: Reliably packaging & distributing python CLI tools is hard. Aider uses uv in novel ways to make it easy to install the aider CLI, its dependencies and python 3.12. All in an isolated env. -draft: false -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Using uv as an installer -{: .no_toc } - -It's hard to reliably -package and distribute python command line tools -to end users. -Users frequently encounter challenges: -dependency version conflicts, virtual environment management, -needing to install python or a specific version of python, etc. - -Aider employs [uv](https://github.com/astral-sh/uv) -in a couple of novel ways to streamline the installation process: - -1. Install aider with -`curl https://aider.chat/install.sh | sh` even if python isn't already installed. - -2. Users who have python 3.8+ installed can `pip install aider-install && aider-install`. - -Both methods use uv to **globally** install the `aider` command line program, -with all of its dependencies in an **isolated environment**. -They ensure that aider will run with **python 3.12**, and install that version -if it is not already available. - -These uv install methods are especially helpful for aider, because it -has a large set of very specific dependencies. -Since not all of aider's dependencies are available on all python versions, -it requires python 3.10-3.14. - -Most users don't want to worry about these details -- -they just want a quick way to install and run aider. - - -## One-liners - -Users can install aider with a shell one-liner, without even having python previously installed: - -```bash -curl -LsSf https://aider.chat/install.sh | sh -``` - -This installs uv, then uses it to install python 3.12, -install the `aider` command line tool -and update the user's shell path. -Under the hood, it is simply a copy of -uv's own install script `https://astral.sh/uv/install.sh` -with [one line added](https://github.com/Aider-AI/aider/blob/4251e976b3aa52c2a3af08da4b203d4d524c8e92/aider/website/install.sh#L1181), to install aider as a tool: - -``` -ensure "${_install_dir}/uv" tool install --force --python python3.12 aider-chat@latest -``` - - -## aider-install - -The aider-install python package allows quick global installation of aider -for users who already have python 3.8+ installed. -It simply provides the `aider-install` command line program, -which users just need to run once. - -```bash -pip install aider-install -aider-install -``` - -The `pip install aider-install` installs only two packages: -aider-install and the [uv python package](https://pypi.org/project/uv/). -This ensures that uv is available -in the user's environment. -Everything else is installed in a stand-alone environment created by uv. - -When the user runs `aider-install`, it runs uv -to install aider as a tool and update the user's shell path if needed: - -```bash -uv tool install --force --python python3.12 aider-chat -uv tool update-shell -``` - - -## Benefits - -These uv install methods have been popular with users, -providing a hassle free way to install aider and quickly get started. -Installs are also extremely fast, much faster than pip or pipx installs -even when uv is also installing python 3.12! - -There are also a number of benefits from the perspective of the tool developer/publisher. -Since providing these install methods, far fewer users report dependency problems and -version conflicts as compared to users who `pip install aider-chat`. -There is also less pressure to rapidly support the newest python versions, -since aider always installs with python 3.12. - diff --git a/cecli/website/_posts/2025-01-24-r1-sonnet.md b/cecli/website/_posts/2025-01-24-r1-sonnet.md deleted file mode 100644 index 909f69c232d..00000000000 --- a/cecli/website/_posts/2025-01-24-r1-sonnet.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: R1+Sonnet set SOTA on aider's polyglot benchmark -excerpt: R1+Sonnet has set a new SOTA on the aider polyglot benchmark. At 14X less cost compared to o1. -highlight_image: /assets/r1-sonnet-sota.jpg -draft: false -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# R1+Sonnet set SOTA on aider's polyglot benchmark -{: .no_toc } - - - -Aider supports [using a pair of models for coding](https://aider.chat/2024/09/26/architect.html): - -- An Architect model is asked to describe how to solve the coding problem. Thinking/reasoning models often work well in this role. -- An Editor model is given the Architect's solution and asked to produce specific code editing instructions to apply those changes to existing source files. - -**R1 as architect with Sonnet as editor has set a new SOTA of 64.0%** on the -[aider polyglot benchmark](/2024/12/21/polyglot.html). -They achieve this at **14X less cost** compared to the previous o1 SOTA result. - -o1 paired with Sonnet didn't produce better results than just using o1 alone. -Using various other models as editor didn't seem to improve o1 or R1 versus their solo scores. -This is in contrast to the first wave of thinking models like o1-preview and o1-mini, -which improved when paired with many different editor models. - -o1 was set with reasoning effort high for these tests. - -## Try it - -Once you [install aider](https://aider.chat/docs/install.html), -you can use aider, R1 and Sonnet like this: - -```bash -export DEEPSEEK_API_KEY= -export ANTHROPIC_API_KEY= - -aider --architect --model r1 --editor-model sonnet -``` - -Or if you have an [OpenRouter](https://openrouter.ai) account: - -```bash -export OPENROUTER_API_KEY= - -aider --architect --model openrouter/deepseek/deepseek-r1 --editor-model openrouter/anthropic/claude-3.5-sonnet -``` - -## Thinking output - -There has been -[some recent discussion](https://github.com/Aider-AI/aider/pull/2973) -about extracting the `` tokens from R1's responses -and feeding them to Sonnet. -That was an interesting experiment, for sure. - -To be clear, the results above are *not* using R1's thinking tokens, just the normal -final output. -R1 is configured in aider's standard architect role with Sonnet as editor. -The benchmark results that used the thinking tokens appear to be worse than -the architect/editor results shared here. - -## Results - - - - - - - - - - - - - - {% assign edit_sorted = site.data.r1_architect | sort: 'pass_rate_2' | reverse %} - {% for row in edit_sorted %} - - - - - - - - - {% endfor %} - -
ModelPercent completed correctlyPercent using correct edit formatCommandEdit formatTotal Cost
{{ row.model }}{{ row.pass_rate_2 }}%{{ row.percent_cases_well_formed }}%{{ row.command }}{{ row.edit_format }}{% if row.total_cost == 0 %}?{% else %}${{ row.total_cost | times: 1.0 | round: 2 }}{% endif %}
- - - - - diff --git a/cecli/website/_posts/2025-01-28-deepseek-down.md b/cecli/website/_posts/2025-01-28-deepseek-down.md deleted file mode 100644 index b3145ce9392..00000000000 --- a/cecli/website/_posts/2025-01-28-deepseek-down.md +++ /dev/null @@ -1,257 +0,0 @@ ---- -title: Alternative DeepSeek V3 providers -excerpt: DeepSeek's API has been experiencing reliability issues. Here are alternative providers you can use. -#highlight_image: /assets/deepseek-down.jpg -draft: false -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Alternative DeepSeek V3 providers -{: .no_toc } - - - -DeepSeek's API has been experiencing significant reliability issues for the past 24-48+ hours, with many users reporting downtime and overload problems. -Their [status page](https://status.deepseek.com) notes an ongoing incident. - -If you're affected by these issues, several alternative providers offer access to DeepSeek V3. This article compares their performance on aider's polyglot benchmark to help you choose a reliable alternative. - -## Providers -{: .no_toc } - -* TOC -{:toc} - -## OpenRouter - -[OpenRouter offers many DeepSeek providers](https://openrouter.ai/deepseek/deepseek-chat/providers) -through their unified API. -You can use aider with OpenRouter like this: - -```bash -# Set your API key using environment variables -export OPENROUTER_API_KEY= -aider --model openrouter/deepseek/deepseek-chat - -# Or use the --api-key command line option -aider --model openrouter/deepseek/deepseek-chat --api-key openrouter= - -# Or add it to .aider.conf.yml in your home directory or project root: -api-key: - - openrouter= -``` - -OpenRouter automatically monitors their providers and routes requests to stable -APIs and away from those experiencing unreliable performance. - -But not all providers serve the same version of open source models, and not -all have the same privacy guarantees. -You can control which OpenRouter providers are used to serve the model via -[aider's model settings](https://aider.chat/docs/config/adv-model-settings.html#model-settings). -Create a `.aider.model.settings.yml` file in your home directory or git project root with settings like this: - -```yaml -- name: openrouter/deepseek/deepseek-chat - extra_params: - extra_body: - provider: - # Only use these providers, in this order - order: ["Novita"] - # Don't fall back to other providers - allow_fallbacks: false -``` - -See [OpenRouter's provider routing docs](https://openrouter.ai/docs/provider-routing) for more details. - - -## Fireworks - -```bash -# Set your API key using environment variables -export FIREWORKS_API_KEY= -aider --model fireworks_ai/accounts/fireworks/models/deepseek-chat - -# Or use the --api-key command line option -aider --model fireworks_ai/accounts/fireworks/models/deepseek-chat --api-key fireworks= - -# Or add it to .aider.conf.yml in your home directory or project root: -api-key: - - fireworks= -``` - -Create a `.aider.model.settings.yml` file in your home directory or git project root with settings like this: - -```yaml -- name: fireworks_ai/accounts/fireworks/models/deepseek-chat - edit_format: diff - weak_model_name: null - use_repo_map: true - send_undo_reply: false - lazy: false - reminder: sys - examples_as_sys_msg: true - extra_params: - max_tokens: 8192 - cache_control: false - caches_by_default: true - use_system_prompt: true - use_temperature: true - streaming: true -``` - - -## Hyperbolic - -You can use [Hyperbolic's API](https://hyperbolic.xyz) as an OpenAI-compatible provider: - -```bash -# Set your API key using environment variables -export OPENAI_API_BASE=https://api.hyperbolic.xyz/v1/ -export OPENAI_API_KEY= -aider --model openai/deepseek-ai/DeepSeek-V3 - -# Or use the --api-key command line option -aider --model openai/deepseek-ai/DeepSeek-V3 --api-key openai= - -# Or add it to .aider.conf.yml in your home directory or project root: -api-key: - - openai= -``` - -Create a `.aider.model.settings.yml` file in your home directory or git project root with settings like this: - -```yaml -- name: openai/deepseek-ai/DeepSeek-V3 - edit_format: diff - weak_model_name: null - use_repo_map: true - send_undo_reply: false - lazy: false - reminder: sys - examples_as_sys_msg: true - cache_control: false - caches_by_default: true - use_system_prompt: true - use_temperature: true - streaming: true - editor_model_name: null - editor_edit_format: null - extra_params: - max_tokens: 65536 -``` - -## Ollama - -You can run [DeepSeek V3 via Ollama](https://ollama.com/library/deepseek-v3). - -```bash -# Pull the model -ollama pull deepseek-v3 - -# Start your ollama server -ollama serve - -# In another terminal window... -export OLLAMA_API_BASE=http://127.0.0.1:11434 # Mac/Linux -setx OLLAMA_API_BASE http://127.0.0.1:11434 # Windows, restart shell after setx - -aider --model ollama/deepseek-v3 -``` - -It's important to provide model settings, especially the `num_ctx` parameter to -set the context window. -Ollama uses a 2k context window by default, which is very small for working with aider. -Larger context windows will allow you to work with larger amounts of code, -but will use memory and increase latency. - -Unlike most other LLM servers, Ollama does not throw an error if you submit a request that exceeds the context window. Instead, it just silently truncates the request by discarding the “oldest” messages in the chat to make it fit within the context window. - -So if your context window is too small, you won’t get an explicit error. The biggest symptom will be that aider says it can’t see (some of) the files you added to the chat. That’s because ollama is silently discarding them because they exceed the context window. - -Create a `.aider.model.settings.yml` file in your home directory or git project root with settings like this: - -```yaml -- name: ollama/deepseek-v3 - edit_format: diff - weak_model_name: null - use_repo_map: true - send_undo_reply: false - lazy: false - reminder: sys - examples_as_sys_msg: true - cache_control: false - caches_by_default: true - use_system_prompt: true - use_temperature: true - streaming: true - extra_params: - num_ctx: 8192 # How large a context window? -``` - -## Other providers - -You will need to properly configure aider to work with DeepSeek V3 when served -via other providers: - -- Determine the `--model` name to use. -- Provide your API key to aider. -- Add model settings to `.aider.model.settings.yml`. - - -Adapt the `.aider.model.settings.yml` shown above for Fireworks. You will need to change the `name` field to match you chosen provider's model naming scheme. - -See [Advanced model settings](https://aider.chat/docs/config/adv-model-settings.html#model-settings) for details about all aider model settings - -## Results - - - - - - - - - - - - - - {% assign edit_sorted = site.data.deepseek-down | sort: 'pass_rate_2' | reverse %} - {% for row in edit_sorted %} - - - - - - - - {% endfor %} - -
ModelPercent completed correctlyPercent using correct edit formatCommandEdit format
{{ row.model }}{{ row.pass_rate_2 }}%{{ row.percent_cases_well_formed }}%{{ row.command }}{{ row.edit_format }}
- - - - - diff --git a/cecli/website/_posts/2025-05-07-gemini-cost.md b/cecli/website/_posts/2025-05-07-gemini-cost.md deleted file mode 100644 index 32c9d90414e..00000000000 --- a/cecli/website/_posts/2025-05-07-gemini-cost.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: Gemini 2.5 Pro Preview 03-25 benchmark cost -excerpt: The $6.32 benchmark cost reported for Gemini 2.5 Pro Preview 03-25 was incorrect. -draft: false -nav_exclude: true ---- -{% if page.date %} - -{% endif %} - -# Gemini 2.5 Pro Preview 03-25 benchmark cost - -## Summary -The $6.32 cost reported to run the aider polyglot benchmark on -Gemini 2.5 Pro Preview 03-25 was incorrect. -The true cost was higher, possibly significantly so. -The incorrect cost has been removed from the leaderboard. - -An investigation determined the primary cause was that the litellm -package (used by aider for LLM API connections) was not properly including reasoning tokens in -the token counts it reported. -While an incorrect price-per-token entry for the model also existed in litellm's cost -database at that time, this was found not to be a contributing factor. -Aider's own internal, correct pricing data was utilized during the benchmark. - -## Resolution - -Litellm began correctly including reasoning tokens in the reported counts -on April 21, 2025 in -commit [a7db0df](https://github.com/BerriAI/litellm/commit/a7db0df0434bfbac2b68ebe1c343b77955becb4b). -This change was released in litellm v1.67.1. -Aider picked up this change April 28, 2025 when it upgraded its litellm dependency -from v1.65.7 to v1.67.4.post1 -in commit [9351f37](https://github.com/Aider-AI/aider/commit/9351f37). -That dependency change shipped on May 5, 2025 in aider v0.82.3. - -Unfortunately the 03-25 version of Gemini 2.5 Pro Preview is no longer available, -so it is not possible to re-run the benchmark to obtain an accurate cost. -As a possibly relevant comparison, the newer 05-06 version of Gemini 2.5 Pro Preview -completed the benchmark at a cost of about $37. - -## Investigation detail - -The version of litellm available at that time of the benchmark appears to have been -excluding reasoning tokens from the token counts it reported. -So even though aider had correct per-token pricing, it did not have the correct token counts -used during the benchmark. -This resulted in an underestimate of the benchmark costs. - -The incorrect litellm database entry does not appear to have affected the aider benchmark costs. -Aider maintains and uses its own database of costs for some models, and it contained -the correct pricing at the time of the benchmark. -Aider appears to have -loaded the correct cost data from its database and made use of it during the benchmark. - -Every aider benchmark report contains the git commit hash of the aider repository state used to -run the benchmark. -The -[benchmark run in question](https://github.com/Aider-AI/aider/blob/edbfec0ce4e1fe86735c915cb425b0d8636edc32/aider/website/_data/polyglot_leaderboard.yml#L814) -was built from -commit [0282574](https://github.com/Aider-AI/aider/commit/0282574). - -Additional runs of the benchmark from that build verified that the error in litellm's -model cost database appears not to have been a factor: - -- Aider's internal model database correctly overrides the litellm database, which contained an incorrect token cost at the time. -- The correct pricing is loaded from aider's internal model database and produces similar (incorrect) costs as the original run. -- Updating aider's internal model database with an absurdly high token cost resulted in an appropriately high benchmark cost report, demonstrating that the internal database costs were in effect. - -This specific build of aider was then updated with various versions of litellm using `git biset` -to identify the first litellm commit where reasoning tokens counts were correctly reported. - - - -## Timeline - -Below is the full timeline of git commits related to this issue in the aider and litellm repositories. -Each entry has a UTC timestamp, followed by the original literal timestamp obtained from the -relevant source. - -- 2025-04-04 19:54:45 UTC (Sat Apr 5 08:54:45 2025 +1300) - - Correct value `"output_cost_per_token": 0.000010` for `gemini/gemini-2.5-pro-preview-03-25` added to `aider/resources/model-metadata.json` - - Commit [eda796d](https://github.com/Aider-AI/aider/commit/eda796d) in aider. - -- 2025-04-05 16:20:01 UTC (Sun Apr 6 00:20:01 2025 +0800) - - First litellm commit of `gemini/gemini-2.5-pro-preview-03-25` metadata, with incorrect price `"output_cost_per_token": 0.0000010` - - Commit [cd0a1e6](https://github.com/BerriAI/litellm/commit/cd0a1e6) in litellm. - -- 2025-04-10 01:48:43 UTC (Wed Apr 9 18:48:43 2025 -0700) - - litellm commit updates `gemini/gemini-2.5-pro-preview-03-25` metadata, but not price - - Commit [ac4f32f](https://github.com/BerriAI/litellm/commit/ac4f32f) in litellm. - -- 2025-04-12 04:55:50 UTC (2025-04-12-04-55-50 UTC) - - Benchmark performed. - - Aider repo hash [0282574 recorded in benchmark results](https://github.com/Aider-AI/aider/blob/7fbeafa1cfd4ad83f7499417837cdfa6b16fe7a1/aider/website/_data/polyglot_leaderboard.yml#L814), without a "dirty" annotation, indicating that the benchmark was run on a clean checkout of the aider repo at commit [0282574](https://github.com/Aider-AI/aider/commit/0282574). - - Correct value `"output_cost_per_token": 0.000010` is in `aider/resources/model-metadata.json` at this commit [0282574](https://github.com/Aider-AI/aider/blob/0282574/aider/resources/model-metadata.json#L357). - -- 2025-04-12 15:06:39 UTC (Apr 12 08:06:39 2025 -0700) - - Benchmark results added to aider repo. - - Commit [7fbeafa](https://github.com/Aider-AI/aider/commit/7fbeafa) in aider. - -- 2025-04-12 15:20:04 UTC (Sat Apr 12 19:20:04 2025 +0400) - - litellm commit fixes `gemini/gemini-2.5-pro-preview-03-25` price metadata to `"output_cost_per_token": 0.00001` - - Commit [93037ea](https://github.com/BerriAI/litellm/commit/93037ea) in litellm. - -- 2025-04-22 05:48:00 UTC (Mon Apr 21 22:48:00 2025 -0700) - - Litellm started including reasoning tokens in token count reporting. - - Commit [a7db0df](https://github.com/BerriAI/litellm/commit/a7db0df0434bfbac2b68ebe1c343b77955becb4b) in litellm. - - This fix was released in litellm v1.67.1. - -- 2025-04-28 14:53:20 UTC (Mon Apr 28 07:53:20 2025 -0700) - - Aider upgraded its litellm dependency from v1.65.7 to v1.67.4.post1, which included the reasoning token count fix. - - Commit [9351f37](https://github.com/Aider-AI/aider/commit/9351f37) in aider. - - This dependency change shipped on May 5, 2025 in aider v0.82.3. diff --git a/cecli/website/_posts/2025-05-08-qwen3.md b/cecli/website/_posts/2025-05-08-qwen3.md deleted file mode 100644 index 80b580d1c2b..00000000000 --- a/cecli/website/_posts/2025-05-08-qwen3.md +++ /dev/null @@ -1,365 +0,0 @@ ---- -layout: post -title: Qwen3 benchmark results -excerpt: "Benchmark results for Qwen3 models using the Aider polyglot coding benchmark." -highlight_image: /assets/2025-05-08-qwen3.jpg -date: 2025-05-08 ---- - -# Qwen3 results on the aider polyglot benchmark - -As [previously discussed when Qwen2.5 was released](/2024/11/21/quantization.html), -details matter when working with open source models for AI coding. -Proprietary models are served by their creators or trusted providers with stable inference settings. -Open source models are wonderful because anyone can serve them, -but API providers can use very different inference settings, quantizations, etc. - -Below are collection of aider polyglot benchmark results for the new Qwen3 models. -Results are presented using both "diff" and "whole" -[edit formats](https://aider.chat/docs/more/edit-formats.html), -with various models settings, against various API providers. - -See details on the -[model settings](https://aider.chat/docs/config/adv-model-settings.html#model-settings) -used after the results table. - -{: .note } -This article is being updated as new results become available. -Also, some results were submitted by aider users and have not been verified. - -

Qwen3 results on the aider polyglot benchmark

- -
- -
- - - -
- -
- - - - - - - - - - - - - - - {% assign max_cost = 0 %} - {% for row in site.data.qwen3_leaderboard %} - {% if row.total_cost > max_cost %} - {% assign max_cost = row.total_cost %} - {% endif %} - {% endfor %} - {% if max_cost == 0 %}{% assign max_cost = 1 %}{% endif %} - {% assign edit_sorted = site.data.qwen3_leaderboard | sort: 'pass_rate_2' | reverse %} - {% for row in edit_sorted %} {% comment %} Add loop index for unique IDs {% endcomment %} - {% assign row_index = forloop.index0 %} - - - - - - - - - - - - - {% endfor %} - -
- - ModelPercent correctCostCommandCorrect edit formatEdit Format
- - - {{ row.model }} -
- {{ row.pass_rate_2 }}% -
- {% if row.total_cost > 0 %} -
- {% endif %} - {% assign rounded_cost = row.total_cost | times: 1.0 | round: 2 %} - {% if row.total_cost == 0 or rounded_cost == 0.00 %}{% else %}${{ rounded_cost }}{% endif %} -
{{ row.command }}{{ row.percent_cases_well_formed }}%{{ row.edit_format }}
- - - - - - -## No think, via official Alibaba API - -These results were obtained running against `https://dashscope.aliyuncs.com/compatible-mode/v1` -with no thinking. - -```bash -export OPENAI_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1 -export OPENAI_API_KEY= -``` - -```yaml -- name: openai/qwen3-235b-a22b - use_temperature: 0.7 - streaming: false - extra_params: - stream: false - max_tokens: 16384 - top_p: 0.8 - top_k: 20 - temperature: 0.7 - enable_thinking: false - extra_body: - enable_thinking: false -``` - -## OpenRouter only TogetherAI, recommended /no_think settings - -These results were obtained with the -[recommended](https://huggingface.co/Qwen/Qwen3-235B-A22B#best-practices) -non-thinking model settings in `.aider.model.settings.yml`: - -```yaml -- name: openrouter/qwen/qwen3-235b-a22b - system_prompt_prefix: "/no_think" - use_temperature: 0.7 - extra_params: - max_tokens: 24000 - top_p: 0.8 - top_k: 20 - min_p: 0.0 - temperature: 0.7 - extra_body: - provider: - order: ["Together"] -``` - -And then running aider: - -```bash -aider --model openrouter/qwen/qwen3-235b-a22b -``` - - -## OpenRouter, all providers, default settings (thinking) - -These results were obtained by simply running aider as shown below, without any model specific settings. -This should have enabled thinking, assuming upstream API providers honor that convention for Qwen3. - -```bash -aider --model openrouter/qwen/qwen3-xxx -``` - -## VLLM, bfloat16, recommended /no_think - -These [benchmarks results were obtained by GitHub user AlongWY](https://github.com/Aider-AI/aider/pull/3908) -with the -[recommended](https://huggingface.co/Qwen/Qwen3-235B-A22B#best-practices) -non-thinking model settings in `.aider.model.settings.yml`: - -```yaml -- name: openai/ - system_prompt_prefix: "/no_think" - use_temperature: 0.7 - extra_params: - max_tokens: 24000 - top_p: 0.8 - top_k: 20 - min_p: 0.0 - temperature: 0.7 -``` - -And then running aider: - -```bash -aider --model openai/ --openai-api-base -``` diff --git a/cecli/website/assets/sample.aider.conf.yml b/cecli/website/assets/sample.cecli.conf.yml similarity index 99% rename from cecli/website/assets/sample.aider.conf.yml rename to cecli/website/assets/sample.cecli.conf.yml index f79b13dfb08..a0275eee566 100644 --- a/cecli/website/assets/sample.aider.conf.yml +++ b/cecli/website/assets/sample.cecli.conf.yml @@ -1,5 +1,5 @@ ########################################################## -# Sample .aider.conf.yml +# Sample .cecli.conf.yml # This file lists *all* the valid configuration entries. # Place in your home dir, or at the root of your git repo. ########################################################## @@ -413,7 +413,7 @@ ## Line endings to use when writing files (default: platform) #line-endings: platform -## Specify the config file (default: search for .aider.conf.yml in git root, cwd or home directory) +## Specify the config file (default: search for .cecli.conf.yml in git root, cwd or home directory) #config: xxx ## Specify the .env file to load (default: .env in git root) diff --git a/cecli/website/docs/config.md b/cecli/website/docs/config.md index c838fad9dd7..55b983ffa47 100644 --- a/cecli/website/docs/config.md +++ b/cecli/website/docs/config.md @@ -8,7 +8,7 @@ description: Information on all of cecli's settings and how to use them. cecli has many options which can be set with command line switches. -Most options can also be set in an `.aider.conf.yml` file +Most options can also be set in an `.cecli.conf.yml` file which can be placed in your home directory or at the root of your git repo. Or by setting environment variables like `CECLI_xxx` @@ -22,7 +22,7 @@ With a command line switch: $ cecli --dark-mode ``` -Using a `.aider.conf.yml` file: +Using a `.cecli.conf.yml` file: ```yaml dark-mode: true @@ -51,7 +51,7 @@ The `retries` option is a JSON object that can be configured with the following - `retry-backoff-factor`: The backoff factor to use between retries. - `retry-on-unavailable`: Whether to retry on 503 Service Unavailable errors. -Example usage in `.aider.conf.yml`: +Example usage in `.cecli.conf.yml`: ```yaml retries: diff --git a/cecli/website/docs/config/adv-model-settings.md b/cecli/website/docs/config/adv-model-settings.md index 85494e8dcaa..a96d8b80cbd 100644 --- a/cecli/website/docs/config/adv-model-settings.md +++ b/cecli/website/docs/config/adv-model-settings.md @@ -19,7 +19,7 @@ configure cecli with the proper token limits for unusual models. But, you can register context window limits and costs for models that aren't known -to cecli. Create a `.aider.model.metadata.json` file in one of these locations: +to cecli. Create a `.cecli.model.metadata.json` file in one of these locations: - Your home directory. - The root if your git repo. @@ -48,7 +48,7 @@ The json file should be a dictionary with an entry for each model, as follows: {: .tip } Use a fully qualified model name with a `provider/` at the front -in the `.aider.model.metadata.json` file. +in the `.cecli.model.metadata.json` file. For example, use `deepseek/deepseek-chat`, not just `deepseek-chat`. That prefix should match the `litellm_provider` field. @@ -71,7 +71,7 @@ a model that cecli doesn't know about. ### Configuration file locations -You can override or add settings for any model by creating a `.aider.model.settings.yml` file in one of these locations: +You can override or add settings for any model by creating a `.cecli.model.settings.yml` file in one of these locations: - Your home directory. - The root of your git repo. diff --git a/cecli/website/docs/config/agent-mode.md b/cecli/website/docs/config/agent-mode.md index 3e714afc14d..5d47d9f42dc 100644 --- a/cecli/website/docs/config/agent-mode.md +++ b/cecli/website/docs/config/agent-mode.md @@ -276,7 +276,7 @@ use-enhanced-map: true #### Complete Configuration Example -Complete configuration example in YAML configuration file (`.aider.conf.yml` or `~/.aider.conf.yml`): +Complete configuration example in YAML configuration file (`.cecli.conf.yml` or `~/.cecli.conf.yml`): ```yaml # Enable Agent Mode diff --git a/cecli/website/docs/config/aider_conf.md b/cecli/website/docs/config/aider_conf.md index e1df3c1526d..3354be9a602 100644 --- a/cecli/website/docs/config/aider_conf.md +++ b/cecli/website/docs/config/aider_conf.md @@ -6,7 +6,7 @@ description: How to configure cecli with a YAML config file. # YAML config file -Most of cecli's options can be set in an `.aider.conf.yml` file. +Most of cecli's options can be set in an `.cecli.conf.yml` file. cecli will look for a this file in these locations: - Your home directory. @@ -40,20 +40,20 @@ read: [CONVENTIONS.md, anotherfile.txt, thirdfile.py] Below is a sample of the YAML config file, which you can also -[download from GitHub](https://github.com/dwash96/cecli/blob/main/cecli/website/assets/sample.aider.conf.yml). +[download from GitHub](https://github.com/dwash96/cecli/blob/main/cecli/website/assets/sample.cecli.conf.yml). ``` ########################################################## -# Sample .aider.conf.yml +# Sample .cecli.conf.yml # This file lists *all* the valid configuration entries. # Place in your home dir, or at the root of your git repo. ########################################################## @@ -121,10 +121,10 @@ cog.outl("```") #list-models: xxx ## Specify a file with cecli model settings for unknown models -#model-settings-file: .aider.model.settings.yml +#model-settings-file: .cecli.model.settings.yml ## Specify a file with context window and costs for unknown models -#model-metadata-file: .aider.model.metadata.json +#model-metadata-file: .cecli.model.metadata.json ## Add a model alias (can be used multiple times) #alias: xxx @@ -209,16 +209,16 @@ cog.outl("```") ################ # History Files: -## Specify the chat input history file (default: .aider.input.history) -#input-history-file: .aider.input.history +## Specify the chat input history file (default: .cecli.input.history) +#input-history-file: .cecli.input.history -## Specify the chat history file (default: .aider.dev.history.md) -#chat-history-file: .aider.dev.history.md +## Specify the chat history file (default: .cecli.dev.history.md) +#chat-history-file: .cecli.dev.history.md ## Restore the previous chat history messages (default: False) #restore-chat-history: false -## Log the conversation with the LLM to this file (for example, .aider.llm.history) +## Log the conversation with the LLM to this file (for example, .cecli.llm.history) #llm-history-file: xxx ################## @@ -275,14 +275,14 @@ cog.outl("```") ## Enable/disable looking for a git repo (default: True) #git: true -## Enable/disable adding .aider* to .gitignore (default: True) +## Enable/disable adding .cecli* to .gitignore (default: True) #gitignore: true ## Enable/disable the addition of files listed in .gitignore to cecli's editing scope. #add-gitignore-files: false -## Specify the cecli ignore file (default: .aiderignore in git root) -#cecliignore: .aiderignore +## Specify the cecli ignore file (default: .cecli.ignore in git root) +#cecliignore: .cecli.ignore ## Only consider files in the current subtree of the git repository #subtree-only: false @@ -479,7 +479,7 @@ cog.outl("```") ## Line endings to use when writing files (default: platform) #line-endings: platform -## Specify the config file (default: search for .aider.conf.yml in git root, cwd or home directory) +## Specify the config file (default: search for .cecli.conf.yml in git root, cwd or home directory) #config: xxx ## Specify the .env file to load (default: .env in git root) diff --git a/cecli/website/docs/config/api-keys.md b/cecli/website/docs/config/api-keys.md index ce27fb0bc7c..5759bc96ca3 100644 --- a/cecli/website/docs/config/api-keys.md +++ b/cecli/website/docs/config/api-keys.md @@ -11,7 +11,7 @@ cecli lets you specify API keys in a few ways: - On the command line - As environment variables - In a `.env` file -- In your `.aider.conf.yml` config file +- In your `.cecli.conf.yml` config file --- @@ -78,7 +78,7 @@ DEEPSEEK_API_KEY=baz You can also set API keys in the -[`.aider.conf.yml` file](/docs/config/cecli_conf.html) +[`.cecli.conf.yml` file](/docs/config/cecli_conf.html) via the `api-key` entry: ``` diff --git a/cecli/website/docs/config/custom-commands.md b/cecli/website/docs/config/custom-commands.md index 6bad17a040b..781b4c4e56d 100644 --- a/cecli/website/docs/config/custom-commands.md +++ b/cecli/website/docs/config/custom-commands.md @@ -23,7 +23,7 @@ Custom commands can be configured using the `command-paths` configuration option ```yaml custom: - command-paths: [".aider/custom/commands", "~/my-commands/", "./special_command.py"] + command-paths: [".cecli/custom/commands", "~/my-commands/", "./special_command.py"] ``` The `command-paths` configuration option allows you to specify directories or files containing custom commands to load. @@ -144,7 +144,7 @@ class CustomCommand(BaseCommand): #### Complete Configuration Example -Complete configuration example in YAML configuration file (`.aider.conf.yml` or `~/.aider.conf.yml`): +Complete configuration example in YAML configuration file (`.cecli.conf.yml` or `~/.cecli.conf.yml`): ```yaml # Model configuration @@ -153,7 +153,7 @@ weak-model: gemini/gemini-3-flash-preview # Custom commands configuration custom: - command-paths: [".aider/custom/commands"] + command-paths: [".cecli/custom/commands"] # Other cecli options ... diff --git a/cecli/website/docs/config/custom-system-prompts.md b/cecli/website/docs/config/custom-system-prompts.md index d1ef3ae3219..0fb2082caa1 100644 --- a/cecli/website/docs/config/custom-system-prompts.md +++ b/cecli/website/docs/config/custom-system-prompts.md @@ -24,9 +24,9 @@ Custom system prompts can be configured using the `prompt_map` configuration opt ```yaml custom: prompt_map: - agent: .aider/custom/prompts/agent.yml - base: .aider/custom/prompts/base.yml - all: .aider/custom/prompts/all.yml + agent: .cecli/custom/prompts/agent.yml + base: .cecli/custom/prompts/base.yml + all: .cecli/custom/prompts/all.yml ``` The `prompt_map` configuration option allows you to specify which custom prompt files to use for different prompt types. @@ -74,7 +74,7 @@ main_system: | Here's a complete example of a custom agent prompt that changes the language and adds specific directives: ```yaml -# .aider/custom/prompts/agent.yml +# .cecli/custom/prompts/agent.yml # Agent prompts - inherits from base.yaml # Overrides specific prompts _inherits: [agent, base] @@ -93,7 +93,7 @@ main_system: | ### Complete Configuration Example -Complete configuration example in YAML configuration file (`.aider.conf.yml` or `~/.aider.conf.yml`): +Complete configuration example in YAML configuration file (`.cecli.conf.yml` or `~/.cecli.conf.yml`): ```yaml # Model configuration @@ -103,12 +103,12 @@ weak-model: gemini/gemini-3-flash-preview # Custom prompts configuration custom: prompt_map: - agent: .aider/custom/prompts/agent.yml - base: .aider/custom/prompts/my-base.yml + agent: .cecli/custom/prompts/agent.yml + base: .cecli/custom/prompts/my-base.yml # Custom commands configuration custom: - command-paths: [".aider/custom/commands/"] + command-paths: [".cecli/custom/commands/"] # Other cecli options agent: true diff --git a/cecli/website/docs/config/dotenv.md b/cecli/website/docs/config/dotenv.md index 9b7e5607e22..07595ddb4ca 100644 --- a/cecli/website/docs/config/dotenv.md +++ b/cecli/website/docs/config/dotenv.md @@ -101,10 +101,10 @@ cog.outl("```") #CECLI_LIST_MODELS= ## Specify a file with cecli model settings for unknown models -#CECLI_MODEL_SETTINGS_FILE=.aider.model.settings.yml +#CECLI_MODEL_SETTINGS_FILE=.cecli.model.settings.yml ## Specify a file with context window and costs for unknown models -#CECLI_MODEL_METADATA_FILE=.aider.model.metadata.json +#CECLI_MODEL_METADATA_FILE=.cecli.model.metadata.json ## Add a model alias (can be used multiple times) #CECLI_ALIAS= @@ -175,16 +175,16 @@ cog.outl("```") ################ # History Files: -## Specify the chat input history file (default: .aider.input.history) -#CECLI_INPUT_HISTORY_FILE=.aider.input.history +## Specify the chat input history file (default: .cecli.input.history) +#CECLI_INPUT_HISTORY_FILE=.cecli.input.history -## Specify the chat history file (default: .aider.dev.history.md) -#CECLI_CHAT_HISTORY_FILE=.aider.dev.history.md +## Specify the chat history file (default: .cecli.dev.history.md) +#CECLI_CHAT_HISTORY_FILE=.cecli.dev.history.md ## Restore the previous chat history messages (default: False) #CECLI_RESTORE_CHAT_HISTORY=false -## Log the conversation with the LLM to this file (for example, .aider.llm.history) +## Log the conversation with the LLM to this file (for example, .cecli.llm.history) #CECLI_LLM_HISTORY_FILE= ################## @@ -241,14 +241,14 @@ cog.outl("```") ## Enable/disable looking for a git repo (default: True) #CECLI_GIT=true -## Enable/disable adding .aider* to .gitignore (default: True) +## Enable/disable adding .cecli* to .gitignore (default: True) #CECLI_GITIGNORE=true ## Enable/disable the addition of files listed in .gitignore to cecli's editing scope. #CECLI_ADD_GITIGNORE_FILES=false -## Specify the cecli ignore file (default: .aiderignore in git root) -#CECLI_cecliIGNORE=.aiderignore +## Specify the cecli ignore file (default: .cecli.ignore in git root) +#CECLI_IGNORE=.cecli.ignore ## Only consider files in the current subtree of the git repository #CECLI_SUBTREE_ONLY=false diff --git a/cecli/website/docs/config/mcp.md b/cecli/website/docs/config/mcp.md index 19bd8e8420c..eec92c4acaf 100644 --- a/cecli/website/docs/config/mcp.md +++ b/cecli/website/docs/config/mcp.md @@ -22,7 +22,7 @@ You have two ways of sharing your MCP server configuration with cecli. ### Config Files -You can configure MCP servers in your `.aider.conf.yml` file using either JSON or YAML format: +You can configure MCP servers in your `.cecli.conf.yml` file using either JSON or YAML format: #### JSON Configuration diff --git a/cecli/website/docs/config/model-aliases.md b/cecli/website/docs/config/model-aliases.md index cbd717e36fd..df726caca31 100644 --- a/cecli/website/docs/config/model-aliases.md +++ b/cecli/website/docs/config/model-aliases.md @@ -21,7 +21,7 @@ Multiple aliases can be defined by using the `--alias` option multiple times. Ea ## Configuration File Of course, -you can also define aliases in your [`.aider.conf.yml` file](https://cecli.dev/docs/config/cecli_conf.html): +you can also define aliases in your [`.cecli.conf.yml` file](https://cecli.dev/docs/config/cecli_conf.html): ```yaml alias: @@ -101,7 +101,7 @@ CECLI/Cecli supports model names with colon-separated suffixes (e.g., `gpt-5:hig ### Configuration File -Add a structure like the following to your config.yml file or create a `.aider.model.overrides.yml` file (or specify a different file with `--model-overrides-file` if there are global defaults you want): +Add a structure like the following to your config.yml file or create a `.cecli.model.overrides.yml` file (or specify a different file with `--model-overrides-file` if there are global defaults you want): ```yaml model-overrides: @@ -133,7 +133,7 @@ You can use these suffixes with any model argument: ```bash # Main model with high reasoning effort (using file) -cecli --model gpt-5:high --model-overrides-file .aider.model.overrides.yml +cecli --model gpt-5:high --model-overrides-file .cecli.model.overrides.yml # Main model with high reasoning effort (using direct JSON/YAML) cecli --model gpt-5:high --model-overrides '{"gpt-5": {"high": {"temperature": 0.8, "top_p": 0.9, "extra_body": {"reasoning_effort": "high"}}}}' diff --git a/cecli/website/docs/config/options.md b/cecli/website/docs/config/options.md index d9079030c1d..50b572e59ab 100644 --- a/cecli/website/docs/config/options.md +++ b/cecli/website/docs/config/options.md @@ -161,12 +161,12 @@ Aliases: ### `--model-settings-file MODEL_SETTINGS_FILE` Specify a file with cecli model settings for unknown models -Default: .aider.model.settings.yml +Default: .cecli.model.settings.yml Environment variable: `CECLI_MODEL_SETTINGS_FILE` ### `--model-metadata-file MODEL_METADATA_FILE` Specify a file with context window and costs for unknown models -Default: .aider.model.metadata.json +Default: .cecli.model.metadata.json Environment variable: `CECLI_MODEL_METADATA_FILE` ### `--alias ALIAS:MODEL` @@ -302,13 +302,13 @@ Environment variable: `CECLI_MAP_MAX_LINE_LENGTH` ## History Files: ### `--input-history-file INPUT_HISTORY_FILE` -Specify the chat input history file (default: .aider.input.history) -Default: .aider.input.history +Specify the chat input history file (default: .cecli.input.history) +Default: .cecli.input.history Environment variable: `CECLI_INPUT_HISTORY_FILE` ### `--chat-history-file CHAT_HISTORY_FILE` -Specify the chat history file (default: .aider.dev.history.md) -Default: .aider.dev.history.md +Specify the chat history file (default: .cecli.dev.history.md) +Default: .cecli.dev.history.md Environment variable: `CECLI_CHAT_HISTORY_FILE` ### `--restore-chat-history` @@ -320,7 +320,7 @@ Aliases: - `--no-restore-chat-history` ### `--llm-history-file LLM_HISTORY_FILE` -Log the conversation with the LLM to this file (for example, .aider.llm.history) +Log the conversation with the LLM to this file (for example, .cecli.llm.history) Environment variable: `CECLI_LLM_HISTORY_FILE` ## Output settings: @@ -412,7 +412,7 @@ Aliases: - `--no-git` ### `--gitignore` -Enable/disable adding .aider* to .gitignore (default: True) +Enable/disable adding .cecli* to .gitignore (default: True) Default: True Environment variable: `CECLI_GITIGNORE` Aliases: @@ -427,10 +427,10 @@ Aliases: - `--add-gitignore-files` - `--no-add-gitignore-files` -### `--aiderignore AIDERIGNORE` -Specify the cecli ignore file (default: .aiderignore in git root) -Default: .aiderignore -Environment variable: `CECLI_AIDERIGNORE` +### `--cecli-ignore CECLI_IGNORE` +Specify the cecli ignore file (default: .cecli.ignore in git root) +Default: .cecli.ignore +Environment variable: `CECLI_IGNORE` ### `--subtree-only` Only consider files in the current subtree of the git repository @@ -761,7 +761,7 @@ Default: platform Environment variable: `CECLI_LINE_ENDINGS` ### `--config CONFIG_FILE` -Specify the config file (default: search for .aider.conf.yml in git root, cwd or home directory) +Specify the config file (default: search for .cecli.conf.yml in git root, cwd or home directory) Aliases: - `-c CONFIG_FILE` - `--config CONFIG_FILE` @@ -823,68 +823,4 @@ Environment variable: `CECLI_EDITOR` Print shell completion script for the specified SHELL and exit. Supported shells: bash, tcsh, zsh. Example: cecli --shell-completions bash Environment variable: `CECLI_SHELL_COMPLETIONS` -## Deprecated model settings: - -### `--opus` -Use claude-3-opus-20240229 model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_OPUS` - -### `--sonnet` -Use anthropic/claude-3-7-sonnet-20250219 model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_SONNET` - -### `--haiku` -Use claude-3-5-haiku-20241022 model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_HAIKU` - -### `--4` -Use gpt-4-0613 model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_4` -Aliases: - - `--4` - - `-4` - -### `--4o` -Use gpt-4o model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_4O` - -### `--mini` -Use gpt-4o-mini model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_MINI` - -### `--4-turbo` -Use gpt-4-1106-preview model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_4_TURBO` - -### `--35turbo` -Use gpt-3.5-turbo model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_35TURBO` -Aliases: - - `--35turbo` - - `--35-turbo` - - `--3` - - `-3` - -### `--deepseek` -Use deepseek/deepseek-chat model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_DEEPSEEK` - -### `--o1-mini` -Use o1-mini model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_O1_MINI` - -### `--o1-preview` -Use o1-preview model for the main chat (deprecated, use --model) -Default: False -Environment variable: `CECLI_O1_PREVIEW` diff --git a/cecli/website/docs/config/reasoning.md b/cecli/website/docs/config/reasoning.md index 53046d5f2fa..b12ecd80944 100644 --- a/cecli/website/docs/config/reasoning.md +++ b/cecli/website/docs/config/reasoning.md @@ -156,7 +156,7 @@ If you're using a model through a different provider (like Azure or custom deplo you may need to [configure model settings](/docs/config/adv-model-settings.html) if you see errors related to temperature or system prompt. -Include settings for your new provider in `.aider.model.settings.yml` file +Include settings for your new provider in `.cecli.model.settings.yml` file at the root of your project or in your home directory. ### Temperature, streaming and system prompt diff --git a/cecli/website/docs/config/security.md b/cecli/website/docs/config/security.md index b3d9f9e1749..6030f463603 100644 --- a/cecli/website/docs/config/security.md +++ b/cecli/website/docs/config/security.md @@ -93,7 +93,7 @@ cecli --security-config '{"allowed-domains": ["github.com", "docs.python.org", " ### YAML Configuration File Examples -In `.aider.conf.yml` or `~/.aider.conf.yml`: +In `.cecli.conf.yml` or `~/.cecli.conf.yml`: ```yaml # Restrict to GitHub and Python documentation diff --git a/cecli/website/docs/config/skills.md b/cecli/website/docs/config/skills.md index 9f1f8a15b58..9d0a6261eaa 100644 --- a/cecli/website/docs/config/skills.md +++ b/cecli/website/docs/config/skills.md @@ -74,7 +74,7 @@ Skills are configured through the `agent-config` parameter in the YAML configura - **`skills_includelist`**: Array of skill names to include (whitelist) - **`skills_excludelist`**: Array of skill names to exclude (blacklist) -Complete configuration example in YAML configuration file (`.aider.conf.yml` or `~/.aider.conf.yml`): +Complete configuration example in YAML configuration file (`.cecli.conf.yml` or `~/.cecli.conf.yml`): ```yaml # Enable Agent Mode diff --git a/cecli/website/docs/config/tui.md b/cecli/website/docs/config/tui.md index b9143584c88..3962766fd40 100644 --- a/cecli/website/docs/config/tui.md +++ b/cecli/website/docs/config/tui.md @@ -30,7 +30,7 @@ tui: true ### Complete Configuration Example -Complete configuration example in YAML configuration file (`.aider.conf.yml` or `~/.aider.conf.yml`). The base theme is pretty nice but if you want different colors and key bindings, do you thing: +Complete configuration example in YAML configuration file (`.cecli.conf.yml` or `~/.cecli.conf.yml`). The base theme is pretty nice but if you want different colors and key bindings, do you thing: ```yaml tui: true diff --git a/cecli/website/docs/faq.md b/cecli/website/docs/faq.md index 7dbd751aad8..71590935d44 100644 --- a/cecli/website/docs/faq.md +++ b/cecli/website/docs/faq.md @@ -57,11 +57,11 @@ code you want to work on and use the `--subtree-only` switch. This will tell cecli to ignore the repo outside of the directory you start in. -You can also create a `.aiderignore` file to tell cecli +You can also create a `.cecli.ignore` file to tell cecli to ignore parts of the repo that aren't relevant to your task. This file conforms to `.gitignore` syntax and conventions. For example, to focus only on specific directories in a monorepo, -you could create a `.aiderignore` file with: +you could create a `.cecli.ignore` file with: ``` # Ignore everything @@ -78,7 +78,7 @@ you could create a `.aiderignore` file with: !baz/** ``` -You can use `--aiderignore ` to name a specific file +You can use `--cecli-ignore ` to name a specific file to use for ignore patterns. You might have a few of these handy for when you want to work on frontend, backend, etc portions of your repo. @@ -335,7 +335,7 @@ But the LLM may not comply. Yes, you can now share cecli chat logs in a pretty way. -1. Copy the markdown logs you want to share from `.aider.dev.history.md` and make a github gist. Or publish the raw markdown logs on the web any way you'd like. +1. Copy the markdown logs you want to share from `.cecli.chat.history.md` and make a github gist. Or publish the raw markdown logs on the web any way you'd like. ``` https://gist.github.com/cecli-AI/2087ab8b64034a078c0a209440ac8be0 diff --git a/cecli/website/docs/install/docker.md b/cecli/website/docs/install/docker.md index 837f35f7444..0e410a07ff2 100644 --- a/cecli/website/docs/install/docker.md +++ b/cecli/website/docs/install/docker.md @@ -17,10 +17,10 @@ docker run \ -it \ --user $(id -u):$(id -g) \ --volume $(pwd):/app dustinwashington/cecli \ - --volume $(pwd)/.aider.conf.yml:/.aider.conf.yml \ - --volume $(pwd)/.aider.env:/.aider/.env \ + --volume $(pwd)/.cecli.conf.yml:/.cecli.conf.yml \ + --volume $(pwd)/.cecli.env:/.cecli/.env \ [...other args...] \ - --config /app/.aider.conf.yml + --config /app/.cecli.conf.yml ``` ## How to use it diff --git a/cecli/website/docs/llms/anthropic.md b/cecli/website/docs/llms/anthropic.md index a88a55c559b..ebcfa132c17 100644 --- a/cecli/website/docs/llms/anthropic.md +++ b/cecli/website/docs/llms/anthropic.md @@ -51,7 +51,7 @@ cecli can work with Sonnet 3.7's new thinking tokens, but does not ask Sonnet to thinking tokens by default. Enabling thinking currently requires manual configuration. -You need to add the following to your `.aider.model.settings.yml` +You need to add the following to your `.cecli.model.settings.yml` [model settings file](/docs/config/adv-model-settings.html#model-settings). Adjust the `budget_tokens` value to change the target number of thinking tokens. diff --git a/cecli/website/docs/llms/github.md b/cecli/website/docs/llms/github.md index 2d2ae972406..96f859ff1ac 100644 --- a/cecli/website/docs/llms/github.md +++ b/cecli/website/docs/llms/github.md @@ -89,7 +89,7 @@ cecli --model openai/gpt-4o --- -## Optional config file (`~/.aider.conf.yml`) +## Optional config file (`~/.cecli.conf.yml`) ```yaml openai-api-base: https://api.githubcopilot.com diff --git a/cecli/website/docs/llms/ollama.md b/cecli/website/docs/llms/ollama.md index d2ba5e8071d..fbbe22917b2 100644 --- a/cecli/website/docs/llms/ollama.md +++ b/cecli/website/docs/llms/ollama.md @@ -64,7 +64,7 @@ This ensures data isn't silently discarded by Ollama. If you'd like you can configure a fixed sized context window instead with an -[`.aider.model.settings.yml` file](https://cecli.dev/docs/config/adv-model-settings.html#model-settings) +[`.cecli.model.settings.yml` file](https://cecli.dev/docs/config/adv-model-settings.html#model-settings) like this: ``` diff --git a/cecli/website/docs/llms/openrouter.md b/cecli/website/docs/llms/openrouter.md index c33b4f80f1a..f70fed8bb0d 100644 --- a/cecli/website/docs/llms/openrouter.md +++ b/cecli/website/docs/llms/openrouter.md @@ -49,7 +49,7 @@ You can control which OpenRouter providers are used for your requests in two way [OpenRouter account settings](https://openrouter.ai/settings/preferences). This disables those named providers across all the models that you access via OpenRouter. -2. By configuring "provider routing" in a `.aider.model.settings.yml` file. +2. By configuring "provider routing" in a `.cecli.model.settings.yml` file. Place that file in your home directory or the root of your git project, with entries like this: diff --git a/cecli/website/docs/llms/vertex.md b/cecli/website/docs/llms/vertex.md index a70d8e090b4..fb79f81b325 100644 --- a/cecli/website/docs/llms/vertex.md +++ b/cecli/website/docs/llms/vertex.md @@ -43,7 +43,7 @@ cecli --model vertex_ai/claude-3-5-sonnet@20240620 Or you can use the [YAML config](/docs/config/cecli_conf.html) to set the model to any of the models supported by Vertex AI. -Example `.aider.conf.yml` file: +Example `.cecli.conf.yml` file: ```yaml model: vertex_ai/claude-3-5-sonnet@20240620 diff --git a/cecli/website/docs/sessions.md b/cecli/website/docs/sessions.md index 9d077784db9..bf185146335 100644 --- a/cecli/website/docs/sessions.md +++ b/cecli/website/docs/sessions.md @@ -9,7 +9,7 @@ cecli provides session management commands that allow you to save, load, and man ## Session Commands ### `/save-session ` -Save the current chat session to a named file in `.aider/sessions/`. +Save the current chat session to a named file in `.cecli/sessions/`. ### Auto-Save and Auto-Load cecli can automatically save and load sessions using command line options: @@ -42,7 +42,7 @@ When `--auto-save` is enabled, cecli will automatically save your session as 'au - All files in the chat (editable, read-only, and read-only stubs) - Current model and edit format settings - Auto-commit, auto-lint, and auto-test settings -- Todo list content from `.aider.todo.txt` +- Todo list content from `.cecli.todo.txt` - Session metadata (timestamp, version) ### `/load-session ` @@ -60,7 +60,7 @@ Load a previously saved session by name or file path. - Restores the todo list content saved in the session ### `/list-sessions` -List all available saved sessions in `.aider/sessions/`. +List all available saved sessions in `.cecli/sessions/`. **Usage:** ``` @@ -76,7 +76,7 @@ List all available saved sessions in `.aider/sessions/`. ## How Sessions Work ### Session Storage -Sessions are stored as JSON files in the `.aider/sessions/` directory within your project. Each session file contains: +Sessions are stored as JSON files in the `.cecli/sessions/` directory within your project. Each session file contains: ```json { @@ -148,16 +148,16 @@ Sessions are stored as JSON files in the `.aider/sessions/` directory within you - Session files include all file paths, so they work best when project structure is stable - External files (outside the project root) are stored with absolute paths - Missing files are skipped with warnings during loading -- The todo list file (`.aider.todo.txt`) is cleared on startup; it is restored when you load a session or when you update it during a run +- The todo list file (`.cecli.todo.txt`) is cleared on startup; it is restored when you load a session or when you update it during a run ### Version Control -- Consider adding `.aider/sessions/` to your `.gitignore` if sessions contain sensitive information +- Consider adding `.cecli/sessions/` to your `.gitignore` if sessions contain sensitive information ## Troubleshooting ### Session Not Found If `/load-session` reports "Session not found": -- Check that the session file exists in `.aider/sessions/` +- Check that the session file exists in `.cecli/sessions/` - Verify the session name matches exactly - Use `/list-sessions` to see available sessions @@ -166,7 +166,7 @@ If files are reported as missing during loading: - The files may have been moved or deleted - Session files store relative paths, so directory structure changes can affect this - External files must exist at their original locations -- The todo list (`.aider.todo.txt`) is cleared on startup unless restored from a loaded session +- The todo list (`.cecli.todo.txt`) is cleared on startup unless restored from a loaded session ### Corrupted Sessions If a session fails to load: diff --git a/cecli/website/docs/usage/conventions.md b/cecli/website/docs/usage/conventions.md index 8ac334ce74f..b348195823c 100644 --- a/cecli/website/docs/usage/conventions.md +++ b/cecli/website/docs/usage/conventions.md @@ -36,7 +36,7 @@ to find or contribute conventions files. ## Always load conventions You can also configure cecli to always load your conventions file -in the [`.aider.conf.yml` config file](https://cecli.dev/docs/config/cecli_conf.html): +in the [`.cecli.conf.yml` config file](https://cecli.dev/docs/config/cecli_conf.html): ```yaml diff --git a/cecli/website/examples/README.md b/cecli/website/examples/README.md index 80843df1d21..0c51f80212b 100644 --- a/cecli/website/examples/README.md +++ b/cecli/website/examples/README.md @@ -10,17 +10,17 @@ has_toc: false Below are some chat transcripts showing what it's like to code with aider. In the chats, you'll see a variety of coding tasks like generating new code, editing existing code, debugging, exploring unfamiliar code, etc. -* [**Hello World Flask App**](https://aider.chat/examples/hello-world-flask.html): Start from scratch and have aider create a simple Flask app with various endpoints, such as adding two numbers and calculating the Fibonacci sequence. +* [**Hello World Flask App**](https://cecli.dev/examples/hello-world-flask.html): Start from scratch and have aider create a simple Flask app with various endpoints, such as adding two numbers and calculating the Fibonacci sequence. -* [**Javascript Game Modification**](https://aider.chat/examples/2048-game.html): Dive into an existing open-source repo, and get aider's help to understand it and make modifications. +* [**Javascript Game Modification**](https://cecli.dev/examples/2048-game.html): Dive into an existing open-source repo, and get aider's help to understand it and make modifications. -* [**Complex Multi-file Change with Debugging**](https://aider.chat/examples/complex-change.html): Aider makes a complex code change that is coordinated across multiple source files, and resolves bugs by reviewing error output and doc snippets. +* [**Complex Multi-file Change with Debugging**](https://cecli.dev/examples/complex-change.html): Aider makes a complex code change that is coordinated across multiple source files, and resolves bugs by reviewing error output and doc snippets. -* [**Create a Black Box Test Case**](https://aider.chat/examples/add-test.html): Aider creates a "black box" test case without access to the source of the method being tested, using only a [high level map of the repository based on ctags](https://aider.chat/docs/ctags.html). +* [**Create a Black Box Test Case**](https://cecli.dev/examples/add-test.html): Aider creates a "black box" test case without access to the source of the method being tested, using only a [high level map of the repository based on ctags](https://cecli.dev/docs/ctags.html). -* [**Honor the NO_COLOR env var**](https://aider.chat/examples/no-color.html): The user pastes the NO_COLOR spec from no-color.org into the chat, and aider modifies the application to conform. +* [**Honor the NO_COLOR env var**](https://cecli.dev/examples/no-color.html): The user pastes the NO_COLOR spec from no-color.org into the chat, and aider modifies the application to conform. -* [**Download, analyze and plot US Census data**](https://aider.chat/examples/census.html): Aider downloads census data, suggests some hypotheses to test, tests one and then summarizes and plots a graph of the results. +* [**Download, analyze and plot US Census data**](https://cecli.dev/examples/census.html): Aider downloads census data, suggests some hypotheses to test, tests one and then summarizes and plots a graph of the results. * [**Semantic Search & Replace**](semantic-search-replace.md): Updating a collection of function calls, which requires dealing with various formatting and semantic differences in the various function call sites. diff --git a/cecli/website/examples/add-test.md b/cecli/website/examples/add-test.md index 4aae2d88930..3db295d4248 100644 --- a/cecli/website/examples/add-test.md +++ b/cecli/website/examples/add-test.md @@ -11,7 +11,7 @@ access to the source code of the function being tested or any of the other code in the repo.** Instead, aider is operating entirely off a -[high level map of the repository based on ctags](https://aider.chat/docs/ctags.html), +[high level map of the repository based on ctags](https://cecli.dev/docs/ctags.html), which provides details on all the symbols and function signatures in the repo. Using only the meta-data in the map, the LLM is able to figure out how to call the method to be tested, as well as how to instantiate multiple class objects that are required to prepare for the test. diff --git a/cecli/website/examples/update-docs.md b/cecli/website/examples/update-docs.md index b580ed528f8..9616ceb6c32 100644 --- a/cecli/website/examples/update-docs.md +++ b/cecli/website/examples/update-docs.md @@ -16,15 +16,15 @@ In this chat transcript, the user asks to automatically update the Usage docs ba ```markdown README.md <<<<<<< ORIGINAL -- `--history-file HISTORY_FILE`: Specify the chat input history file (default: .aider.history). Override the default with the environment variable `CECLI_HISTORY_FILE`. +- `--history-file HISTORY_FILE`: Specify the chat input history file (default: .cecli.history). Override the default with the environment variable `CECLI_HISTORY_FILE`. - `--model MODEL`: Specify the model to use for the main chat (default: gpt-4). Override the default with the environment variable `CECLI_MODEL`. - `-3`: Use gpt-3.5-turbo model for the main chat (basically won't work). No environment variable override. - `--no-pretty`: Disable pretty, colorized output. Override the default with the environment variable `CECLI_PRETTY` (default: 1 for enabled, 0 for disabled). - `--show-diffs`: Show diffs when committing changes (default: False). Override the default with the environment variable `CECLI_SHOW_DIFFS` (default: 0 for False, 1 for True). - `--no-auto-commits`: Disable auto commit of changes. Override the default with the environment variable `CECLI_AUTO_COMMITS` (default: 1 for enabled, 0 for disabled). ======= -- `--input-history-file INPUT_HISTORY_FILE`: Specify the chat input history file (default: .aider.input.history). Override the default with the environment variable `CECLI_INPUT_HISTORY_FILE`. -- `--chat-history-file CHAT_HISTORY_FILE`: Specify the chat history file (default: .aider.chat.history.md). Override the default with the environment variable `CECLI_CHAT_HISTORY_FILE`. +- `--input-history-file INPUT_HISTORY_FILE`: Specify the chat input history file (default: .cecli.input.history). Override the default with the environment variable `CECLI_INPUT_HISTORY_FILE`. +- `--chat-history-file CHAT_HISTORY_FILE`: Specify the chat history file (default: .cecli.chat.history.md). Override the default with the environment variable `CECLI_CHAT_HISTORY_FILE`. - `--model MODEL`: Specify the model to use for the main chat (default: gpt-4). Override the default with the environment variable `CECLI_MODEL`. - `-3`: Use gpt-3.5-turbo model for the main chat (basically won't work). No environment variable override. - `--no-pretty`: Disable pretty, colorized output. Override the default with the environment variable `CECLI_PRETTY` (default: 1 for enabled, 0 for disabled). diff --git a/cecli/website/index.html b/cecli/website/index.html index 15abbf95904..9095b5e0f43 100644 --- a/cecli/website/index.html +++ b/cecli/website/index.html @@ -235,7 +235,7 @@

Community & Resources

@@ -254,7 +254,7 @@

Community & Resources

// Create new image element const logoImg = new Image(); logoImg.src = '/assets/cecli-temp-logo.svg'; - logoImg.alt = 'Aider Logo'; + logoImg.alt = 'Cecli Logo'; logoImg.style.height = '3rem'; logoImg.style.verticalAlign = 'middle'; diff --git a/tests/basic/test_coder.py b/tests/basic/test_coder.py index 8d568f64a89..031440b8476 100644 --- a/tests/basic/test_coder.py +++ b/tests/basic/test_coder.py @@ -866,7 +866,7 @@ async def test_skip_gitignored_files_on_init(self): async def test_check_for_urls(self): io = InputOutput(yes=True) mock_args = MagicMock() - mock_args.yes_always_commands = False + mock_args.yes_always_commands = True mock_args.disable_scraping = False coder = await Coder.create(self.GPT35, None, io=io, args=mock_args) @@ -1030,7 +1030,7 @@ async def test_detect_urls_enabled(self): with GitTemporaryDirectory(): io = InputOutput(yes=True) mock_args = MagicMock() - mock_args.yes_always_commands = False + mock_args.yes_always_commands = True mock_args.disable_scraping = False coder = await Coder.create(self.GPT35, "diff", io=io, detect_urls=True, args=mock_args) @@ -1387,7 +1387,7 @@ async def test_get_user_language(self): async def test_architect_coder_auto_accept_true(self): with GitTemporaryDirectory(): io = InputOutput(yes=True) - io.confirm_ask = AsyncMock(return_value=False) + io.confirm_ask = AsyncMock(return_value=True) coder = await Coder.create(self.GPT35, edit_format="architect", io=io) coder.auto_accept_architect = True @@ -1405,8 +1405,9 @@ async def test_architect_coder_auto_accept_true(self): ): with pytest.raises(SwitchCoderSignal): await coder.reply_completed() - - io.confirm_ask.assert_called_once_with("Edit the files?", allow_tweak=False) + io.confirm_ask.assert_called_once_with( + "Edit the files?", allow_tweak=False, explicit_yes_required=False + ) mock_editor.generate.assert_called_once() async def test_architect_coder_auto_accept_false_confirmed(self): @@ -1431,7 +1432,9 @@ async def test_architect_coder_auto_accept_false_confirmed(self): with pytest.raises(SwitchCoderSignal): await coder.reply_completed() - io.confirm_ask.assert_called_once_with("Edit the files?", allow_tweak=False) + io.confirm_ask.assert_called_once_with( + "Edit the files?", allow_tweak=False, explicit_yes_required=True + ) mock_editor.generate.assert_called_once() async def test_architect_coder_auto_accept_false_rejected(self): @@ -1451,7 +1454,9 @@ async def test_architect_coder_auto_accept_false_rejected(self): result = await coder.reply_completed() assert result is None - io.confirm_ask.assert_called_once_with("Edit the files?", allow_tweak=False) + io.confirm_ask.assert_called_once_with( + "Edit the files?", allow_tweak=False, explicit_yes_required=True + ) mock_create.assert_not_called() async def test_process_tool_calls_none_response(self): diff --git a/tests/basic/test_hashline.py b/tests/basic/test_hashline.py new file mode 100644 index 00000000000..30c5c27b7ac --- /dev/null +++ b/tests/basic/test_hashline.py @@ -0,0 +1,911 @@ +"""Tests for hashline.py functions.""" + +import pytest + +from cecli.helpers.hashline import ( + HashlineError, + apply_hashline_operation, + extract_hashline_range, + find_hashline_by_exact_match, + find_hashline_by_fragment, + find_hashline_range, + get_hashline_content_diff, + get_hashline_diff, + hashline, + int_to_2digit_52, + normalize_hashline, + parse_hashline, + strip_hashline, +) + + +def test_int_to_2digit_52_basic(): + """Test basic integer to 2-digit base52 conversion.""" + assert int_to_2digit_52(0) == "aa" + assert int_to_2digit_52(1) == "ab" + assert int_to_2digit_52(25) == "az" + assert int_to_2digit_52(26) == "aA" + assert int_to_2digit_52(51) == "aZ" + assert int_to_2digit_52(52) == "ba" + assert int_to_2digit_52(2703) == "ZZ" # 52^2 - 1 + + +def test_int_to_2digit_52_wraparound(): + """Test that values wrap around modulo 2704.""" + assert int_to_2digit_52(2704) == "aa" # wraps around + assert int_to_2digit_52(2705) == "ab" + assert int_to_2digit_52(5408) == "aa" # 2 * 2704 + + +def test_hashline_basic(): + """Test basic hashline functionality.""" + text = "Hello\nWorld\nTest" + result = hashline(text) + + # Check that we have 3 lines + lines = result.splitlines() + assert len(lines) == 3 + + # Check each line has the format "line_number|hash|content" (new format) + for i, line in enumerate(lines, start=1): + assert "|" in line + parts = line.split("|", 2) + assert len(parts) == 3 + # Check line number matches expected + assert parts[0] == str(i) + # Check hash is 2 characters + hash_part = parts[1] + assert len(hash_part) == 2 + # Check all hash characters are valid base52 + for char in hash_part: + assert char in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + + +def test_hashline_with_start_line(): + """Test hashline with custom start line.""" + text = "Line 1\nLine 2" + result = hashline(text, start_line=10) + + lines = result.splitlines() + assert len(lines) == 2 + # Check format is line_number|hash|content (new format) + assert "10|" in lines[0] + assert "11|" in lines[1] + # Extract hash fragments to verify they're valid + hash1 = lines[0].split("|")[1] + hash2 = lines[1].split("|")[1] + assert len(hash1) == 2 + assert len(hash2) == 2 + for char in hash1 + hash2: + assert char in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + + +def test_hashline_empty_string(): + """Test hashline with empty string.""" + result = hashline("") + assert result == "" + + +def test_hashline_single_line(): + """Test hashline with single line.""" + text = "Single line" + result = hashline(text) + lines = result.splitlines() + assert len(lines) == 1 + # Check format is line_number|hash|content (new format) + assert "1|" in lines[0] + assert lines[0].endswith("|Single line") + # Extract hash fragment to verify it's valid + hash_part = lines[0].split("|")[1] + for char in hash_part: + assert char in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + + +def test_hashline_preserves_newlines(): + """Test that hashline preserves newline characters.""" + text = "Line 1\nLine 2\n" + result = hashline(text) + # Should end with newline since input ended with newline + assert result.endswith("\n") + lines = result.splitlines(keepends=True) + # splitlines(keepends=True) doesn't preserve trailing empty lines + # So we should have 2 lines, both ending with newline + assert len(lines) == 2 + assert lines[0].endswith("\n") + assert lines[1].endswith("\n") + + +def test_strip_hashline_basic(): + """Test basic strip_hashline functionality.""" + # Create a hashline-formatted text with correct format: line_number|hash|content + text = "1|ab|Hello\n2|cd|World\n3|ef|Test" + stripped = strip_hashline(text) + assert stripped == "Hello\nWorld\nTest" + + +def test_strip_hashline_with_negative_line_numbers(): + """Test strip_hashline with negative line numbers.""" + # Note: Negative line numbers are no longer supported since line numbers in files are always positive + # But the regex still handles them if they appear + text = "-1|ab|Hello\n0|cd|World\n1|ef|Test" + stripped = strip_hashline(text) + assert stripped == "Hello\nWorld\nTest" + + +def test_strip_hashline_mixed_lines(): + """Test strip_hashline with mixed hashline and non-hashline lines.""" + text = "1|ab|Hello\nPlain line\n3|cd|World" + stripped = strip_hashline(text) + assert stripped == "Hello\nPlain line\nWorld" + + +def test_strip_hashline_preserves_newlines(): + """Test that strip_hashline preserves newline characters.""" + text = "1|ab|Line 1\n2|cd|Line 2\n" + stripped = strip_hashline(text) + assert stripped == "Line 1\nLine 2\n" + + +def test_strip_hashline_empty_string(): + """Test strip_hashline with empty string.""" + assert strip_hashline("") == "" + + +def test_round_trip(): + """Test that strip_hashline can reverse hashline.""" + original = "Hello\nWorld\nTest\nMulti\nLine\nText" + hashed = hashline(original) + stripped = strip_hashline(hashed) + assert stripped == original + + +def test_hashline_deterministic(): + """Test that hashline produces the same output for the same input.""" + text = "Hello World" + result1 = hashline(text) + result2 = hashline(text) + assert result1 == result2 + + +def test_hashline_different_inputs(): + """Test that different inputs produce different hashes.""" + text1 = "Hello" + text2 = "World" + result1 = hashline(text1) + result2 = hashline(text2) + + # Extract hashes (hash is second part in new format: line_num|hash|content) + hash1 = result1.split("|")[1] + hash2 = result2.split("|")[1] + + # Hashes should be different (very high probability) + assert hash1 != hash2 + + +def test_parse_hashline(): + """Test parse_hashline function.""" + # Test basic parsing (new format: line_num|hash) + hash_fragment, line_num_str, line_num = parse_hashline("10|ab") + assert hash_fragment == "ab" + assert line_num_str == "10" + assert line_num == 10 + + # Test with trailing pipe + hash_fragment, line_num_str, line_num = parse_hashline("5|cd|") + assert hash_fragment == "cd" + assert line_num_str == "5" + assert line_num == 5 + + # Test with old order but new separator (hash|line_num) + hash_fragment, line_num_str, line_num = parse_hashline("ef|3") + assert hash_fragment == "ef" + assert line_num_str == "3" + assert line_num == 3 + + # Test invalid format + with pytest.raises(HashlineError, match="Invalid hashline format"): + parse_hashline("invalid") + + with pytest.raises(HashlineError, match="Invalid hashline format"): + parse_hashline("ab") # Missing line number + + # Test that colons are no longer supported + with pytest.raises(HashlineError, match="Invalid hashline format"): + parse_hashline("10:ab") + + +def test_normalize_hashline(): + """Test normalize_hashline function.""" + # Test new format (should return unchanged) + assert normalize_hashline("10|ab") == "10|ab" + + # Test old order with new separator (should normalize to new order) + assert normalize_hashline("ab|10") == "10|ab" + + # Test that colons are no longer supported + with pytest.raises(HashlineError, match="Invalid hashline format"): + normalize_hashline("10:ab") + + +def test_find_hashline_by_exact_match(): + """Test find_hashline_by_exact_match function.""" + hashed_lines = [ + "1|ab|Hello", + "2|cd|World", + "3|ef|Test", + ] + + # Test exact match found + index = find_hashline_by_exact_match(hashed_lines, "cd", "2") + assert index == 1 + + # Test exact match not found + index = find_hashline_by_exact_match(hashed_lines, "wrong", "2") + assert index is None + + # Test line number doesn't match + index = find_hashline_by_exact_match(hashed_lines, "cd", "5") + assert index is None + + +def test_find_hashline_by_fragment(): + """Test find_hashline_by_fragment function.""" + hashed_lines = [ + "1|ab|Hello", + "2|cd|World", + "3|ab|Test", # Same hash fragment as line 1 + "4|ef|Another", + ] + + # Test fragment found + index = find_hashline_by_fragment(hashed_lines, "cd") + assert index == 1 + + # Test fragment found (first occurrence) + index = find_hashline_by_fragment(hashed_lines, "ab") + assert index == 0 # Should return first occurrence + + # Test fragment not found + index = find_hashline_by_fragment(hashed_lines, "zz") + assert index is None + + +def test_find_hashline_range(): + """Test find_hashline_range function.""" + # Create hashed content + original = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" + hashed = hashline(original) + hashed_lines = hashed.splitlines(keepends=True) + + # Get hash fragments for testing (hash is first part before colon) + # Get hash fragments for testing (hash is second part in new format) + line1_hash = hashed_lines[0].split("|")[1] + line3_hash = hashed_lines[2].split("|")[1] + line5_hash = hashed_lines[4].split("|")[1] + + # Test exact match + start_idx, end_idx = find_hashline_range( + hashed_lines, + f"1|{line1_hash}", + f"3|{line3_hash}", + allow_exact_match=True, + ) + assert start_idx == 0 + assert end_idx == 2 + + # Test fragment match (no exact match) + start_idx, end_idx = find_hashline_range( + hashed_lines, + f"99|{line1_hash}", # Wrong line number + f"101|{line3_hash}", # Wrong line number + allow_exact_match=True, + ) + assert start_idx == 0 # Should find by fragment + assert end_idx == 2 # Should calculate distance + + # Test with allow_exact_match=False + start_idx, end_idx = find_hashline_range( + hashed_lines, + f"1|{line1_hash}", + f"5|{line5_hash}", + allow_exact_match=False, + ) + assert start_idx == 0 + assert end_idx == 4 + + # Test error cases + with pytest.raises(HashlineError, match="Start line hash fragment 'zz' not found in file"): + find_hashline_range(hashed_lines, "1|zz", "3|zz") + + +def test_apply_hashline_operation_insert(): + """Test apply_hashline_operation with insert operation.""" + original = "Line 1\nLine 2\nLine 3" + hashed = hashline(original) + + # Get hash fragment for line 2 (hash is second part in new format) + hashed_lines = hashed.splitlines() + line2_hash = hashed_lines[1].split("|")[1] + + # Insert after line 2 + new_content = apply_hashline_operation( + original, + f"2|{line2_hash}", + operation="insert", + text="Inserted line", + ) + + expected = "Line 1\nLine 2\nInserted line\nLine 3" + assert new_content == expected + + +def test_apply_hashline_operation_delete(): + """Test apply_hashline_operation with delete operation.""" + original = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" + hashed = hashline(original) + + # Get hash fragments (hash is second part in new format) + hashed_lines = hashed.splitlines() + line2_hash = hashed_lines[1].split("|")[1] + line4_hash = hashed_lines[3].split("|")[1] + + # Delete lines 2-4 + new_content = apply_hashline_operation( + original, + f"2|{line2_hash}", + f"4|{line4_hash}", + operation="delete", + ) + + expected = "Line 1\nLine 5" + assert new_content == expected + + +def test_extract_hashline_range(): + """Test extract_hashline_range function.""" + original = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" + hashed = hashline(original) + + # Get hash fragments (hash is second part in new format) + hashed_lines = hashed.splitlines() + line2_hash = hashed_lines[1].split("|")[1] + line4_hash = hashed_lines[3].split("|")[1] + + # Extract lines 2-4 + extracted = extract_hashline_range( + original, + f"2|{line2_hash}", + f"4|{line4_hash}", + ) + + # Extract should return hashed content + expected_hashed_range = "\n".join(hashed_lines[1:4]) + "\n" + assert extracted == expected_hashed_range + + +def test_get_hashline_diff(): + """Test get_hashline_diff function.""" + original = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" + hashed = hashline(original) + + # Get hash fragments (hash is second part in new format) + hashed_lines = hashed.splitlines() + line2_hash = hashed_lines[1].split("|")[1] + line4_hash = hashed_lines[3].split("|")[1] + + # Get diff for replace operation + diff = get_hashline_diff( + original, + f"2|{line2_hash}", + f"4|{line4_hash}", + operation="replace", + text="New line 2\nNew line 3\nNew line 4", + ) + + # Diff should not be empty + assert diff != "" + # Diff should contain the changed lines + assert "Line 2" in diff or "New line 2" in diff + + +def test_get_hashline_content_diff(): + """Test get_hashline_content_diff function.""" + old_content = "1|ab|Hello\n2|cd|World\n3|ef|Test" + new_content = "1|ab|Hello\n2|cd|Changed\n3|ef|Test" + + diff = get_hashline_content_diff(old_content, new_content) + + # Diff should not be empty + assert diff != "" + # Diff should show the change + assert "World" in diff or "Changed" in diff + + # Test with identical content + diff = get_hashline_content_diff(old_content, old_content) + assert diff == "" + + +def test_apply_hashline_operations_complex_sequence(): + """Test 1: Sequence of 5+ mixed operations on 20+ lines.""" + from cecli.helpers.hashline import apply_hashline_operations + + original = "\n".join([f"Line {i + 1}" for i in range(25)]) + print(f"\nTest: Complex sequence\nOriginal (first 10 lines): {original.splitlines()[:10]}") + hashed = hashline(original) + h_lines = hashed.splitlines() + + # Get hashes for lines 2, 5, 10, 15, 20 + h2 = h_lines[1].split("|")[1] + h5 = h_lines[4].split("|")[1] + h10 = h_lines[9].split("|")[1] + h15 = h_lines[14].split("|")[1] + h20 = h_lines[19].split("|")[1] + + ops = [ + { + "operation": "replace", + "start_line_hash": f"2|{h2}", + "end_line_hash": f"2|{h2}", + "text": "New Line 2", + }, + {"operation": "insert", "start_line_hash": f"5|{h5}", "text": "Inserted after 5"}, + {"operation": "delete", "start_line_hash": f"10|{h10}", "end_line_hash": f"10|{h10}"}, + { + "operation": "replace", + "start_line_hash": f"15|{h15}", + "end_line_hash": f"15|{h15}", + "text": "New Line 15", + }, + {"operation": "insert", "start_line_hash": f"20|{h20}", "text": "Inserted after 20"}, + ] + + print(f"Operations: {ops}") + + modified, success, failed = apply_hashline_operations(original, ops) + + print(f"Success indices: {success}") + print(f"Failed: {len(failed)}") + print(f"Modified (first 15 lines): {modified.splitlines()[:15]}") + + assert len(success) == 5 + assert len(failed) == 0 + mod_lines = modified.splitlines() + assert "New Line 2" in mod_lines + assert "Inserted after 5" in mod_lines + assert "Line 10" not in mod_lines + assert "New Line 15" in mod_lines + assert "Inserted after 20" in mod_lines + + +def test_apply_hashline_operations_overlapping(): + """Test 2: Overlapping ranges.""" + from cecli.helpers.hashline import apply_hashline_operations + + original = "\n".join([f"Line {i + 1}" for i in range(20)]) + print(f"\nTest: Overlapping ranges\nOriginal (first 15 lines): {original.splitlines()[:15]}") + hashed = hashline(original) + h_lines = hashed.splitlines() + + h5 = h_lines[4].split("|")[1] + h10 = h_lines[9].split("|")[1] + h15 = h_lines[14].split("|")[1] + + # Op 1: Replace 5-15 + # Op 2: Replace 8-12 (inside Op 1) + # Since it applies bottom-to-top, we need to see how it handles it. + # Actually, apply_hashline_operations resolves indices on the ORIGINAL hashed content. + ops = [ + { + "operation": "replace", + "start_line_hash": f"5|{h5}", + "end_line_hash": f"15|{h15}", + "text": "Big Replace", + }, + { + "operation": "replace", + "start_line_hash": f"10|{h10}", + "end_line_hash": f"10|{h10}", + "text": "Small Replace", + }, + ] + + print(f"Operations: {ops}") + + modified, success, failed = apply_hashline_operations(original, ops) + + print(f"Success indices: {success}") + print(f"Failed: {len(failed)}") + print(f"Modified lines: {modified.splitlines()}") + + # Bottom-to-top application: + # 1. Small Replace at index 9 + # 2. Big Replace at indices 4-14 + # The Big Replace will overwrite the Small Replace if they are applied in that order on the same string. + # However, the implementation applies them sequentially to the content. + mod_lines = modified.splitlines() + assert "Big Replace" in mod_lines + # If Op 1 is applied after Op 2 (reverse order), Op 1 replaces the range that included Op 2's result. + assert "Small Replace" not in mod_lines + + +def test_apply_hashline_operations_duplicate_hashes(): + """Test 3: Duplicate hash values resolution with empty lines and content.""" + from cecli.helpers.hashline import apply_hashline_operations + + original = "Same\n\nNormal Content 1\nSame\n\nNormal Content 2\nSame\n\nNormal Content 3\nSame" + print(f"\nTest: Duplicate hashes\nOriginal: {original.splitlines()}") + hashed = hashline(original) + h_lines = hashed.splitlines() + + # Get actual hashes for each "Same" line + h_val_2 = h_lines[3].split("|")[1] + h_val_4 = h_lines[9].split("|")[1] + + # Target the 2nd (line 4) and 4th (line 10) "Same" using their specific hashes + ops = [ + { + "operation": "replace", + "start_line_hash": f"4|{h_val_2}", + "end_line_hash": f"4|{h_val_2}", + "text": "Changed 2", + }, + { + "operation": "replace", + "start_line_hash": f"10|{h_val_4}", + "end_line_hash": f"10|{h_val_4}", + "text": "Changed 4", + }, + ] + + print(f"Operations: {ops}") + + modified, success, failed = apply_hashline_operations(original, ops) + + print(f"Success indices: {success}") + print(f"Failed: {len(failed)}") + print(f"Modified: {modified.splitlines()}") + + mod_lines = modified.splitlines() + assert mod_lines[3] == "Changed 2" + assert mod_lines[9] == "Changed 4" + assert mod_lines[0] == "Same" + assert mod_lines[6] == "Same" + + +def test_apply_hashline_operations_empty_lines_duplicates(): + """Test 6: Complex empty lines and duplicate hashes with multiple operations.""" + from cecli.helpers.hashline import apply_hashline_operations + + original = "Header\n\nBlock 1\n\nContent\n\nBlock 2\n\nFooter" + print(f"\nTest: Empty lines duplicates\nOriginal: {original.splitlines()}") + # In this case, all empty lines will likely have the same hash fragment + # because they have the same content (empty string). + hashed = hashline(original) + h_lines = hashed.splitlines() + + # Find hash for an empty line (e.g., line 2) + empty_hash = h_lines[1].split("|")[1] + print(f"Empty line hash: {empty_hash}") + + # Operations targeting specific empty lines by their line number + ops = [ + { + "operation": "replace", + "start_line_hash": f"2|{empty_hash}", + "end_line_hash": f"2|{empty_hash}", + "text": "# Comment 1", + }, + { + "operation": "replace", + "start_line_hash": f"6|{empty_hash}", + "end_line_hash": f"6|{empty_hash}", + "text": "# Comment 2", + }, + { + "operation": "insert", + "start_line_hash": f"8|{empty_hash}", + "text": "# Inserted after empty line 8", + }, + ] + + print(f"Operations: {ops}") + + modified, success, failed = apply_hashline_operations(original, ops) + + print(f"Success indices: {success}") + print(f"Failed: {len(failed)}") + print(f"Modified: {modified.splitlines()}") + + assert len(success) == 3 + assert len(failed) == 0 + + mod_lines = modified.splitlines() + # Line 2 (index 1) should be replaced + assert mod_lines[1] == "# Comment 1" + # Line 4 (index 3) should still be empty + assert mod_lines[3] == "" + # Line 6 (index 5) should be replaced + assert mod_lines[5] == "# Comment 2" + # Line 8 (index 7) should still be empty, followed by insertion + assert mod_lines[7] == "" + assert mod_lines[8] == "# Inserted after empty line 8" + + +def test_apply_hashline_operations_multiline_non_contiguous(): + """Test 7: Non-contiguous multiline replaces on a 40+ line file with duplicates.""" + from cecli.helpers.hashline import apply_hashline_operations + + # Create a 45-line file with interspersed duplicates + lines = [] + for i in range(1, 46): + if i % 10 == 0: + lines.append("Duplicate Block") + lines.append("Common Content") + else: + lines.append(f"Unique Line {i}") + original = "\n".join(lines) + + print( + f"\nTest: Multiline non-contiguous\nOriginal (first 20 lines): {original.splitlines()[:20]}" + ) + + hashed = hashline(original) + h_lines = hashed.splitlines() + + # We want to perform three non-contiguous multiline replacements + # Op 1: Lines 5-8 (Unique Line 5 to Unique Line 8) + # Op 2: Lines 16-22 (Unique Line 15 to Common Content) + # Op 3: Lines 35-42 (Unique Line 32 to Unique Line 39) + + def get_h(ln): + return h_lines[ln - 1].split("|")[1] + + ops = [ + { + "operation": "replace", + "start_line_hash": f"5|{get_h(5)}", + "end_line_hash": f"8|{get_h(8)}", + "text": "Replacement Alpha", + }, + { + "operation": "replace", + "start_line_hash": f"16|{get_h(16)}", + "end_line_hash": f"22|{get_h(22)}", + "text": "Replacement Beta\nMore Beta", + }, + { + "operation": "replace", + "start_line_hash": f"35|{get_h(35)}", + "end_line_hash": f"42|{get_h(42)}", + "text": "Replacement Gamma", + }, + ] + + print(f"Operations: {ops}") + + modified, success, failed = apply_hashline_operations(original, ops) + + print(f"Success indices: {success}") + print(f"Failed: {len(failed)}") + print(f"Modified (first 25 lines): {modified.splitlines()[:25]}") + + assert len(success) == 3 + assert len(failed) == 0 + + mod_lines = modified.splitlines() + + # Verify Alpha + assert "Replacement Alpha" in mod_lines + assert "Unique Line 4" in mod_lines + assert "Unique Line 9" in mod_lines + + # Verify Beta + assert "Replacement Beta" in mod_lines + assert "More Beta" in mod_lines + # Line 15 (Unique Line 14) should be there, line 23 (Unique Line 21) should be there + assert "Unique Line 14" in mod_lines + assert "Unique Line 21" in mod_lines + + # Verify Gamma + assert "Replacement Gamma" in mod_lines + assert "Unique Line 31" in mod_lines + assert "Unique Line 41" in mod_lines + + # Verify a duplicate block that wasn't touched (the one at line 10-11) + assert "Duplicate Block" in mod_lines + assert "Common Content" in mod_lines + """Test 4: Operations at file boundaries.""" + from cecli.helpers.hashline import apply_hashline_operations + + original = "First\nMiddle\nLast" + hashed = hashline(original) + h_lines = hashed.splitlines() + h_first = h_lines[0].split("|")[1] + h_last = h_lines[2].split("|")[1] + + ops = [ + {"operation": "insert", "start_line_hash": f"1|{h_first}", "text": "Before First"}, + {"operation": "insert", "start_line_hash": f"3|{h_last}", "text": "After Last"}, + ] + + modified, success, failed = apply_hashline_operations(original, ops) + mod_lines = modified.splitlines() + assert mod_lines[0] == "First" + assert mod_lines[1] == "Before First" + assert mod_lines[2] == "Middle" + assert mod_lines[3] == "Last" + assert mod_lines[4] == "After Last" + + +def test_apply_hashline_operations_mixed_success(): + """Test 5: Mix of successful and failing operations.""" + from cecli.helpers.hashline import apply_hashline_operations + + original = "Line 1\nLine 2\nLine 3" + print(f"\nTest: Mixed success\nOriginal: {original.splitlines()}") + hashed = hashline(original) + h_lines = hashed.splitlines() + h1 = h_lines[0].split("|")[1] + + ops = [ + { + "operation": "replace", + "start_line_hash": f"1|{h1}", + "end_line_hash": f"1|{h1}", + "text": "New 1", + }, + { + "operation": "replace", + "start_line_hash": "99|zz", + "end_line_hash": "99|zz", + "text": "Fail", + }, + ] + + print(f"Operations: {ops}") + + modified, success, failed = apply_hashline_operations(original, ops) + + print(f"Success indices: {success}") + print(f"Failed: {len(failed)}") + for f in failed: + print(f" Failed op {f['index']}: {f['error'][:50]}...") + print(f"Modified: {modified.splitlines()}") + + assert len(success) == 1 + assert len(failed) == 1 + assert "New 1" in modified + assert "Fail" not in modified + assert failed[0]["index"] == 1 + assert "not found" in failed[0]["error"] + + +def test_apply_hashline_operations_bidirectional_stitching(): + """Test bidirectional non-contiguous stitching. + + Tests that the algorithm correctly stitches at both start and end + when replacement text contains lines that exist before and after + the replacement range. + + Based on user's test case: + Original Contents: + A + B + A + B + B + C + D + E + E + F + G + H + I + H + I + J + K + L + + Replacement lines 7-10 (D through F) with: + B + C + M + N + H + I + + Expected Result: + A + B + A + B + B + C + M + N + H + I + H + I + J + K + L + """ + from cecli.helpers.hashline import apply_hashline_operations, hashline + + original_content = """A +B +A +B +B +C +D +E +E +F +G +H +I +H +I +J +K +L""" + + # Generate hashlines for the content + hashed_content = hashline(original_content) + hashed_lines = hashed_content.splitlines(keepends=True) + + # Find hash fragments for lines 7-10 (D through F) + # Lines are 0-indexed, so: + # Line 7 (D) is index 6 + # Line 10 (F) is index 9 + line_7_hash = hashed_lines[6].split("|", 2)[1] + line_10_hash = hashed_lines[9].split("|", 2)[1] + + # Replacement text + replacement_text = """B +C +M +N +H +I""" + + operations = [ + { + "start_line_hash": f"7|{line_7_hash}", # Line 7 (1-indexed) - D + "end_line_hash": f"10|{line_10_hash}", # Line 10 (1-indexed) - F + "operation": "replace", + "text": replacement_text, + } + ] + + # Expected result from user + expected_result = """A +B +A +B +B +C +M +N +H +I +H +I +J +K +L""" + + # Apply the operation + result, resolved_ops, errors = apply_hashline_operations(original_content, operations) + + # Check for errors + assert not errors, f"Errors occurred: {errors}" + + # Check if result matches expected + assert ( + result == expected_result + ), f"Result doesn't match expected.\nExpected:\n{expected_result}\nGot:\n{result}" diff --git a/tests/tools/test_insert_block.py b/tests/tools/test_insert_block.py index 00d46906f73..e4456f15b1b 100644 --- a/tests/tools/test_insert_block.py +++ b/tests/tools/test_insert_block.py @@ -4,6 +4,7 @@ import pytest +from cecli.helpers.hashline import hashline from cecli.tools import insert_text @@ -73,41 +74,64 @@ def coder_with_file(tmp_path): def test_position_top_succeeds_with_no_patterns(coder_with_file): coder, file_path = coder_with_file + # Calculate hashline for line 1 + content = file_path.read_text() + hashed_content = hashline(content) + lines = hashed_content.splitlines() + line1_hashline = lines[0] # Index 0 is line 1 + parts = line1_hashline.split("|") + line_num = parts[0] # Should be "1" + hash_fragment = parts[1] # The hash fragment + start_line = f"{line_num}|{hash_fragment}" + result = insert_text.Tool.execute( coder, file_path="example.txt", content="inserted line", - position="top", + start_line=start_line, ) assert result.startswith("Successfully executed InsertText.") - assert file_path.read_text().splitlines()[0] == "inserted line" + lines = file_path.read_text().splitlines() + assert lines[0] == "first line" # Original first line remains first + assert lines[1] == "inserted line" # Inserted line comes after line 1 coder.io.tool_error.assert_not_called() def test_mutually_exclusive_parameters_raise(coder_with_file): coder, file_path = coder_with_file + # Test with invalid hashline format (missing pipe) result = insert_text.Tool.execute( coder, file_path="example.txt", content="new line", - position="top", - line_number=1, + start_line="invalid_hashline", ) - assert result.startswith("Error: Must specify exactly one of") + assert result.startswith("Error:") + assert "Hashline insertion failed" in result assert file_path.read_text().startswith("first line") coder.io.tool_error.assert_called() def test_trailing_newline_preservation(coder_with_file): coder, file_path = coder_with_file + # Calculate hashline for line 1 + content = file_path.read_text() + hashed_content = hashline(content) + lines = hashed_content.splitlines() + line1_hashline = lines[0] # Index 0 is line 1 + parts = line1_hashline.split("|") + line_num = parts[0] # Should be "1" + hash_fragment = parts[1] # The hash fragment + start_line = f"{line_num}|{hash_fragment}" + insert_text.Tool.execute( coder, file_path="example.txt", content="inserted line", - position="top", + start_line=start_line, ) content = file_path.read_text() @@ -121,15 +145,27 @@ def test_no_trailing_newline_preservation(coder_with_file): content_without_trailing_newline = "first line\nsecond line" file_path.write_text(content_without_trailing_newline) + # Calculate hashline for line 1 + content = file_path.read_text() + hashed_content = hashline(content) + lines = hashed_content.splitlines() + line1_hashline = lines[0] # Index 0 is line 1 + parts = line1_hashline.split("|") + line_num = parts[0] # Should be "1" + hash_fragment = parts[1] # The hash fragment + start_line = f"{line_num}|{hash_fragment}" + insert_text.Tool.execute( coder, file_path="example.txt", content="inserted line", - position="top", + start_line=start_line, ) content = file_path.read_text() - assert not content.endswith("\n"), "File should preserve lack of trailing newline" + # Note: hashline implementation respects original trailing newline + # If original doesn't have trailing newline, result won't have one either + assert not content.endswith("\n"), "Hashline implementation respects original trailing newline" coder.io.tool_error.assert_not_called() @@ -137,11 +173,24 @@ def test_line_number_beyond_file_length_appends(coder_with_file): coder, file_path = coder_with_file # file_path has "first line\nsecond line\n" (2 lines) + # Calculate hashline for line 2 + content = file_path.read_text() + hashed_content = hashline(content) + # Extract hash fragment for line 2 + # hashline format is "{line_num}|{hash_fragment}|{line_content}" + lines = hashed_content.splitlines() + line2_hashline = lines[1] # Index 1 is line 2 (0-indexed) + # Split by | to get line_num|hash_fragment|content + parts = line2_hashline.split("|") + line_num = parts[0] # Should be "2" + hash_fragment = parts[1] # The hash fragment + start_line = f"{line_num}|{hash_fragment}" + result = insert_text.Tool.execute( coder, file_path="example.txt", content="appended line", - line_number=10, + start_line=start_line, ) assert result.startswith("Successfully executed InsertText.") @@ -154,16 +203,28 @@ def test_line_number_beyond_file_length_appends_no_trailing_newline(coder_with_f coder, file_path = coder_with_file file_path.write_text("first line\nsecond line") # No trailing newline + # Calculate hashline for line 2 (without trailing newline) + content = file_path.read_text() + hashed_content = hashline(content) + # Extract hash fragment for line 2 + lines = hashed_content.splitlines() + line2_hashline = lines[1] # Index 1 is line 2 (0-indexed) + # Split by | to get line_num|hash_fragment|content + parts = line2_hashline.split("|") + line_num = parts[0] # Should be "2" + hash_fragment = parts[1] # The hash fragment + start_line = f"{line_num}|{hash_fragment}" + result = insert_text.Tool.execute( coder, file_path="example.txt", content="appended line", - line_number=10, + start_line=start_line, ) assert result.startswith("Successfully executed InsertText.") content = file_path.read_text() - # Current implementation joins with \n, so it should result in: - # "first line\nsecond line\nappended line" + # Current implementation joins with \n, but respects original trailing newline + # Original doesn't have trailing newline, so result won't have one either assert content == "first line\nsecond line\nappended line" coder.io.tool_error.assert_not_called() diff --git a/tests/tools/test_show_numbered_context.py b/tests/tools/test_show_numbered_context.py index a0a8e054a47..0ed3b338da4 100644 --- a/tests/tools/test_show_numbered_context.py +++ b/tests/tools/test_show_numbered_context.py @@ -4,6 +4,7 @@ import pytest +from cecli.helpers.hashline import hashline from cecli.tools import show_numbered_context @@ -67,6 +68,16 @@ def test_pattern_with_zero_line_number_is_allowed(coder_with_file): def test_empty_pattern_uses_line_number(coder_with_file): coder, file_path = coder_with_file + # Calculate expected hashline for line 2 + content = file_path.read_text() + hashed_content = hashline(content) + # Extract hashline for line 2 + lines = hashed_content.splitlines() + line2_hashline = lines[1] # Index 1 is line 2 (0-indexed) + # hashline format is "{hash_fragment}:{line_num}|{line_content}" + # We need the full hashline (e.g., "BP:2|beta") + expected_hashline = line2_hashline + result = show_numbered_context.Tool.execute( coder, show=[ @@ -79,7 +90,7 @@ def test_empty_pattern_uses_line_number(coder_with_file): ], ) - assert "2 | beta" in result + assert expected_hashline in result coder.io.tool_error.assert_not_called()