Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
!/.pre-commit-config.yaml
!/CHANGELOG.md
!/CNAME
!/CONTRIBUTING.metadata
!/CONTRIBUTING.md
!/HISTORY.md
!/LICENSE.txt
!/MANIFEST.in
Expand Down
12 changes: 12 additions & 0 deletions cecli/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,18 @@ def get_parser(default_config_files, git_root):
default=False,
)

##########
group = parser.add_argument_group("Security Settings")
group.add_argument(
"--security-config",
metavar="SECURITY_CONFIG_JSON",
help=(
'Specify Security configuration as a JSON string (e.g., \'{"allowed-domains":'
' ["github.com"]}\')'
),
default=None,
)

##########
group = parser.add_argument_group("Context Compaction")
group.add_argument(
Expand Down
18 changes: 13 additions & 5 deletions cecli/coders/agent_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,14 @@ def __init__(self, *args, **kwargs):
"viewfileswithsymbol",
"grep",
"listchanges",
"extractlines",
"shownumberedcontext",
}
self.write_tools = {
"command",
"commandinteractive",
"insertblock",
"replaceblock",
"replaceall",
"deletetext",
"indenttext",
"inserttext",
"replacetext",
"undochange",
}
Expand Down Expand Up @@ -245,10 +244,19 @@ async def _execute_local_tool_calls(self, tool_calls_list):
for chunk in json_chunks:
try:
parsed_args_list.append(json.loads(chunk))
except json.JSONDecodeError:
except json.JSONDecodeError as e:
self.io.tool_warning(
f"Could not parse JSON chunk for tool {tool_name}: {chunk}"
)
tool_responses.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": (
f"Could not parse JSON chunk for tool {tool_name}: {str(e)}"
),
}
)
continue
if not parsed_args_list and not args_string:
parsed_args_list.append({})
Expand Down
123 changes: 69 additions & 54 deletions cecli/coders/base_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from json.decoder import JSONDecodeError
from pathlib import Path
from typing import List
from urllib.parse import urlparse

import httpx
from litellm import experimental_mcp_client
Expand Down Expand Up @@ -330,6 +331,7 @@ def __init__(
map_cache_dir=".",
repomap_in_memory=False,
linear_output=False,
security_config=None,
):
# initialize from args.map_cache_dir
self.map_cache_dir = map_cache_dir
Expand All @@ -342,6 +344,7 @@ def __init__(
self.abs_root_path_cache = {}

self.auto_copy_context = auto_copy_context
self.security_config = security_config or {}
self.auto_accept_architect = auto_accept_architect

self.ignore_mentions = ignore_mentions
Expand Down Expand Up @@ -1607,6 +1610,22 @@ async def run_one(self, user_message, preproc):

await self.auto_save_session(force=True)

def _is_url_allowed(self, url):
allowed_domains = self.security_config.get("allowed-domains")
if not allowed_domains:
return True

parsed_url = urlparse(url)
domain = parsed_url.netloc.lower()
if not domain:
return False

for allowed in allowed_domains:
allowed = allowed.lower()
if domain == allowed or domain.endswith("." + allowed):
return True
return False

async def check_and_open_urls(self, exc, friendly_msg=None):
"""Check exception for URLs, offer to open in a browser, with user-friendly error msgs."""
text = str(exc)
Expand All @@ -1623,7 +1642,8 @@ async def check_and_open_urls(self, exc, friendly_msg=None):
urls = list(set(url_pattern.findall(text)))
for url in urls:
url = url.rstrip(".',\"}") # Added } to the characters to strip
await self.io.offer_url(url)
if self._is_url_allowed(url):
await self.io.offer_url(url)
return urls

async def check_for_urls(self, inp: str) -> List[str]:
Expand All @@ -1637,7 +1657,7 @@ async def check_for_urls(self, inp: str) -> List[str]:
urls = list(set(url_pattern.findall(inp)))
group = ConfirmGroup(urls)
for url in urls:
if url not in self.rejected_urls:
if url not in self.rejected_urls and self._is_url_allowed(url):
url = url.rstrip(".',\"")
if await self.io.confirm_ask(
"Add URL to the chat?",
Expand Down Expand Up @@ -2419,12 +2439,17 @@ def _print_tool_call_info(self, server_tool_calls):

for server, tool_calls in server_tool_calls.items():
for tool_call in tool_calls:
if ToolRegistry.get_tool(tool_call.function.name.lower()):
ToolRegistry.get_tool(tool_call.function.name.lower()).format_output(
coder=self, mcp_server=server, tool_response=tool_call
)
else:
print_tool_response(coder=self, mcp_server=server, tool_response=tool_call)
try:
if ToolRegistry.get_tool(tool_call.function.name.lower()):
ToolRegistry.get_tool(tool_call.function.name.lower()).format_output(
coder=self, mcp_server=server, tool_response=tool_call
)
else:
print_tool_response(coder=self, mcp_server=server, tool_response=tool_call)
except Exception:
self.io.tool_output(f"Tool Output Error: {tool_call.function.name.lower()}")
self.io.tool_error(traceback.format_exc())
pass

def _gather_server_tool_calls(self, tool_calls):
"""Collect all tool calls grouped by server.
Expand Down Expand Up @@ -2784,73 +2809,57 @@ def add_assistant_reply_to_cur_messages(self):
)

def get_file_mentions(self, content, ignore_current=False):
# Get file-like words from content (contiguous strings containing slashes or periods)
# 1. Extract words once: O(N)
words = set()
for word in content.split():
# Strip quotes and punctuation
word = word.strip("\"'`*_,.!;:?")
if re.search(r"[\\\/._-]", word):
words.add(word)

# Also check basenames of file-like words
basename_words = set()
for word in words:
basename = os.path.basename(word)
if basename and basename != word: # Only add if basename is different
basename_words.add(basename)

# Combine all words to check
basename_words = {os.path.basename(w) for w in words if os.path.basename(w) != w}
all_words = words | basename_words

if ignore_current:
files_to_check = self.get_all_relative_files()
existing_basenames = set()
else:
files_to_check = self.get_addable_relative_files()
# Get basenames of files already in chat or read-only
# Pre-normalize for O(1) lookups: O(W)
normalized_words = {w.replace("\\", "/") for w in all_words}

# 2. Get files and filter ignored once: O(F)
raw_files = (
self.get_all_relative_files() if ignore_current else self.get_addable_relative_files()
)

# Filter ignored files once to avoid repeated expensive calls
files_to_check = [f for f in raw_files if not (self.repo and self.repo.git_ignored_file(f))]

# 3. Existing basenames setup
existing_basenames = set()

if not ignore_current:
existing_basenames = {os.path.basename(f) for f in self.get_inchat_relative_files()} | {
os.path.basename(self.get_rel_fname(f))
for f in self.abs_read_only_fnames | self.abs_read_only_stubs_fnames
}

# Build map of basenames to files for uniqueness check
# Only consider basenames that look like filenames (contain /, \, ., _, or -)
# to avoid false matches on common words like "run" or "make"
# 4. Build map: O(F)
basename_to_files = {}
for rel_fname in files_to_check:
# Skip git-ignored files
if self.repo and self.repo.git_ignored_file(rel_fname):
continue

basename = os.path.basename(rel_fname)
# Only include basenames that look like filenames
if re.search(r"[\\\/._-]", basename):
if basename not in basename_to_files:
basename_to_files[basename] = []
basename_to_files[basename].append(rel_fname)
bn = os.path.basename(rel_fname)
if re.search(r"[\\\/._-]", bn):
basename_to_files.setdefault(bn, []).append(rel_fname)

# 5. Final selection: O(F)
mentioned_rel_fnames = set()

for rel_fname in files_to_check:
# Skip git-ignored files
if self.repo and self.repo.git_ignored_file(rel_fname):
continue

# Check if full path matches
normalized_fname = rel_fname.replace("\\", "/")
normalized_words = {w.replace("\\", "/") for w in all_words}

if normalized_fname in normalized_words:
# Full path match
if rel_fname.replace("\\", "/") in normalized_words:
mentioned_rel_fnames.add(rel_fname)
continue

# Check basename - only add if unique among addable files and not already in chat
basename = os.path.basename(rel_fname)
# Basename match logic
bn = os.path.basename(rel_fname)
if (
basename in all_words
and basename not in existing_basenames
and len(basename_to_files.get(basename, [])) == 1
and basename_to_files[basename][0] == rel_fname
bn in all_words
and bn not in existing_basenames
and len(basename_to_files.get(bn, [])) == 1
):
mentioned_rel_fnames.add(rel_fname)

Expand Down Expand Up @@ -3010,6 +3019,7 @@ def show_send_output(self, completion):

async def show_send_output_stream(self, completion):
received_content = False
chunk_index = 0

async for chunk in completion:
if self.args.debug:
Expand Down Expand Up @@ -3105,6 +3115,8 @@ async def show_send_output_stream(self, completion):

self.partial_response_content += text

chunk_index += 1
chunk._hidden_params["created_at"] = chunk_index
self.partial_response_chunks.append(chunk)

if self.show_pretty():
Expand Down Expand Up @@ -3475,7 +3487,10 @@ def get_all_relative_files(self):
# Continue to get tracked files normally

if self.repo:
files = self.repo.get_tracked_files()
if not self.repo.cecli_ignore_file or not self.repo.cecli_ignore_file.is_file():
files = self.repo.get_tracked_files()
else:
files = self.repo.get_non_ignored_files_from_root()
else:
files = self.get_inchat_relative_files()

Expand Down
6 changes: 6 additions & 0 deletions cecli/commands/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,16 @@ def get_help(cls) -> str:
help_text = super().get_help()
help_text += "\nUsage:\n"
help_text += " /models <partial-name> # Search for models matching the partial name\n"
help_text += " /models <glob-pattern> # Search using glob patterns (*, ?, [])\n"
help_text += "\nExamples:\n"
help_text += " /models gpt-4 # Search for GPT-4 models\n"
help_text += " /models claude # Search for Claude models\n"
help_text += " /models o1 # Search for o1 models\n"
help_text += " /models gemini/* # Search for all Gemini models\n"
help_text += " /models gpt-4* # Search for models starting with 'gpt-4'\n"
help_text += " /models *gpt* # Search for models containing 'gpt'\n"
help_text += "\nThis command searches through the available LLM models and displays\n"
help_text += "matching models with their details including cost and capabilities.\n"
help_text += "Supports glob patterns: * (any characters), ? (single character),\n"
help_text += "[] (character class).\n"
return help_text
4 changes: 3 additions & 1 deletion cecli/helpers/conversation/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,9 @@ def update_file_diff(cls, fname: str) -> Optional[str]:
# Add diff message to conversation
diff_message = {
"role": "user",
"content": f"File {rel_fname} has changed:\n\n{diff}",
"content": (
f"File {rel_fname} has changed. Here is a diff of the changes:\n\n{diff}"
),
}

if coder and hasattr(coder, "abs_fnames"):
Expand Down
15 changes: 11 additions & 4 deletions cecli/helpers/conversation/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,12 @@ def add_readonly_files_messages(cls, coder) -> List[Dict[str, Any]]:

user_msg = {
"role": "user",
"content": f"File Contents {rel_fname}:\n\n{content}",
"content": (
f"Here are the original file contents for {rel_fname}:\n\n{content}"
"\n\nModifications will be communicated as diff messages."
),
}

ConversationManager.add_message(
message_dict=user_msg,
tag=MessageTag.READONLY_FILES,
Expand All @@ -389,7 +393,7 @@ def add_readonly_files_messages(cls, coder) -> List[Dict[str, Any]]:
# Add assistant message with file path as hash_key
assistant_msg = {
"role": "assistant",
"content": "Ok, I will view and/or modify this file as is necessary.",
"content": "I understand, thank you for sharing the file contents.",
}
ConversationManager.add_message(
message_dict=assistant_msg,
Expand Down Expand Up @@ -475,13 +479,16 @@ def add_chat_files_messages(cls, coder) -> Dict[str, Any]:
# Create user message
user_msg = {
"role": "user",
"content": f"File Contents {rel_fname}:\n\n{content}",
"content": (
f"Here are the original file contents for {rel_fname}:\n\n{content}"
"\n\nModifications will be communicated as diff messages."
),
}

# Create assistant message
assistant_msg = {
"role": "assistant",
"content": "Ok, I will modify this file as is necessary.",
"content": "I understand, thank you for sharing the file contents.",
}

# Determine tag based on editability
Expand Down
21 changes: 21 additions & 0 deletions cecli/helpers/responses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import re


def preprocess_json(response: str) -> str:
# This pattern matches any sequence of backslashes followed by
# a character or a unicode sequence.
pattern = r'(\\+)(u[0-9a-fA-F]{4}|["\\\/bfnrt]|.)?'

def normalize(match):
suffix = match.group(2) or ""

# If it's a valid escape character (like \n or \u0020)
# we ensure it has exactly ONE backslash.
if re.match(r'^(u[0-9a-fA-F]{4}|["\\\/bfnrt])$', suffix):
return "\\" + suffix

# Otherwise, it's a literal backslash (like C:\temp)
# We ensure it is escaped for JSON (exactly TWO backslashes).
return "\\\\" + suffix

return re.sub(pattern, normalize, response)
3 changes: 3 additions & 0 deletions cecli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,8 @@ async def main_async(argv=None, input=None, output=None, force_git_root=None, re
args.mcp_servers = convert_yaml_to_json_string(args.mcp_servers)
if hasattr(args, "custom") and args.custom is not None:
args.custom = convert_yaml_to_json_string(args.custom)
if hasattr(args, "security_config") and args.security_config is not None:
args.security_config = convert_yaml_to_json_string(args.security_config)
if hasattr(args, "retries") and args.retries is not None:
args.retries = convert_yaml_to_json_string(args.retries)
if args.debug:
Expand Down Expand Up @@ -1042,6 +1044,7 @@ def apply_model_overrides(model_name):
map_cache_dir=args.map_cache_dir,
repomap_in_memory=args.map_memory_cache,
linear_output=args.linear_output,
security_config=args.security_config,
)
if args.show_model_warnings and not suppress_pre_init:
problem = await models.sanity_check_models(pre_init_io, main_model)
Expand Down
Loading
Loading