Skip to content

Commit 37a0b0d

Browse files
sinelawclaude
andcommitted
Fix backspace/delete to handle multi-byte UTF-8 characters
Fixes #225 Backspace and delete operations were removing single bytes instead of entire UTF-8 characters, corrupting multi-byte characters like emojis, Norwegian letters (æøå), and currency symbols (€). Changed DeleteBackward to use prev_char_boundary() and DeleteForward to use next_char_boundary() to properly find character boundaries. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent abdde9f commit 37a0b0d

File tree

2 files changed

+222
-2
lines changed

2 files changed

+222
-2
lines changed

src/input/actions.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,9 +1318,11 @@ pub fn action_to_events(
13181318
if let Some(range) = cursor.selection_range() {
13191319
Some((*cursor_id, range))
13201320
} else if cursor.position > 0 {
1321-
let delete_from = cursor.position.saturating_sub(1);
1321+
// Use prev_char_boundary to properly handle multi-byte UTF-8 characters
1322+
let delete_from = state.buffer.prev_char_boundary(cursor.position);
13221323

13231324
// Check for auto-pair deletion when auto_indent is enabled
1325+
// Note: Auto-pairs are ASCII-only, so we can safely check single bytes
13241326
if auto_indent && cursor.position < state.buffer.len() {
13251327
let char_before = state
13261328
.buffer
@@ -1377,7 +1379,9 @@ pub fn action_to_events(
13771379
if let Some(range) = cursor.selection_range() {
13781380
Some((*cursor_id, range))
13791381
} else if cursor.position < buffer_len {
1380-
Some((*cursor_id, cursor.position..(cursor.position + 1)))
1382+
// Use next_char_boundary to properly handle multi-byte UTF-8 characters
1383+
let delete_to = state.buffer.next_char_boundary(cursor.position);
1384+
Some((*cursor_id, cursor.position..delete_to))
13811385
} else {
13821386
None
13831387
}

tests/e2e/unicode_cursor.rs

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::common::harness::EditorTestHarness;
22
use crossterm::event::{KeyCode, KeyModifiers};
3+
use tempfile::TempDir;
34

45
/// Test that cursor position stays in sync when editing lines with non-ASCII characters
56
/// This reproduces the bug where visual cursor position drifts from actual position
@@ -155,3 +156,218 @@ fn test_mouse_click_on_non_ascii_text() {
155156

156157
// This test may need adjustment based on actual gutter rendering
157158
}
159+
160+
/// Test that backspace properly deletes entire UTF-8 characters, not just bytes
161+
/// This reproduces the bug where backspace removes only the last byte of a multi-byte character
162+
#[test]
163+
fn test_backspace_deletes_entire_utf8_character() {
164+
let mut harness = EditorTestHarness::new(80, 24).unwrap();
165+
166+
// Test 1: Euro sign (3 bytes: 0xE2 0x82 0xAC)
167+
harness.type_text("€").unwrap();
168+
harness.assert_buffer_content("€");
169+
170+
// Backspace should delete the entire euro sign, not just one byte
171+
harness
172+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
173+
.unwrap();
174+
harness.assert_buffer_content("");
175+
176+
// Test 2: Norwegian characters (2 bytes each: æ=0xC3 0xA6, ø=0xC3 0xB8, å=0xC3 0xA5)
177+
harness.type_text("æøå").unwrap();
178+
harness.assert_buffer_content("æøå");
179+
180+
// Backspace should delete 'å' entirely
181+
harness
182+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
183+
.unwrap();
184+
harness.assert_buffer_content("æø");
185+
186+
// Another backspace should delete 'ø' entirely
187+
harness
188+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
189+
.unwrap();
190+
harness.assert_buffer_content("æ");
191+
192+
// Another backspace should delete 'æ' entirely
193+
harness
194+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
195+
.unwrap();
196+
harness.assert_buffer_content("");
197+
198+
// Test 3: Emoji (4 bytes: 😀 = U+1F600)
199+
harness.type_text("a😀b").unwrap();
200+
harness.assert_buffer_content("a😀b");
201+
202+
// Backspace should delete 'b'
203+
harness
204+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
205+
.unwrap();
206+
harness.assert_buffer_content("a😀");
207+
208+
// Backspace should delete the entire emoji (4 bytes), not just one byte
209+
harness
210+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
211+
.unwrap();
212+
harness.assert_buffer_content("a");
213+
}
214+
215+
/// Test that delete (forward) properly removes entire UTF-8 characters
216+
#[test]
217+
fn test_delete_forward_removes_entire_utf8_character() {
218+
let mut harness = EditorTestHarness::new(80, 24).unwrap();
219+
220+
// Type text with multi-byte characters
221+
harness.type_text("a€b").unwrap();
222+
harness.assert_buffer_content("a€b");
223+
224+
// Move to beginning
225+
harness.send_key(KeyCode::Home, KeyModifiers::NONE).unwrap();
226+
227+
// Delete 'a' - this should work fine (ASCII)
228+
harness
229+
.send_key(KeyCode::Delete, KeyModifiers::NONE)
230+
.unwrap();
231+
harness.assert_buffer_content("€b");
232+
233+
// Delete '€' - should delete entire 3-byte euro sign, not just one byte
234+
harness
235+
.send_key(KeyCode::Delete, KeyModifiers::NONE)
236+
.unwrap();
237+
harness.assert_buffer_content("b");
238+
}
239+
240+
/// Test that selecting and deleting/replacing UTF-8 characters works correctly
241+
#[test]
242+
fn test_selection_delete_with_utf8_characters() {
243+
let mut harness = EditorTestHarness::new(80, 24).unwrap();
244+
245+
// Type text with multi-byte characters: a + æ(2) + ø(2) + å(2) + b
246+
harness.type_text("aæøåb").unwrap();
247+
harness.assert_buffer_content("aæøåb");
248+
249+
// Move to beginning
250+
harness.send_key(KeyCode::Home, KeyModifiers::NONE).unwrap();
251+
252+
// Move right once (past 'a')
253+
harness
254+
.send_key(KeyCode::Right, KeyModifiers::NONE)
255+
.unwrap();
256+
257+
// Select the three Norwegian characters by shift+right 3 times
258+
harness
259+
.send_key(KeyCode::Right, KeyModifiers::SHIFT)
260+
.unwrap();
261+
harness
262+
.send_key(KeyCode::Right, KeyModifiers::SHIFT)
263+
.unwrap();
264+
harness
265+
.send_key(KeyCode::Right, KeyModifiers::SHIFT)
266+
.unwrap();
267+
268+
// Delete the selection with backspace
269+
harness
270+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
271+
.unwrap();
272+
harness.assert_buffer_content("ab");
273+
}
274+
275+
/// Test that selecting and replacing UTF-8 characters works correctly
276+
#[test]
277+
fn test_selection_replace_with_utf8_characters() {
278+
let mut harness = EditorTestHarness::new(80, 24).unwrap();
279+
280+
// Type text with emoji
281+
harness.type_text("hello😀world").unwrap();
282+
harness.assert_buffer_content("hello😀world");
283+
284+
// Move to beginning
285+
harness.send_key(KeyCode::Home, KeyModifiers::NONE).unwrap();
286+
287+
// Move right 5 times (past "hello")
288+
for _ in 0..5 {
289+
harness
290+
.send_key(KeyCode::Right, KeyModifiers::NONE)
291+
.unwrap();
292+
}
293+
294+
// Select the emoji (1 character, 4 bytes)
295+
harness
296+
.send_key(KeyCode::Right, KeyModifiers::SHIFT)
297+
.unwrap();
298+
299+
// Replace with a different character
300+
harness.type_text("X").unwrap();
301+
harness.assert_buffer_content("helloXworld");
302+
}
303+
304+
/// Test loading a file with UTF-8 characters, backspacing, saving, and verifying file content
305+
/// This reproduces the exact bug where backspace removes only a byte, corrupting the file on save
306+
#[test]
307+
fn test_backspace_utf8_file_save_roundtrip() {
308+
let temp_dir = TempDir::new().unwrap();
309+
310+
// Test 1: Euro sign (3 bytes: 0xE2 0x82 0xAC)
311+
let euro_path = temp_dir.path().join("euro.txt");
312+
std::fs::write(&euro_path, "€\n").unwrap();
313+
314+
let mut harness = EditorTestHarness::new(80, 24).unwrap();
315+
harness.open_file(&euro_path).unwrap();
316+
harness.render().unwrap();
317+
318+
// Move to end of line (after €, before newline)
319+
harness.send_key(KeyCode::End, KeyModifiers::NONE).unwrap();
320+
321+
// Backspace should delete the entire euro sign
322+
harness
323+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
324+
.unwrap();
325+
326+
// Save with Ctrl+S
327+
harness
328+
.send_key(KeyCode::Char('s'), KeyModifiers::CONTROL)
329+
.unwrap();
330+
harness.render().unwrap();
331+
332+
// Verify the file contains only a newline (euro sign fully deleted)
333+
let saved = std::fs::read(&euro_path).unwrap();
334+
assert_eq!(
335+
saved,
336+
b"\n",
337+
"Euro sign should be fully deleted, file should contain only newline. Got: {:?}",
338+
saved
339+
);
340+
341+
// Test 2: Norwegian characters (æøå)
342+
let norwegian_path = temp_dir.path().join("norwegian.txt");
343+
std::fs::write(&norwegian_path, "æøå\n").unwrap();
344+
345+
let mut harness2 = EditorTestHarness::new(80, 24).unwrap();
346+
harness2.open_file(&norwegian_path).unwrap();
347+
harness2.render().unwrap();
348+
349+
// Move to end of line
350+
harness2
351+
.send_key(KeyCode::End, KeyModifiers::NONE)
352+
.unwrap();
353+
354+
// Backspace should delete 'å' entirely (2 bytes)
355+
harness2
356+
.send_key(KeyCode::Backspace, KeyModifiers::NONE)
357+
.unwrap();
358+
359+
// Save
360+
harness2
361+
.send_key(KeyCode::Char('s'), KeyModifiers::CONTROL)
362+
.unwrap();
363+
harness2.render().unwrap();
364+
365+
// Verify
366+
let saved2 = std::fs::read(&norwegian_path).unwrap();
367+
assert_eq!(
368+
saved2,
369+
"æø\n".as_bytes(),
370+
"Only 'å' should be deleted, leaving 'æø'. Got: {:?}",
371+
String::from_utf8_lossy(&saved2)
372+
);
373+
}

0 commit comments

Comments
 (0)