Skip to content

Commit 04d88c7

Browse files
committed
fix(gmail): RFC 2047 encode non-ASCII display names in address headers
Add encode_address_header() that parses mailbox lists, RFC 2047 encodes only the display-name portion of non-ASCII addresses, and leaves email addresses untouched. Applied to all 4 address headers (To, From, Cc, Bcc) in MessageBuilder::build(). Previously, only Subject got RFC 2047 encoding while address headers only got CRLF sanitization, causing mojibake for non-ASCII names. Supersedes #405, #458, #469. Closes #404.
1 parent 47afe5f commit 04d88c7

2 files changed

Lines changed: 159 additions & 4 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
"@googleworkspace/cli": patch
3+
---
4+
5+
fix(gmail): RFC 2047 encode non-ASCII display names in To/From/Cc/Bcc headers
6+
7+
Fixes mojibake when sending emails to recipients with non-ASCII display names (e.g. Japanese, Spanish accented characters). The new `encode_address_header()` function parses mailbox lists, encodes only the display-name portion via RFC 2047 Base64, and leaves email addresses untouched.

src/helpers/gmail/mod.rs

Lines changed: 152 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,43 @@ pub(super) fn encode_header_value(value: &str) -> String {
448448
encoded_words.join("\r\n ")
449449
}
450450

451+
/// RFC 2047 encode non-ASCII display names in an address header value.
452+
///
453+
/// Parses a comma-separated mailbox list (e.g. `"José <j@ex.com>, alice@ex.com"`),
454+
/// encodes only the display-name portion of each mailbox if it contains
455+
/// non-ASCII characters, and leaves email addresses untouched.
456+
///
457+
/// Examples:
458+
/// - `"alice@example.com"` → `"alice@example.com"` (bare email, unchanged)
459+
/// - `"José García <jose@example.com>"` → `"=?UTF-8?B?...?= <jose@example.com>"`
460+
/// - `"Alice <a@ex.com>, José <j@ex.com>"` → `"Alice <a@ex.com>, =?UTF-8?B?...?= <j@ex.com>"`
461+
pub(super) fn encode_address_header(value: &str) -> String {
462+
let mailboxes = split_mailbox_list(value);
463+
let encoded: Vec<String> = mailboxes
464+
.into_iter()
465+
.map(|mailbox| {
466+
let email = extract_email(mailbox);
467+
let display = extract_display_name(mailbox);
468+
469+
// Bare email address — no display name to encode
470+
if email == display {
471+
return mailbox.to_string();
472+
}
473+
474+
// Display name is ASCII — no encoding needed
475+
if display.is_ascii() {
476+
return mailbox.to_string();
477+
}
478+
479+
// Non-ASCII display name — RFC 2047 encode it
480+
let encoded_name = encode_header_value(display);
481+
format!("{} <{}>", encoded_name, email)
482+
})
483+
.collect();
484+
485+
encoded.join(", ")
486+
}
487+
451488
/// In-Reply-To and References values for threading a reply or forward.
452489
#[derive(Clone, Copy)]
453490
pub(super) struct ThreadingHeaders<'a> {
@@ -482,7 +519,7 @@ impl MessageBuilder<'_> {
482519

483520
let mut headers = format!(
484521
"To: {}\r\nSubject: {}",
485-
sanitize_header_value(self.to),
522+
encode_address_header(&sanitize_header_value(self.to)),
486523
// Sanitize first: stripping CRLF before encoding prevents injection
487524
// in encoded-words.
488525
encode_header_value(&sanitize_header_value(self.subject)),
@@ -506,17 +543,26 @@ impl MessageBuilder<'_> {
506543
));
507544

508545
if let Some(from) = self.from {
509-
headers.push_str(&format!("\r\nFrom: {}", sanitize_header_value(from)));
546+
headers.push_str(&format!(
547+
"\r\nFrom: {}",
548+
encode_address_header(&sanitize_header_value(from))
549+
));
510550
}
511551

512552
if let Some(cc) = self.cc {
513-
headers.push_str(&format!("\r\nCc: {}", sanitize_header_value(cc)));
553+
headers.push_str(&format!(
554+
"\r\nCc: {}",
555+
encode_address_header(&sanitize_header_value(cc))
556+
));
514557
}
515558

516559
// The Gmail API reads the Bcc header to route to those recipients,
517560
// then strips it before delivery.
518561
if let Some(bcc) = self.bcc {
519-
headers.push_str(&format!("\r\nBcc: {}", sanitize_header_value(bcc)));
562+
headers.push_str(&format!(
563+
"\r\nBcc: {}",
564+
encode_address_header(&sanitize_header_value(bcc))
565+
));
520566
}
521567

522568
format!("{}\r\n\r\n{}", headers, body)
@@ -1249,6 +1295,108 @@ mod tests {
12491295
}
12501296
}
12511297

1298+
#[test]
1299+
fn test_encode_address_header_bare_email() {
1300+
assert_eq!(
1301+
encode_address_header("alice@example.com"),
1302+
"alice@example.com"
1303+
);
1304+
}
1305+
1306+
#[test]
1307+
fn test_encode_address_header_ascii_display_name() {
1308+
let input = "Alice Smith <alice@example.com>";
1309+
assert_eq!(encode_address_header(input), input);
1310+
}
1311+
1312+
#[test]
1313+
fn test_encode_address_header_non_ascii_display_name() {
1314+
let encoded = encode_address_header("José García <jose@example.com>");
1315+
assert!(
1316+
encoded.contains("=?UTF-8?B?"),
1317+
"Should contain encoded-word: {encoded}"
1318+
);
1319+
assert!(
1320+
encoded.contains("<jose@example.com>"),
1321+
"Email should be preserved: {encoded}"
1322+
);
1323+
assert!(
1324+
!encoded.contains("José"),
1325+
"Raw non-ASCII should not appear: {encoded}"
1326+
);
1327+
}
1328+
1329+
#[test]
1330+
fn test_encode_address_header_multiple_mixed() {
1331+
let input = "Alice <alice@example.com>, José <jose@example.com>";
1332+
let encoded = encode_address_header(input);
1333+
assert!(
1334+
encoded.starts_with("Alice <alice@example.com>, "),
1335+
"ASCII address should be unchanged: {encoded}"
1336+
);
1337+
assert!(
1338+
encoded.contains("=?UTF-8?B?"),
1339+
"Non-ASCII name should be encoded: {encoded}"
1340+
);
1341+
assert!(
1342+
encoded.contains("<jose@example.com>"),
1343+
"Email should be preserved: {encoded}"
1344+
);
1345+
}
1346+
1347+
#[test]
1348+
fn test_encode_address_header_quoted_non_ascii() {
1349+
let encoded = encode_address_header("\"下野祐太\" <shimono@example.com>");
1350+
assert!(
1351+
encoded.contains("=?UTF-8?B?"),
1352+
"Should contain encoded-word: {encoded}"
1353+
);
1354+
assert!(
1355+
encoded.contains("<shimono@example.com>"),
1356+
"Email should be preserved: {encoded}"
1357+
);
1358+
}
1359+
1360+
#[test]
1361+
fn test_message_builder_non_ascii_address_headers() {
1362+
let raw = MessageBuilder {
1363+
to: "José <jose@example.com>",
1364+
subject: "Test",
1365+
from: Some("田中太郎 <tanaka@example.com>"),
1366+
cc: Some("Ñoño <nono@example.com>"),
1367+
bcc: Some("Ünsal <unsal@example.com>"),
1368+
threading: None,
1369+
html: false,
1370+
}
1371+
.build("body");
1372+
1373+
// To header should have encoded display name
1374+
assert!(
1375+
raw.contains("To: =?UTF-8?B?"),
1376+
"To should be RFC 2047 encoded: {raw}"
1377+
);
1378+
// From header should have encoded display name
1379+
assert!(
1380+
raw.contains("From: =?UTF-8?B?"),
1381+
"From should be RFC 2047 encoded: {raw}"
1382+
);
1383+
// Cc header should have encoded display name
1384+
assert!(
1385+
raw.contains("Cc: =?UTF-8?B?"),
1386+
"Cc should be RFC 2047 encoded: {raw}"
1387+
);
1388+
// Bcc header should have encoded display name
1389+
assert!(
1390+
raw.contains("Bcc: =?UTF-8?B?"),
1391+
"Bcc should be RFC 2047 encoded: {raw}"
1392+
);
1393+
// Email addresses should be untouched
1394+
assert!(raw.contains("<jose@example.com>"));
1395+
assert!(raw.contains("<tanaka@example.com>"));
1396+
assert!(raw.contains("<nono@example.com>"));
1397+
assert!(raw.contains("<unsal@example.com>"));
1398+
}
1399+
12521400
#[test]
12531401
fn test_message_builder_basic() {
12541402
let raw = MessageBuilder {

0 commit comments

Comments
 (0)