FMXExpress · FMXExpress · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/src/pkg/platform/PasClaw.Platform.pas b/src/pkg/platform/PasClaw.Platform.pas
@@ -183,6 +183,11 @@ implementation
 function DecodeShellOutputBytes(const Bytes: TBytes;
                                 ByteCount: Integer;
                                 Codepage: UInt32): string;
+{$IFDEF MSWINDOWS}
+const
+  CP_UTF8_LOCAL              = 65001;
+  MB_ERR_INVALID_CHARS_LOCAL = $00000008;
+{$ENDIF}
 var
   Len: Integer;
   {$IFDEF MSWINDOWS}
@@ -203,23 +208,38 @@ function DecodeShellOutputBytes(const Bytes: TBytes;
     CP := Codepage
   else
   begin
-    { Prefer the ACTIVE console output codepage over the system OEM
-      default. cmd.exe writes its stdout in whichever codepage is
-      currently set on the console (the OEM default initially, but
-      operators can switch it -- `chcp 65001` puts the console in
-      UTF-8, and PowerShell sessions inherit the host process's
-      OutputEncoding). Pinning to GetOEMCP would silently re-mojibake
-      output in those environments by decoding UTF-8 bytes as if they
-      were CP437. GetConsoleOutputCP returns the active output CP for
-      the console attached to this process; it returns 0 when the
-      process isn't attached to a console (gateway / serve daemons
-      launched from a service manager, headless CI), in which case
-      we fall back to GetOEMCP -- a long-running headless daemon
-      doesn't have a "currently active" console CP to consult, but
-      its spawned cmd.exe children still default to the OEM CP.
-      Codex P2 on PR #237. }
-    CP := GetConsoleOutputCP;
-    if CP = 0 then CP := GetOEMCP;
+    { Auto-detect between UTF-8 and OEM. PR #237's first attempt
+      pinned GetConsoleOutputCP (wrong: returns OUR console's CP,
+      not the spawned cmd.exe's piped-output CP). The revert to
+      unconditional GetOEMCP was right for cmd.exe (which pipes
+      OEM regardless of chcp) but wrong for pwsh -- PowerShell 6+
+      defaults to UTF-8 stdout, so a `Write-Output 'résumé'` from
+      execute_code's pwsh branch produces UTF-8 bytes that GetOEMCP
+      would mojibake as CP437. Codex P2 on PR #239.
+
+      Heuristic: try strict UTF-8 (MB_ERR_INVALID_CHARS) first.
+      Valid UTF-8 -> use it (pwsh / chcp 65001 / Linux on Wine).
+      Invalid sequence anywhere -> fall back to OEM (cmd.exe's
+      piped output).
+
+      This is robust because:
+        - Pure ASCII (most output) is valid UTF-8 -> taken either way.
+        - cmd's CP437 non-ASCII bytes (0x80-0xFF) are typically
+          invalid UTF-8 lead bytes -- e.g. 0x82 (é in CP437) has
+          binary 10000010 which is a UTF-8 continuation marker, not
+          a lead byte, so it fails MB_ERR_INVALID_CHARS.
+        - pwsh UTF-8 output (multi-byte sequences for é/résumé/etc.)
+          parses cleanly.
+
+      Edge case: OEM bytes that happen to coincide with a valid
+      UTF-8 sequence (e.g. exactly a 2-byte CP437 pair that maps
+      to a real Unicode codepoint via UTF-8 decoding). This is
+      vanishingly unlikely for filename / dir output. }
+    if MultiByteToWideChar(CP_UTF8_LOCAL, MB_ERR_INVALID_CHARS_LOCAL,
+                            PAnsiChar(@Bytes[0]), Len, nil, 0) > 0 then
+      CP := CP_UTF8_LOCAL
+    else
+      CP := GetOEMCP;
   end;
   { Pass 1: discover the wide-char buffer size we need. }
   WideLen := MultiByteToWideChar(CP, 0, PAnsiChar(@Bytes[0]), Len, nil, 0);

diff --git a/src/tests/shell_output_decode_tests.pas b/src/tests/shell_output_decode_tests.pas
@@ -241,6 +241,33 @@ procedure TestUTF8InputThroughExplicitCP65001;
               'UTF-8 input round-trips through codepage 65001');
 end;
 
+procedure TestAutoDetectPrefersUTF8ForValidSequences;
+(* Codex P2 on PR #239: PowerShell 6+ (pwsh) defaults to UTF-8 stdout,
+   so when execute_code or shell_exec captures pwsh output the bytes
+   are valid UTF-8 sequences -- decoding via GetOEMCP would mojibake
+   them. With Codepage = 0 the helper should detect "this is valid
+   UTF-8" and pass through verbatim, NOT route through CP437. POSIX
+   side: Codepage=0 already goes through TEncoding.UTF8.GetString
+   so the same input is handled the same way on Linux CI. *)
+var
+  B: TBytes;
+  Got: string;
+begin
+  { "résumé" as UTF-8: r(0x72), é(0xC3 0xA9), s(0x73), u(0x75),
+    m(0x6D), é(0xC3 0xA9) -- 8 bytes total. }
+  SetLength(B, 8);
+  B[0] := $72;
+  B[1] := $C3; B[2] := $A9;
+  B[3] := $73;
+  B[4] := $75;
+  B[5] := $6D;
+  B[6] := $C3; B[7] := $A9;
+  Got := DecodeShellOutputBytes(B);   { Codepage = 0 -> auto-detect }
+  AssertEqStr(Got, 'résumé',
+              'auto-detect: valid UTF-8 input passes through verbatim ' +
+              '(would be mojibake "rA©sumA©" or similar if CP437 was forced)');
+end;
+
 begin
   TestEmptyInputEmptyOutput;
   WriteLn('  ok: empty input -> empty output');
@@ -262,5 +289,7 @@ procedure TestUTF8InputThroughExplicitCP65001;
   WriteLn('  ok: CP437 0xC4 -> ─ (3-byte UTF-8)');
   TestUTF8InputThroughExplicitCP65001;
   WriteLn('  ok: codepage 65001 = pass-through UTF-8');
+  TestAutoDetectPrefersUTF8ForValidSequences;
+  WriteLn('  ok: auto-detect picks UTF-8 for valid UTF-8 input (pwsh case)');
   WriteLn('PASS');
 end.