Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Text;

namespace Microsoft.Extensions.Logging.Console
{
internal static class ConsoleControlCharacterSanitizer
{
public static string? Sanitize(string? value)
{
if (string.IsNullOrEmpty(value))
{
return value;
}

int firstEscapedCharacterIndex = GetFirstEscapedCharacterIndex(value);
if (firstEscapedCharacterIndex < 0)
{
return value;
}

var sanitized = new ValueStringBuilder(stackalloc char[256]);
sanitized.Append(value.AsSpan(0, firstEscapedCharacterIndex));

for (int i = firstEscapedCharacterIndex; i < value.Length; i++)
{
char current = value[i];
if (ShouldEscape(current))
{
sanitized.Append('\\');
sanitized.Append('u');
int codePoint = current;
Span<char> hex = sanitized.AppendSpan(4);
hex[0] = HexConverter.ToCharUpper(current >> 12);
hex[1] = HexConverter.ToCharUpper(current >> 8);
hex[2] = HexConverter.ToCharUpper(current >> 4);
hex[3] = HexConverter.ToCharUpper(current);
}
Comment on lines +30 to +40
else
{
sanitized.Append(current);
}
}

return sanitized.ToString();
}

private static char ToHexChar(int value) =>
(char)(value < 10 ? '0' + value : 'A' + value - 10);

private static int GetFirstEscapedCharacterIndex(string value)
{
for (int i = 0; i < value.Length; i++)
{
if (ShouldEscape(value[i]))
{
return i;
}
}

return -1;
}

private static bool ShouldEscape(char c)
{
return c switch
{
'\u0000' => true, // NUL
'\u0001' => true, // SOH
'\u0002' => true, // STX
'\u0003' => true, // ETX
'\u0004' => true, // EOT
'\u0005' => true, // ENQ
'\u0006' => true, // ACK
'\u0007' => true, // BEL
'\u0008' => true, // BS
// '\u0009' HT (tab) - preserved for log formatting
// '\u000A' LF (newline) - preserved for log formatting
'\u000B' => true, // VT
'\u000C' => true, // FF
// '\u000D' CR (carriage return) - preserved for log formatting
'\u000E' => true, // SO
'\u000F' => true, // SI
Comment thread
rosebyte marked this conversation as resolved.
'\u0010' => true, // DLE
'\u0011' => true, // DC1
'\u0012' => true, // DC2
'\u0013' => true, // DC3
'\u0014' => true, // DC4
'\u0015' => true, // NAK
'\u0016' => true, // SYN
'\u0017' => true, // ETB
'\u0018' => true, // CAN
'\u0019' => true, // EM
'\u001A' => true, // SUB
'\u001B' => true, // ESC
'\u001C' => true, // FS
'\u001D' => true, // GS
'\u001E' => true, // RS
'\u001F' => true, // US
'\u007F' => true, // DEL
>= '\u0080' and <= '\u009F' => true, // C1 control range
>= '\u200B' and <= '\u200F' => true, // zero-width and directional marks
>= '\u202A' and <= '\u202E' => true, // bidi embedding/override
>= '\u2066' and <= '\u2069' => true, // bidi isolates
_ => false,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious how did we choose regions? There are many other questionable ranges. For example https://en.wikipedia.org/wiki/Tags_(Unicode_block). I think the more proper way would be to decide based on https://learn.microsoft.com/en-us/dotnet/api/system.char.getunicodecategory?view=net-10.0 but did not check for any edge cases using this approach. I also recommend reading https://learn.microsoft.com/en-us/dotnet/standard/base-types/character-encoding-introduction which list many interesting and possibly error prone parts.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. Can we leverage GetUnicodeCategory without giving up perf using SearchValues? At least for the fast non-sanitizing path? E.g:

// Built once. GetUnicodeCategory cost is paid here, not per log call.
private static readonly SearchValues<char> s_charsToEscape = SearchValues.Create(BuildEscapeSet());

private static char[] BuildEscapeSet()
{
    var chars = new List<char>(256);
    for (int c = 0; c <= 0xFFFF; c++)
    {
        if (c is '\t' or '\r' or '\n')
            continue;
        if (char.GetUnicodeCategory((char)c) is UnicodeCategory.Control or UnicodeCategory.Format)
            chars.Add((char)c);
    }
    return chars.ToArray();
}

public static string Sanitize(string value)
{
    int idx = value.AsSpan().IndexOfAny(s_charsToEscape);   // vectorized

    if (idx < 0)
        return value;

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there no problematic code points outside the BMP (i.e. those that would be composed of two chars)?

};
Comment thread
rosebyte marked this conversation as resolved.
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<Compile Include="$(CommonPath)Extensions\Logging\NullScope.cs" Link="Common\src\Extensions\Logging\NullScope.cs" />
<Compile Include="$(CommonPath)System\Net\ArrayBuffer.cs" Link="Common\System\Net\ArrayBuffer.cs" />
<Compile Include="$(CommonPath)System\Text\Json\PooledByteBufferWriter.cs" Link="Common\System\Text\Json\PooledByteBufferWriter.cs" />
<Compile Include="$(CommonPath)System\Text\ValueStringBuilder.cs" Link="Common\System\Text\ValueStringBuilder.cs" />
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public override void Write<TState>(in LogEntry<TState> logEntry, IExternalScopeP
private void WriteInternal(IExternalScopeProvider? scopeProvider, TextWriter textWriter, string message, LogLevel logLevel,
int eventId, string? exception, string category, DateTimeOffset stamp)
{
message = ConsoleControlCharacterSanitizer.Sanitize(message)!;
exception = ConsoleControlCharacterSanitizer.Sanitize(exception);
category = ConsoleControlCharacterSanitizer.Sanitize(category)!;
Comment thread
rosebyte marked this conversation as resolved.
Comment thread
rosebyte marked this conversation as resolved.

Comment thread
rosebyte marked this conversation as resolved.
Comment thread
rosebyte marked this conversation as resolved.
ConsoleColors logLevelColors = GetLogLevelConsoleColors(logLevel);
string logLevelString = GetLogLevelString(logLevel);

Expand Down Expand Up @@ -215,7 +219,8 @@ private void WriteScopeInformation(TextWriter textWriter, IExternalScopeProvider
{
state.Write(" => ");
}
state.Write(scope);
string? scopeMessage = ConsoleControlCharacterSanitizer.Sanitize(scope?.ToString());
state.Write(scopeMessage);
}, textWriter);

if (!paddingNeeded && !singleLine)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ public override void Write<TState>(in LogEntry<TState> logEntry, IExternalScopeP
private void WriteInternal(IExternalScopeProvider? scopeProvider, TextWriter textWriter, string message, LogLevel logLevel, string category,
int eventId, string? exception, DateTimeOffset stamp)
{
message = ConsoleControlCharacterSanitizer.Sanitize(message)!;
exception = ConsoleControlCharacterSanitizer.Sanitize(exception);
category = ConsoleControlCharacterSanitizer.Sanitize(category)!;
Comment thread
rosebyte marked this conversation as resolved.

Comment thread
rosebyte marked this conversation as resolved.
// systemd reads messages from standard out line-by-line in a '<pri>message' format.
// newline characters are treated as message delimiters, so we must replace them.
// Messages longer than the journal LineMax setting (default: 48KB) are cropped.
Expand Down Expand Up @@ -139,7 +143,8 @@ private void WriteScopeInformation(TextWriter textWriter, IExternalScopeProvider
scopeProvider.ForEachScope((scope, state) =>
{
state.Write(" => ");
state.Write(scope);
string? scopeMessage = ConsoleControlCharacterSanitizer.Sanitize(scope?.ToString());
state.Write(scopeMessage);
}, textWriter);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,68 @@ public void NullFormatterName_Throws()
Assert.Throws<ArgumentNullException>(() => new NullNameConsoleFormatter());
}

[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsMultithreadingSupported))]
[MemberData(nameof(NonJsonFormatterNames))]
public void Log_DangerousControlCharacters_AreSanitized(string formatterName)
{
using var t = SetUp(
new ConsoleLoggerOptions { FormatterName = formatterName },
new SimpleConsoleFormatterOptions { ColorBehavior = LoggerColorBehavior.Disabled },
new ConsoleFormatterOptions(),
new JsonConsoleFormatterOptions());
Comment thread
rosebyte marked this conversation as resolved.
var logger = (ILogger)t.Logger;
Comment thread
rosebyte marked this conversation as resolved.
var sink = t.Sink;

logger.LogInformation("Payload: {Value}", "prefix\u001b[31mtext\u0008\u202E\r\n\tsuffix");

string output = GetMessage(sink.Writes);
Assert.DoesNotContain('\u001b', output);
Assert.DoesNotContain('\u0008', output);
Assert.DoesNotContain('\u202E', output);
Assert.Contains("\\u001B", output);
Assert.Contains("\\u0008", output);
Assert.Contains("\\u202E", output);
}
Comment thread
rosebyte marked this conversation as resolved.

[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsMultithreadingSupported))]
[MemberData(nameof(NonJsonFormatterNames))]
public void Log_SafeWhitespace_IsNotEscaped(string formatterName)
{
using var t = SetUp(
new ConsoleLoggerOptions { FormatterName = formatterName },
new SimpleConsoleFormatterOptions { ColorBehavior = LoggerColorBehavior.Disabled },
new ConsoleFormatterOptions(),
new JsonConsoleFormatterOptions());
var logger = (ILogger)t.Logger;
Comment thread
rosebyte marked this conversation as resolved.
var sink = t.Sink;

logger.LogInformation("Line1\nLine2\tIndented");

string output = GetMessage(sink.Writes);
Assert.DoesNotContain("\\u000A", output);
Assert.DoesNotContain("\\u0009", output);
}

[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsMultithreadingSupported))]
[InlineData(ConsoleFormatterNames.Json)]
public void Log_Json_ControlCharacters_EscapedByWriter(string formatterName)
{
using var t = SetUp(
new ConsoleLoggerOptions { FormatterName = formatterName },
new SimpleConsoleFormatterOptions(),
new ConsoleFormatterOptions(),
new JsonConsoleFormatterOptions());
var logger = (ILogger)t.Logger;
var sink = t.Sink;

logger.LogInformation("Payload: {Value}", "prefix\u001b[31mtext\u0008\u202Esuffix");

string output = GetMessage(sink.Writes);
Assert.DoesNotContain('\u001b', output);
Assert.DoesNotContain('\u0008', output);
Assert.DoesNotContain('\u202E', output);
}
Comment on lines +234 to +252

private class NullNameConsoleFormatter : ConsoleFormatter
{
public NullNameConsoleFormatter() : base(null) { }
Expand Down Expand Up @@ -226,6 +288,17 @@ public static TheoryData<string> FormatterNames
}
}

public static TheoryData<string> NonJsonFormatterNames
{
get
{
var data = new TheoryData<string>();
data.Add(ConsoleFormatterNames.Simple);
data.Add(ConsoleFormatterNames.Systemd);
return data;
}
}

public static TheoryData<LogLevel> Levels
{
get
Expand Down
Loading