-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStringTableBenchmark.MultiKeySort.cs
More file actions
132 lines (118 loc) · 3.96 KB
/
StringTableBenchmark.MultiKeySort.cs
File metadata and controls
132 lines (118 loc) · 3.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
using System.Buffers;
using System.Text;
using BenchmarkDotNet.Attributes;
using Microsoft.Diagnostics.Tracing.Parsers.Clr;
public partial class StringTableBenchmarks
{
public ulong MultiKeySortByteSize;
[Benchmark]
public void MultiKeySort()
{
// Copy the input
Span<string> inputSpan = inputData.ToArray();
// Output data
var stringTable = new ArrayBufferWriter<byte>();
var stringToOffset = new Dictionary<string, int>(inputSpan.Length);
// Pre-sort the string based on their matching suffix
MultiKeySort(inputSpan, 0);
// Compose the final string table
string? lastSymbol = null;
for (int i = 0; i < inputSpan.Length; i++)
{
var str = inputSpan[i];
if (lastSymbol != null && lastSymbol.EndsWith(str, StringComparison.Ordinal))
{
// Suffix matches the last symbol
stringToOffset.Add(str, stringTable.WrittenCount - Encoding.UTF8.GetByteCount(str) - 1);
}
else
{
lastSymbol = str;
stringToOffset.Add(str, stringTable.WrittenCount);
int utf8Length = Encoding.UTF8.GetByteCount(str.AsSpan());
var utf8Span = stringTable.GetSpan(utf8Length + 1);
Encoding.UTF8.GetBytes(str, utf8Span);
utf8Span[utf8Length] = 0;
stringTable.Advance(utf8Length + 1);
}
}
MultiKeySortByteSize = (ulong)stringTable.WrittenCount;
static char TailCharacter(string str, int pos)
{
int index = str.Length - pos - 1;
if ((uint)index < str.Length)
return str[index];
return '\0';
}
static void MultiKeySort(Span<string> input, int pos)
{
if (!MultiKeySortSmallInput(input, pos))
{
MultiKeySortLargeInput(input, pos);
}
}
static void MultiKeySortLargeInput(Span<string> input, int pos)
{
tailcall:
char pivot = TailCharacter(input[0], pos);
int l = 0, h = input.Length;
for (int i = 1; i < h;)
{
char c = TailCharacter(input[i], pos);
if (c > pivot)
{
(input[l], input[i]) = (input[i], input[l]);
l++; i++;
}
else if (c < pivot)
{
h--;
(input[h], input[i]) = (input[i], input[h]);
}
else
{
i++;
}
}
MultiKeySort(input.Slice(0, l), pos);
MultiKeySort(input.Slice(h), pos);
if (pivot != '\0')
{
// Use a loop as a poor man's tailcall
// MultiKeySort(input.Slice(l, h - l), pos + 1);
pos++;
input = input.Slice(l, h - l);
if (!MultiKeySortSmallInput(input, pos))
{
goto tailcall;
}
}
}
static bool MultiKeySortSmallInput(Span<string> input, int pos)
{
if (input.Length <= 1)
return true;
// Optimize comparing two strings
if (input.Length == 2)
{
while (true)
{
char c0 = TailCharacter(input[0], pos);
char c1 = TailCharacter(input[1], pos);
if (c0 < c1)
{
(input[0], input[1]) = (input[1], input[0]);
break;
}
else if (c0 > c1 || c0 == (char)0)
{
break;
}
pos++;
}
return true;
}
return false;
}
}
}