Adding benchmarks for JsonDocument.Parse, GetProperty, and Enumerates.#852
Adding benchmarks for JsonDocument.Parse, GetProperty, and Enumerates.#852adamsitnik merged 14 commits intodotnet:masterfrom
Conversation
Benchmark results.Perf_DocumentParse
Perf_EnumerateArray
Perf_EnumerateObject
Perf_ParseThenWrite
|
|
@adamsitnik is there a way to not need to create/update xlf files for resx files? |
|
@jozkee I found these benchmarks really useful thanks. Is this PR abandoned? I would love to see |
Serialization/Deserialization benchmarks already exist which measure the This PR is specifically for measuring the performance of the Document Object Model (DOM) APIs which include |
| public enum TestCaseType | ||
| { | ||
| HelloWorld, | ||
| BasicJson, |
There was a problem hiding this comment.
nit: spacing
| BasicJson, | |
| BasicJson, |
|
@jozkee -- is this abandoned? |
adamsitnik
left a comment
There was a problem hiding this comment.
Overall looks go to me, but I would prefer to fix few things befor we merge it.
Thank you @jozkee and @steveharter !
| private static string ReadJson400B(JsonElement elem) | ||
| { | ||
| var sb = new StringBuilder(); | ||
| for (int i = 0; i < elem.GetArrayLength(); i++) | ||
| { | ||
| sb.Append(elem[i].GetProperty("_id").GetString()); | ||
| sb.Append(elem[i].GetProperty("index").GetInt32()); | ||
| sb.Append(elem[i].GetProperty("isActive").GetBoolean()); | ||
| sb.Append(elem[i].GetProperty("balance").GetString()); | ||
| sb.Append(elem[i].GetProperty("picture").GetString()); | ||
| sb.Append(elem[i].GetProperty("age").GetInt32()); | ||
| sb.Append(elem[i].GetProperty("email").GetString()); | ||
| sb.Append(elem[i].GetProperty("phone").GetString()); | ||
| sb.Append(elem[i].GetProperty("address").GetString()); | ||
| sb.Append(elem[i].GetProperty("registered").GetString()); | ||
| sb.Append(elem[i].GetProperty("latitude").GetDouble()); | ||
| sb.Append(elem[i].GetProperty("longitude").GetDouble()); | ||
| } | ||
| return sb.ToString(); | ||
| } |
There was a problem hiding this comment.
I am afraid that creating a big string builder and appending a lot of content to it might dominate the reported execution time. Instead of this, we can just create an int variable and add the values/lengths to it.
| private static string ReadJson400B(JsonElement elem) | |
| { | |
| var sb = new StringBuilder(); | |
| for (int i = 0; i < elem.GetArrayLength(); i++) | |
| { | |
| sb.Append(elem[i].GetProperty("_id").GetString()); | |
| sb.Append(elem[i].GetProperty("index").GetInt32()); | |
| sb.Append(elem[i].GetProperty("isActive").GetBoolean()); | |
| sb.Append(elem[i].GetProperty("balance").GetString()); | |
| sb.Append(elem[i].GetProperty("picture").GetString()); | |
| sb.Append(elem[i].GetProperty("age").GetInt32()); | |
| sb.Append(elem[i].GetProperty("email").GetString()); | |
| sb.Append(elem[i].GetProperty("phone").GetString()); | |
| sb.Append(elem[i].GetProperty("address").GetString()); | |
| sb.Append(elem[i].GetProperty("registered").GetString()); | |
| sb.Append(elem[i].GetProperty("latitude").GetDouble()); | |
| sb.Append(elem[i].GetProperty("longitude").GetDouble()); | |
| } | |
| return sb.ToString(); | |
| } | |
| private static int ReadJson400B(JsonElement elem) | |
| { | |
| int result = 0; | |
| for (int i = 0; i < elem.GetArrayLength(); i++) | |
| { | |
| result += elem[i].GetProperty("_id").GetString().Length; | |
| result += elem[i].GetProperty("index").GetInt32()); | |
| result += (int)elem[i].GetProperty("isActive").GetBoolean(); | |
| result += elem[i].GetProperty("balance").GetString().Length; | |
| result += elem[i].GetProperty("picture").GetString().Length; | |
| result += elem[i].GetProperty("age").GetInt32(); | |
| result += elem[i].GetProperty("email").GetString().Length; | |
| result += elem[i].GetProperty("phone").GetString().Length; | |
| result += elem[i].GetProperty("address").GetString().Length; | |
| result += elem[i].GetProperty("registered").GetString().Length; | |
| result += (int)elem[i].GetProperty("latitude").GetDouble(); | |
| result += (int)elem[i].GetProperty("longitude").GetDouble(); | |
| } | |
| return result; | |
| } |
| public bool IsDataCompact; | ||
|
|
||
| [Params(false, true)] | ||
| public bool TestRandomAccess; |
There was a problem hiding this comment.
I am not sure whether this name describes what it does. To me, it should rather be called AccessProperties
There was a problem hiding this comment.
The "random access" terminology comes from trying to access the JSON properties from a DOM in potentially out-of-order fashion.
The reader gives access forward only, linearly. A tree structure allows for random acess.
https://en.wikipedia.org/wiki/Random_access
Random access (more precisely and more generally called direct access) is the ability to access an arbitrary element of a sequence in equal time or any datum from a population of addressable elements roughly as easily and efficiently as any other, no matter how many elements may be in the set. In computer science it is typically contrasted to sequential access which requires data to be retrieved in the order it was stored.
There was a problem hiding this comment.
The reader gives access forward only, linearly. A tree structure allows for random acess.
@ahsonkhan thanks for the explanation. I was not aware of this implementation detail and it was not obvious for me when looking at the benchmarks
| sb.Append(address.GetProperty("city").GetString()); | ||
| sb.Append(address.GetProperty("zip").GetInt32()); | ||
| return sb.ToString(); | ||
| } |
There was a problem hiding this comment.
same as above, we should use a cheaper way of consuming the result
| sb.Append(elem[i].GetProperty("favoriteFruit").GetString()); | ||
| } | ||
| return sb.ToString(); | ||
| } |
| [Benchmark] | ||
| public void ParseAndEnumerateArray() | ||
| { | ||
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | ||
| JsonElement elem = obj.RootElement; | ||
|
|
||
| for (int j = 0; j < IterationCount; j++) | ||
| { | ||
| foreach (JsonElement withinArray in elem.EnumerateArray()) | ||
| { | ||
| } | ||
| } | ||
|
|
||
| obj.Dispose(); | ||
| } | ||
|
|
||
| [Benchmark] | ||
| public void ParseAndIterateUsingIndex() | ||
| { | ||
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | ||
| JsonElement elem = obj.RootElement; | ||
| int arrayLength = elem.GetArrayLength(); | ||
|
|
||
| for (int j = 0; j < IterationCount; j++) | ||
| { | ||
| for (int i = 0; i < arrayLength; i++) | ||
| { | ||
| JsonElement withinArray = elem[i]; | ||
| } | ||
| } | ||
|
|
||
| obj.Dispose(); | ||
| } | ||
|
|
||
| [Benchmark] | ||
| public void ParseAndIterateUsingIndexReverse() | ||
| { | ||
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | ||
| JsonElement elem = obj.RootElement; | ||
| int arrayLength = elem.GetArrayLength(); | ||
|
|
||
| for (int j = 0; j < IterationCount; j++) | ||
| { | ||
| for (int i = arrayLength - 1; i >= 0; i--) | ||
| { | ||
| JsonElement withinArray = obj.RootElement[i]; | ||
| } | ||
| } | ||
|
|
||
| obj.Dispose(); | ||
| } | ||
|
|
||
| [Benchmark] | ||
| public void ParseAndGetFirst() | ||
| { | ||
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | ||
| JsonElement elem = obj.RootElement; | ||
| int arrayLength = elem.GetArrayLength(); | ||
|
|
||
| for (int j = 0; j < IterationCount; j++) | ||
| { | ||
| for (int i = 0; i < arrayLength; i++) | ||
| { | ||
| JsonElement withinArray = obj.RootElement[0]; | ||
| } | ||
| } | ||
|
|
||
| obj.Dispose(); | ||
| } | ||
|
|
||
| [Benchmark] | ||
| public void ParseAndGetMiddle() | ||
| { | ||
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | ||
| JsonElement elem = obj.RootElement; | ||
| int arrayLength = elem.GetArrayLength(); | ||
|
|
||
| for (int j = 0; j < IterationCount; j++) | ||
| { | ||
| for (int i = 0; i < arrayLength; i++) | ||
| { | ||
| JsonElement withinArray = elem[arrayLength / 2]; | ||
| } | ||
| } | ||
|
|
||
| obj.Dispose(); | ||
| } | ||
| } |
There was a problem hiding this comment.
I've tried to wrap my head around these benchmarks and I think that the best approach here would be to have:
- 1 benchmark just for parsing
- 1 benchmark for just
EnumerateArrayover a pre-parsed document (int the setup) - 1 benchmark for enumerating a pre-parsed document (int the setup) using an indexer
This would reduce the number of benchmarks: fewer benchmarks to track for devs and less time to run the benchmarks. And also allow for easier identification of the regression: if a benchmark that parses and iterates over an array of ints regressed, what has regressed: parsing or iterating?
| [Benchmark] | |
| public void ParseAndEnumerateArray() | |
| { | |
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | |
| JsonElement elem = obj.RootElement; | |
| for (int j = 0; j < IterationCount; j++) | |
| { | |
| foreach (JsonElement withinArray in elem.EnumerateArray()) | |
| { | |
| } | |
| } | |
| obj.Dispose(); | |
| } | |
| [Benchmark] | |
| public void ParseAndIterateUsingIndex() | |
| { | |
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | |
| JsonElement elem = obj.RootElement; | |
| int arrayLength = elem.GetArrayLength(); | |
| for (int j = 0; j < IterationCount; j++) | |
| { | |
| for (int i = 0; i < arrayLength; i++) | |
| { | |
| JsonElement withinArray = elem[i]; | |
| } | |
| } | |
| obj.Dispose(); | |
| } | |
| [Benchmark] | |
| public void ParseAndIterateUsingIndexReverse() | |
| { | |
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | |
| JsonElement elem = obj.RootElement; | |
| int arrayLength = elem.GetArrayLength(); | |
| for (int j = 0; j < IterationCount; j++) | |
| { | |
| for (int i = arrayLength - 1; i >= 0; i--) | |
| { | |
| JsonElement withinArray = obj.RootElement[i]; | |
| } | |
| } | |
| obj.Dispose(); | |
| } | |
| [Benchmark] | |
| public void ParseAndGetFirst() | |
| { | |
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | |
| JsonElement elem = obj.RootElement; | |
| int arrayLength = elem.GetArrayLength(); | |
| for (int j = 0; j < IterationCount; j++) | |
| { | |
| for (int i = 0; i < arrayLength; i++) | |
| { | |
| JsonElement withinArray = obj.RootElement[0]; | |
| } | |
| } | |
| obj.Dispose(); | |
| } | |
| [Benchmark] | |
| public void ParseAndGetMiddle() | |
| { | |
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | |
| JsonElement elem = obj.RootElement; | |
| int arrayLength = elem.GetArrayLength(); | |
| for (int j = 0; j < IterationCount; j++) | |
| { | |
| for (int i = 0; i < arrayLength; i++) | |
| { | |
| JsonElement withinArray = elem[arrayLength / 2]; | |
| } | |
| } | |
| obj.Dispose(); | |
| } | |
| } | |
| [Benchmark] | |
| public void Parse() | |
| { | |
| JsonDocument obj = JsonDocument.Parse(_dataUtf8); | |
| JsonElement elem = obj.RootElement; | |
| obj.Dispose(); | |
| } | |
| [Benchmark] | |
| public int EnumerateArray() | |
| { | |
| int count = 0; | |
| foreach (JsonElement withinArray in _elem.EnumerateArray()) | |
| { | |
| count++; | |
| } | |
| return count; | |
| } | |
| [Benchmark] | |
| public void IterateUsingIndexer() | |
| { | |
| int arrayLength = _elem.GetArrayLength(); | |
| for (int j = 0; j < arrayLength; j++) | |
| { | |
| _ = elem[j]; | |
| } | |
| } | |
| } |
There was a problem hiding this comment.
I agree with the separation. However, we should keep the reverse indexer and possibly even the repeatedly accessing length/2 element separate test cases.
There was a problem hiding this comment.
However, we should keep the reverse indexer and possibly even the repeatedly accessing length/2 element separate test cases.
Is it possible that we could introduce a regression to the product that would be detected by only one of these benchmarks? If not, we should not have that many benchmarks
There was a problem hiding this comment.
Is it possible that we could introduce a regression to the product that would be detected by only one of these benchmarks?
From what I recall, yes. The iteration logic of the JsonElement has some complexity due to the way it is optimized which can regress some cases if modified (it is not a simple dictionary look up which is almost always O(1)).
That said, someone should confirm and validate that, because I could be mistaken :)
There was a problem hiding this comment.
done. I removed the reverse test since it is by index, not an enumerator, and I don't see how that would regress.
|
|
||
| obj.Dispose(); | ||
| } | ||
| } |
There was a problem hiding this comment.
similar to the previous type with benchmarks, I would refactor it into two benchmarks:
- 1 for parsing only
- 1 for
EnumerateObjectover a pre-parsed element
| // Remove all formatting/indentation | ||
| using (var jsonReader = new JsonTextReader(new StringReader(jsonString))) | ||
| using (var stringWriter = new StringWriter()) | ||
| using (var jsonWriter = new JsonTextWriter(stringWriter)) | ||
| { | ||
| JToken obj = JToken.ReadFrom(jsonReader); | ||
| obj.WriteTo(jsonWriter); | ||
| jsonString = stringWriter.ToString(); | ||
| } |
There was a problem hiding this comment.
this logic exists in all 4 classes, we should consider refactoring it into a helper type to avoid code duplication and possible bugs
| public TestCaseType TestCase; | ||
|
|
||
| [Params(true, false)] | ||
| public bool IsDataCompact; |
There was a problem hiding this comment.
would Indented be a better and more consistent name?
There was a problem hiding this comment.
Maybe. Indented is the opposite of IsDataCompact, so we would have to flip the boolean if we choose to change it. I think IsDataCompact is reasonably self-describing.
How is Indented more consistent?
There was a problem hiding this comment.
How is Indented more consistent?
In the setup we have the following usage of this property: new JsonWriterOptions { Indented = !IsDataCompact }
So we have two names that describe the same thing
There was a problem hiding this comment.
I see what you mean.
That setup is for a different test (Perf.ParseThenWrite.cs) and using Indented is a little strange for a parsing-only test like Perf.DocumentParse.cs .
The way I thought about it was:
- IsDataCompact is about the characteristic of the data provided by the benchmark setup which being read/parsed to build a DOM.
- The Indented option is to control how to write the JSON back in the benchmark itself.
But you do bring up a good point about terminology. Maybe, rename it to IsDataIndented to avoid having multiple terms for the same concept?
| public bool IsDataCompact; | |
| public bool IsDataIndented; |
| public void ParseThenWrite() | ||
| { | ||
|
|
||
| var arrayBufferWriter = new ArrayBufferWriter<byte>(); |
There was a problem hiding this comment.
should we allocate a new instance of the ArrayBufferWriter for every benchmark invocation or move it to a setup method and reset after every benchmark invocation? what do our users typically do when they use this API?
https://github.com/dotnet/performance/blob/master/docs/microbenchmark-design-guidelines.md#setup
There was a problem hiding this comment.
Good point. I think if someone wanted to optimize their code, they would re-use the instance. So I'll change it to clear.
The issue previously was that it was not cleared and continued to grow, plus I found a case where that could cause a hang which I am still investigating but unable to repro yet outside of the benchmarks.
| _arrayBufferWriter.Clear(); | ||
|
|
||
| using (JsonDocument document = JsonDocument.Parse(_dataUtf8)) | ||
| using (Utf8JsonWriter writer = new Utf8JsonWriter(_arrayBufferWriter, new JsonWriterOptions { Indented = !IsDataCompact })) | ||
| { | ||
| document.WriteTo(writer); | ||
| } |
There was a problem hiding this comment.
You could go one step further and cache the writer as a field and re-use that, calling writer.Reset() on it, instead of explicitly clearing the ABW.
Reset clears the writer state including the underling ABW. And then dispose the writer on the global cleanup of the benchmark.
| [Benchmark] | ||
| public void ParseThenWrite() | ||
| { | ||
| _arrayBufferWriter.Clear(); |
There was a problem hiding this comment.
Make sure to dispose this ABW on global cleanup of the benchmark set to avoid exhausting the arraypool between runs, that might impact perf of other benchmarks that might be running in parallel.
There was a problem hiding this comment.
ABW does not use the pool
| <DebugType>portable</DebugType> | ||
| <DebugSymbols>true</DebugSymbols> | ||
| <AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||
| <EnableXlfLocalization>false</EnableXlfLocalization> |
There was a problem hiding this comment.
@adamsitnik can this be set? It appears to solve my local issues with having to manually generate the XLF files and then ignore during commit.
Do we have localized resources?
There was a problem hiding this comment.
@steveharter as long as it makes your dev experience better and the CI is green, it's perfectly fine
Do we have localized resources?
afaik not. All resources were added to avoid having big strings defined as const string?
| return Encoding.UTF8.GetBytes(jsonString); | ||
| } | ||
| } | ||
| } No newline at end of file |
| result += elem.GetProperty("first").GetString().Length; | ||
| result += elem.GetProperty("last").GetString().Length; | ||
|
|
||
| JsonElement phoneNumbers = elem.GetProperty("phoneNumbers"); |
There was a problem hiding this comment.
| JsonElement phoneNumbers = elem.GetProperty("phoneNumbers"); | |
| JsonElement phoneNumbers = elem.GetProperty("phoneNumbers"); |
| string jsonString = JsonStrings.ResourceManager.GetString(TestCase.ToString()); | ||
| _dataUtf8 = DocumentHelpers.RemoveFormatting(jsonString); | ||
|
|
||
| JsonDocument document = JsonDocument.Parse(_dataUtf8); |
There was a problem hiding this comment.
Store this as a field and dispose it at global cleanup to avoid exhausting the underlying pool. JsonDocument is IDisposable.
| int arrayLength = _element.GetArrayLength(); | ||
| for (int j = 0; j < arrayLength; j++) | ||
| { | ||
| _ = _element[j]; |
There was a problem hiding this comment.
Will BDN optimize this indexer access out? Do we need to aggregate the result somehow and return it?
There was a problem hiding this comment.
I don't think so, since calling element[j] could have side-effects.
However, we did all of the work prior to this to change from string\StringBuffer to int\Count() apparently out of the same concern.
@adamsitnik do benchmarks have to worry about code being optimized out? If not, we should also remove the int\Count() work that was done earlier (originally StringBuffer).
| string jsonString = JsonStrings.ResourceManager.GetString(TestCase.ToString()); | ||
| _dataUtf8 = DocumentHelpers.RemoveFormatting(jsonString); | ||
|
|
||
| JsonDocument document = JsonDocument.Parse(_dataUtf8); |
There was a problem hiding this comment.
Same here/elsewhere. Remember to dispose.
There was a problem hiding this comment.
Yep. Thanks. Probably wouldn't matter for the benchmark numbers (perf not affected; memory not exhausted), but we should definitively do it.
| [Benchmark] | ||
| public void EnumerateProperties() | ||
| { | ||
| foreach (JsonProperty withinArray in _element.EnumerateObject()) { } |
There was a problem hiding this comment.
Similarly, will this actually go through the loop while measuring?
There was a problem hiding this comment.
Yes. I verified this and a couple other areas that the jitter won't remove. So I also removed the int-based "Count" logic.
| if (TestCase == TestCaseType.HelloWorld) | ||
| { | ||
| ReadHelloWorld(document.RootElement); | ||
| } | ||
| else if (TestCase == TestCaseType.Json400B) | ||
| { | ||
| ReadJson400B(document.RootElement); | ||
| } | ||
| else if (TestCase == TestCaseType.BasicJson) | ||
| { | ||
| ReadJsonBasic(document.RootElement); | ||
| } | ||
| else if (TestCase == TestCaseType.Json400KB) | ||
| { | ||
| ReadJson400KB(document.RootElement); | ||
| } |
adamsitnik
left a comment
There was a problem hiding this comment.
LGTM, thank you @steveharter !
| <DebugType>portable</DebugType> | ||
| <DebugSymbols>true</DebugSymbols> | ||
| <AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||
| <EnableXlfLocalization>false</EnableXlfLocalization> |
There was a problem hiding this comment.
@steveharter as long as it makes your dev experience better and the CI is green, it's perfectly fine
Do we have localized resources?
afaik not. All resources were added to avoid having big strings defined as const string?
src/benchmarks/micro/libraries/System.Text.Json/Document/EnumerateObject.cs
Outdated
Show resolved
Hide resolved
src/benchmarks/micro/libraries/System.Text.Json/Document/EnumerateArray.cs
Outdated
Show resolved
Hide resolved
|
the CI failures are not related (this PR added microbenchmarks, did not touch scenarios) so I am merging |
Fixes #792