Skip to content

Commit df02d00

Browse files
ChALkeRgurgunday
authored andcommitted
src: improve StringBytes::Encode perf on UTF8
Co-authored-by: Gürgün Dayıoğlu <[email protected]> PR-URL: #61131 Reviewed-By: Gürgün Dayıoğlu <[email protected]> Reviewed-By: Stephen Belanger <[email protected]> Reviewed-By: Matteo Collina <[email protected]> Reviewed-By: Daniel Lemire <[email protected]> Reviewed-By: Anna Henningsen <[email protected]> Reviewed-By: Rafael Gonzaga <[email protected]>
1 parent 2939950 commit df02d00

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

src/encoding_binding.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,8 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
379379
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
380380
env->isolate(), "The encoded data was not valid for encoding utf-8");
381381
}
382+
383+
// TODO(chalker): save on utf8 validity recheck in StringBytes::Encode()
382384
}
383385

384386
if (length == 0) return args.GetReturnValue().SetEmptyString();

src/string_bytes.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,25 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
539539
return ExternOneByteString::NewFromCopy(isolate, buf, buflen);
540540
}
541541

542+
if (buflen >= 32 && simdutf::validate_utf8(buf, buflen)) {
543+
// We know that we are non-ASCII (and are unlikely Latin1), use 2-byte
544+
// In the most likely case of valid UTF-8, we can use this fast impl
545+
// For very short input, it is slower, so we limit min size
546+
size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen);
547+
if (u16size > static_cast<size_t>(v8::String::kMaxLength)) {
548+
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
549+
return MaybeLocal<Value>();
550+
}
551+
uint16_t* dst = node::UncheckedMalloc<uint16_t>(u16size);
552+
if (u16size != 0 && dst == nullptr) {
553+
THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate);
554+
return MaybeLocal<Value>();
555+
}
556+
size_t utf16len = simdutf::convert_valid_utf8_to_utf16(
557+
buf, buflen, reinterpret_cast<char16_t*>(dst));
558+
return ExternTwoByteString::New(isolate, dst, utf16len);
559+
}
560+
542561
val =
543562
String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen);
544563
Local<String> str;

0 commit comments

Comments
 (0)