diff --git a/src/lang/text/include/sourcemeta/core/text.h b/src/lang/text/include/sourcemeta/core/text.h index 9c5766bcf..5bc34b678 100644 --- a/src/lang/text/include/sourcemeta/core/text.h +++ b/src/lang/text/include/sourcemeta/core/text.h @@ -317,6 +317,23 @@ auto join_to(std::ostream &stream, const Range &items, } } +/// @ingroup text +/// +/// Decode a hexadecimal string into its raw bytes, returning no value when +/// the input has an odd length or contains a character outside the +/// hexadecimal alphabet. Both letter cases are accepted. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// const auto bytes{sourcemeta::core::hex_to_bytes("666f6f")}; +/// assert(bytes.has_value()); +/// assert(bytes.value() == "foo"); +/// ``` +SOURCEMETA_CORE_TEXT_EXPORT +auto hex_to_bytes(const std::string_view input) -> std::optional; + /// @ingroup text /// /// Return `input` with `suffix` removed from the end under ASCII diff --git a/src/lang/text/text.cc b/src/lang/text/text.cc index e7e95addf..86ce21d2c 100644 --- a/src/lang/text/text.cc +++ b/src/lang/text/text.cc @@ -1,6 +1,7 @@ #include #include // std::size_t +#include // std::int8_t #include // std::filesystem::path #include // std::optional, std::nullopt #include // std::string @@ -25,6 +26,18 @@ auto to_ascii_uppercase(const char character) noexcept -> char { : character; } +auto hex_digit_value(const char character) noexcept -> std::int8_t { + if (character >= '0' && character <= '9') { + return static_cast(character - '0'); + } else if (character >= 'a' && character <= 'f') { + return static_cast(character - 'a' + 10); + } else if (character >= 'A' && character <= 'F') { + return static_cast(character - 'A' + 10); + } else { + return -1; + } +} + } // namespace namespace sourcemeta::core { @@ -153,4 +166,24 @@ auto remove_suffix_ignore_case(const std::string_view input, return result; } +auto hex_to_bytes(const std::string_view input) -> std::optional { + if (input.size() % 2 != 0) { + return std::nullopt; + } + + std::string result; + result.reserve(input.size() / 2); + for (std::size_t index{0}; index < input.size(); index += 2) { + const auto high{hex_digit_value(input[index])}; + const auto low{hex_digit_value(input[index + 1])}; + if (high < 0 || low < 0) { + return std::nullopt; + } + + result.push_back(static_cast((high << 4) | low)); + } + + return result; +} + } // namespace sourcemeta::core diff --git a/test/text/CMakeLists.txt b/test/text/CMakeLists.txt index 73f6fa84c..a711a3937 100644 --- a/test/text/CMakeLists.txt +++ b/test/text/CMakeLists.txt @@ -3,7 +3,8 @@ sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME text text_is_lowercase_test.cc text_truncate_test.cc text_remove_suffix_ignore_case_test.cc text_trim_test.cc text_take_until_test.cc text_split_once_test.cc - text_split_test.cc text_join_to_test.cc) + text_split_test.cc text_join_to_test.cc + text_hex_to_bytes_test.cc) target_link_libraries(sourcemeta_core_text_unit PRIVATE sourcemeta::core::text) diff --git a/test/text/text_hex_to_bytes_test.cc b/test/text/text_hex_to_bytes_test.cc new file mode 100644 index 000000000..5f8f256c2 --- /dev/null +++ b/test/text/text_hex_to_bytes_test.cc @@ -0,0 +1,75 @@ +#include + +#include + +#include // std::string + +TEST(Text_hex_to_bytes, empty_input) { + const auto result{sourcemeta::core::hex_to_bytes("")}; + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), ""); +} + +TEST(Text_hex_to_bytes, single_byte_lowercase) { + const auto result{sourcemeta::core::hex_to_bytes("ff")}; + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), "\xFF"); +} + +TEST(Text_hex_to_bytes, single_byte_uppercase) { + const auto result{sourcemeta::core::hex_to_bytes("FF")}; + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), "\xFF"); +} + +TEST(Text_hex_to_bytes, mixed_case) { + const auto result{sourcemeta::core::hex_to_bytes("DeadBeef")}; + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), "\xDE\xAD\xBE\xEF"); +} + +TEST(Text_hex_to_bytes, ascii_word) { + const auto result{sourcemeta::core::hex_to_bytes("666f6f626172")}; + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), "foobar"); +} + +TEST(Text_hex_to_bytes, nul_byte) { + const auto result{sourcemeta::core::hex_to_bytes("00")}; + EXPECT_TRUE(result.has_value()); + const std::string expected("\x00", 1); + EXPECT_EQ(result.value(), expected); +} + +TEST(Text_hex_to_bytes, leading_zeros) { + const auto result{sourcemeta::core::hex_to_bytes("0001")}; + EXPECT_TRUE(result.has_value()); + const std::string expected("\x00\x01", 2); + EXPECT_EQ(result.value(), expected); +} + +TEST(Text_hex_to_bytes, all_digit_pairs) { + const auto result{sourcemeta::core::hex_to_bytes("0123456789abcdef")}; + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), "\x01\x23\x45\x67\x89\xAB\xCD\xEF"); +} + +TEST(Text_hex_to_bytes, rejects_odd_length) { + EXPECT_FALSE(sourcemeta::core::hex_to_bytes("abc").has_value()); +} + +TEST(Text_hex_to_bytes, rejects_single_character) { + EXPECT_FALSE(sourcemeta::core::hex_to_bytes("a").has_value()); +} + +TEST(Text_hex_to_bytes, rejects_non_hexadecimal_letter) { + EXPECT_FALSE(sourcemeta::core::hex_to_bytes("zz").has_value()); +} + +TEST(Text_hex_to_bytes, rejects_interior_space) { + EXPECT_FALSE(sourcemeta::core::hex_to_bytes("ab c").has_value()); +} + +TEST(Text_hex_to_bytes, rejects_prefix_notation) { + EXPECT_FALSE(sourcemeta::core::hex_to_bytes("0xff").has_value()); +}