Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/lang/text/include/sourcemeta/core/text.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,23 @@ auto join_to(std::ostream &stream, const Range &items,
}
}

/// @ingroup text
///
/// Decode a hexadecimal string into its raw bytes, returning no value when
/// the input has an odd length or contains a character outside the
/// hexadecimal alphabet. Both letter cases are accepted. For example:
///
/// ```cpp
/// #include <sourcemeta/core/text.h>
/// #include <cassert>
///
/// const auto bytes{sourcemeta::core::hex_to_bytes("666f6f")};
/// assert(bytes.has_value());
/// assert(bytes.value() == "foo");
/// ```
SOURCEMETA_CORE_TEXT_EXPORT
auto hex_to_bytes(const std::string_view input) -> std::optional<std::string>;

/// @ingroup text
///
/// Return `input` with `suffix` removed from the end under ASCII
Expand Down
33 changes: 33 additions & 0 deletions src/lang/text/text.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <sourcemeta/core/text.h>

#include <cstddef> // std::size_t
#include <cstdint> // std::int8_t
#include <filesystem> // std::filesystem::path
#include <optional> // std::optional, std::nullopt
#include <string> // std::string
Expand All @@ -25,6 +26,18 @@ auto to_ascii_uppercase(const char character) noexcept -> char {
: character;
}

auto hex_digit_value(const char character) noexcept -> std::int8_t {
if (character >= '0' && character <= '9') {
return static_cast<std::int8_t>(character - '0');
} else if (character >= 'a' && character <= 'f') {
return static_cast<std::int8_t>(character - 'a' + 10);
} else if (character >= 'A' && character <= 'F') {
return static_cast<std::int8_t>(character - 'A' + 10);
} else {
return -1;
}
}

} // namespace

namespace sourcemeta::core {
Expand Down Expand Up @@ -153,4 +166,24 @@ auto remove_suffix_ignore_case(const std::string_view input,
return result;
}

auto hex_to_bytes(const std::string_view input) -> std::optional<std::string> {
if (input.size() % 2 != 0) {
return std::nullopt;
}

std::string result;
result.reserve(input.size() / 2);
for (std::size_t index{0}; index < input.size(); index += 2) {
const auto high{hex_digit_value(input[index])};
const auto low{hex_digit_value(input[index + 1])};
if (high < 0 || low < 0) {
return std::nullopt;
}

result.push_back(static_cast<char>((high << 4) | low));
}

return result;
}

} // namespace sourcemeta::core
3 changes: 2 additions & 1 deletion test/text/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME text
text_is_lowercase_test.cc
text_truncate_test.cc text_remove_suffix_ignore_case_test.cc
text_trim_test.cc text_take_until_test.cc text_split_once_test.cc
text_split_test.cc text_join_to_test.cc)
text_split_test.cc text_join_to_test.cc
text_hex_to_bytes_test.cc)

target_link_libraries(sourcemeta_core_text_unit
PRIVATE sourcemeta::core::text)
75 changes: 75 additions & 0 deletions test/text/text_hex_to_bytes_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#include <gtest/gtest.h>

#include <sourcemeta/core/text.h>

#include <string> // std::string

TEST(Text_hex_to_bytes, empty_input) {
const auto result{sourcemeta::core::hex_to_bytes("")};
EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), "");
}

TEST(Text_hex_to_bytes, single_byte_lowercase) {
const auto result{sourcemeta::core::hex_to_bytes("ff")};
EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), "\xFF");
}

TEST(Text_hex_to_bytes, single_byte_uppercase) {
const auto result{sourcemeta::core::hex_to_bytes("FF")};
EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), "\xFF");
}

TEST(Text_hex_to_bytes, mixed_case) {
const auto result{sourcemeta::core::hex_to_bytes("DeadBeef")};
EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), "\xDE\xAD\xBE\xEF");
}

TEST(Text_hex_to_bytes, ascii_word) {
const auto result{sourcemeta::core::hex_to_bytes("666f6f626172")};
EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), "foobar");
}

TEST(Text_hex_to_bytes, nul_byte) {
const auto result{sourcemeta::core::hex_to_bytes("00")};
EXPECT_TRUE(result.has_value());
const std::string expected("\x00", 1);
EXPECT_EQ(result.value(), expected);
}

TEST(Text_hex_to_bytes, leading_zeros) {
const auto result{sourcemeta::core::hex_to_bytes("0001")};
EXPECT_TRUE(result.has_value());
const std::string expected("\x00\x01", 2);
EXPECT_EQ(result.value(), expected);
}

TEST(Text_hex_to_bytes, all_digit_pairs) {
const auto result{sourcemeta::core::hex_to_bytes("0123456789abcdef")};
EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), "\x01\x23\x45\x67\x89\xAB\xCD\xEF");
}

TEST(Text_hex_to_bytes, rejects_odd_length) {
EXPECT_FALSE(sourcemeta::core::hex_to_bytes("abc").has_value());
}

TEST(Text_hex_to_bytes, rejects_single_character) {
EXPECT_FALSE(sourcemeta::core::hex_to_bytes("a").has_value());
}

TEST(Text_hex_to_bytes, rejects_non_hexadecimal_letter) {
EXPECT_FALSE(sourcemeta::core::hex_to_bytes("zz").has_value());
}

TEST(Text_hex_to_bytes, rejects_interior_space) {
EXPECT_FALSE(sourcemeta::core::hex_to_bytes("ab c").has_value());
}

TEST(Text_hex_to_bytes, rejects_prefix_notation) {
EXPECT_FALSE(sourcemeta::core::hex_to_bytes("0xff").has_value());
}
Loading