Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/core/crypto/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME crypto
PRIVATE_HEADERS sha256.h sha1.h fnv128.h uuid.h crc32.h
PRIVATE_HEADERS sha256.h sha1.h fnv128.h uuid.h crc32.h base64.h
SOURCES crypto_sha256.cc crypto_sha1.cc crypto_fnv128.cc
crypto_uuid.cc crypto_crc32.cc)
crypto_uuid.cc crypto_crc32.cc crypto_base64.cc)

if(SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL)
target_compile_definitions(sourcemeta_core_crypto
Expand Down
194 changes: 194 additions & 0 deletions src/core/crypto/crypto_base64.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
#include <sourcemeta/core/crypto_base64.h>

#include <array> // std::array
#include <cstddef> // std::size_t
#include <cstdint> // std::uint8_t, std::uint32_t
#include <optional> // std::optional, std::nullopt
#include <ostream> // std::ostream
#include <string> // std::string
#include <string_view> // std::string_view

namespace {

// RFC 4648 Section 4, Table 1: The Base 64 Alphabet
constexpr std::string_view BASE64_ALPHABET{
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"};

// RFC 4648 Section 5, Table 2: The "URL and Filename safe" Base 64 Alphabet
constexpr std::string_view BASE64URL_ALPHABET{
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"};

constexpr std::uint8_t INVALID_SEXTET{0xFF};

constexpr auto build_decode_table(const std::string_view alphabet) noexcept
-> std::array<std::uint8_t, 256> {
std::array<std::uint8_t, 256> table{};
table.fill(INVALID_SEXTET);
for (std::size_t index = 0; index < alphabet.size(); ++index) {
table[static_cast<std::uint8_t>(alphabet[index])] =
static_cast<std::uint8_t>(index);
}
return table;
}

constexpr std::array<std::uint8_t, 256> BASE64_DECODE_TABLE{
build_decode_table(BASE64_ALPHABET)};
constexpr std::array<std::uint8_t, 256> BASE64URL_DECODE_TABLE{
build_decode_table(BASE64URL_ALPHABET)};

auto encode(const std::string_view input, const std::string_view alphabet,
const bool padding, std::string &output) -> void {
std::size_t index{0};
while (index + 3 <= input.size()) {
const std::uint32_t first{static_cast<std::uint8_t>(input[index])};
const std::uint32_t second{static_cast<std::uint8_t>(input[index + 1])};
const std::uint32_t third{static_cast<std::uint8_t>(input[index + 2])};
output.push_back(alphabet[first >> 2u]);
output.push_back(alphabet[((first & 0x03u) << 4u) | (second >> 4u)]);
output.push_back(alphabet[((second & 0x0Fu) << 2u) | (third >> 6u)]);
output.push_back(alphabet[third & 0x3Fu]);
index += 3;
}

const auto remaining{input.size() - index};
if (remaining == 1) {
const std::uint32_t first{static_cast<std::uint8_t>(input[index])};
output.push_back(alphabet[first >> 2u]);
output.push_back(alphabet[(first & 0x03u) << 4u]);
if (padding) {
output.push_back('=');
output.push_back('=');
}
} else if (remaining == 2) {
const std::uint32_t first{static_cast<std::uint8_t>(input[index])};
const std::uint32_t second{static_cast<std::uint8_t>(input[index + 1])};
output.push_back(alphabet[first >> 2u]);
output.push_back(alphabet[((first & 0x03u) << 4u) | (second >> 4u)]);
output.push_back(alphabet[(second & 0x0Fu) << 2u]);
if (padding) {
output.push_back('=');
}
}
}

auto decode(const std::string_view input,
const std::array<std::uint8_t, 256> &table, const bool padding)
-> std::optional<std::string> {
auto data{input};

if (padding) {
// RFC 4648 Section 4: "Special processing is performed if fewer than 24
// bits are available at the end of the data being encoded. A full encoding
// quantum is always completed at the end of a quantity", hence the padded
// form must be a multiple of four characters
if (data.size() % 4 != 0) {
return std::nullopt;
}

if (data.ends_with('=')) {
data.remove_suffix(1);
if (data.ends_with('=')) {
data.remove_suffix(1);
}
}
}

if (data.size() % 4 == 1) {
return std::nullopt;
}

std::string output;
output.reserve(((data.size() / 4) * 3) + 2);

std::size_t index{0};
while (index + 4 <= data.size()) {
const std::uint32_t first{table[static_cast<std::uint8_t>(data[index])]};
const std::uint32_t second{
table[static_cast<std::uint8_t>(data[index + 1])]};
const std::uint32_t third{
table[static_cast<std::uint8_t>(data[index + 2])]};
const std::uint32_t fourth{
table[static_cast<std::uint8_t>(data[index + 3])]};
if (first == INVALID_SEXTET || second == INVALID_SEXTET ||
third == INVALID_SEXTET || fourth == INVALID_SEXTET) {
return std::nullopt;
}

const std::uint32_t group{(first << 18u) | (second << 12u) | (third << 6u) |
fourth};
output.push_back(static_cast<char>((group >> 16u) & 0xFFu));
output.push_back(static_cast<char>((group >> 8u) & 0xFFu));
output.push_back(static_cast<char>(group & 0xFFu));
index += 4;
}

// RFC 4648 Section 3.5: "Implementations MAY chose to reject the encoding
// if the pad bits have not been set to zero". We reject so that every value
// has exactly one accepted encoding
const auto remaining{data.size() - index};
if (remaining == 2) {
const std::uint32_t first{table[static_cast<std::uint8_t>(data[index])]};
const std::uint32_t second{
table[static_cast<std::uint8_t>(data[index + 1])]};
if (first == INVALID_SEXTET || second == INVALID_SEXTET ||
(second & 0x0Fu) != 0) {
return std::nullopt;
}

output.push_back(static_cast<char>((first << 2u) | (second >> 4u)));
} else if (remaining == 3) {
const std::uint32_t first{table[static_cast<std::uint8_t>(data[index])]};
const std::uint32_t second{
table[static_cast<std::uint8_t>(data[index + 1])]};
const std::uint32_t third{
table[static_cast<std::uint8_t>(data[index + 2])]};
if (first == INVALID_SEXTET || second == INVALID_SEXTET ||
third == INVALID_SEXTET || (third & 0x03u) != 0) {
return std::nullopt;
}

output.push_back(static_cast<char>((first << 2u) | (second >> 4u)));
output.push_back(
static_cast<char>(((second & 0x0Fu) << 4u) | (third >> 2u)));
}

return output;
}

} // namespace

namespace sourcemeta::core {

auto base64_encode(const std::string_view input, std::ostream &output) -> void {
output << base64_encode(input);
}

auto base64_encode(const std::string_view input) -> std::string {
std::string result;
result.reserve(((input.size() + 2) / 3) * 4);
encode(input, BASE64_ALPHABET, true, result);
return result;
}

auto base64_decode(const std::string_view input) -> std::optional<std::string> {
return decode(input, BASE64_DECODE_TABLE, true);
}

auto base64url_encode(const std::string_view input, std::ostream &output)
-> void {
output << base64url_encode(input);
}

auto base64url_encode(const std::string_view input) -> std::string {
std::string result;
result.reserve(((input.size() + 2) / 3) * 4);
encode(input, BASE64URL_ALPHABET, false, result);
return result;
}

auto base64url_decode(const std::string_view input)
-> std::optional<std::string> {
return decode(input, BASE64URL_DECODE_TABLE, false);
}

} // namespace sourcemeta::core
3 changes: 2 additions & 1 deletion src/core/crypto/include/sourcemeta/core/crypto.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
#define SOURCEMETA_CORE_CRYPTO_H_

/// @defgroup crypto Crypto
/// @brief Cryptographic hash functions and UUID generation.
/// @brief Cryptographic hash functions, UUID generation, and Base64 codecs.
///
/// This functionality is included as follows:
///
/// ```cpp
/// #include <sourcemeta/core/crypto.h>
/// ```

#include <sourcemeta/core/crypto_base64.h>
#include <sourcemeta/core/crypto_crc32.h>
#include <sourcemeta/core/crypto_fnv128.h>
#include <sourcemeta/core/crypto_sha1.h>
Expand Down
104 changes: 104 additions & 0 deletions src/core/crypto/include/sourcemeta/core/crypto_base64.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#ifndef SOURCEMETA_CORE_CRYPTO_BASE64_H_
#define SOURCEMETA_CORE_CRYPTO_BASE64_H_

#ifndef SOURCEMETA_CORE_CRYPTO_EXPORT
#include <sourcemeta/core/crypto_export.h>
#endif

#include <optional> // std::optional
#include <ostream> // std::ostream
#include <string> // std::string
#include <string_view> // std::string_view

namespace sourcemeta::core {

/// @ingroup crypto
/// Encode a byte sequence using Base64 (RFC 4648 Section 4) into a stream.
/// For example:
///
/// ```cpp
/// #include <sourcemeta/core/crypto.h>
/// #include <sstream>
/// #include <cassert>
///
/// std::ostringstream result;
/// sourcemeta::core::base64_encode("foobar", result);
/// assert(result.str() == "Zm9vYmFy");
/// ```
auto SOURCEMETA_CORE_CRYPTO_EXPORT base64_encode(const std::string_view input,
std::ostream &output) -> void;

/// @ingroup crypto
/// Encode a byte sequence using Base64 (RFC 4648 Section 4). For example:
///
/// ```cpp
/// #include <sourcemeta/core/crypto.h>
/// #include <cassert>
///
/// assert(sourcemeta::core::base64_encode("foobar") == "Zm9vYmFy");
/// ```
auto SOURCEMETA_CORE_CRYPTO_EXPORT base64_encode(const std::string_view input)
-> std::string;

/// @ingroup crypto
/// Decode a Base64 string (RFC 4648 Section 4), returning no value unless the
/// input is a canonical padded encoding. For example:
///
/// ```cpp
/// #include <sourcemeta/core/crypto.h>
/// #include <cassert>
///
/// const auto result{sourcemeta::core::base64_decode("Zm9vYmFy")};
/// assert(result.has_value());
/// assert(result.value() == "foobar");
/// ```
auto SOURCEMETA_CORE_CRYPTO_EXPORT base64_decode(const std::string_view input)
-> std::optional<std::string>;

/// @ingroup crypto
/// Encode a byte sequence using unpadded Base64url (RFC 4648 Section 5) into a
/// stream. For example:
///
/// ```cpp
/// #include <sourcemeta/core/crypto.h>
/// #include <sstream>
/// #include <cassert>
///
/// std::ostringstream result;
/// sourcemeta::core::base64url_encode("fo", result);
/// assert(result.str() == "Zm8");
/// ```
auto SOURCEMETA_CORE_CRYPTO_EXPORT
base64url_encode(const std::string_view input, std::ostream &output) -> void;

/// @ingroup crypto
/// Encode a byte sequence using unpadded Base64url (RFC 4648 Section 5). For
/// example:
///
/// ```cpp
/// #include <sourcemeta/core/crypto.h>
/// #include <cassert>
///
/// assert(sourcemeta::core::base64url_encode("fo") == "Zm8");
/// ```
auto SOURCEMETA_CORE_CRYPTO_EXPORT
base64url_encode(const std::string_view input) -> std::string;

/// @ingroup crypto
/// Decode an unpadded Base64url string (RFC 4648 Section 5), returning no
/// value unless the input is a canonical encoding. For example:
///
/// ```cpp
/// #include <sourcemeta/core/crypto.h>
/// #include <cassert>
///
/// const auto result{sourcemeta::core::base64url_decode("Zm8")};
/// assert(result.has_value());
/// assert(result.value() == "fo");
/// ```
auto SOURCEMETA_CORE_CRYPTO_EXPORT
base64url_decode(const std::string_view input) -> std::optional<std::string>;

} // namespace sourcemeta::core

#endif
4 changes: 3 additions & 1 deletion test/crypto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME crypto
crypto_sha1_test.cc
crypto_fnv128_test.cc
crypto_uuid_test.cc
crypto_crc32_test.cc)
crypto_crc32_test.cc
crypto_base64_test.cc
crypto_base64url_test.cc)

target_link_libraries(sourcemeta_core_crypto_unit
PRIVATE sourcemeta::core::crypto)
Expand Down
Loading
Loading