Skip to content

Commit f0db4e4

Browse files
committed
polyval: Add runtime PCLMULQDQ detection
When the `std` feature is enabled (which it is now by default), this adds runtime detection for PCLMULQDQ support on x86/x86_64 architectures. The detection happens once at the time `Polyval` is instantated. The `polyval::field::Element` type has been changed into an enum which remembers the detection result, and its API changed to operate on bytestring representations of POLYVAL field elements. This appears to have a negligable performance impact.
1 parent fd534c8 commit f0db4e4

9 files changed

Lines changed: 140 additions & 97 deletions

File tree

.travis.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ matrix:
3434
script: cd polyval && cargo test --release --tests
3535

3636
# no_std build
37-
- name: "Rust: stable (thumbv7em-none-eabihf)"
37+
- name: "Rust: stable (thumbv7em-none-eabi)"
3838
rust: stable
39-
install: rustup target add thumbv7em-none-eabihf
40-
script: cargo build --all --target thumbv7em-none-eabihf --release
39+
install: rustup target add thumbv7em-none-eabi
40+
script: ./build_nostd.sh
4141
- name "Rust: nightly (benches)"
4242
rust: nightly
4343
script: cargo build --all-features --benches

build_nostd.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
set -eux
4+
5+
# Due to the fact that cargo does not disable default features when we use
6+
# cargo build --all --no-default-features we have to explicitly iterate over
7+
# all crates (see https://github.com/rust-lang/cargo/issues/4753 )
8+
DIRS=`ls -d */`
9+
TARGET="thumbv7em-none-eabi"
10+
cargo clean
11+
12+
for dir in $DIRS; do
13+
if [ $dir = "target/" ]
14+
then
15+
continue
16+
fi
17+
18+
pushd $dir
19+
cargo build --no-default-features --verbose --target $TARGET || {
20+
echo $dir failed
21+
exit 1
22+
}
23+
popd
24+
done

ghash/Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@ keywords = ["aes-gcm", "crypto", "universal-hashing"]
1414
categories = ["cryptography", "no-std"]
1515
edition = "2018"
1616

17-
[dependencies]
18-
polyval = { version = "0.0.1", path = "../polyval" }
17+
[dependencies.polyval]
18+
version = "0.0.1"
19+
default-features = false
20+
path = "../polyval"
1921

2022
[dev-dependencies]
2123
hex-literal = "0.1"
2224

2325
[features]
26+
default = ["std"]
2427
std = ["polyval/std"]
2528

2629
[badges]

polyval/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ zeroize = { version = "0.10", optional = true, default-features = false }
2222
hex-literal = "0.1"
2323

2424
[features]
25+
default = ["std"]
2526
std = ["universal-hash/std"]
2627

2728
[badges]

polyval/src/field.rs

Lines changed: 93 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,23 @@
1414
//!
1515
//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
1616
17-
pub mod backend;
17+
#[cfg(all(
18+
target_feature = "pclmulqdq",
19+
target_feature = "sse2",
20+
target_feature = "sse4.1",
21+
any(target_arch = "x86", target_arch = "x86_64")
22+
))]
23+
mod pclmulqdq;
24+
mod soft;
1825

19-
use self::backend::Backend;
20-
use core::ops::{Add, Mul};
26+
#[cfg(all(
27+
target_feature = "pclmulqdq",
28+
target_feature = "sse2",
29+
target_feature = "sse4.1",
30+
any(target_arch = "x86", target_arch = "x86_64")
31+
))]
32+
use self::pclmulqdq::M128i;
33+
use self::soft::U64x2;
2134

2235
/// Size of GF(2^128) in bytes (16-bytes).
2336
pub const FIELD_SIZE: usize = 16;
@@ -27,29 +40,64 @@ pub type Block = [u8; FIELD_SIZE];
2740

2841
/// POLYVAL field element.
2942
#[derive(Copy, Clone)]
30-
pub struct Element<B: Backend>(B);
43+
pub enum Element {
44+
#[cfg(all(
45+
target_feature = "pclmulqdq",
46+
target_feature = "sse2",
47+
target_feature = "sse4.1",
48+
any(target_arch = "x86", target_arch = "x86_64")
49+
))]
50+
/// (P)CLMUL(QDQ)-accelerated backend on supported x86 architectures
51+
Clmul(M128i),
3152

32-
impl<B: Backend> Element<B> {
53+
/// Portable software fallback
54+
Soft(U64x2),
55+
}
56+
57+
impl Element {
3358
/// Load a `FieldElement` from its bytestring representation.
59+
#[cfg(all(
60+
target_feature = "pclmulqdq",
61+
target_feature = "sse2",
62+
target_feature = "sse4.1",
63+
any(target_arch = "x86", target_arch = "x86_64")
64+
))]
3465
pub fn from_bytes(bytes: Block) -> Self {
35-
Element(bytes.into())
66+
if cfg!(feature = "std") {
67+
if is_x86_feature_detected!("pclmulqdq") {
68+
Element::Clmul(bytes.into())
69+
} else {
70+
Element::Soft(bytes.into())
71+
}
72+
} else {
73+
Element::Clmul(bytes.into())
74+
}
3675
}
3776

38-
/// Serialize this `FieldElement` as a bytestring.
39-
pub fn to_bytes(self) -> Block {
40-
self.0.into()
77+
/// Load a `FieldElement` from its bytestring representation.
78+
#[cfg(not(all(
79+
target_feature = "pclmulqdq",
80+
target_feature = "sse2",
81+
target_feature = "sse4.1",
82+
any(target_arch = "x86", target_arch = "x86_64")
83+
)))]
84+
pub fn from_bytes(bytes: Block) -> Self {
85+
Element::Soft(bytes.into())
4186
}
42-
}
4387

44-
impl<B: Backend> Default for Element<B> {
45-
fn default() -> Self {
46-
Self::from_bytes(Block::default())
88+
/// Serialize this `FieldElement` as a bytestring.
89+
pub fn to_bytes(self) -> Block {
90+
match self {
91+
#[cfg(all(
92+
target_feature = "pclmulqdq",
93+
target_feature = "sse2",
94+
target_feature = "sse4.1",
95+
any(target_arch = "x86", target_arch = "x86_64")
96+
))]
97+
Element::Clmul(m128i) => m128i.into(),
98+
Element::Soft(u64x2) => u64x2.into(),
99+
}
47100
}
48-
}
49-
50-
#[allow(clippy::suspicious_arithmetic_impl)]
51-
impl<B: Backend> Add for Element<B> {
52-
type Output = Self;
53101

54102
/// Adds two POLYVAL field elements.
55103
///
@@ -58,16 +106,21 @@ impl<B: Backend> Add for Element<B> {
58106
/// > "The sum of any two elements in the field is the result of XORing them."
59107
///
60108
/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
61-
fn add(self, rhs: Self) -> Self {
62-
Element(self.0 + rhs.0)
109+
#[allow(clippy::should_implement_trait)]
110+
pub fn add(self, other: Block) -> Self {
111+
match self {
112+
#[cfg(all(
113+
target_feature = "pclmulqdq",
114+
target_feature = "sse2",
115+
target_feature = "sse4.1",
116+
any(target_arch = "x86", target_arch = "x86_64")
117+
))]
118+
Element::Clmul(m128i) => Element::Clmul(m128i + M128i::from(other)),
119+
Element::Soft(u64x2) => Element::Soft(u64x2 + U64x2::from(other)),
120+
}
63121
}
64-
}
65-
66-
#[allow(clippy::suspicious_arithmetic_impl)]
67-
impl<B: Backend> Mul for Element<B> {
68-
type Output = Self;
69122

70-
/// Computes POLYVAL multiplication over GF(2^128).
123+
/// Computes carryless POLYVAL multiplication over GF(2^128).
71124
///
72125
/// From [RFC 8452 Section 3]:
73126
///
@@ -76,13 +129,22 @@ impl<B: Backend> Mul for Element<B> {
76129
/// > irreducible polynomial."
77130
///
78131
/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
79-
fn mul(self, rhs: Self) -> Self {
80-
Element(self.0 * rhs.0)
132+
pub fn clmul(self, other: Block) -> Self {
133+
match self {
134+
#[cfg(all(
135+
target_feature = "pclmulqdq",
136+
target_feature = "sse2",
137+
target_feature = "sse4.1",
138+
any(target_arch = "x86", target_arch = "x86_64")
139+
))]
140+
Element::Clmul(m128i) => Element::Clmul(m128i * M128i::from(other)),
141+
Element::Soft(u64x2) => Element::Soft(u64x2 * U64x2::from(other)),
142+
}
81143
}
82144
}
83145

84-
impl<B: Backend> From<B> for Element<B> {
85-
fn from(element: B) -> Element<B> {
86-
Element(element)
146+
impl Default for Element {
147+
fn default() -> Self {
148+
Self::from_bytes(Block::default())
87149
}
88150
}

polyval/src/field/backend.rs

Lines changed: 0 additions & 40 deletions
This file was deleted.
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use core::arch::x86::*;
66
#[cfg(target_arch = "x86_64")]
77
use core::arch::x86_64::*;
88

9-
use super::Backend;
109
use crate::field::Block;
1110
use core::ops::{Add, Mul};
1211

@@ -15,8 +14,6 @@ use core::ops::{Add, Mul};
1514
#[derive(Copy, Clone)]
1615
pub struct M128i(__m128i);
1716

18-
impl Backend for M128i {}
19-
2017
impl From<Block> for M128i {
2118
// `_mm_loadu_si128` performs an unaligned load
2219
#[allow(clippy::cast_ptr_alignment)]
Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
//!
66
//! Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
77
8-
use super::Backend;
98
use crate::field::Block;
109
use core::{
1110
convert::TryInto,
@@ -16,8 +15,6 @@ use core::{
1615
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1716
pub struct U64x2(u64, u64);
1817

19-
impl Backend for U64x2 {}
20-
2118
impl From<Block> for U64x2 {
2219
fn from(bytes: Block) -> U64x2 {
2320
U64x2(
@@ -29,14 +26,7 @@ impl From<Block> for U64x2 {
2926

3027
impl From<U64x2> for Block {
3128
fn from(u64x2: U64x2) -> Block {
32-
let x: u128 = u64x2.into();
33-
x.to_le_bytes()
34-
}
35-
}
36-
37-
impl From<U64x2> for u128 {
38-
fn from(u64x2: U64x2) -> u128 {
39-
u128::from(u64x2.0) | (u128::from(u64x2.1) << 64)
29+
(u128::from(u64x2.0) | (u128::from(u64x2.1) << 64)).to_le_bytes()
4030
}
4131
}
4232

polyval/src/lib.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,26 +46,33 @@
4646
#![doc(html_logo_url = "https://github.com/RustCrypto/meta/master/logo_small.png")]
4747
#![warn(missing_docs, rust_2018_idioms)]
4848

49+
#[cfg(all(
50+
feature = "std",
51+
target_feature = "pclmulqdq",
52+
target_feature = "sse2",
53+
target_feature = "sse4.1",
54+
any(target_arch = "x86", target_arch = "x86_64")
55+
))]
56+
#[macro_use]
57+
extern crate std;
58+
4959
pub mod field;
5060

5161
pub use universal_hash;
5262

5363
use universal_hash::generic_array::{typenum::U16, GenericArray};
5464
use universal_hash::{Output, UniversalHash};
5565

56-
// TODO(tarcieri): runtime selection of CLMUL vs soft backend when both are available
57-
use field::backend::M128i;
58-
5966
/// **POLYVAL**: GHASH-like universal hash over GF(2^128).
6067
#[allow(non_snake_case)]
6168
#[derive(Clone)]
6269
#[repr(align(16))]
6370
pub struct Polyval {
6471
/// GF(2^128) field element input blocks are multiplied by
65-
H: field::Element<M128i>,
72+
H: field::Element,
6673

6774
/// Field element representing the computed universal hash
68-
S: field::Element<M128i>,
75+
S: field::Element,
6976
}
7077

7178
impl UniversalHash for Polyval {
@@ -82,8 +89,7 @@ impl UniversalHash for Polyval {
8289

8390
/// Input a field element `X` to be authenticated
8491
fn update_block(&mut self, x: &GenericArray<u8, U16>) {
85-
let x = field::Element::from_bytes(x.clone().into());
86-
self.S = (self.S + x) * self.H;
92+
self.S = self.H.clmul(self.S.add(x.clone().into()).to_bytes());
8793
}
8894

8995
/// Reset internal state

0 commit comments

Comments
 (0)