1- // Source: https://crates.io/crates/base32 v0.4.0
2- // License: MIT or Apache-2.0
3- // Copyright (c) 2015 The base32 Developers
4- // Permission is hereby granted, free of charge, to any person obtaining a copy
5- // of this software and associated documentation files (the "Software"), to deal
6- // in the Software without restriction, including without limitation the rights
7- // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8- // copies of the Software, and to permit persons to whom the Software is
9- // furnished to do so, subject to the following conditions:
10-
11- // The above copyright notice and this permission notice shall be included in all
12- // copies or substantial portions of the Software.
13-
14- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19- // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20- // SOFTWARE.
21- // (reference https://github.com/andreasots/base32/blob/master/LICENSE-MIT)
1+ // This is a modification of base32 encoding to support the zbase32 alphabet.
2+ // The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
3+ // The original portions of this software are Copyright (c) 2015 The base32 Developers
4+
5+ /* This file is licensed under either of
6+ * Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
7+ * MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT)
8+ * at your option.
9+ */
2210
2311use crate :: prelude:: * ;
2412
@@ -30,19 +18,29 @@ pub enum Alphabet {
3018 /// Whether to use padding.
3119 padding : bool
3220 } ,
21+ ZBase32
3322}
3423
35- /// RFC4648 base32 encoding with padding.
36- const RFC4648_ALPHABET : & ' static [ u8 ] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" ;
3724
38- /// Encode a byte slice into a base32 string.
39- pub fn encode ( alphabet : Alphabet , data : & [ u8 ] ) -> String {
40- let ( alphabet, padding) = match alphabet {
41- Alphabet :: RFC4648 { padding } => ( RFC4648_ALPHABET , padding) ,
42- } ;
25+ // ASCII 0-Z
26+ const ZBASE_INV_ALPHABET : [ i8 ; 43 ] = [
27+ -1 , 18 , -1 , 25 , 26 , 27 , 30 , 29 , 7 , 31 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 24 , 1 , 12 , 3 , 8 , 5 , 6 , 28 ,
28+ 21 , 9 , 10 , -1 , 11 , 2 , 16 , 13 , 14 , 4 , 22 , 17 , 19 , -1 , 20 , 15 , 0 , 23 ,
29+ ] ;
30+
31+ // zbase alphabet
32+ const ZBASE_ALPHABET : & ' static [ u8 ] = b"ybndrfg8ejkmcpqxot1uwisza345h769" ;
33+
34+ /// Inverse RFC4648 lookup table for decoding.
35+ const RFC4648_INV_ALPHABET : [ i8 ; 43 ] = [
36+ -1 , -1 , 26 , 27 , 28 , 29 , 30 , 31 , -1 , -1 , -1 , -1 , -1 , 0 , -1 , -1 , -1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ,
37+ 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 ,
38+ ] ;
4339
44- let mut ret = Vec :: with_capacity ( ( data. len ( ) + 3 ) / 4 * 5 ) ;
40+ /// RFC4648 base32 encoding with padding.
41+ const RFC4648_ALPHABET : & ' static [ u8 ] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" ;
4542
43+ fn map_alphabet ( mut ret : Vec < u8 > , data : & [ u8 ] , alphabet : & ' static [ u8 ] ) -> Vec < u8 > {
4644 for chunk in data. chunks ( 5 ) {
4745 let buf = {
4846 let mut buf = [ 0u8 ; 5 ] ;
@@ -61,6 +59,12 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
6159 ret. push ( alphabet[ ( buf[ 4 ] & 0x1F ) as usize ] ) ;
6260 }
6361
62+ ret
63+ }
64+
65+
66+ fn rfc4648_encode ( data : & [ u8 ] , padding : bool ) -> String {
67+ let mut ret = map_alphabet ( Vec :: with_capacity ( ( data. len ( ) + 3 ) / 4 * 5 ) , data, RFC4648_ALPHABET ) ;
6468 if data. len ( ) % 5 != 0 {
6569 let len = ret. len ( ) ;
6670 let num_extra = 8 - ( data. len ( ) % 5 * 8 + 4 ) / 5 ;
@@ -72,50 +76,163 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
7276 ret. truncate ( len - num_extra) ;
7377 }
7478 }
79+ String :: from_utf8 ( ret) . unwrap ( )
80+
81+ }
82+
83+ fn zbase32_encode ( data : & [ u8 ] ) -> String {
84+ let mut ret = Vec :: with_capacity ( ( data. len ( ) + 4 ) / 5 * 8 ) ;
85+ ret = map_alphabet ( ret, data, RFC4648_ALPHABET ) ;
86+ ret. truncate ( ( data. len ( ) * 8 + 4 ) / 5 ) ;
87+
88+ // Check that our capacity calculation doesn't under-shoot in fuzzing
89+ #[ cfg( fuzzing) ]
90+ assert_eq ! ( ret. capacity( ) , ( data. len( ) + 4 ) / 5 * 8 ) ;
7591
7692 String :: from_utf8 ( ret) . unwrap ( )
7793}
7894
79- /// Inverse RFC4648 lookup table for decoding.
80- const RFC4648_INV_ALPHABET : [ i8 ; 43 ] = [
81- -1 , -1 , 26 , 27 , 28 , 29 , 30 , 31 , -1 , -1 , -1 , -1 , -1 , 0 , -1 , -1 , -1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ,
82- 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 ,
83- ] ;
95+ /// Encode a byte slice into a base32 string.
96+ pub fn encode ( alphabet : Alphabet , data : & [ u8 ] ) -> String {
97+ match alphabet {
98+ Alphabet :: RFC4648 { padding } =>{
99+ rfc4648_encode ( data, padding)
100+ } ,
101+ Alphabet :: ZBase32 => {
102+ zbase32_encode ( data)
103+ }
104+ }
105+ }
106+
84107
85108/// Decode a base32 string into a byte vector.
86- pub fn decode ( alphabet : Alphabet , data : & str ) -> Option < Vec < u8 > > {
87- let data = data. as_bytes ( ) ;
88- let alphabet = match alphabet {
89- Alphabet :: RFC4648 { .. } => RFC4648_INV_ALPHABET ,
90- } ;
91- let mut unpadded_data_length = data. len ( ) ;
92- data. iter ( ) . rev ( ) . take ( 6 ) . for_each ( |& c| {
93- if c != b'=' {
94- return ;
95- }
96- unpadded_data_length -= 1 ;
97- } ) ;
98- let output_length = unpadded_data_length * 5 / 8 ;
99- let mut ret = Vec :: with_capacity ( ( output_length + 4 ) / 5 * 5 ) ;
100- for chunk in data. chunks ( 8 ) {
101- let buf = {
102- let mut buf = [ 0u8 ; 8 ] ;
103- for ( i, & c) in chunk. iter ( ) . enumerate ( ) {
104- match alphabet. get ( c. to_ascii_uppercase ( ) . wrapping_sub ( b'0' ) as usize ) {
105- Some ( & -1 ) | None => return None ,
106- Some ( & value) => buf[ i] = value as u8 ,
109+ pub fn decode ( alphabet : Alphabet , data : & str ) -> Result < Vec < u8 > , ( ) > {
110+ match alphabet {
111+ Alphabet :: RFC4648 { .. } => {
112+ let alphabet = RFC4648_INV_ALPHABET ;
113+ let data = data. as_bytes ( ) ;
114+ let mut unpadded_data_length = data. len ( ) ;
115+ data. iter ( ) . rev ( ) . take ( 6 ) . for_each ( |& c| {
116+ if c != b'=' {
117+ return ;
118+ }
119+ unpadded_data_length -= 1 ;
120+ } ) ;
121+ let output_length = unpadded_data_length * 5 / 8 ;
122+ let mut ret = Vec :: with_capacity ( ( output_length + 4 ) / 5 * 5 ) ;
123+ for chunk in data. chunks ( 8 ) {
124+ let buf = {
125+ let mut buf = [ 0u8 ; 8 ] ;
126+ for ( i, & c) in chunk. iter ( ) . enumerate ( ) {
127+ match alphabet. get ( c. to_ascii_uppercase ( ) . wrapping_sub ( b'0' ) as usize ) {
128+ Some ( & -1 ) | None => return Err ( ( ) ) ,
129+ Some ( & value) => buf[ i] = value as u8 ,
130+ } ;
131+ }
132+ buf
107133 } ;
134+ ret. push ( ( buf[ 0 ] << 3 ) | ( buf[ 1 ] >> 2 ) ) ;
135+ ret. push ( ( buf[ 1 ] << 6 ) | ( buf[ 2 ] << 1 ) | ( buf[ 3 ] >> 4 ) ) ;
136+ ret. push ( ( buf[ 3 ] << 4 ) | ( buf[ 4 ] >> 1 ) ) ;
137+ ret. push ( ( buf[ 4 ] << 7 ) | ( buf[ 5 ] << 2 ) | ( buf[ 6 ] >> 3 ) ) ;
138+ ret. push ( ( buf[ 6 ] << 5 ) | buf[ 7 ] ) ;
108139 }
109- buf
110- } ;
111- ret. push ( ( buf[ 0 ] << 3 ) | ( buf[ 1 ] >> 2 ) ) ;
112- ret. push ( ( buf[ 1 ] << 6 ) | ( buf[ 2 ] << 1 ) | ( buf[ 3 ] >> 4 ) ) ;
113- ret. push ( ( buf[ 3 ] << 4 ) | ( buf[ 4 ] >> 1 ) ) ;
114- ret. push ( ( buf[ 4 ] << 7 ) | ( buf[ 5 ] << 2 ) | ( buf[ 6 ] >> 3 ) ) ;
115- ret. push ( ( buf[ 6 ] << 5 ) | buf[ 7 ] ) ;
140+ ret. truncate ( output_length) ;
141+ Ok ( ret)
142+
143+ } ,
144+ Alphabet :: ZBase32 => {
145+ if !data. is_ascii ( ) {
146+ return Err ( ( ) ) ;
147+ }
148+ let alphabet = ZBASE_INV_ALPHABET ;
149+
150+ let data = data. as_bytes ( ) ;
151+ let output_length = data. len ( ) * 5 / 8 ;
152+ if data. len ( ) > ( output_length * 8 + 4 ) / 5 {
153+ // If the string has more charachters than are required to alphabet_encode the number of bytes
154+ // decodable, treat the string as invalid.
155+ return Err ( ( ) ) ;
156+ }
157+
158+ let mut ret = Vec :: with_capacity ( ( data. len ( ) + 7 ) / 8 * 5 ) ;
159+
160+ for chunk in data. chunks ( 8 ) {
161+ let buf = {
162+ let mut buf = [ 0u8 ; 8 ] ;
163+ for ( i, & c) in chunk. iter ( ) . enumerate ( ) {
164+ match ZBASE_INV_ALPHABET . get ( c. to_ascii_uppercase ( ) . wrapping_sub ( b'0' ) as usize ) {
165+ Some ( & -1 ) | None => return Err ( ( ) ) ,
166+ Some ( & value) => buf[ i] = value as u8 ,
167+ } ;
168+ }
169+ buf
170+ } ;
171+ ret. push ( ( buf[ 0 ] << 3 ) | ( buf[ 1 ] >> 2 ) ) ;
172+ ret. push ( ( buf[ 1 ] << 6 ) | ( buf[ 2 ] << 1 ) | ( buf[ 3 ] >> 4 ) ) ;
173+ ret. push ( ( buf[ 3 ] << 4 ) | ( buf[ 4 ] >> 1 ) ) ;
174+ ret. push ( ( buf[ 4 ] << 7 ) | ( buf[ 5 ] << 2 ) | ( buf[ 6 ] >> 3 ) ) ;
175+ ret. push ( ( buf[ 6 ] << 5 ) | buf[ 7 ] ) ;
176+ }
177+ for c in ret. drain ( output_length..) {
178+ if c != 0 {
179+ // If the original string had any bits set at positions outside of the encoded data,
180+ // treat the string as invalid.
181+ return Err ( ( ) ) ;
182+ }
183+ }
184+
185+ // Check that our capacity calculation doesn't under-shoot in fuzzing
186+ #[ cfg( fuzzing) ]
187+ assert_eq ! ( ret. capacity( ) , ( data. len( ) + 7 ) / 8 * 5 ) ;
188+
189+ Ok ( ret)
190+ } ,
191+ }
192+ }
193+
194+ #[ cfg( test) ]
195+ mod tests {
196+ use super :: * ;
197+
198+ const TEST_DATA : & [ ( & str , & [ u8 ] ) ] = & [
199+ ( "" , & [ ] ) ,
200+ ( "yy" , & [ 0x00 ] ) ,
201+ ( "oy" , & [ 0x80 ] ) ,
202+ ( "tqrey" , & [ 0x8b , 0x88 , 0x80 ] ) ,
203+ ( "6n9hq" , & [ 0xf0 , 0xbf , 0xc7 ] ) ,
204+ ( "4t7ye" , & [ 0xd4 , 0x7a , 0x04 ] ) ,
205+ ( "6im5sdy" , & [ 0xf5 , 0x57 , 0xbb , 0x0c ] ) ,
206+ ( "ybndrfg8ejkmcpqxot1uwisza345h769" , & [ 0x00 , 0x44 , 0x32 , 0x14 , 0xc7 , 0x42 , 0x54 , 0xb6 ,
207+ 0x35 , 0xcf , 0x84 , 0x65 , 0x3a , 0x56 , 0xd7 , 0xc6 ,
208+ 0x75 , 0xbe , 0x77 , 0xdf ] )
209+ ] ;
210+
211+ #[ test]
212+ fn test_encode ( ) {
213+ for & ( zbase32, data) in TEST_DATA {
214+ assert_eq ! ( encode( Alphabet :: ZBase32 , data) , zbase32) ;
215+ }
216+ }
217+
218+ #[ test]
219+ fn test_decode ( ) {
220+ for & ( zbase32, data) in TEST_DATA {
221+ assert_eq ! ( decode( Alphabet :: ZBase32 , zbase32) . unwrap( ) , data) ;
222+ }
223+ }
224+
225+ #[ test]
226+ fn test_decode_wrong ( ) {
227+ const WRONG_DATA : & [ & str ] = & [ "00" , "l1" , "?" , "=" ] ;
228+
229+ for & data in WRONG_DATA {
230+ match decode ( Alphabet :: ZBase32 , data) {
231+ Ok ( _) => assert ! ( false , "Data shouldn't be decodable" ) ,
232+ Err ( _) => assert ! ( true ) ,
233+ }
234+ }
116235 }
117- ret. truncate ( output_length) ;
118- Some ( ret)
119236}
120237
121238#[ cfg( test) ]
@@ -203,11 +320,84 @@ mod test {
203320
204321 #[ test]
205322 fn invalid_chars_rfc4648 ( ) {
206- assert_eq ! ( decode( RFC4648 { padding: true } , "," ) , None )
323+ assert_eq ! ( decode( RFC4648 { padding: true } , "," ) . is_err ( ) , true )
207324 }
208325
209326 #[ test]
210327 fn invalid_chars_unpadded_rfc4648 ( ) {
211- assert_eq ! ( decode( RFC4648 { padding: false } , "," ) , None )
328+ assert_eq ! ( decode( RFC4648 { padding: false } , "," ) . is_err ( ) , true )
212329 }
213330}
331+
332+ // Decodes a zbase32 string to the original bytes, failing if the string was not encoded by a
333+ // proper zbase32 encoder.
334+ // pub fn alphabet_decode(data: &str) -> Result<Vec<u8>, ()> {
335+ // if !data.is_ascii() {
336+ // return Err(());
337+ // }
338+
339+ // let data = data.as_bytes();
340+ // let output_length = data.len() * 5 / 8;
341+ // if data.len() > (output_length * 8 + 4) / 5 {
342+ // // If the string has more charachters than are required to alphabet_encode the number of bytes
343+ // // decodable, treat the string as invalid.
344+ // return Err(());
345+ // }
346+
347+ // let mut ret = Vec::with_capacity((data.len() + 7) / 8 * 5);
348+
349+ // for chunk in data.chunks(8) {
350+ // let buf = {
351+ // let mut buf = [0u8; 8];
352+ // for (i, &c) in chunk.iter().enumerate() {
353+ // match ZBASE_INV_ALPHABET.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
354+ // Some(&-1) | None => return Err(()),
355+ // Some(&value) => buf[i] = value as u8,
356+ // };
357+ // }
358+ // buf
359+ // };
360+ // ret.push((buf[0] << 3) | (buf[1] >> 2));
361+ // ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
362+ // ret.push((buf[3] << 4) | (buf[4] >> 1));
363+ // ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
364+ // ret.push((buf[6] << 5) | buf[7]);
365+ // }
366+ // for c in ret.drain(output_length..) {
367+ // if c != 0 {
368+ // // If the original string had any bits set at positions outside of the encoded data,
369+ // // treat the string as invalid.
370+ // return Err(());
371+ // }
372+ // }
373+
374+ // // Check that our capacity calculation doesn't under-shoot in fuzzing
375+ // #[cfg(fuzzing)]
376+ // assert_eq!(ret.capacity(), (data.len() + 7) / 8 * 5);
377+
378+ // Ok(ret)
379+ // }
380+ //
381+ //
382+ //
383+ // fn inv_map_alphabet(mut ret: Vec<u8>, data: &str, alphabet: &'static [u8]) -> Result<Vec<u8>, ()> {
384+ // let data = data.as_bytes();
385+ // for chunk in data.chunks(8) {
386+ // let buf = {
387+ // let mut buf = [0u8; 8];
388+ // for (i, &c) in chunk.iter().enumerate() {
389+ // match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
390+ // Some(&-1) | None => return Err(()),
391+ // Some(&value) => buf[i] = value as u8,
392+ // };
393+ // }
394+ // buf
395+ // };
396+ // ret.push((buf[0] << 3) | (buf[1] >> 2));
397+ // ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
398+ // ret.push((buf[3] << 4) | (buf[4] >> 1));
399+ // ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
400+ // ret.push((buf[6] << 5) | buf[7]);
401+ // }
402+ // Ok(ret)
403+ // }
0 commit comments