@@ -41,6 +41,46 @@ testBits('11000001 10000001 01000001', '\ufffd\ufffdA');
4141test ( 'utf-8' , Buffer . from ( 'F09D908D' , 'hex' ) , '\ud835\udc0d' ) ;
4242//CESU-8 not supported: test('utf-8', Buffer.from('EDA0B5EDB08D', 'hex'), '\ud835\udc0d');
4343
44+ // 0x00: |00000000 ASCII
45+ // 0x41: |01000001 ASCII
46+ // 0xb8: 10|111000 continuation
47+ // 0xcc: 110|01100 two-byte head
48+ // 0xe2: 1110|0010 three-byte head
49+ // 0xf0: 11110|000 four-byte head
50+ // 0xf1: 11110|001'another four-byte head
51+ // 0xfb: 111110|11 "five-byte head", not UTF-8
52+ const alphabet = [ 0x00 , 0x41 , 0xb8 , 0xcc , 0xe2 , 0xf0 , 0xf1 , 0xfb ] ;
53+ const knownToFail = [
54+ / ( e 2 | f 0 | f 1 ) ( 0 0 | 4 1 ) / i,
55+ / c c c c b 8 / i,
56+ / f [ 0 1 ] ( b 8 | c c ) ( 0 0 | 4 1 ) / i,
57+ / f [ 0 1 ] c c b 8 / i,
58+ / f [ 0 1 ] f b ( 0 0 | 4 1 ) / i,
59+ / ( c c | e 2 ) e 2 b 8 b 8 / i,
60+ / e 2 ( b 8 | e 2 ) c c b 8 / i,
61+ / e 2 f b c c ( 0 1 | b 8 ) / i,
62+ ] ;
63+ function recurse ( len , bytes ) {
64+ if ( len === 0 ) {
65+ let buf = Buffer . from ( bytes ) ;
66+ let h = buf . toString ( 'hex' ) ;
67+ if ( ! knownToFail . some ( t => t . test ( h ) ) ) {
68+ let d = new StringDecoder ( 'utf-8' ) ;
69+ let exp = d . end ( buf ) ;
70+ test ( 'utf-8' , buf , exp ) ;
71+ }
72+ } else {
73+ alphabet . forEach ( b => {
74+ bytes . push ( b ) ;
75+ recurse ( len - 1 , bytes ) ;
76+ bytes . pop ( ) ;
77+ } ) ;
78+ }
79+ }
80+ for ( let len = 0 ; len <= 4 ; ++ len ) {
81+ recurse ( len , [ ] ) ;
82+ }
83+
4484// UCS-2
4585test ( 'ucs2' , Buffer . from ( 'ababc' , 'ucs2' ) , 'ababc' ) ;
4686
0 commit comments