@@ -53,11 +53,12 @@ test("escape spaces with backslashes", () => {
5353 expect ( tokenizeArgs ( `command space\\ ` ) ) . toEqual ( [ "command" , "space " ] ) ;
5454} ) ;
5555
56- test ( "ignore escaped newlines outside of quotes" , ( ) => {
56+ test ( "remove escaped newlines outside of single quotes" , ( ) => {
5757 expect ( tokenizeArgs ( `command \\\nargument` ) ) . toEqual ( [ "command" , `argument` ] ) ;
58- expect ( tokenizeArgs ( `command "\\\nargument"` ) ) . toEqual ( [
58+ expect ( tokenizeArgs ( `command "\\\nargument"` ) ) . toEqual ( [ "command" , `argument` , ] ) ;
59+ expect ( tokenizeArgs ( `command '\\\nargument'` ) ) . toEqual ( [
5960 "command" ,
60- `\nargument` ,
61+ `\\\ nargument` ,
6162 ] ) ;
6263} ) ;
6364
@@ -116,3 +117,247 @@ test("empty command", () => {
116117 expect ( tokenizeArgs ( `` ) ) . toEqual ( [ ] ) ;
117118 expect ( tokenizeArgs ( ` ` ) ) . toEqual ( [ ] ) ;
118119} ) ;
120+
121+ // --------------------------------------------------
122+ // Characters and character codes
123+ // --------------------------------------------------
124+
125+ // special characters
126+ const CHR_BS = "\\" ;
127+ // special character codes
128+ const ASC_NL = "\n" . charCodeAt ( 0 ) ;
129+ const ASC_DQ = '"' . charCodeAt ( 0 ) ;
130+ const ASC_SQ = "'" . charCodeAt ( 0 ) ;
131+ const ASC_DOLLAR = "$" . charCodeAt ( 0 ) ;
132+ const ASC_AT = "@" . charCodeAt ( 0 ) ;
133+ const ASC_BS = CHR_BS . charCodeAt ( 0 ) ;
134+ const ASC_BQ = "`" . charCodeAt ( 0 ) ;
135+ // characters that vanish, when escaped with a backslash
136+ // <backslash><newline> is a line continuation, that should be removed
137+ const skip_escaped_unquoted = [ ASC_NL ] ;
138+ const skip_escaped_double = [ ASC_NL ] ;
139+ const skip_escaped_single = [ ASC_SQ ] ;
140+ // characters that are unescaped in a double quoting context
141+ const escaped_double = [ ASC_DQ , ASC_DOLLAR , ASC_BQ , ASC_BS ] ;
142+
143+ // --------------------------------------------------
144+ // Expected unescaped result crafted according to POSIX standard
145+ // --------------------------------------------------
146+
147+ // characters escaped with a backslash in 2 parts
148+ let chars_escaped_1 : string [ ] = [ ] ;
149+ let chars_escaped_2 : string [ ] = [ ] ;
150+
151+ // expected results for unescaped characters depending on context
152+ let chars_unescaped_unquoted : string [ ] = [ ] ;
153+ let chars_unescaped_double : string [ ] = [ ] ;
154+ let chars_unescaped_single : string [ ] = [ ] ;
155+
156+ let arg_string_unquoted : string ;
157+ let arg_string_double : string ;
158+ let arg_string_single : string ;
159+
160+ let arg_tokens_unquoted : string [ ] = [ ] ;
161+ let arg_tokens_double : string [ ] = [ ] ;
162+ let arg_tokens_single : string [ ] = [ ] ;
163+
164+ function add_char_to_unescaped_arrays ( ascii_code : number , escaped_chars : string [ ] ) {
165+ let _chr = String . fromCharCode ( ascii_code ) ;
166+ escaped_chars . push ( CHR_BS + _chr ) ;
167+
168+ if ( skip_escaped_unquoted . indexOf ( ascii_code ) < 0 ) {
169+ // all characters are unescaped
170+ chars_unescaped_unquoted . push ( _chr ) ;
171+ }
172+ if ( skip_escaped_double . indexOf ( ascii_code ) < 0 ) {
173+ // only some characters are unescaped
174+ if ( escaped_double . indexOf ( ascii_code ) < 0 ) {
175+ chars_unescaped_double . push ( CHR_BS + _chr ) ;
176+ } else {
177+ chars_unescaped_double . push ( _chr ) ;
178+ }
179+ }
180+ if ( skip_escaped_single . indexOf ( ascii_code ) < 0 ) {
181+ // no characters are unescaped
182+ chars_unescaped_single . push ( CHR_BS + _chr ) ;
183+ } else {
184+ // a single quote terminates single quoting
185+ chars_unescaped_single . push ( CHR_BS ) ;
186+ }
187+ }
188+
189+ type Overrides = {
190+ tokens_unquoted ?: string [ ] ;
191+ tokens_double ?: string [ ] ;
192+ tokens_single ?: string [ ] ;
193+ }
194+
195+ function chars_escaped_test_generate_strings ( start : number , end : number , overrides ?: Overrides ) {
196+ chars_escaped_1 = [ ] ;
197+ chars_escaped_2 = [ ] ;
198+ chars_unescaped_unquoted = [ ] ;
199+ chars_unescaped_double = [ ] ;
200+ chars_unescaped_single = [ ] ;
201+ arg_tokens_unquoted = [ ] ;
202+ arg_tokens_double = [ ] ;
203+ arg_tokens_single = [ ] ;
204+
205+ if ( typeof ( overrides ) === 'undefined' ) {
206+ overrides = { } ;
207+ }
208+
209+ // the first part of escaped characters are all characters from
210+ // 0 - ASC(single_quote) "\000 ... '"
211+ for ( let ascii_code = start ; ascii_code <= ASC_SQ ; ascii_code ++ ) {
212+ add_char_to_unescaped_arrays ( ascii_code , chars_escaped_1 ) ;
213+ }
214+ // The second part are all characters from
215+ // ASC(<open-parenthesis>) - ASC(255) "( ... ÿ"
216+ for ( let ascii_code = ASC_SQ + 1 ; ascii_code <= end ; ascii_code ++ ) {
217+ add_char_to_unescaped_arrays ( ascii_code , chars_escaped_2 ) ;
218+ }
219+
220+ arg_string_unquoted = chars_escaped_1 . join ( "" ) + chars_escaped_2 . join ( "" ) ;
221+ arg_string_double = '"' + chars_escaped_1 . join ( "" ) + chars_escaped_2 . join ( "" ) + '"' ;
222+ // Since a single quote cannot be a member of a single quoted string,
223+ // the escaped single quote at the end of the first part will
224+ // terminate single-quoting. To avoid a syntax error, a single quote
225+ // must be prepended to the second part of escaped characters.
226+ arg_string_single = "'" + chars_escaped_1 . join ( "" ) + "'" + chars_escaped_2 . join ( "" ) + "'" ;
227+
228+ arg_tokens_unquoted = overrides . tokens_unquoted ? overrides . tokens_unquoted : [ chars_unescaped_unquoted . join ( "" ) ] ;
229+ arg_tokens_double = overrides . tokens_double ? overrides . tokens_double : [ chars_unescaped_double . join ( "" ) ] ;
230+ arg_tokens_single = overrides . tokens_single ? overrides . tokens_single : [ chars_unescaped_single . join ( "" ) ] ;
231+ }
232+
233+ // --------------------------------------------------
234+ // Expected unescaped result generated by /bin/sh
235+ // --------------------------------------------------
236+
237+ let shell_arg_token_unquoted = atob ( `
238+ AQIDBAUGBwgJCwwNDg8QERITFBUWFxgZGhscHR4fICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6
239+ Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJz
240+ dHV2d3h5ent8fX5/
241+ ` ) ;
242+
243+ let shell_arg_token_double = atob ( `
244+ XAFcAlwDXARcBVwGXAdcCFwJXAtcDFwNXA5cD1wQXBFcElwTXBRcFVwWXBdcGFwZXBpcG1wcXB1c
245+ HlwfXCBcISJcIyRcJVwmXCdcKFwpXCpcK1wsXC1cLlwvXDBcMVwyXDNcNFw1XDZcN1w4XDlcOlw7
246+ XDxcPVw+XD9cQFxBXEJcQ1xEXEVcRlxHXEhcSVxKXEtcTFxNXE5cT1xQXFFcUlxTXFRcVVxWXFdc
247+ WFxZXFpcW1xcXVxeXF9gXGFcYlxjXGRcZVxmXGdcaFxpXGpca1xsXG1cblxvXHBccVxyXHNcdFx1
248+ XHZcd1x4XHlcelx7XHxcfVx+XH8=
249+ ` ) ;
250+
251+ let shell_arg_token_single = atob ( `
252+ XAFcAlwDXARcBVwGXAdcCFwJXApcC1wMXA1cDlwPXBBcEVwSXBNcFFwVXBZcF1wYXBlcGlwbXBxc
253+ HVweXB9cIFwhXCJcI1wkXCVcJlxcKFwpXCpcK1wsXC1cLlwvXDBcMVwyXDNcNFw1XDZcN1w4XDlc
254+ Olw7XDxcPVw+XD9cQFxBXEJcQ1xEXEVcRlxHXEhcSVxKXEtcTFxNXE5cT1xQXFFcUlxTXFRcVVxW
255+ XFdcWFxZXFpcW1xcXF1cXlxfXGBcYVxiXGNcZFxlXGZcZ1xoXGlcalxrXGxcbVxuXG9ccFxxXHJc
256+ c1x0XHVcdlx3XHhceVx6XHtcfFx9XH5cfw==
257+ ` ) ;
258+
259+ // function fold_string (str: string, width?: number) {
260+ // let lines = [];
261+ // if (typeof(width) === 'undefined') {
262+ // width = 76;
263+ // }
264+ // while (str) {
265+ // lines.push(str.substring(0, width));
266+ // str = str.substring(width);
267+ // }
268+ // return lines.join("\n");
269+ // }
270+ //
271+ // async function generate_shell_arg_tokens () {
272+ // // result.stdout - the stdout as a string
273+ // // result.stderr - the stderr as a string
274+ // // result.exitCode - the process exit code as a number
275+ // let result = await x('/bin/sh', ['-c', `pecho () { printf "%s" "\${*}"; }; pecho ` + arg_string_unquoted]);
276+ // console.log('let shell_arg_token_unquoted = atob(`' + "\n" + fold_string(btoa(result.stdout)) + '`)';);
277+ // result = await x('/bin/sh', ['-c', `pecho () { printf "%s" "\${*}"; }; pecho ` + arg_string_double]);
278+ // console.log('let shell_arg_token_double = atob(`' + "\n" + fold_string(btoa(result.stdout)) + '`)');
279+ // result = await x('/bin/sh', ['-c', `pecho () { printf "%s" "\${*}"; }; pecho ` + arg_string_single]);
280+ // console.log('let shell_arg_token_single = atob(`' + "\n" + fold_string(btoa(result.stdout)) + '`)');
281+ // }
282+
283+ // import { x } from 'tinyexec';
284+ // // tinyexec does not handle NUL in argument strings, ASCII codes > 127 are messed up by UTF-8 output
285+ // chars_escaped_test_generate_strings(1, 127);
286+ // await generate_shell_arg_tokens();
287+
288+ function pretty_print_character_string_array ( char_string_array : string [ ] ) {
289+ let output = [ ] ;
290+ for ( const _string of char_string_array ) {
291+ output . push ( "--------------------------------------------------" ) ;
292+ let _escaped = "" ;
293+ for ( let _indx = 0 ; _indx < _string . length ; _indx ++ ) {
294+ let _chr = _string [ _indx ] ;
295+ let _asc = _chr . charCodeAt ( 0 ) ;
296+ if ( ! _escaped ) {
297+ if ( _chr === CHR_BS ) {
298+ _escaped = _chr ;
299+ continue
300+ }
301+ }
302+ if ( _asc < 32 ) {
303+ _chr = "^" + String . fromCharCode ( ASC_AT + _asc ) ;
304+ } else if ( _asc >= 127 ) {
305+ _chr = "\\x" + _asc . toString ( 16 ) . toUpperCase ( ) ;
306+ }
307+ output . push ( _escaped + _chr + " " + _asc . toString ( ) ) ;
308+ _escaped = "" ;
309+ }
310+ if ( _escaped ) {
311+ output . push ( _escaped ) ;
312+ }
313+ }
314+ return output . join ( "\n" ) ;
315+ }
316+
317+ function chars_escaped_test ( start : number , end : number , suffix : string , overrides ?: Overrides ) {
318+ chars_escaped_test_generate_strings ( start , end , overrides ) ;
319+
320+ if ( suffix ) {
321+ suffix = " " + suffix ;
322+ }
323+
324+ test ( "all escaped characters outside quoting context" + suffix , ( ) => {
325+ expect (
326+ pretty_print_character_string_array (
327+ tokenizeArgs ( arg_string_unquoted ) )
328+ ) . toEqual (
329+ pretty_print_character_string_array (
330+ arg_tokens_unquoted )
331+ ) ;
332+ } ) ;
333+
334+ test ( "all escaped characters in double quoting context" + suffix , ( ) => {
335+ expect (
336+ pretty_print_character_string_array (
337+ tokenizeArgs ( arg_string_double ) )
338+ ) . toEqual (
339+ pretty_print_character_string_array (
340+ arg_tokens_double )
341+ ) ;
342+ } ) ;
343+
344+ test ( "all escaped characters in single quoting context" + suffix , ( ) => {
345+ expect (
346+ pretty_print_character_string_array (
347+ tokenizeArgs ( arg_string_single ) )
348+ ) . toEqual (
349+ pretty_print_character_string_array (
350+ arg_tokens_single )
351+ ) ;
352+ } ) ;
353+ }
354+
355+ // Expected unescaped result generated according to POSIX
356+ chars_escaped_test ( 0 , 255 , "(POSIX)" ) ;
357+
358+ // Expected unescaped result generated by /bin/sh
359+ chars_escaped_test ( 1 , 127 , "(/bin/sh)" , {
360+ tokens_unquoted : [ shell_arg_token_unquoted ] ,
361+ tokens_double : [ shell_arg_token_double ] ,
362+ tokens_single : [ shell_arg_token_single ]
363+ } ) ;
0 commit comments