Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 53 additions & 8 deletions ext/cgi/escape/escape.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ url_unreserved_char(unsigned char c)
}

static VALUE
optimized_escape(VALUE str)
optimized_escape(VALUE str, int plus_escape)
{
long i, len, beg = 0;
VALUE dest = 0;
Expand All @@ -220,7 +220,7 @@ optimized_escape(VALUE str)
rb_str_cat(dest, cstr + beg, i - beg);
beg = i + 1;

if (c == ' ') {
if (plus_escape && c == ' ') {
rb_str_cat_cstr(dest, "+");
}
else {
Expand All @@ -242,7 +242,7 @@ optimized_escape(VALUE str)
}

static VALUE
optimized_unescape(VALUE str, VALUE encoding)
optimized_unescape(VALUE str, VALUE encoding, int unescape_plus)
{
long i, len, beg = 0;
VALUE dest = 0;
Expand All @@ -265,7 +265,7 @@ optimized_unescape(VALUE str, VALUE encoding)
| char_to_number(cstr[i+2]));
clen = 2;
}
else if (c == '+') {
else if (unescape_plus && c == '+') {
buf[0] = ' ';
}
else {
Expand Down Expand Up @@ -348,7 +348,7 @@ cgiesc_unescape_html(VALUE self, VALUE str)
* call-seq:
* CGI.escape(string) -> string
*
* Returns URL-escaped string.
* Returns URL-escaped string (+application/x-www-form-urlencoded+).
*
*/
static VALUE
Expand All @@ -357,7 +357,7 @@ cgiesc_escape(VALUE self, VALUE str)
StringValue(str);

if (rb_enc_str_asciicompat_p(str)) {
return optimized_escape(str);
return optimized_escape(str, 1);
}
else {
return rb_call_super(1, &str);
Expand All @@ -376,7 +376,7 @@ accept_charset(int argc, VALUE *argv, VALUE self)
* call-seq:
* CGI.unescape(string, encoding=@@accept_charset) -> string
*
* Returns URL-unescaped string.
* Returns URL-unescaped string (+application/x-www-form-urlencoded+).
*
*/
static VALUE
Expand All @@ -388,7 +388,50 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self)

if (rb_enc_str_asciicompat_p(str)) {
VALUE enc = accept_charset(argc-1, argv+1, self);
return optimized_unescape(str, enc);
return optimized_unescape(str, enc, 1);
}
else {
return rb_call_super(argc, argv);
}
}

/*
* call-seq:
* CGI.escapeURIComponent(string) -> string
*
* Returns URL-escaped string following RFC 3986.
*
*/
static VALUE
cgiesc_escape_uri_component(VALUE self, VALUE str)
{
StringValue(str);

if (rb_enc_str_asciicompat_p(str)) {
return optimized_escape(str, 0);
}
else {
return rb_call_super(1, &str);
}
}

/*
* call-seq:
* CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string
*
* Returns URL-unescaped string following RFC 3986.
*
*/
static VALUE
cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self)
{
VALUE str = (rb_check_arity(argc, 1, 2), argv[0]);

StringValue(str);

if (rb_enc_str_asciicompat_p(str)) {
VALUE enc = accept_charset(argc-1, argv+1, self);
return optimized_unescape(str, enc, 0);
}
else {
return rb_call_super(argc, argv);
Expand All @@ -414,6 +457,8 @@ InitVM_escape(void)
rb_mUtil = rb_define_module_under(rb_cCGI, "Util");
rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1);
rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
rb_prepend_module(rb_mUtil, rb_mEscape);
Expand Down
49 changes: 41 additions & 8 deletions lib/cgi/util.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,57 @@ module Util; end
extend Util
end
module CGI::Util
@@accept_charset="UTF-8" unless defined?(@@accept_charset)
# URL-encode a string.
@@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)

# URL-encode a string into application/x-www-form-urlencoded.
# Space characters (+" "+) are encoded with plus signs (+"+"+)
# url_encoded_string = CGI.escape("'Stop!' said Fred")
# # => "%27Stop%21%27+said+Fred"
def escape(string)
encoding = string.encoding
string.b.gsub(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
buffer = string.b
buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end.tr(' ', '+').force_encoding(encoding)
end
buffer.tr!(' ', '+')
buffer.force_encoding(encoding)
end

# URL-decode a string with encoding(optional).
# URL-decode an application/x-www-form-urlencoded string with encoding(optional).
# string = CGI.unescape("%27Stop%21%27+said+Fred")
# # => "'Stop!' said Fred"
def unescape(string,encoding=@@accept_charset)
str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m|
def unescape(string, encoding = @@accept_charset)
str = string.tr('+', ' ')
str = str.b
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
[m.delete('%')].pack('H*')
end
str.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end

# URL-encode a string following RFC 3986
# Space characters (+" "+) are encoded with (+"%20"+)
# url_encoded_string = CGI.escape("'Stop!' said Fred")
# # => "%27Stop%21%27%20said%20Fred"
def escapeURIComponent(string)
encoding = string.encoding
buffer = string.b
buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end
buffer.force_encoding(encoding)
end

# URL-decode a string following RFC 3986 with encoding(optional).
# string = CGI.unescape("%27Stop%21%27+said%20Fred")
# # => "'Stop!'+said Fred"
def unescapeURIComponent(string, encoding = @@accept_charset)
str = string.b
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
[m.delete('%')].pack('H*')
end.force_encoding(encoding)
end
str.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end

Expand Down
49 changes: 48 additions & 1 deletion test/cgi/test_cgi_util.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def teardown
ENV.update(@environ)
end


def test_cgi_escape
assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1))
assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding)
Expand Down Expand Up @@ -70,6 +69,54 @@ def test_cgi_unescape_accept_charset
end;
end

def test_cgi_escapeURIComponent
assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escapeURIComponent(@str1))
assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding)
end

def test_cgi_escapeURIComponent_with_unreserved_characters
assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~",
CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"),
"should not encode any unreserved characters, as per RFC3986 Section 2.3")
end

def test_cgi_escapeURIComponent_with_invalid_byte_sequence
assert_equal('%C0%3C%3C', CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")))
end

def test_cgi_escapeURIComponent_preserve_encoding
assert_equal(Encoding::US_ASCII, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding)
assert_equal(Encoding::ASCII_8BIT, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding)
assert_equal(Encoding::UTF_8, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")).encoding)
end

def test_cgi_unescapeURIComponent
str = CGI.unescapeURIComponent('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93')
assert_equal(@str1, str)
return unless defined?(::Encoding)

assert_equal("foo+bar", CGI.unescapeURIComponent("foo+bar"))

assert_equal(@str1.encoding, str.encoding)
assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2"))
end

def test_cgi_unescapeURIComponent_preserve_encoding
assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding)
assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding)
assert_equal(Encoding::UTF_8, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding)
end

def test_cgi_unescapeURIComponent_accept_charset
return unless defined?(::Encoding)

assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)}
assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}")
begin;
assert_equal("", CGI.unescapeURIComponent(''))
end;
end

def test_cgi_pretty
assert_equal("<HTML>\n <BODY>\n </BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>"))
assert_equal("<HTML>\n\t<BODY>\n\t</BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>","\t"))
Expand Down