Skip to content

Commit 456f6ab

Browse files
author
bneradt
committed
traffic_dump: don't dump cookies from the wire
Cookies are often very sensitive information and should not be logged. This change modifies traffic_dump to not dump the actual cookies from the wire. --promiscuous-mode is added in case the user wants to dump sensitive headers. --sensitive-fields is added so the user can treat custom fields as sensitive.
1 parent 55abbd7 commit 456f6ab

4 files changed

Lines changed: 217 additions & 21 deletions

File tree

plugins/experimental/traffic_dump/traffic_dump.cc

Lines changed: 130 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,59 @@
3232
#include <arpa/inet.h>
3333
#include <netinet/in.h>
3434

35+
#include <algorithm>
3536
#include <sstream>
3637
#include <iomanip>
3738
#include <chrono>
3839
#include <atomic>
3940
#include <string>
4041
#include <string_view>
42+
#include <unordered_set>
4143

4244
#include "tscore/ts_file.h"
45+
#include "tscpp/util/TextView.h"
4346
#include "ts/ts.h"
4447

4548
namespace
4649
{
4750
const char *PLUGIN_NAME = "traffic_dump";
4851
const std::string closing = "]}]}";
52+
std::string defaut_sensitive_field_value;
53+
54+
// A case-insensitive comparitor used for comparing HTTP field names.
55+
struct InsensitiveCompare {
56+
bool
57+
operator()(std::string_view a, std::string_view b) const
58+
{
59+
return strcasecmp(a, b) == 0;
60+
}
61+
};
62+
63+
struct StringHashByLower {
64+
public:
65+
size_t
66+
operator()(const std::string &str) const
67+
{
68+
std::string lower;
69+
std::transform(str.begin(), str.end(), lower.begin(), [](unsigned char c) -> unsigned char { return std::tolower(c); });
70+
return std::hash<std::string>()(lower);
71+
}
72+
};
73+
74+
/// Fields considered sensitive because they may contain user-private
75+
/// information. These fields are replaced with auto-generated generic content
76+
/// by default. To turn off this behavior, the user should add the
77+
/// --promiscuous-mode flag as a commandline argument.
78+
///
79+
/// While these are specified with case, they are matched case-insensitively.
80+
std::unordered_set<std::string, StringHashByLower, InsensitiveCompare> default_sensitive_fields = {
81+
"Set-Cookie",
82+
"Cookie",
83+
};
84+
85+
/// The set of fields, default and user-specified, that are sensitive and whose
86+
/// values will be replaced with auto-generated generic content.
87+
std::unordered_set<std::string, StringHashByLower, InsensitiveCompare> sensitive_fields;
4988

5089
ts::file::path log_path{"dump"}; // default log directory
5190
int s_arg_idx = 0; // Session Arg Index to pass on session data
@@ -183,10 +222,10 @@ esc_json_out(const char *buf, int64_t len, std::ostream &jsonfile)
183222

184223
/// escape_json(): escape chars in a string and returns json string
185224
std::string
186-
escape_json(std::string const &s)
225+
escape_json(std::string_view s)
187226
{
188227
std::ostringstream o;
189-
esc_json_out(s.c_str(), s.length(), o);
228+
esc_json_out(s.data(), s.length(), o);
190229
return o.str();
191230
}
192231
std::string
@@ -205,9 +244,9 @@ json_entry(std::string const &name, const char *value, int64_t size)
205244

206245
/// json_entry_array(): Formats to array-style entry i.e. ["field","value"]
207246
inline std::string
208-
json_entry_array(const char *name, int name_len, const char *value, int value_len)
247+
json_entry_array(std::string_view name, std::string_view value)
209248
{
210-
return "[\"" + escape_json(name, name_len) + "\", \"" + escape_json(value, value_len) + "\"]";
249+
return "[\"" + escape_json(name) + "\", \"" + escape_json(value) + "\"]";
211250
}
212251

213252
/** Remove the scheme prefix from the url.
@@ -236,6 +275,47 @@ write_content_node(int64_t num_body_bytes)
236275
return std::string(R"(,"content":{"encoding":"plain","size":)" + std::to_string(num_body_bytes) + '}');
237276
}
238277

278+
/** Initialize the generic sensitive field to be dumped. This is used instead
279+
* of the sensitive field values seen on the wire.
280+
*/
281+
void
282+
initialize_default_sensitive_field()
283+
{
284+
constexpr size_t default_field_size = 2048;
285+
defaut_sensitive_field_value.resize(default_field_size);
286+
287+
char *field_buffer = defaut_sensitive_field_value.data();
288+
for (auto i = 0u; i < default_field_size; i += 8) {
289+
sprintf(field_buffer, "%07x ", i / 8);
290+
field_buffer += 8;
291+
}
292+
}
293+
294+
/** Inspect the field to see whether it is sensitive and return a generic value
295+
* of equal size to the original if it is.
296+
*
297+
* @param[in] name The field name to inspect.
298+
* @param[in] original_value The field value to inspect.
299+
*
300+
* @return The value traffic_dump should dump for the given field.
301+
*/
302+
std::string_view
303+
replace_sensitive_fields(std::string_view name, std::string_view original_value)
304+
{
305+
auto search = sensitive_fields.find(std::string(name));
306+
if (search == sensitive_fields.end()) {
307+
return original_value;
308+
}
309+
auto new_value_size = original_value.size();
310+
if (original_value.size() > defaut_sensitive_field_value.size()) {
311+
new_value_size = defaut_sensitive_field_value.size();
312+
TSError("[%s] Encountered a sensitive field value larger than our default "
313+
"field size. Default size: %zu, incoming field size: %zu",
314+
PLUGIN_NAME, defaut_sensitive_field_value.size(), original_value.size());
315+
}
316+
return std::string_view{defaut_sensitive_field_value.data(), new_value_size};
317+
}
318+
239319
/// Read the txn information from TSMBuffer and write the header information.
240320
/// This function does not write the content node.
241321
std::string
@@ -302,8 +382,11 @@ write_message_node_no_content(TSMBuffer &buffer, TSMLoc &hdr_loc)
302382
int name_len = 0, value_len = 0;
303383
// Append to "fields" list if valid value exists
304384
if ((name = TSMimeHdrFieldNameGet(buffer, hdr_loc, field_loc, &name_len)) && name_len) {
385+
std::string_view name_view{name, static_cast<size_t>(name_len)};
305386
value = TSMimeHdrFieldValueStringGet(buffer, hdr_loc, field_loc, -1, &value_len);
306-
result += json_entry_array(name, name_len, value, value_len);
387+
std::string_view value_view{value, static_cast<size_t>(value_len)};
388+
std::string_view new_value = replace_sensitive_fields(name_view, value_view);
389+
result += json_entry_array(name_view, new_value);
307390
}
308391

309392
next_field_loc = TSMimeHdrFieldNext(buffer, hdr_loc, field_loc);
@@ -391,6 +474,7 @@ session_txn_handler(TSCont contp, TSEvent event, void *edata)
391474
// Get UUID
392475
char uuid[TS_CRUUID_STRING_LEN + 1];
393476
TSAssert(TS_SUCCESS == TSClientRequestUuidGet(txnp, uuid));
477+
std::string_view uuid_view{uuid, strnlen(uuid, TS_CRUUID_STRING_LEN)};
394478

395479
// Generate per transaction json records
396480
if (!ssnData->first) {
@@ -408,7 +492,7 @@ session_txn_handler(TSCont contp, TSEvent event, void *edata)
408492
// The uuid is a header field for each message in the transaction. Use the
409493
// "all" node to apply to each message.
410494
std::string_view name = "uuid";
411-
txn_info += ",\"all\":{\"headers\":{\"fields\":[" + json_entry_array(name.data(), name.size(), uuid, strlen(uuid));
495+
txn_info += ",\"all\":{\"headers\":{\"fields\":[" + json_entry_array(name, uuid_view);
412496
txn_info += "]}}";
413497
ssnData->write_to_disk(txn_info);
414498
break;
@@ -637,11 +721,32 @@ TSPluginInit(int argc, const char *argv[])
637721
static const struct option longopts[] = {{"logdir", required_argument, nullptr, 'l'},
638722
{"sample", required_argument, nullptr, 's'},
639723
{"limit", required_argument, nullptr, 'm'},
724+
{"sensitive-fields", required_argument, nullptr, 'f'},
640725
{nullptr, no_argument, nullptr, 0}};
641726
int opt = 0;
642727
while (opt >= 0) {
643728
opt = getopt_long(argc, const_cast<char *const *>(argv), "l:", longopts, nullptr);
644729
switch (opt) {
730+
case 'f': {
731+
// --sensitive-fields takes a comma-separated list of HTTP fields that
732+
// are sensitive. The field values for these fields will be replaced
733+
// with generic traffic_dump generated data.
734+
//
735+
// If this option is not used, then the default values in
736+
// default_sensitive_fields is used. If this option is used, then it
737+
// replaced the default sensitive fields with the user-supplied list of
738+
// sensitive fields.
739+
ts::TextView input_filter_fields{std::string_view{optarg}};
740+
ts::TextView filter_field;
741+
while (!(filter_field = input_filter_fields.take_prefix_at(',')).empty()) {
742+
filter_field.trim_if(&isspace);
743+
if (filter_field.empty()) {
744+
continue;
745+
}
746+
sensitive_fields.emplace(filter_field);
747+
}
748+
break;
749+
}
645750
case 'l': {
646751
log_path = ts::file::path{optarg};
647752
break;
@@ -664,6 +769,23 @@ TSPluginInit(int argc, const char *argv[])
664769
}
665770
}
666771

772+
if (sensitive_fields.empty()) {
773+
// The user did not provide their own list of sensitive fields. Use the
774+
// default.
775+
sensitive_fields.merge(default_sensitive_fields);
776+
}
777+
778+
std::string sensitive_fields_string;
779+
bool is_first = true;
780+
for (const auto &field : sensitive_fields) {
781+
if (!is_first) {
782+
sensitive_fields_string += ", ";
783+
}
784+
is_first = false;
785+
sensitive_fields_string += field;
786+
}
787+
TSDebug(PLUGIN_NAME, "Sensitive fields for which generic values will be dumped: %s", sensitive_fields_string.c_str());
788+
667789
// Make absolute path if not
668790
if (!log_path.is_absolute()) {
669791
log_path = ts::file::path(TSInstallDirGet()) / log_path;
@@ -675,6 +797,8 @@ TSPluginInit(int argc, const char *argv[])
675797
} else if (TS_SUCCESS != TSUserArgIndexReserve(TS_USER_ARGS_SSN, PLUGIN_NAME, "Track log related data", &s_arg_idx)) {
676798
TSError("[%s] Unable to initialize plugin (disabled). Failed to reserve ssn arg.", PLUGIN_NAME);
677799
} else {
800+
initialize_default_sensitive_field();
801+
678802
/// Add global hooks
679803
TSCont ssncont = TSContCreate(global_ssn_handler, nullptr);
680804
TSHttpHookAdd(TS_HTTP_SSN_START_HOOK, ssncont);

tests/gold_tests/pluginTest/traffic_dump/gold/200.gold

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
``
77
< HTTP/1.1 200 OK
88
< Content-Length: 0
9+
< Set-Cookie: classified_not_for_logging
910
< Date: ``
1011
< Age: ``
1112
< Server: ATS/``

tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,16 @@
3333
"Host: www.example.com\r\nContent-Length: 0\r\n\r\n",
3434
"timestamp": "1469733493.993", "body": ""}
3535
response_header = {"headers": "HTTP/1.1 200 OK"
36-
"\r\nConnection: close\r\nContent-Length: 0\r\n\r\n",
36+
"\r\nConnection: close\r\nContent-Length: 0"
37+
"\r\nSet-Cookie: classified_not_for_logging\r\n\r\n",
3738
"timestamp": "1469733493.993", "body": ""}
3839
server.addResponse("sessionfile.log", request_header, response_header)
3940
request_header = {"headers": "GET /one HTTP/1.1\r\n"
4041
"Host: www.example.com\r\nContent-Length: 0\r\n\r\n",
4142
"timestamp": "1469733493.993", "body": ""}
4243
response_header = {"headers": "HTTP/1.1 200 OK"
43-
"\r\nConnection: close\r\nContent-Length: 0\r\n\r\n",
44+
"\r\nConnection: close\r\nContent-Length: 0"
45+
"\r\nSet-Cookie: classified_not_for_logging\r\n\r\n",
4446
"timestamp": "1469733493.993", "body": ""}
4547
server.addResponse("sessionfile.log", request_header, response_header)
4648
request_header = {"headers": "GET /post_with_body HTTP/1.1\r\n"
@@ -63,7 +65,8 @@
6365
)
6466
# Configure traffic_dump.
6567
ts.Disk.plugin_config.AddLine(
66-
'traffic_dump.so --logdir {0} --sample 1 --limit 1000000000'.format(replay_dir)
68+
'traffic_dump.so --logdir {0} --sample 1 --limit 1000000000 '
69+
'--sensitive-fields "cookie,set-cookie,x-request-1,x-request-2"'.format(replay_dir)
6770
)
6871

6972
# Set up trafficserver expectations.
@@ -99,17 +102,21 @@
99102

100103
tr.Processes.Default.StartBefore(server, ready=When.PortOpen(server.Variables.Port))
101104
tr.Processes.Default.StartBefore(Test.Processes.ts)
102-
tr.Processes.Default.Command = 'curl http://127.0.0.1:{0} -H\'Host: www.example.com\' --verbose'.format(
103-
ts.Variables.port)
105+
tr.Processes.Default.Command = \
106+
('curl http://127.0.0.1:{0} -H"Cookie: donotlogthis" '
107+
'-H"Host: www.example.com" -H"X-Request-1: ultra_sensitive" --verbose'.format(
108+
ts.Variables.port))
104109
tr.Processes.Default.ReturnCode = 0
105110
tr.Processes.Default.Streams.stderr = "gold/200.gold"
106111
tr.StillRunningAfter = server
107112
tr.StillRunningAfter = ts
108113

109114
# Execute the second transaction.
110115
tr = Test.AddTestRun("Second transaction")
111-
tr.Processes.Default.Command = 'curl http://127.0.0.1:{0}/one -H\'Host: www.example.com\' --verbose'.format(
112-
ts.Variables.port)
116+
tr.Processes.Default.Command = \
117+
('curl http://127.0.0.1:{0}/one -H"Host: www.example.com" '
118+
'-H"X-Request-2: also_very_sensitive" --verbose'.format(
119+
ts.Variables.port))
113120
tr.Processes.Default.ReturnCode = 0
114121
tr.Processes.Default.Streams.stderr = "gold/200.gold"
115122
tr.StillRunningAfter = server
@@ -118,22 +125,29 @@
118125
# Verify the properties of the replay file for the first transaction.
119126
tr = Test.AddTestRun("Verify the json content of the first session")
120127
verify_replay = "verify_replay.py"
128+
sensitive_fields_arg = (
129+
"--sensitive-fields cookie "
130+
"--sensitive-fields set-cookie "
131+
"--sensitive-fields x-request-1 "
132+
"--sensitive-fields x-request-2 ")
121133
tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
122-
tr.Processes.Default.Command = "python3 {0} {1} {2}".format(
134+
tr.Processes.Default.Command = "python3 {0} {1} {2} {3}".format(
123135
verify_replay,
124136
os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'),
125-
replay_file_session_1)
137+
replay_file_session_1,
138+
sensitive_fields_arg)
126139
tr.Processes.Default.ReturnCode = 0
127140
tr.StillRunningAfter = server
128141
tr.StillRunningAfter = ts
129142

130143
# Verify the properties of the replay file for the second transaction.
131144
tr = Test.AddTestRun("Verify the json content of the second session")
132145
tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
133-
tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target '/one'".format(
146+
tr.Processes.Default.Command = "python3 {0} {1} {2} {3} --request-target '/one'".format(
134147
verify_replay,
135148
os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'),
136-
replay_file_session_2)
149+
replay_file_session_2,
150+
sensitive_fields_arg)
137151
tr.Processes.Default.ReturnCode = 0
138152
tr.StillRunningAfter = server
139153
tr.StillRunningAfter = ts
@@ -147,7 +161,7 @@
147161
request_target = "http://localhost:{0}/candy".format(ts.Variables.port)
148162
tr.Processes.Default.Command = (
149163
'curl --request-target "{0}" '
150-
'http://127.0.0.1:{1} -H\'Host: www.example.com\' --verbose'.format(
164+
'http://127.0.0.1:{1} -H"Host: www.example.com" --verbose'.format(
151165
request_target, ts.Variables.port))
152166
tr.Processes.Default.ReturnCode = 0
153167
tr.Processes.Default.Streams.stderr = "gold/explicit_target.gold"
@@ -157,10 +171,11 @@
157171
tr = Test.AddTestRun("Verify the replay file has the explicit target.")
158172
tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
159173

160-
tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target '{3}'".format(
174+
tr.Processes.Default.Command = "python3 {0} {1} {2} {3} --request-target '{4}'".format(
161175
verify_replay,
162176
os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'),
163177
replay_file_session_3,
178+
sensitive_fields_arg,
164179
request_target)
165180
tr.Processes.Default.ReturnCode = 0
166181
tr.StillRunningAfter = server
@@ -178,7 +193,7 @@
178193
# in the test run directory.
179194
tr.Processes.Default.Command = (
180195
'curl --data-binary @{0} --request-target "{1}" '
181-
'http://127.0.0.1:{2} -H\'Host: www.example.com\' --verbose'.format(
196+
'http://127.0.0.1:{2} -H"Host: www.example.com" --verbose'.format(
182197
verify_replay, request_target, ts.Variables.port))
183198
tr.Processes.Default.ReturnCode = 0
184199
tr.Processes.Default.Streams.stderr = "gold/post_with_body.gold"
@@ -190,10 +205,11 @@
190205

191206
size_of_verify_replay_file = os.path.getsize(os.path.join(Test.TestDirectory, verify_replay))
192207
tr.Processes.Default.Command = \
193-
"python3 {0} {1} {2} --client-request-size {3}".format(
208+
"python3 {0} {1} {2} {3} --client-request-size {4}".format(
194209
verify_replay,
195210
os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'),
196211
replay_file_session_4,
212+
sensitive_fields_arg,
197213
size_of_verify_replay_file)
198214
tr.Processes.Default.ReturnCode = 0
199215
tr.StillRunningAfter = server

0 commit comments

Comments
 (0)