Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions assets/gep/failed_capsules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"version": 1,
"failed_capsules": []
}
Empty file added assets/gep/genes.jsonl
Empty file.
13 changes: 13 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 19 additions & 1 deletion src/gep/sanitize.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,30 @@

// Patterns to redact (replaced with placeholder)
const REDACT_PATTERNS = [
// API keys & tokens
// API keys & tokens (generic)
/Bearer\s+[A-Za-z0-9\-._~+\/]+=*/g,
/sk-[A-Za-z0-9]{20,}/g,
/token[=:]\s*["']?[A-Za-z0-9\-._~+\/]{16,}["']?/gi,
/api[_-]?key[=:]\s*["']?[A-Za-z0-9\-._~+\/]{16,}["']?/gi,
/secret[=:]\s*["']?[A-Za-z0-9\-._~+\/]{16,}["']?/gi,
/password[=:]\s*["']?[^\s"',;)}\]]{6,}["']?/gi,
// GitHub tokens (ghp_, gho_, ghu_, ghs_, github_pat_)
/ghp_[A-Za-z0-9]{36,}/g,
/gho_[A-Za-z0-9]{36,}/g,
/ghu_[A-Za-z0-9]{36,}/g,
/ghs_[A-Za-z0-9]{36,}/g,
/github_pat_[A-Za-z0-9_]{22,}/g,
// AWS access keys
/AKIA[0-9A-Z]{16}/g,
// OpenAI / Anthropic tokens
/sk-proj-[A-Za-z0-9\-_]{20,}/g,
/sk-ant-[A-Za-z0-9\-_]{20,}/g,
// npm tokens
/npm_[A-Za-z0-9]{36,}/g,
// Private keys
/-----BEGIN\s+(?:RSA\s+|EC\s+|DSA\s+|OPENSSH\s+)?PRIVATE\s+KEY-----[\s\S]*?-----END\s+(?:RSA\s+|EC\s+|DSA\s+|OPENSSH\s+)?PRIVATE\s+KEY-----/g,
// Basic auth in URLs (redact only credentials, keep :// and @)
/(?<=:\/\/)[^@\s]+:[^@\s]+(?=@)/g,
// Local filesystem paths
/\/home\/[^\s"',;)}\]]+/g,
/\/Users\/[^\s"',;)}\]]+/g,
Expand Down
90 changes: 90 additions & 0 deletions test/sanitize.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
const assert = require('assert');
const { sanitizePayload, redactString } = require('../src/gep/sanitize');

const REDACTED = '[REDACTED]';

// --- redactString ---

// Existing patterns (regression)
assert.strictEqual(redactString('Bearer abc123def456ghi789jkl0'), REDACTED);
assert.strictEqual(redactString('sk-abcdefghijklmnopqrstuvwxyz'), REDACTED);
assert.strictEqual(redactString('token=abcdefghijklmnop1234'), REDACTED);
assert.strictEqual(redactString('api_key=abcdefghijklmnop1234'), REDACTED);
assert.strictEqual(redactString('secret: abcdefghijklmnop1234'), REDACTED);
assert.strictEqual(redactString('/home/user/secret/file.txt'), REDACTED);
assert.strictEqual(redactString('/Users/admin/docs'), REDACTED);
assert.strictEqual(redactString('user@example.com'), REDACTED);

// GitHub tokens (bare, without token= prefix)
assert.ok(redactString('ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234').includes(REDACTED),
'bare ghp_ token should be redacted');
assert.ok(redactString('gho_abcdefghijklmnopqrstuvwxyz1234567890').includes(REDACTED),
'bare gho_ token should be redacted');
assert.ok(redactString('github_pat_abcdefghijklmnopqrstuvwxyz123456').includes(REDACTED),
'github_pat_ token should be redacted');
assert.ok(redactString('use ghs_abcdefghijklmnopqrstuvwxyz1234567890 for auth').includes(REDACTED),
'ghs_ in sentence should be redacted');

// AWS keys
assert.ok(redactString('AKIAIOSFODNN7EXAMPLE').includes(REDACTED),
'AWS access key should be redacted');

// OpenAI project tokens
assert.ok(redactString('sk-proj-bxOCXoWsaPj0IDE1yqlXCXIkWO1f').includes(REDACTED),
'sk-proj- token should be redacted');

// Anthropic tokens
assert.ok(redactString('sk-ant-api03-abcdefghijklmnopqrst').includes(REDACTED),
'sk-ant- token should be redacted');

// npm tokens
assert.ok(redactString('npm_abcdefghijklmnopqrstuvwxyz1234567890').includes(REDACTED),
'npm token should be redacted');

// Private keys
assert.ok(redactString('-----BEGIN RSA PRIVATE KEY-----\nabc\n-----END RSA PRIVATE KEY-----').includes(REDACTED),
'RSA private key should be redacted');
assert.ok(redactString('-----BEGIN PRIVATE KEY-----\ndata\n-----END PRIVATE KEY-----').includes(REDACTED),
'generic private key should be redacted');

// Password fields
assert.ok(redactString('password=mysecretpassword123').includes(REDACTED),
'password= should be redacted');
assert.ok(redactString('PASSWORD: "hunter2xyz"').includes(REDACTED),
'PASSWORD: should be redacted');

// Basic auth in URLs (should preserve scheme and @)
var urlResult = redactString('https://user:pass123@github.com/repo');
assert.ok(urlResult.includes(REDACTED), 'basic auth in URL should be redacted');
assert.ok(urlResult.startsWith('https://'), 'URL scheme should be preserved');
assert.ok(urlResult.includes('@github.com'), '@ and host should be preserved');

// Safe strings should NOT be redacted
assert.strictEqual(redactString('hello world'), 'hello world');
assert.strictEqual(redactString('error: something failed'), 'error: something failed');
assert.strictEqual(redactString('fix the bug in parser'), 'fix the bug in parser');

// --- sanitizePayload ---

// Deep sanitization
var payload = {
summary: 'Fixed auth using ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx5678',
nested: {
path: '/home/user/.ssh/id_rsa',
email: 'admin@internal.corp',
safe: 'this is fine',
},
};
var sanitized = sanitizePayload(payload);
assert.ok(sanitized.summary.includes(REDACTED), 'ghp token in summary');
assert.ok(sanitized.nested.path.includes(REDACTED), 'path in nested');
assert.ok(sanitized.nested.email.includes(REDACTED), 'email in nested');
assert.strictEqual(sanitized.nested.safe, 'this is fine');

// Null/undefined/number inputs
assert.strictEqual(sanitizePayload(null), null);
assert.strictEqual(sanitizePayload(undefined), undefined);
assert.strictEqual(redactString(null), null);
assert.strictEqual(redactString(123), 123);

console.log('All sanitize tests passed (34 assertions)');