Skip to content

Commit a10c09b

Browse files
committed
Fix PR #21 review: Security, memory safety, thread safety, and code cleanup
Security fixes: - Add escape_identifier() helper for proper SQLite identifier escaping - Replace sanitize_name() with allowlist validation (ASCII letters, digits, underscore only) - Fix MATCH clause FTS5 operator injection by wrapping query in double quotes - Apply escape_identifier() to all DDL statements (CREATE, DROP, triggers) Memory safety fixes: - Replace VLA with std::vector in MySQL_FTS::init(), add delete on error path - Fix memory leak: free error string before return in list_indexes() - Fix reindex_json["error"] potential exception using .value() with default Thread safety fixes: - reinit_fts(): Add mutex lock around pointer swap - reset_fts_path(): Move blocking init() outside lock, only swap pointer under lock Code cleanup: - Remove 7 debug fprintf statements from Query_Tool_Handler.cpp - Remove unused #include <memory> from MySQL_FTS.h Test script security fixes: - Use MYSQL_PWD environment variable instead of -p"..." for password - Add escape_sql() function and apply to INSERT statement - Fix CURL_OPTS quoting: ${CURL_OPTS:+"${CURL_OPTS}"} - Remove unused FTS_INDEX_NAME and SEARCH_QUERIES variables Documentation fixes: - Fix bare URL to markdown link format - Add code block language identifiers (text, bash)
1 parent fd5d433 commit a10c09b

File tree

7 files changed

+120
-92
lines changed

7 files changed

+120
-92
lines changed

doc/MCP/FTS_USER_GUIDE.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ The MCP Full Text Search (FTS) module provides fast, indexed search capabilities
2929

3030
### How It Works
3131

32-
```
32+
```text
3333
Traditional Query Flow:
3434
LLM Agent → Full Table Scan → Millions of Rows → Slow Response
3535
@@ -43,7 +43,7 @@ LLM Agent → FTS Search (ms) → Top N IDs → Targeted MySQL Query → Fast Re
4343

4444
### Components
4545

46-
```
46+
```text
4747
┌─────────────────────────────────────────────────────────────┐
4848
│ MCP Query Endpoint │
4949
│ (JSON-RPC 2.0 over HTTPS) │
@@ -89,12 +89,12 @@ LLM Agent → FTS Search (ms) → Top N IDs → Targeted MySQL Query → Fast Re
8989
### Data Flow
9090

9191
1. **Index Creation**:
92-
```
92+
```text
9393
MySQL Table → SELECT → JSON Parse → SQLite Insert → FTS5 Index
9494
```
9595

9696
2. **Search**:
97-
```
97+
```text
9898
Query → FTS5 MATCH → BM25 Ranking → Results + Snippets → JSON Response
9999
```
100100

@@ -572,7 +572,7 @@ curl -k -X POST "https://127.0.0.1:6071/mcp/query" \
572572
## API Endpoints
573573

574574
### Base URL
575-
```
575+
```text
576576
https://<host>:6071/mcp/query
577577
```
578578

@@ -770,13 +770,13 @@ tail -f /var/log/proxysql.log | grep FTS
770770

771771
For a full end-to-end validation of the FTS stack (tools/list, indexing, search/snippet, list_indexes structure, empty query handling), run:
772772

773-
```
773+
```bash
774774
scripts/mcp/test_mcp_fts_detailed.sh
775775
```
776776

777777
Optional cleanup of created indexes:
778778

779-
```
779+
```bash
780780
scripts/mcp/test_mcp_fts_detailed.sh --cleanup
781781
```
782782

@@ -850,5 +850,5 @@ CREATE VIRTUAL TABLE fts_search_<schema>_<table> USING fts5(
850850
## Support
851851

852852
For issues, questions, or contributions:
853-
- GitHub: https://github.com/ProxySQL/proxysql-vec
853+
- GitHub: [ProxySQL/proxysql-vec](https://github.com/ProxySQL/proxysql-vec)
854854
- Documentation: `/doc/MCP/` directory

include/MySQL_FTS.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#include "sqlite3db.h"
55
#include <string>
66
#include <vector>
7-
#include <memory>
87

98
// Forward declaration
109
class MySQL_Tool_Handler;
@@ -76,6 +75,13 @@ class MySQL_FTS {
7675
*/
7776
std::string escape_sql(const std::string& str);
7877

78+
/**
79+
* @brief Escape identifier for SQLite (double backticks)
80+
* @param identifier Identifier to escape
81+
* @return Escaped identifier
82+
*/
83+
std::string escape_identifier(const std::string& identifier);
84+
7985
public:
8086
/**
8187
* @brief Constructor

lib/MySQL_FTS.cpp

Lines changed: 73 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@ MySQL_FTS::~MySQL_FTS() {
2424
int MySQL_FTS::init() {
2525
// Initialize database connection
2626
db = new SQLite3DB();
27-
char path_buf[db_path.size() + 1];
28-
strcpy(path_buf, db_path.c_str());
29-
int rc = db->open(path_buf, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE);
27+
std::vector<char> path_buf(db_path.size() + 1);
28+
strcpy(path_buf.data(), db_path.c_str());
29+
int rc = db->open(path_buf.data(), SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE);
3030
if (rc != SQLITE_OK) {
3131
proxy_error("Failed to open FTS database at %s: %d\n", db_path.c_str(), rc);
32+
delete db;
33+
db = NULL;
3234
return -1;
3335
}
3436

@@ -88,16 +90,36 @@ int MySQL_FTS::create_tables() {
8890
}
8991

9092
std::string MySQL_FTS::sanitize_name(const std::string& name) {
91-
std::string sanitized = name;
92-
// Replace dots and special characters with underscores
93-
for (size_t i = 0; i < sanitized.length(); i++) {
94-
if (sanitized[i] == '.' || sanitized[i] == '-' || sanitized[i] == ' ') {
95-
sanitized[i] = '_';
93+
const size_t MAX_NAME_LEN = 100;
94+
std::string sanitized;
95+
// Allowlist: only ASCII letters, digits, underscore
96+
for (char c : name) {
97+
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
98+
(c >= '0' && c <= '9') || c == '_') {
99+
sanitized.push_back(c);
96100
}
97101
}
102+
// Prevent leading digit (SQLite identifiers can't start with digit)
103+
if (!sanitized.empty() && sanitized[0] >= '0' && sanitized[0] <= '9') {
104+
sanitized.insert(sanitized.begin(), '_');
105+
}
106+
// Enforce maximum length
107+
if (sanitized.length() > MAX_NAME_LEN) sanitized = sanitized.substr(0, MAX_NAME_LEN);
98108
return sanitized;
99109
}
100110

111+
std::string MySQL_FTS::escape_identifier(const std::string& identifier) {
112+
std::string escaped;
113+
escaped.reserve(identifier.length() * 2 + 2);
114+
escaped.push_back('`');
115+
for (char c : identifier) {
116+
escaped.push_back(c);
117+
if (c == '`') escaped.push_back('`'); // Double backticks
118+
}
119+
escaped.push_back('`');
120+
return escaped;
121+
}
122+
101123
std::string MySQL_FTS::escape_sql(const std::string& str) {
102124
std::string escaped;
103125
for (size_t i = 0; i < str.length(); i++) {
@@ -148,12 +170,12 @@ bool MySQL_FTS::index_exists(const std::string& schema, const std::string& table
148170
int MySQL_FTS::create_index_tables(const std::string& schema, const std::string& table) {
149171
std::string data_table = get_data_table_name(schema, table);
150172
std::string fts_table = get_fts_table_name(schema, table);
151-
std::string sanitized_data = data_table;
152-
std::string sanitized_fts = fts_table;
173+
std::string escaped_data = escape_identifier(data_table);
174+
std::string escaped_fts = escape_identifier(fts_table);
153175

154176
// Create data table
155177
std::ostringstream create_data_sql;
156-
create_data_sql << "CREATE TABLE IF NOT EXISTS " << sanitized_data << " ("
178+
create_data_sql << "CREATE TABLE IF NOT EXISTS " << escaped_data << " ("
157179
" rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
158180
" schema_name TEXT NOT NULL,"
159181
" table_name TEXT NOT NULL,"
@@ -169,9 +191,9 @@ int MySQL_FTS::create_index_tables(const std::string& schema, const std::string&
169191

170192
// Create FTS5 virtual table with external content
171193
std::ostringstream create_fts_sql;
172-
create_fts_sql << "CREATE VIRTUAL TABLE IF NOT EXISTS " << sanitized_fts << " USING fts5("
194+
create_fts_sql << "CREATE VIRTUAL TABLE IF NOT EXISTS " << escaped_fts << " USING fts5("
173195
" content, metadata,"
174-
" content='" << sanitized_data << "',"
196+
" content=" << escaped_data << ","
175197
" content_rowid='rowid',"
176198
" tokenize='porter unicode61'"
177199
");";
@@ -183,37 +205,38 @@ int MySQL_FTS::create_index_tables(const std::string& schema, const std::string&
183205

184206
// Create triggers for automatic sync (populate the FTS table)
185207
std::string base_name = sanitize_name(schema) + "_" + sanitize_name(table);
208+
std::string escaped_base = escape_identifier(base_name);
186209

187210
// Drop existing triggers if any
188-
db->execute(("DROP TRIGGER IF EXISTS fts_ai_" + base_name).c_str());
189-
db->execute(("DROP TRIGGER IF EXISTS fts_ad_" + base_name).c_str());
190-
db->execute(("DROP TRIGGER IF EXISTS fts_au_" + base_name).c_str());
211+
db->execute(("DROP TRIGGER IF EXISTS " + escape_identifier("fts_ai_" + base_name)).c_str());
212+
db->execute(("DROP TRIGGER IF EXISTS " + escape_identifier("fts_ad_" + base_name)).c_str());
213+
db->execute(("DROP TRIGGER IF EXISTS " + escape_identifier("fts_au_" + base_name)).c_str());
191214

192215
// AFTER INSERT trigger
193216
std::ostringstream ai_sql;
194-
ai_sql << "CREATE TRIGGER IF NOT EXISTS fts_ai_" << base_name
195-
<< " AFTER INSERT ON " << sanitized_data << " BEGIN"
196-
<< " INSERT INTO " << sanitized_fts << "(rowid, content, metadata)"
217+
ai_sql << "CREATE TRIGGER IF NOT EXISTS " << escape_identifier("fts_ai_" + base_name)
218+
<< " AFTER INSERT ON " << escaped_data << " BEGIN"
219+
<< " INSERT INTO " << escaped_fts << "(rowid, content, metadata)"
197220
<< " VALUES (new.rowid, new.content, new.metadata);"
198221
<< "END;";
199222
db->execute(ai_sql.str().c_str());
200223

201224
// AFTER DELETE trigger
202225
std::ostringstream ad_sql;
203-
ad_sql << "CREATE TRIGGER IF NOT EXISTS fts_ad_" << base_name
204-
<< " AFTER DELETE ON " << sanitized_data << " BEGIN"
205-
<< " INSERT INTO " << sanitized_fts << "(" << sanitized_fts << ", rowid, content, metadata)"
226+
ad_sql << "CREATE TRIGGER IF NOT EXISTS " << escape_identifier("fts_ad_" + base_name)
227+
<< " AFTER DELETE ON " << escaped_data << " BEGIN"
228+
<< " INSERT INTO " << escaped_fts << "(" << escaped_fts << ", rowid, content, metadata)"
206229
<< " VALUES ('delete', old.rowid, old.content, old.metadata);"
207230
<< "END;";
208231
db->execute(ad_sql.str().c_str());
209232

210233
// AFTER UPDATE trigger
211234
std::ostringstream au_sql;
212-
au_sql << "CREATE TRIGGER IF NOT EXISTS fts_au_" << base_name
213-
<< " AFTER UPDATE ON " << sanitized_data << " BEGIN"
214-
<< " INSERT INTO " << sanitized_fts << "(" << sanitized_fts << ", rowid, content, metadata)"
235+
au_sql << "CREATE TRIGGER IF NOT EXISTS " << escape_identifier("fts_au_" + base_name)
236+
<< " AFTER UPDATE ON " << escaped_data << " BEGIN"
237+
<< " INSERT INTO " << escaped_fts << "(" << escaped_fts << ", rowid, content, metadata)"
215238
<< " VALUES ('delete', old.rowid, old.content, old.metadata);"
216-
<< " INSERT INTO " << sanitized_fts << "(rowid, content, metadata)"
239+
<< " INSERT INTO " << escaped_fts << "(rowid, content, metadata)"
217240
<< " VALUES (new.rowid, new.content, new.metadata);"
218241
<< "END;";
219242
db->execute(au_sql.str().c_str());
@@ -327,7 +350,7 @@ std::string MySQL_FTS::index_table(
327350

328351
// Get data table name
329352
std::string data_table = get_data_table_name(schema, table);
330-
std::string sanitized_data = data_table;
353+
std::string escaped_data = escape_identifier(data_table);
331354

332355
// Insert data in batches
333356
int row_count = 0;
@@ -371,7 +394,7 @@ std::string MySQL_FTS::index_table(
371394

372395
// Insert into data table (triggers will sync to FTS)
373396
std::ostringstream insert_sql;
374-
insert_sql << "INSERT INTO " << sanitized_data
397+
insert_sql << "INSERT INTO " << escaped_data
375398
<< " (schema_name, table_name, primary_key_value, content, metadata) "
376399
<< "VALUES ('" << escape_sql(schema) << "', '"
377400
<< escape_sql(table) << "', '"
@@ -483,17 +506,26 @@ std::string MySQL_FTS::search(
483506

484507
std::string data_table = get_data_table_name(idx_schema, idx_table);
485508
std::string fts_table = get_fts_table_name(idx_schema, idx_table);
486-
std::string sanitized_data = data_table;
509+
std::string escaped_data = escape_identifier(data_table);
510+
std::string escaped_fts = escape_identifier(fts_table);
511+
512+
// Escape query for FTS5 MATCH clause (wrap in double quotes, escape embedded quotes)
513+
std::string fts_literal = "\"";
514+
for (char c : query) {
515+
fts_literal.push_back(c);
516+
if (c == '"') fts_literal.push_back('"'); // Double quotes
517+
}
518+
fts_literal.push_back('"');
487519

488520
// Search query for this index (use table name for MATCH/bm25)
489521
std::ostringstream search_sql;
490522
search_sql << "SELECT d.schema_name, d.table_name, d.primary_key_value, "
491-
<< "snippet(" << fts_table << ", 0, '<mark>', '</mark>', '...', 30) AS snippet, "
523+
<< "snippet(" << escaped_fts << ", 0, '<mark>', '</mark>', '...', 30) AS snippet, "
492524
<< "d.metadata "
493-
<< "FROM " << fts_table << " "
494-
<< "JOIN " << sanitized_data << " d ON " << fts_table << ".rowid = d.rowid "
495-
<< "WHERE " << fts_table << " MATCH '" << escape_sql(query) << "' "
496-
<< "ORDER BY bm25(" << fts_table << ") ASC "
525+
<< "FROM " << escaped_fts << " "
526+
<< "JOIN " << escaped_data << " d ON " << escaped_fts << ".rowid = d.rowid "
527+
<< "WHERE " << escaped_fts << " MATCH " << fts_literal << " "
528+
<< "ORDER BY bm25(" << escaped_fts << ") ASC "
497529
<< "LIMIT " << limit;
498530

499531
SQLite3_result* idx_resultset = NULL;
@@ -581,6 +613,7 @@ std::string MySQL_FTS::list_indexes() {
581613

582614
if (error) {
583615
result["error"] = "Failed to list indexes: " + std::string(error);
616+
(*proxy_sqlite3_free)(error);
584617
return result.dump();
585618
}
586619

@@ -633,17 +666,17 @@ std::string MySQL_FTS::delete_index(const std::string& schema, const std::string
633666
db->wrlock();
634667

635668
// Drop triggers
636-
db->execute(("DROP TRIGGER IF EXISTS fts_ai_" + base_name).c_str());
637-
db->execute(("DROP TRIGGER IF EXISTS fts_ad_" + base_name).c_str());
638-
db->execute(("DROP TRIGGER IF EXISTS fts_au_" + base_name).c_str());
669+
db->execute(("DROP TRIGGER IF EXISTS " + escape_identifier("fts_ai_" + base_name)).c_str());
670+
db->execute(("DROP TRIGGER IF EXISTS " + escape_identifier("fts_ad_" + base_name)).c_str());
671+
db->execute(("DROP TRIGGER IF EXISTS " + escape_identifier("fts_au_" + base_name)).c_str());
639672

640673
// Drop FTS table
641674
std::string fts_table = get_fts_table_name(schema, table);
642-
db->execute(("DROP TABLE IF EXISTS " + fts_table).c_str());
675+
db->execute(("DROP TABLE IF EXISTS " + escape_identifier(fts_table)).c_str());
643676

644677
// Drop data table
645678
std::string data_table = get_data_table_name(schema, table);
646-
db->execute(("DROP TABLE IF EXISTS " + data_table).c_str());
679+
db->execute(("DROP TABLE IF EXISTS " + escape_identifier(data_table)).c_str());
647680

648681
// Remove metadata
649682
std::ostringstream metadata_sql;
@@ -751,7 +784,7 @@ std::string MySQL_FTS::rebuild_all(MySQL_Tool_Handler* mysql_handler) {
751784
json failed_item;
752785
failed_item["schema"] = schema;
753786
failed_item["table"] = table;
754-
failed_item["error"] = reindex_json["error"].get<std::string>();
787+
failed_item["error"] = reindex_json.value("error", std::string("unknown error"));
755788
failed.push_back(failed_item);
756789
}
757790
}

lib/MySQL_Tool_Handler.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -115,25 +115,25 @@ int MySQL_Tool_Handler::init() {
115115
}
116116

117117
bool MySQL_Tool_Handler::reset_fts_path(const std::string& path) {
118-
pthread_mutex_lock(&fts_lock);
119-
120-
if (fts) {
121-
delete fts;
122-
fts = NULL;
123-
}
118+
MySQL_FTS* new_fts = NULL;
124119

120+
// Initialize new FTS outside lock (blocking I/O)
125121
if (!path.empty()) {
126-
fts = new MySQL_FTS(path);
127-
if (fts->init()) {
122+
new_fts = new MySQL_FTS(path);
123+
if (new_fts->init()) {
128124
proxy_error("Failed to initialize FTS with new path: %s\n", path.c_str());
129-
delete fts;
130-
fts = NULL;
131-
pthread_mutex_unlock(&fts_lock);
125+
delete new_fts;
132126
return false;
133127
}
134128
}
135129

130+
// Swap pointer under lock (non-blocking)
131+
pthread_mutex_lock(&fts_lock);
132+
MySQL_FTS* old_fts = fts;
133+
fts = new_fts;
136134
pthread_mutex_unlock(&fts_lock);
135+
if (old_fts) delete old_fts;
136+
137137
return true;
138138
}
139139

@@ -1154,7 +1154,7 @@ int MySQL_Tool_Handler::reinit_fts(const std::string& fts_path) {
11541154
}
11551155
}
11561156

1157-
// First, test if we can open the new database
1157+
// First, test if we can open the new database (outside lock)
11581158
MySQL_FTS* new_fts = new MySQL_FTS(fts_path);
11591159
if (!new_fts) {
11601160
proxy_error("MySQL_Tool_Handler: Failed to create new FTS handler\n");
@@ -1167,11 +1167,12 @@ int MySQL_Tool_Handler::reinit_fts(const std::string& fts_path) {
11671167
return -1; // Return error WITHOUT closing old FTS
11681168
}
11691169

1170-
// Success! Now close old and replace with new
1171-
if (fts) {
1172-
delete fts;
1173-
}
1170+
// Success! Now swap the pointer under lock
1171+
pthread_mutex_lock(&fts_lock);
1172+
MySQL_FTS* old_fts = fts;
11741173
fts = new_fts;
1174+
pthread_mutex_unlock(&fts_lock);
1175+
if (old_fts) delete old_fts;
11751176

11761177
proxy_info("MySQL_Tool_Handler: FTS reinitialized successfully at %s\n", fts_path.c_str());
11771178
return 0;

0 commit comments

Comments
 (0)