Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
8967a98
Add SQL parser design spec for ProxySQL integration
renecannao Mar 24, 2026
8b07819
Address spec review findings: arena growth, AstNode layout, multi-sta…
renecannao Mar 24, 2026
13fbd8c
Fix implementation plan issues from review
renecannao Mar 24, 2026
5257914
feat: add new build system and test infrastructure for sql_parser
renecannao Mar 24, 2026
e5c18f8
feat: add core types — StringRef, Dialect, StmtType, NodeType enums
renecannao Mar 24, 2026
c9b7e86
feat: add arena allocator with block chaining and max size
renecannao Mar 24, 2026
5c58e9f
feat: add AstNode (32-byte) and ParseResult structs
renecannao Mar 24, 2026
b51164e
feat: add token types and keyword lookup tables for MySQL and PostgreSQL
renecannao Mar 24, 2026
4d97bb7
feat: add dialect-templated tokenizer with MySQL and PostgreSQL support
renecannao Mar 24, 2026
e5c2733
feat: add classifier and Tier 2 extractors for all statement types
renecannao Mar 24, 2026
00ff816
chore: update .gitignore for new parser build artifacts
renecannao Mar 24, 2026
2df44b3
fix: suppress unused parameter warnings, add doubled single-quote esc…
renecannao Mar 24, 2026
10fe5fe
Add expression parser + SET deep parser implementation plan
renecannao Mar 24, 2026
2d0cfda
feat: add Pratt expression parser with literals, identifiers, and ope…
renecannao Mar 24, 2026
9d0dcf7
feat: add SET deep parser with full AST for all SET variants
renecannao Mar 24, 2026
a8bf3f1
test: add extensive SET parser tests from ProxySQL test corpus
renecannao Mar 24, 2026
5fce9ad
ci: add GitHub Actions workflow for build and test
renecannao Mar 24, 2026
fc2efc3
Add SELECT deep parser implementation plan
renecannao Mar 24, 2026
db2a3e9
feat: add SELECT deep parser with FROM, JOIN, WHERE, GROUP BY, ORDER …
renecannao Mar 24, 2026
b992978
Add query emitter implementation plan
renecannao Mar 24, 2026
b5dcbc7
feat: add query emitter with round-trip support for SET and SELECT
renecannao Mar 24, 2026
2a9afd0
Add prepared statement cache and benchmarks implementation plans
renecannao Mar 24, 2026
8c4d7de
feat: add prepared statement cache with parse_and_cache, execute, and…
renecannao Mar 24, 2026
b7126b9
feat: add Google Benchmark performance tests for parser operations
renecannao Mar 24, 2026
149b54e
Add design spec for Tier 1 promotions (INSERT/UPDATE/DELETE), UNION, …
renecannao Mar 24, 2026
567d8b2
Fix spec review issues: shared TableRefParser, compound query layerin…
renecannao Mar 24, 2026
c830060
Add implementation plans 7-11: INSERT, UPDATE, DELETE, UNION, digest
renecannao Mar 24, 2026
b66f241
Extract TableRefParser from SelectParser for shared table ref parsing
renecannao Mar 24, 2026
85eaaf6
Add INSERT-related tokens, node types, and keyword table entries
renecannao Mar 24, 2026
40cbcd2
Add INSERT/REPLACE deep parser with emitter and classifier integration
renecannao Mar 24, 2026
d64a5fa
Add comprehensive INSERT parser tests with round-trip verification
renecannao Mar 24, 2026
ec435cc
Add NODE_UPDATE_STMT and NODE_UPDATE_SET_CLAUSE to NodeType enum
renecannao Mar 24, 2026
7c9a1fe
Add UpdateParser<D> header-only template for UPDATE deep parsing
renecannao Mar 24, 2026
b2ed564
Integrate UPDATE deep parser with emitter and classifier
renecannao Mar 24, 2026
96145db
Add comprehensive UPDATE parser tests with round-trip verification
renecannao Mar 24, 2026
8348618
Add DELETE node types (NODE_DELETE_STMT, NODE_DELETE_USING_CLAUSE)
renecannao Mar 24, 2026
12b17c8
Add DELETE deep parser for MySQL and PostgreSQL
renecannao Mar 24, 2026
ac8752b
Integrate DELETE parser with emitter and classifier
renecannao Mar 24, 2026
0de97c8
Add comprehensive DELETE parser tests
renecannao Mar 24, 2026
13a1e50
Add TK_INTERSECT, TK_EXCEPT tokens and NODE_COMPOUND_QUERY, NODE_SET_…
renecannao Mar 24, 2026
b7ecc1b
Add CompoundQueryParser for UNION/INTERSECT/EXCEPT with precedence
renecannao Mar 24, 2026
b0ac674
Integrate CompoundQueryParser into parser and add emitter support
renecannao Mar 24, 2026
5c8f2bd
Add compound query tests and fix SelectParser compound mode
renecannao Mar 24, 2026
e415b16
Add query digest/normalization module (Plan 11)
renecannao Mar 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
30 changes: 30 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: CI

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
build-and-test:
strategy:
matrix:
os: [ubuntu-22.04, ubuntu-24.04]
compiler: [g++, clang++]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4

- name: Build and test
env:
CXX: ${{ matrix.compiler }}
run: make -f Makefile.new clean && make -f Makefile.new all

macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@v4

- name: Build and test
run: make -f Makefile.new clean && make -f Makefile.new all
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,7 @@ src/*_parser/*_lexer.yy.c
src/*_parser/*_parser.output
src/*_parser/*_parser.report

# New parser build artifacts
libsqlparser.a
run_tests
run_bench
93 changes: 93 additions & 0 deletions Makefile.new
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
CXX = g++
CXXFLAGS = -std=c++17 -Wall -Wextra -g -O2
CPPFLAGS = -I./include -I./third_party/googletest/googletest/include
Comment on lines +1 to +3
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description focuses on the new SQL parser foundation, but the diff also vendors a full GoogleTest/GoogleMock distribution and adds new build plumbing to compile it from source. Please reflect this explicitly in the PR description (or consider splitting the GoogleTest vendoring into a separate PR) so reviewers can assess third-party import/licensing/update implications independently from the parser changes.

Copilot uses AI. Check for mistakes.

PROJECT_ROOT = .
SRC_DIR = $(PROJECT_ROOT)/src/sql_parser
INCLUDE_DIR = $(PROJECT_ROOT)/include/sql_parser
TEST_DIR = $(PROJECT_ROOT)/tests

# Library sources
LIB_SRCS = $(SRC_DIR)/arena.cpp $(SRC_DIR)/parser.cpp
LIB_OBJS = $(LIB_SRCS:.cpp=.o)
LIB_TARGET = $(PROJECT_ROOT)/libsqlparser.a

# Google Test library
GTEST_DIR = $(PROJECT_ROOT)/third_party/googletest/googletest
GTEST_SRC = $(GTEST_DIR)/src/gtest-all.cc
GTEST_OBJ = $(GTEST_DIR)/src/gtest-all.o
GTEST_CPPFLAGS = -I$(GTEST_DIR)/include -I$(GTEST_DIR)

# Test sources
TEST_SRCS = $(TEST_DIR)/test_main.cpp \
$(TEST_DIR)/test_arena.cpp \
$(TEST_DIR)/test_tokenizer.cpp \
$(TEST_DIR)/test_classifier.cpp \
$(TEST_DIR)/test_expression.cpp \
$(TEST_DIR)/test_set.cpp \
$(TEST_DIR)/test_select.cpp \
$(TEST_DIR)/test_emitter.cpp \
$(TEST_DIR)/test_stmt_cache.cpp \
$(TEST_DIR)/test_insert.cpp \
$(TEST_DIR)/test_update.cpp \
$(TEST_DIR)/test_delete.cpp \
$(TEST_DIR)/test_compound.cpp \
$(TEST_DIR)/test_digest.cpp
TEST_OBJS = $(TEST_SRCS:.cpp=.o)
TEST_TARGET = $(PROJECT_ROOT)/run_tests

# Google Benchmark
GBENCH_DIR = $(PROJECT_ROOT)/third_party/benchmark
GBENCH_SRCS = $(filter-out $(GBENCH_DIR)/src/benchmark_main.cc, $(wildcard $(GBENCH_DIR)/src/*.cc))
GBENCH_OBJS = $(GBENCH_SRCS:.cc=.o)
GBENCH_CPPFLAGS = -I$(GBENCH_DIR)/include -I$(GBENCH_DIR)/src -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK

BENCH_DIR = $(PROJECT_ROOT)/bench
BENCH_SRCS = $(BENCH_DIR)/bench_main.cpp $(BENCH_DIR)/bench_parser.cpp
BENCH_OBJS = $(BENCH_SRCS:.cpp=.o)
BENCH_TARGET = $(PROJECT_ROOT)/run_bench

.PHONY: all lib test bench clean

all: lib test

lib: $(LIB_TARGET)

$(LIB_TARGET): $(LIB_OBJS)
ar rcs $@ $^
@echo "Built $@"

$(SRC_DIR)/%.o: $(SRC_DIR)/%.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $< -o $@

# Google Test object
$(GTEST_OBJ): $(GTEST_SRC)
$(CXX) $(CXXFLAGS) $(GTEST_CPPFLAGS) -c $< -o $@

# Test objects
$(TEST_DIR)/%.o: $(TEST_DIR)/%.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(GTEST_CPPFLAGS) -c $< -o $@

test: $(TEST_TARGET)
./$(TEST_TARGET)

$(TEST_TARGET): $(TEST_OBJS) $(GTEST_OBJ) $(LIB_TARGET)
$(CXX) $(CXXFLAGS) -o $@ $(TEST_OBJS) $(GTEST_OBJ) -L$(PROJECT_ROOT) -lsqlparser -lpthread

# Benchmark objects
$(GBENCH_DIR)/src/%.o: $(GBENCH_DIR)/src/%.cc
$(CXX) $(CXXFLAGS) $(GBENCH_CPPFLAGS) -c $< -o $@

$(BENCH_DIR)/%.o: $(BENCH_DIR)/%.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(GBENCH_CPPFLAGS) -c $< -o $@

bench: $(BENCH_TARGET)
./$(BENCH_TARGET) --benchmark_format=console

$(BENCH_TARGET): $(BENCH_OBJS) $(GBENCH_OBJS) $(LIB_TARGET)
$(CXX) $(CXXFLAGS) -o $@ $(BENCH_OBJS) $(GBENCH_OBJS) -L$(PROJECT_ROOT) -lsqlparser -lpthread

clean:
rm -f $(LIB_OBJS) $(LIB_TARGET) $(TEST_OBJS) $(GTEST_OBJ) $(TEST_TARGET)
rm -f $(BENCH_OBJS) $(GBENCH_OBJS) $(BENCH_TARGET)
@echo "Cleaned."
3 changes: 3 additions & 0 deletions bench/bench_main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#include <benchmark/benchmark.h>

BENCHMARK_MAIN();
239 changes: 239 additions & 0 deletions bench/bench_parser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
#include <benchmark/benchmark.h>
#include "sql_parser/parser.h"
#include "sql_parser/emitter.h"

using namespace sql_parser;

// ========== Tier 2: Classification ==========
// Target: <100ns

static void BM_Classify_Insert(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "INSERT INTO users VALUES (1, 'name', 'email')";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.stmt_type);
}
}
BENCHMARK(BM_Classify_Insert);

static void BM_Classify_Update(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "UPDATE users SET name = 'x' WHERE id = 1";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.stmt_type);
}
}
BENCHMARK(BM_Classify_Update);

static void BM_Classify_Delete(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "DELETE FROM users WHERE id = 1";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.stmt_type);
}
}
BENCHMARK(BM_Classify_Delete);

static void BM_Classify_Begin(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "BEGIN";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.stmt_type);
}
}
BENCHMARK(BM_Classify_Begin);

// ========== Tier 1: SET parse ==========
// Target: <300ns

static void BM_Set_Simple(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SET @@session.wait_timeout = 600";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Set_Simple);

static void BM_Set_Names(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Set_Names);

static void BM_Set_MultiVar(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SET autocommit = 1, wait_timeout = 28800, sql_mode = 'STRICT_TRANS_TABLES'";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Set_MultiVar);

static void BM_Set_FunctionRHS(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SET sql_mode = CONCAT(@@sql_mode, ',STRICT_TRANS_TABLES')";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Set_FunctionRHS);

// ========== Tier 1: SELECT parse ==========
// Target: <500ns simple, <2us complex

static void BM_Select_Simple(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SELECT col FROM t WHERE id = 1";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Select_Simple);

static void BM_Select_MultiColumn(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SELECT id, name, email, status FROM users WHERE active = 1 ORDER BY name LIMIT 100";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Select_MultiColumn);

static void BM_Select_Join(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SELECT u.id, o.total FROM users u JOIN orders o ON u.id = o.user_id WHERE o.status = 'active'";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Select_Join);

static void BM_Select_Complex(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql =
"SELECT u.id, u.name, COUNT(o.id) AS order_count "
"FROM users u "
"LEFT JOIN orders o ON u.id = o.user_id "
"WHERE u.status = 'active' AND u.created_at > '2024-01-01' "
"GROUP BY u.id, u.name "
"HAVING COUNT(o.id) > 5 "
"ORDER BY order_count DESC "
"LIMIT 50 OFFSET 10";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Select_Complex);

static void BM_Select_MultiJoin(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql =
"SELECT a.id, b.name, c.value, d.total "
"FROM t1 a "
"JOIN t2 b ON a.id = b.a_id "
"LEFT JOIN t3 c ON b.id = c.b_id "
"JOIN t4 d ON c.id = d.c_id "
"WHERE a.status = 1 AND d.total > 100 "
"ORDER BY d.total DESC "
"LIMIT 20";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_Select_MultiJoin);

// ========== Query Reconstruction (round-trip) ==========
// Target: <500ns

static void BM_Emit_SetSimple(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SET autocommit = 1";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
Emitter<Dialect::MySQL> emitter(parser.arena());
emitter.emit(r.ast);
benchmark::DoNotOptimize(emitter.result());
}
}
BENCHMARK(BM_Emit_SetSimple);

static void BM_Emit_SelectSimple(benchmark::State& state) {
Parser<Dialect::MySQL> parser;
const char* sql = "SELECT * FROM users WHERE id = 1";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
Emitter<Dialect::MySQL> emitter(parser.arena());
emitter.emit(r.ast);
benchmark::DoNotOptimize(emitter.result());
}
}
BENCHMARK(BM_Emit_SelectSimple);

// ========== Arena reset ==========
// Target: <10ns

static void BM_ArenaReset(benchmark::State& state) {
Arena arena(65536);
for (auto _ : state) {
arena.allocate(256); // allocate something
arena.reset();
benchmark::DoNotOptimize(arena.bytes_used());
}
}
BENCHMARK(BM_ArenaReset);

// ========== PostgreSQL ==========

static void BM_PgSQL_Select_Simple(benchmark::State& state) {
Parser<Dialect::PostgreSQL> parser;
const char* sql = "SELECT col FROM t WHERE id = 1";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_PgSQL_Select_Simple);

static void BM_PgSQL_Set_Simple(benchmark::State& state) {
Parser<Dialect::PostgreSQL> parser;
const char* sql = "SET work_mem = '256MB'";
size_t len = strlen(sql);
for (auto _ : state) {
auto r = parser.parse(sql, len);
benchmark::DoNotOptimize(r.ast);
}
}
BENCHMARK(BM_PgSQL_Set_Simple);
Loading
Loading