diff --git a/apps/webapp/app/components/code/AIQueryInput.tsx b/apps/webapp/app/components/code/AIQueryInput.tsx index e3e3ed9c7b..e7624ba5b1 100644 --- a/apps/webapp/app/components/code/AIQueryInput.tsx +++ b/apps/webapp/app/components/code/AIQueryInput.tsx @@ -32,6 +32,8 @@ interface AIQueryInputProps { onQueryGenerated: (query: string) => void; /** Set this to a prompt to auto-populate and immediately submit */ autoSubmitPrompt?: string; + /** Change this to force re-submission even if prompt is the same */ + autoSubmitKey?: number; /** Get the current query in the editor (used for edit mode) */ getCurrentQuery?: () => string; } @@ -39,6 +41,7 @@ interface AIQueryInputProps { export function AIQueryInput({ onQueryGenerated, autoSubmitPrompt, + autoSubmitKey, getCurrentQuery, }: AIQueryInputProps) { const [prompt, setPrompt] = useState(""); @@ -50,7 +53,7 @@ export function AIQueryInput({ const [lastResult, setLastResult] = useState<"success" | "error" | null>(null); const textareaRef = useRef(null); const abortControllerRef = useRef(null); - const lastAutoSubmitRef = useRef(null); + const lastAutoSubmitRef = useRef<{ prompt: string; key?: number } | null>(null); const organization = useOrganization(); const project = useProject(); @@ -197,19 +200,22 @@ export function AIQueryInput({ [prompt, submitQuery] ); - // Auto-submit when autoSubmitPrompt changes + // Auto-submit when autoSubmitPrompt or autoSubmitKey changes useEffect(() => { - if ( - autoSubmitPrompt && - autoSubmitPrompt.trim() && - autoSubmitPrompt !== lastAutoSubmitRef.current && - !isLoading - ) { - lastAutoSubmitRef.current = autoSubmitPrompt; + if (!autoSubmitPrompt || !autoSubmitPrompt.trim() || isLoading) { + return; + } + + const last = lastAutoSubmitRef.current; + const isDifferent = + last === null || autoSubmitPrompt !== last.prompt || autoSubmitKey !== last.key; + + if (isDifferent) { + lastAutoSubmitRef.current = { prompt: autoSubmitPrompt, key: autoSubmitKey }; setPrompt(autoSubmitPrompt); submitQuery(autoSubmitPrompt); } - }, [autoSubmitPrompt, isLoading, submitQuery]); + }, [autoSubmitPrompt, autoSubmitKey, isLoading, submitQuery]); // Cleanup on unmount useEffect(() => { diff --git a/apps/webapp/app/components/code/TSQLEditor.tsx b/apps/webapp/app/components/code/TSQLEditor.tsx index 221de472a9..998fd2da71 100644 --- a/apps/webapp/app/components/code/TSQLEditor.tsx +++ b/apps/webapp/app/components/code/TSQLEditor.tsx @@ -1,6 +1,7 @@ import { sql, StandardSQL } from "@codemirror/lang-sql"; -import { autocompletion } from "@codemirror/autocomplete"; +import { autocompletion, startCompletion } from "@codemirror/autocomplete"; import { linter, lintGutter } from "@codemirror/lint"; +import { EditorView } from "@codemirror/view"; import type { ViewUpdate } from "@codemirror/view"; import { CheckIcon, ClipboardIcon, SparklesIcon, TrashIcon } from "@heroicons/react/20/solid"; import { @@ -103,6 +104,23 @@ export function TSQLEditor(opts: TSQLEditorProps) { maxRenderedOptions: 50, }) ); + + // Trigger autocomplete when ' is typed in value context + // CodeMirror's activateOnTyping only triggers on alphanumeric characters, + // so we manually trigger for quotes after comparison operators + exts.push( + EditorView.domEventHandlers({ + keyup: (event, view) => { + // Trigger on quote key (both ' and shift+' on some keyboards) + if (event.key === "'" || event.key === '"' || event.code === "Quote") { + setTimeout(() => { + startCompletion(view); + }, 50); + } + return false; + }, + }) + ); } // Add TSQL linter diff --git a/apps/webapp/app/components/code/TSQLResultsTable.tsx b/apps/webapp/app/components/code/TSQLResultsTable.tsx index 8e5bb37b83..60facc737f 100644 --- a/apps/webapp/app/components/code/TSQLResultsTable.tsx +++ b/apps/webapp/app/components/code/TSQLResultsTable.tsx @@ -134,6 +134,10 @@ function CellValue({ }) { // Plain text mode - render everything as monospace text with truncation if (!prettyFormatting) { + if (column.type === "JSON") { + return ; + } + const plainValue = value === null ? "NULL" : String(value); const isTruncated = plainValue.length > MAX_STRING_DISPLAY_LENGTH; @@ -277,24 +281,7 @@ function CellValue({ // JSON type if (type === "JSON") { - const jsonString = JSON.stringify(value); - const isTruncated = jsonString.length > MAX_STRING_DISPLAY_LENGTH; - - if (isTruncated) { - return ( - - {jsonString} - - } - button={ - {truncateString(jsonString)} - } - /> - ); - } - return {jsonString}; + return ; } // Array types @@ -382,6 +369,28 @@ function EnvironmentCellValue({ value }: { value: string }) { return ; } +function JSONCellValue({ value }: { value: any }) { + const jsonString = JSON.stringify(value); + const isTruncated = jsonString.length > MAX_STRING_DISPLAY_LENGTH; + + if (isTruncated) { + return ( + + {jsonString} + + } + button={ + {truncateString(jsonString)} + } + /> + ); + } + + return {jsonString}; +} + /** * Check if a column should be right-aligned (numeric columns, duration, cost) */ diff --git a/apps/webapp/app/components/code/tsql/tsqlCompletion.test.ts b/apps/webapp/app/components/code/tsql/tsqlCompletion.test.ts deleted file mode 100644 index 5fee69463e..0000000000 --- a/apps/webapp/app/components/code/tsql/tsqlCompletion.test.ts +++ /dev/null @@ -1,172 +0,0 @@ -import { describe, it, expect } from "vitest"; -import { createTSQLCompletion } from "./tsqlCompletion"; -import type { TableSchema, ColumnSchema } from "@internal/tsql"; - -// Helper to create a mock completion context -function createMockContext(doc: string, pos: number, explicit = false) { - return { - state: { - doc: { - toString: () => doc, - }, - }, - pos, - explicit, - matchBefore: (regex: RegExp) => { - const beforePos = doc.slice(0, pos); - const match = beforePos.match(new RegExp(regex.source + "$")); - if (match) { - return { - from: pos - match[0].length, - to: pos, - text: match[0], - }; - } - return null; - }, - } as any; -} - -// Test schema -const testSchema: TableSchema[] = [ - { - name: "runs", - clickhouseName: "trigger_dev.task_runs_v2", - tenantColumns: { - organizationId: "organization_id", - projectId: "project_id", - environmentId: "environment_id", - }, - description: "Task runs table", - columns: { - id: { name: "id", type: "String", description: "Run ID" }, - status: { name: "status", type: "String", description: "Run status" }, - created_at: { name: "created_at", type: "DateTime64", description: "Creation time" }, - organization_id: { name: "organization_id", type: "String" }, - project_id: { name: "project_id", type: "String" }, - environment_id: { name: "environment_id", type: "String" }, - }, - }, - { - name: "logs", - clickhouseName: "trigger_dev.task_events_v2", - tenantColumns: { - organizationId: "organization_id", - projectId: "project_id", - environmentId: "environment_id", - }, - description: "Task logs table", - columns: { - id: { name: "id", type: "String" }, - run_id: { name: "run_id", type: "String" }, - message: { name: "message", type: "String" }, - level: { name: "level", type: "String" }, - timestamp: { name: "timestamp", type: "DateTime64" }, - organization_id: { name: "organization_id", type: "String" }, - project_id: { name: "project_id", type: "String" }, - environment_id: { name: "environment_id", type: "String" }, - }, - }, -]; - -describe("createTSQLCompletion", () => { - const completionSource = createTSQLCompletion(testSchema); - - it("should return null for empty input without explicit trigger", () => { - const context = createMockContext("", 0, false); - const result = completionSource(context); - expect(result).toBeNull(); - }); - - it("should return completions when explicitly triggered", () => { - const context = createMockContext("", 0, true); - const result = completionSource(context); - expect(result).not.toBeNull(); - expect(result?.options.length).toBeGreaterThan(0); - }); - - it("should suggest tables after FROM keyword", () => { - const doc = "SELECT * FROM "; - const context = createMockContext(doc, doc.length, true); - const result = completionSource(context); - - expect(result).not.toBeNull(); - - const tableLabels = result?.options.map((o) => o.label); - expect(tableLabels).toContain("runs"); - expect(tableLabels).toContain("logs"); - }); - - it("should suggest columns after SELECT keyword", () => { - const doc = "SELECT FROM runs"; - // Position cursor right after SELECT - const pos = 7; - const context = createMockContext(doc, pos, true); - const result = completionSource(context); - - expect(result).not.toBeNull(); - - // Should include functions - const labels = result?.options.map((o) => o.label) || []; - expect(labels.some((l) => l === "count")).toBe(true); - expect(labels.some((l) => l === "sum")).toBe(true); - }); - - it("should suggest columns with table prefix for qualified references", () => { - const doc = "SELECT runs. FROM runs"; - // Position cursor right after "runs." - const pos = 12; - const context = createMockContext(doc, pos, true); - const result = completionSource(context); - - expect(result).not.toBeNull(); - - const columnLabels = result?.options.map((o) => o.label); - expect(columnLabels).toContain("id"); - expect(columnLabels).toContain("status"); - expect(columnLabels).toContain("created_at"); - }); - - it("should include SQL keywords in general context", () => { - const doc = "S"; - const context = createMockContext(doc, doc.length, true); - const result = completionSource(context); - - expect(result).not.toBeNull(); - - const labels = result?.options.map((o) => o.label); - expect(labels).toContain("SELECT"); - }); - - it("should include aggregate functions", () => { - const doc = "SELECT "; - const context = createMockContext(doc, doc.length, true); - const result = completionSource(context); - - expect(result).not.toBeNull(); - - const labels = result?.options.map((o) => o.label); - expect(labels).toContain("count"); - expect(labels).toContain("sum"); - expect(labels).toContain("avg"); - expect(labels).toContain("min"); - expect(labels).toContain("max"); - }); - - it("should handle WHERE clause context", () => { - const doc = "SELECT * FROM runs WHERE "; - const context = createMockContext(doc, doc.length, true); - const result = completionSource(context); - - expect(result).not.toBeNull(); - - // Should suggest columns - const labels = result?.options.map((o) => o.label) || []; - expect(labels).toContain("status"); - - // Should include conditional keywords - expect(labels).toContain("AND"); - expect(labels).toContain("OR"); - }); -}); - diff --git a/apps/webapp/app/components/code/tsql/tsqlCompletion.ts b/apps/webapp/app/components/code/tsql/tsqlCompletion.ts index 8c73600032..047496dde3 100644 --- a/apps/webapp/app/components/code/tsql/tsqlCompletion.ts +++ b/apps/webapp/app/components/code/tsql/tsqlCompletion.ts @@ -1,6 +1,7 @@ import type { CompletionContext, CompletionResult, Completion } from "@codemirror/autocomplete"; -import type { TableSchema, ColumnSchema } from "@internal/tsql"; import { + type TableSchema, + type ColumnSchema, TSQL_CLICKHOUSE_FUNCTIONS, TSQL_AGGREGATIONS, } from "@internal/tsql"; @@ -88,7 +89,11 @@ function createFunctionCompletions(): Completion[] { if (name.startsWith("_")) continue; const argsHint = - meta.maxArgs === 0 ? "()" : meta.minArgs === meta.maxArgs ? `(${meta.minArgs} args)` : `(${meta.minArgs}${meta.maxArgs ? `-${meta.maxArgs}` : "+"} args)`; + meta.maxArgs === 0 + ? "()" + : meta.minArgs === meta.maxArgs + ? `(${meta.minArgs} args)` + : `(${meta.minArgs}${meta.maxArgs ? `-${meta.maxArgs}` : "+"} args)`; functions.push({ label: name, @@ -103,7 +108,11 @@ function createFunctionCompletions(): Completion[] { if (name.startsWith("_")) continue; const argsHint = - meta.maxArgs === 0 ? "()" : meta.minArgs === meta.maxArgs ? `(${meta.minArgs} args)` : `(${meta.minArgs}${meta.maxArgs ? `-${meta.maxArgs}` : "+"} args)`; + meta.maxArgs === 0 + ? "()" + : meta.minArgs === meta.maxArgs + ? `(${meta.minArgs} args)` + : `(${meta.minArgs}${meta.maxArgs ? `-${meta.maxArgs}` : "+"} args)`; functions.push({ label: name, @@ -157,8 +166,7 @@ function extractTablesFromQuery(doc: string, schema: TableSchema[]): Map t.name.toLowerCase() === tableName.toLowerCase() - ); + const tableSchema = schema.find((t) => t.name.toLowerCase() === tableName.toLowerCase()); if (tableSchema) { tableMap.set(alias.toLowerCase(), tableSchema); @@ -202,21 +208,24 @@ interface ContextResult { /** * Extract column name from text before a comparison operator - * Handles: "column =", "table.column =", "column IN", etc. + * Handles: "column =", "table.column =", "column IN", "column = 'partial", etc. */ -function extractColumnBeforeOperator(textBefore: string): { columnName: string; tableAlias?: string } | null { +function extractColumnBeforeOperator( + textBefore: string +): { columnName: string; tableAlias?: string } | null { // Match patterns like: column =, column !=, column IN, table.column =, etc. // We need to capture the column (and optional table prefix) before the operator + // Also match when user is typing a partial string value like: column = 'val const patterns = [ - // column = or column != or column <> (with optional whitespace) - /(\w+)\.(\w+)\s*(?:=|!=|<>)\s*$/i, - /(\w+)\s*(?:=|!=|<>)\s*$/i, - // column IN ( or column NOT IN ( - /(\w+)\.(\w+)\s+(?:NOT\s+)?IN\s*\(\s*$/i, - /(\w+)\s+(?:NOT\s+)?IN\s*\(\s*$/i, - // After a comma in IN clause - need to find the column before IN - /(\w+)\.(\w+)\s+(?:NOT\s+)?IN\s*\([^)]*,\s*$/i, - /(\w+)\s+(?:NOT\s+)?IN\s*\([^)]*,\s*$/i, + // column = or column != or column <> (with optional whitespace and optional partial string value) + /(\w+)\.(\w+)\s*(?:=|!=|<>)\s*(?:'[^']*)?$/i, + /(\w+)\s*(?:=|!=|<>)\s*(?:'[^']*)?$/i, + // column IN ( or column NOT IN ( (with optional partial string value) + /(\w+)\.(\w+)\s+(?:NOT\s+)?IN\s*\(\s*(?:'[^']*)?$/i, + /(\w+)\s+(?:NOT\s+)?IN\s*\(\s*(?:'[^']*)?$/i, + // After a comma in IN clause (with optional partial string value) + /(\w+)\.(\w+)\s+(?:NOT\s+)?IN\s*\([^)]*,\s*(?:'[^']*)?$/i, + /(\w+)\s+(?:NOT\s+)?IN\s*\([^)]*,\s*(?:'[^']*)?$/i, ]; for (const pattern of patterns) { @@ -235,10 +244,7 @@ function extractColumnBeforeOperator(textBefore: string): { columnName: string; return null; } -function determineContext( - doc: string, - pos: number -): ContextResult { +function determineContext(doc: string, pos: number): ContextResult { // Get text before cursor const textBefore = doc.slice(0, pos); @@ -320,21 +326,9 @@ function findColumnSchema( } /** - * Create completions for enum values - * Uses user-friendly values from valueMap when available, showing internal value as detail + * Create completions for enum values from allowedValues */ function createEnumValueCompletions(columnSchema: ColumnSchema): Completion[] { - // Prefer valueMap over allowedValues if available - if (columnSchema.valueMap && Object.keys(columnSchema.valueMap).length > 0) { - return Object.entries(columnSchema.valueMap).map(([internalValue, userFriendlyValue]) => ({ - label: `'${userFriendlyValue}'`, - type: "enum", - detail: `→ ${internalValue}`, - boost: 3, // Highest priority for enum values in value context - })); - } - - // Fall back to allowedValues if (!columnSchema.allowedValues || columnSchema.allowedValues.length === 0) { return []; } @@ -375,6 +369,8 @@ export function createTSQLCompletion( const queryContext = determineContext(doc, context.pos); let options: Completion[] = []; + // Track if we need to extend replacement range (e.g., to consume auto-paired closing quote) + let to: number | undefined = undefined; switch (queryContext.type) { case "table": @@ -406,6 +402,12 @@ export function createTSQLCompletion( if (columnSchema) { options = createEnumValueCompletions(columnSchema); + // Check if there's a closing quote right after cursor (from auto-pairing) + // If so, extend replacement range to include it to avoid 'Completed'' + const charAfterCursor = context.state.doc.sliceString(context.pos, context.pos + 1); + if (charAfterCursor === "'") { + to = context.pos + 1; + } } } break; @@ -426,9 +428,23 @@ export function createTSQLCompletion( options.push(...functionCompletions); options.push( ...keywordCompletions.filter((k) => - ["AND", "OR", "NOT", "IN", "LIKE", "ILIKE", "BETWEEN", "IS", "NULL", "AS", "CASE", "WHEN", "THEN", "ELSE", "END"].includes( - k.label as string - ) + [ + "AND", + "OR", + "NOT", + "IN", + "LIKE", + "ILIKE", + "BETWEEN", + "IS", + "NULL", + "AS", + "CASE", + "WHEN", + "THEN", + "ELSE", + "END", + ].includes(k.label as string) ) ); } @@ -437,11 +453,7 @@ export function createTSQLCompletion( case "general": default: // Show everything - options = [ - ...tableCompletions, - ...functionCompletions, - ...keywordCompletions, - ]; + options = [...tableCompletions, ...functionCompletions, ...keywordCompletions]; // Also add columns from tables in query { @@ -454,11 +466,15 @@ export function createTSQLCompletion( break; } - return { + const result: CompletionResult = { from, options, validFor: /^[\w.']*$/, }; + // Only set 'to' if we need to extend the replacement range + if (to !== undefined) { + result.to = to; + } + return result; }; } - diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 6c0405bb79..75ae7c8978 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1175,6 +1175,17 @@ const EnvironmentSchema = z CLICKHOUSE_LOG_LEVEL: z.enum(["log", "error", "warn", "info", "debug"]).default("info"), CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"), + // Query page ClickHouse limits (for TSQL queries) + QUERY_CLICKHOUSE_MAX_EXECUTION_TIME: z.coerce.number().int().default(10), + QUERY_CLICKHOUSE_MAX_MEMORY_USAGE: z.coerce.number().int().default(1_073_741_824), // 1GB in bytes + QUERY_CLICKHOUSE_MAX_AST_ELEMENTS: z.coerce.number().int().default(4_000_000), + QUERY_CLICKHOUSE_MAX_EXPANDED_AST_ELEMENTS: z.coerce.number().int().default(4_000_000), + QUERY_CLICKHOUSE_MAX_BYTES_BEFORE_EXTERNAL_GROUP_BY: z.coerce.number().int().default(0), + + // Query page concurrency limits + QUERY_DEFAULT_ORG_CONCURRENCY_LIMIT: z.coerce.number().int().default(3), + QUERY_GLOBAL_CONCURRENCY_LIMIT: z.coerce.number().int().default(50), + EVENTS_CLICKHOUSE_URL: z .string() .optional() diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/route.tsx index 6308287d64..317b67e904 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/route.tsx @@ -40,6 +40,7 @@ import { EnvironmentLabel } from "~/components/environments/EnvironmentLabel"; import { PageBody, PageContainer } from "~/components/layout/AppLayout"; import { Badge } from "~/components/primitives/Badge"; import { Button } from "~/components/primitives/Buttons"; +import { Callout } from "~/components/primitives/Callout"; import { CopyableText } from "~/components/primitives/CopyableText"; import { Header2, Header3 } from "~/components/primitives/Headers"; import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; @@ -118,7 +119,12 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const user = await requireUser(request); const { projectParam, organizationSlug, envParam } = EnvironmentParamSchema.parse(params); - const canAccess = await hasQueryAccess(user.id, user.admin, user.isImpersonating, organizationSlug); + const canAccess = await hasQueryAccess( + user.id, + user.admin, + user.isImpersonating, + organizationSlug + ); if (!canAccess) { throw redirect("/"); } @@ -144,25 +150,43 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { organizationId: project.organizationId, }); + // Admins and impersonating users can use EXPLAIN + const isAdmin = user.admin || user.isImpersonating; + return typedjson({ defaultQuery, history, + isAdmin, }); }; const ActionSchema = z.object({ query: z.string().min(1, "Query is required"), scope: z.enum(["environment", "project", "organization"]), + explain: z.enum(["true", "false"]).nullable().optional(), }); export const action = async ({ request, params }: ActionFunctionArgs) => { const user = await requireUser(request); const { projectParam, organizationSlug, envParam } = EnvironmentParamSchema.parse(params); - const canAccess = await hasQueryAccess(user.id, user.admin, user.isImpersonating, organizationSlug); + const canAccess = await hasQueryAccess( + user.id, + user.admin, + user.isImpersonating, + organizationSlug + ); if (!canAccess) { return typedjson( - { error: "Unauthorized", rows: null, columns: null, stats: null }, + { + error: "Unauthorized", + rows: null, + columns: null, + stats: null, + hiddenColumns: null, + explainOutput: null, + generatedSql: null, + }, { status: 403 } ); } @@ -170,7 +194,15 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { const project = await findProjectBySlug(organizationSlug, projectParam, user.id); if (!project) { return typedjson( - { error: "Project not found", rows: null, columns: null, stats: null }, + { + error: "Project not found", + rows: null, + columns: null, + stats: null, + hiddenColumns: null, + explainOutput: null, + generatedSql: null, + }, { status: 404 } ); } @@ -178,7 +210,15 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { const environment = await findEnvironmentBySlug(project.id, envParam, user.id); if (!environment) { return typedjson( - { error: "Environment not found", rows: null, columns: null, stats: null }, + { + error: "Environment not found", + rows: null, + columns: null, + stats: null, + hiddenColumns: null, + explainOutput: null, + generatedSql: null, + }, { status: 404 } ); } @@ -187,6 +227,7 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { const parsed = ActionSchema.safeParse({ query: formData.get("query"), scope: formData.get("scope"), + explain: formData.get("explain"), }); if (!parsed.success) { @@ -196,12 +237,18 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { rows: null, columns: null, stats: null, + hiddenColumns: null, + explainOutput: null, + generatedSql: null, }, { status: 400 } ); } - const { query, scope } = parsed.data; + const { query, scope, explain: explainParam } = parsed.data; + // Only allow explain for admins/impersonating users + const isAdmin = user.admin || user.isImpersonating; + const explain = explainParam === "true" && isAdmin; try { const [error, result] = await executeQuery({ @@ -214,15 +261,25 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { organizationId: project.organizationId, projectId: project.id, environmentId: environment.id, + explain, history: { source: "DASHBOARD", userId: user.id, + skip: user.isImpersonating, }, }); if (error) { return typedjson( - { error: error.message, rows: null, columns: null, stats: null }, + { + error: error.message, + rows: null, + columns: null, + stats: null, + hiddenColumns: null, + explainOutput: null, + generatedSql: null, + }, { status: 400 } ); } @@ -232,11 +289,22 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { rows: result.rows, columns: result.columns, stats: result.stats, + hiddenColumns: result.hiddenColumns ?? null, + explainOutput: result.explainOutput ?? null, + generatedSql: result.generatedSql ?? null, }); } catch (err) { const errorMessage = err instanceof Error ? err.message : "Unknown error executing query"; return typedjson( - { error: errorMessage, rows: null, columns: null, stats: null }, + { + error: errorMessage, + rows: null, + columns: null, + stats: null, + hiddenColumns: null, + explainOutput: null, + generatedSql: null, + }, { status: 500 } ); } @@ -257,8 +325,9 @@ const QueryEditorForm = forwardRef< defaultScope: QueryScope; history: QueryHistoryItem[]; isLoading: boolean; + isAdmin: boolean; } ->(function QueryEditorForm({ defaultQuery, defaultScope, history, isLoading }, ref) { +>(function QueryEditorForm({ defaultQuery, defaultScope, history, isLoading, isAdmin }, ref) { const [query, setQuery] = useState(defaultQuery); const [scope, setScope] = useState(defaultScope); @@ -313,6 +382,17 @@ const QueryEditorForm = forwardRef< )) } + {isAdmin && ( + + )} + + ) : results?.explainOutput ? ( +
+ {results.generatedSql && ( +
+ Generated ClickHouse SQL +
+
+                                {results.generatedSql}
+                              
+
+
+ )} +
+ Query Execution Plan +
+
+                              {results.explainOutput}
+                            
+
+
+
) : results?.rows && results?.columns ? ( - +
+ {results.hiddenColumns && results.hiddenColumns.length > 0 && ( + + SELECT * doesn't return all columns because it's slow. The + following columns are not shown:{" "} + + {results.hiddenColumns.join(", ")} + + . Specify them explicitly to include them. + + )} +
+ +
+
) : ( Run a query to see results here. @@ -493,6 +629,9 @@ export default function Page() { editorRef.current?.setQuery(formatted); }} getCurrentQuery={() => editorRef.current?.getQuery() ?? ""} + activeTab={sidebarTab} + onTabChange={setSidebarTab} + aiFixRequest={aiFixRequest} /> @@ -613,14 +752,24 @@ function QueryHelpSidebar({ onTryExample, onQueryGenerated, getCurrentQuery, + activeTab, + onTabChange, + aiFixRequest, }: { onTryExample: (query: string, scope: QueryScope) => void; onQueryGenerated: (query: string) => void; getCurrentQuery: () => string; + activeTab: string; + onTabChange: (tab: string) => void; + aiFixRequest: { prompt: string; key: number } | null; }) { return (
- +
@@ -641,7 +790,11 @@ function QueryHelpSidebar({ value="ai" className="min-h-0 flex-1 overflow-y-auto p-3 scrollbar-thin scrollbar-track-transparent scrollbar-thumb-charcoal-600" > - + void; getCurrentQuery: () => string; + aiFixRequest: { prompt: string; key: number } | null; }) { - const [autoSubmitPrompt, setAutoSubmitPrompt] = useState(); + const [examplePromptRequest, setExamplePromptRequest] = useState<{ + prompt: string; + key: number; + } | null>(null); + + // Use aiFixRequest if present, otherwise use example prompt request + const activeRequest = aiFixRequest ?? examplePromptRequest; const examplePrompts = [ "Show me failed runs by hour for the past 7 days", @@ -687,7 +848,8 @@ function AITabContent({
@@ -699,9 +861,10 @@ function AITabContent({ key={example} type="button" onClick={() => { - // Use a unique key to ensure re-trigger even if same prompt clicked twice - setAutoSubmitPrompt(undefined); - setTimeout(() => setAutoSubmitPrompt(example), 0); + setExamplePromptRequest((prev) => ({ + prompt: example, + key: (prev?.key ?? 0) + 1, + })); }} className="block w-full rounded-md border border-grid-dimmed bg-charcoal-800 px-3 py-2 text-left text-sm text-text-dimmed transition-colors hover:border-grid-bright hover:bg-charcoal-750 hover:text-text-bright" > diff --git a/apps/webapp/app/services/queryConcurrencyLimiter.server.ts b/apps/webapp/app/services/queryConcurrencyLimiter.server.ts new file mode 100644 index 0000000000..b623f018b1 --- /dev/null +++ b/apps/webapp/app/services/queryConcurrencyLimiter.server.ts @@ -0,0 +1,29 @@ +import { env } from "~/env.server"; +import { singleton } from "~/utils/singleton"; +import { RedisConcurrencyLimiter } from "./redisConcurrencyLimiter.server"; + +function initializeQueryConcurrencyLimiter() { + return new RedisConcurrencyLimiter({ + keyPrefix: "query:concurrency", + redis: { + port: env.RATE_LIMIT_REDIS_PORT, + host: env.RATE_LIMIT_REDIS_HOST, + username: env.RATE_LIMIT_REDIS_USERNAME, + password: env.RATE_LIMIT_REDIS_PASSWORD, + tlsDisabled: env.RATE_LIMIT_REDIS_TLS_DISABLED === "true", + clusterMode: env.RATE_LIMIT_REDIS_CLUSTER_MODE_ENABLED === "1", + }, + }); +} + +export const queryConcurrencyLimiter = singleton( + "queryConcurrencyLimiter", + initializeQueryConcurrencyLimiter +); + +/** Default per-org concurrency limit from environment */ +export const DEFAULT_ORG_CONCURRENCY_LIMIT = env.QUERY_DEFAULT_ORG_CONCURRENCY_LIMIT; + +/** Global concurrency limit from environment */ +export const GLOBAL_CONCURRENCY_LIMIT = env.QUERY_GLOBAL_CONCURRENCY_LIMIT; + diff --git a/apps/webapp/app/services/queryService.server.ts b/apps/webapp/app/services/queryService.server.ts index 679dd5c081..a8c6af00ed 100644 --- a/apps/webapp/app/services/queryService.server.ts +++ b/apps/webapp/app/services/queryService.server.ts @@ -1,5 +1,7 @@ import { executeTSQL, + QueryError, + type ClickHouseSettings, type ExecuteTSQLOptions, type FieldMappings, type TSQLQueryResult, @@ -10,6 +12,11 @@ import { type z } from "zod"; import { prisma } from "~/db.server"; import { env } from "~/env.server"; import { clickhouseClient } from "./clickhouseInstance.server"; +import { + queryConcurrencyLimiter, + DEFAULT_ORG_CONCURRENCY_LIMIT, + GLOBAL_CONCURRENCY_LIMIT, +} from "./queryConcurrencyLimiter.server"; export type { TableSchema, TSQLQueryResult }; @@ -21,6 +28,33 @@ const scopeToEnum = { environment: "ENVIRONMENT", } as const; +/** + * Default ClickHouse settings for query protection + * Based on PostHog's HogQL settings to prevent expensive queries + */ +function getDefaultClickhouseSettings(): ClickHouseSettings { + return { + // Query execution limits + max_execution_time: env.QUERY_CLICKHOUSE_MAX_EXECUTION_TIME, + timeout_overflow_mode: "throw", + max_memory_usage: String(env.QUERY_CLICKHOUSE_MAX_MEMORY_USAGE), + + // AST complexity limits to prevent extremely complex queries + max_ast_elements: String(env.QUERY_CLICKHOUSE_MAX_AST_ELEMENTS), + max_expanded_ast_elements: String(env.QUERY_CLICKHOUSE_MAX_EXPANDED_AST_ELEMENTS), + + // Memory management for GROUP BY operations + max_bytes_before_external_group_by: String( + env.QUERY_CLICKHOUSE_MAX_BYTES_BEFORE_EXTERNAL_GROUP_BY + ), + + // Safety settings + allow_experimental_object_type: 1, + format_csv_allow_double_quotes: 0, + readonly: "1", // Ensure queries are read-only + }; +} + export type ExecuteQueryOptions = Omit< ExecuteTSQLOptions, "tableSchema" | "organizationId" | "projectId" | "environmentId" | "fieldMappings" @@ -40,7 +74,11 @@ export type ExecuteQueryOptions = Omit< source: CustomerQuerySource; /** User ID (optional, null for API calls) */ userId?: string | null; + /** Skip saving to history (e.g., when impersonating) */ + skip?: boolean; }; + /** Custom per-org concurrency limit (overrides default) */ + customOrgConcurrencyLimit?: number; }; /** @@ -50,67 +88,108 @@ export type ExecuteQueryOptions = Omit< export async function executeQuery( options: ExecuteQueryOptions ): Promise>> { - const { scope, organizationId, projectId, environmentId, history, ...baseOptions } = options; - - // Build tenant IDs based on scope - const tenantOptions: { - organizationId: string; - projectId?: string; - environmentId?: string; - } = { + const { + scope, organizationId, - }; + projectId, + environmentId, + history, + customOrgConcurrencyLimit, + ...baseOptions + } = options; - if (scope === "project" || scope === "environment") { - tenantOptions.projectId = projectId; - } + // Generate unique request ID for concurrency tracking + const requestId = crypto.randomUUID(); + const orgLimit = customOrgConcurrencyLimit ?? DEFAULT_ORG_CONCURRENCY_LIMIT; + + // Acquire concurrency slot + const acquireResult = await queryConcurrencyLimiter.acquire({ + key: organizationId, + requestId, + keyLimit: orgLimit, + globalLimit: GLOBAL_CONCURRENCY_LIMIT, + }); - if (scope === "environment") { - tenantOptions.environmentId = environmentId; + if (!acquireResult.success) { + const errorMessage = + acquireResult.reason === "key_limit" + ? `You've exceeded your query concurrency of ${orgLimit} for this organization. Please try again later.` + : "We're experiencing a lot of queries at the moment. Please try again later."; + return [new QueryError(errorMessage, { query: options.query }), null]; } - // Build field mappings for project_ref → project_id and environment_id → slug translation - const projects = await prisma.project.findMany({ - where: { organizationId }, - select: { id: true, externalRef: true }, - }); + try { + // Build tenant IDs based on scope + const tenantOptions: { + organizationId: string; + projectId?: string; + environmentId?: string; + } = { + organizationId, + }; - const environments = await prisma.runtimeEnvironment.findMany({ - where: { project: { organizationId } }, - select: { id: true, slug: true }, - }); + if (scope === "project" || scope === "environment") { + tenantOptions.projectId = projectId; + } - const fieldMappings: FieldMappings = { - project: Object.fromEntries(projects.map((p) => [p.id, p.externalRef])), - environment: Object.fromEntries(environments.map((e) => [e.id, e.slug])), - }; + if (scope === "environment") { + tenantOptions.environmentId = environmentId; + } - const result = await executeTSQL(clickhouseClient.reader, { - ...baseOptions, - ...tenantOptions, - fieldMappings, - }); + // Build field mappings for project_ref → project_id and environment_id → slug translation + const projects = await prisma.project.findMany({ + where: { organizationId }, + select: { id: true, externalRef: true }, + }); + + const environments = await prisma.runtimeEnvironment.findMany({ + where: { project: { organizationId } }, + select: { id: true, slug: true }, + }); - // If query succeeded and history options provided, save to history - if (result[0] === null && history) { - const stats = result[1].stats; - const byteSeconds = parseFloat(stats.byte_seconds) || 0; - const costInCents = byteSeconds * env.CENTS_PER_QUERY_BYTE_SECOND; - - await prisma.customerQuery.create({ - data: { - query: options.query, - scope: scopeToEnum[scope], - stats: { ...stats }, - costInCents, - source: history.source, - organizationId, - projectId: scope === "project" || scope === "environment" ? projectId : null, - environmentId: scope === "environment" ? environmentId : null, - userId: history.userId ?? null, + const fieldMappings: FieldMappings = { + project: Object.fromEntries(projects.map((p) => [p.id, p.externalRef])), + environment: Object.fromEntries(environments.map((e) => [e.id, e.slug])), + }; + + const result = await executeTSQL(clickhouseClient.reader, { + ...baseOptions, + ...tenantOptions, + fieldMappings, + clickhouseSettings: { + ...getDefaultClickhouseSettings(), + ...baseOptions.clickhouseSettings, // Allow caller overrides if needed }, }); - } - return result; + // If query succeeded and history options provided, save to history + // Skip history for EXPLAIN queries (admin debugging) and when explicitly skipped (e.g., impersonating) + if (result[0] === null && history && !history.skip && !baseOptions.explain) { + const stats = result[1].stats; + const byteSeconds = parseFloat(stats.byte_seconds) || 0; + const costInCents = byteSeconds * env.CENTS_PER_QUERY_BYTE_SECOND; + + await prisma.customerQuery.create({ + data: { + query: options.query, + scope: scopeToEnum[scope], + stats: { ...stats }, + costInCents, + source: history.source, + organizationId, + projectId: scope === "project" || scope === "environment" ? projectId : null, + environmentId: scope === "environment" ? environmentId : null, + userId: history.userId ?? null, + }, + }); + } + + return result; + } finally { + // Always release the concurrency slot + await queryConcurrencyLimiter.release({ + key: organizationId, + requestId, + }); + } } diff --git a/apps/webapp/app/services/redisConcurrencyLimiter.server.ts b/apps/webapp/app/services/redisConcurrencyLimiter.server.ts new file mode 100644 index 0000000000..a4f87f88f4 --- /dev/null +++ b/apps/webapp/app/services/redisConcurrencyLimiter.server.ts @@ -0,0 +1,143 @@ +import { createRedisClient, type RedisClient, type RedisWithClusterOptions } from "~/redis.server"; + +export type RedisConcurrencyLimiterOptions = { + redis: RedisWithClusterOptions; + /** Prefix for Redis keys */ + keyPrefix: string; + /** Auto-expire stale entries after this many seconds (default: 300 = 5 minutes) */ + expiryTimeInSeconds?: number; +}; + +export type AcquireResult = + | { success: true } + | { success: false; reason: "key_limit" | "global_limit" }; + +/** + * A generic Redis-based concurrency limiter that supports two-level limiting: + * - Key-level limit (e.g., per organization) + * - Global limit (across all keys) + * + * Uses Redis sorted sets with timestamps to track active requests, + * with automatic expiry of stale entries as a safety net. + */ +export class RedisConcurrencyLimiter { + private redis: RedisClient; + private keyPrefix: string; + private expiryTimeInSeconds: number; + + constructor(options: RedisConcurrencyLimiterOptions) { + this.redis = createRedisClient(`${options.keyPrefix}:limiter`, options.redis); + this.keyPrefix = options.keyPrefix; + this.expiryTimeInSeconds = options.expiryTimeInSeconds ?? 300; // 5 minutes default + this.#registerCommands(); + } + + /** + * Acquire a concurrency slot atomically checking both key and global limits. + * + * @param options.key - The key to limit (e.g., organizationId) + * @param options.requestId - A unique identifier for this request + * @param options.keyLimit - The maximum concurrent requests for this key + * @param options.globalLimit - The maximum concurrent requests globally + * @returns Success or failure with reason + */ + async acquire(options: { + key: string; + requestId: string; + keyLimit: number; + globalLimit: number; + }): Promise { + const { key, requestId, keyLimit, globalLimit } = options; + const keyKey = this.#getKeyKey(key); + const globalKey = this.#getGlobalKey(); + const now = Date.now(); + const cutoffTime = now - this.expiryTimeInSeconds * 1000; + + // @ts-expect-error - Custom command defined via defineCommand + const result = await this.redis.acquireConcurrency( + keyKey, + globalKey, + now.toString(), + requestId, + this.expiryTimeInSeconds.toString(), + cutoffTime.toString(), + keyLimit.toString(), + globalLimit.toString() + ); + + // Result: 1 = success, 0 = key limit exceeded, -1 = global limit exceeded + if (result === 1) { + return { success: true }; + } else if (result === 0) { + return { success: false, reason: "key_limit" }; + } else { + return { success: false, reason: "global_limit" }; + } + } + + /** + * Release a concurrency slot. + * + * @param options.key - The key that was used to acquire + * @param options.requestId - The request identifier used to acquire + */ + async release(options: { key: string; requestId: string }): Promise { + const { key, requestId } = options; + const keyKey = this.#getKeyKey(key); + const globalKey = this.#getGlobalKey(); + + // Remove from both sets in a single round trip + await this.redis.pipeline().zrem(keyKey, requestId).zrem(globalKey, requestId).exec(); + } + + #getKeyKey(key: string): string { + return `${this.keyPrefix}:key:${key}`; + } + + #getGlobalKey(): string { + return `${this.keyPrefix}:global`; + } + + #registerCommands() { + this.redis.defineCommand("acquireConcurrency", { + numberOfKeys: 2, + lua: /* lua */ ` + local keyKey = KEYS[1] + local globalKey = KEYS[2] + + local timestamp = tonumber(ARGV[1]) + local requestId = ARGV[2] + local expiryTime = tonumber(ARGV[3]) + local cutoffTime = tonumber(ARGV[4]) + local keyLimit = tonumber(ARGV[5]) + local globalLimit = tonumber(ARGV[6]) + + -- Remove expired entries from both sets + redis.call('ZREMRANGEBYSCORE', keyKey, '-inf', cutoffTime) + redis.call('ZREMRANGEBYSCORE', globalKey, '-inf', cutoffTime) + + -- Check global limit first (more restrictive check) + local globalCount = redis.call('ZCARD', globalKey) + if globalCount >= globalLimit then + return -1 -- Global limit exceeded + end + + -- Check key-specific limit + local keyCount = redis.call('ZCARD', keyKey) + if keyCount >= keyLimit then + return 0 -- Key limit exceeded + end + + -- Add the request to both sorted sets + redis.call('ZADD', keyKey, timestamp, requestId) + redis.call('ZADD', globalKey, timestamp, requestId) + + -- Set expiry on both keys + redis.call('EXPIRE', keyKey, expiryTime) + redis.call('EXPIRE', globalKey, expiryTime) + + return 1 -- Success + `, + }); + } +} diff --git a/apps/webapp/app/v3/querySchemas.ts b/apps/webapp/app/v3/querySchemas.ts index a14e0528be..cb574cfd9c 100644 --- a/apps/webapp/app/v3/querySchemas.ts +++ b/apps/webapp/app/v3/querySchemas.ts @@ -1,4 +1,5 @@ import { column, type TableSchema } from "@internal/tsql"; +import { autoFormatSQL } from "~/components/code/TSQLEditor"; import { runFriendlyStatus, runStatusTitleFromStatus } from "~/components/runs/v3/TaskRunStatus"; import { logger } from "~/services/logger.server"; @@ -42,6 +43,7 @@ export const runsSchema: TableSchema = { "A unique ID for a run. They always start with `run_`, e.g., run_cm1a2b3c4d5e6f7g8h9i", customRenderType: "runId", example: "run_cm1a2b3c4d5e6f7g8h9i", + coreColumn: true, }), }, environment: { @@ -87,6 +89,7 @@ export const runsSchema: TableSchema = { valueMap: runStatusTitleFromStatus, customRenderType: "runStatus", example: "Completed", + coreColumn: true, }), }, is_finished: { @@ -103,7 +106,11 @@ export const runsSchema: TableSchema = { // Task & queue task_identifier: { name: "task_identifier", - ...column("String", { description: "Task identifier/slug", example: "my-background-task" }), + ...column("String", { + description: "Task identifier/slug", + example: "my-background-task", + coreColumn: true, + }), }, queue: { name: "queue", @@ -182,6 +189,7 @@ export const runsSchema: TableSchema = { ...column("DateTime64", { description: "When the run was triggered.", example: "2024-01-15 09:30:00.000", + coreColumn: true, }), }, queued_at: { @@ -419,7 +427,7 @@ export const querySchemas: TableSchema[] = [runsSchema]; /** * Default query for the query editor */ -export const defaultQuery = `SELECT * +export const defaultQuery = autoFormatSQL(`SELECT run_id, task_identifier, triggered_at, status FROM runs ORDER BY triggered_at DESC -LIMIT 100`; +LIMIT 100`); diff --git a/apps/webapp/app/v3/services/aiQueryService.server.ts b/apps/webapp/app/v3/services/aiQueryService.server.ts index 3d6f941b03..040e79f98c 100644 --- a/apps/webapp/app/v3/services/aiQueryService.server.ts +++ b/apps/webapp/app/v3/services/aiQueryService.server.ts @@ -258,10 +258,23 @@ export class AIQueryService { parts.push(table.description); } parts.push(""); + + // Identify core columns + const coreColumns = Object.values(table.columns) + .filter((col) => col.coreColumn === true) + .map((col) => col.name); + if (coreColumns.length > 0) { + parts.push(`Core columns (use these as defaults): ${coreColumns.join(", ")}`); + parts.push(""); + } + parts.push("Columns:"); for (const col of Object.values(table.columns)) { let colDesc = `- ${col.name} (${col.type})`; + if (col.coreColumn) { + colDesc += " [CORE]"; + } if (col.description) { colDesc += `: ${col.description}`; } @@ -350,13 +363,17 @@ HAVING cnt > 10 ## Important Rules -1. ALWAYS use the validateTSQLQuery tool to check your query before returning it -2. If validation fails, fix the issues and try again (up to 3 attempts) -3. Use column names exactly as defined in the schema (case-sensitive) -4. For enum columns like status, use the allowed values shown in the schema -5. Always include a LIMIT clause (default to 100 if not specified) -6. Use meaningful column aliases with AS for aggregations -7. Format queries with proper indentation for readability +1. NEVER use SELECT * - ClickHouse is a columnar database where SELECT * has very poor performance +2. Always select only the specific columns needed for the request +3. When column selection is ambiguous, use the core columns marked [CORE] in the schema +4. If the user doesn't specify a time period, ALWAYS add a WHERE clause filtering to the last 7 days: \`triggered_at > now() - INTERVAL 7 DAY\` +5. ALWAYS use the validateTSQLQuery tool to check your query before returning it +6. If validation fails, fix the issues and try again (up to 3 attempts) +7. Use column names exactly as defined in the schema (case-sensitive) +8. For enum columns like status, use the allowed values shown in the schema +9. Always include a LIMIT clause (default to 100 if not specified) +10. Use meaningful column aliases with AS for aggregations +11. Format queries with proper indentation for readability ## Response Format @@ -431,13 +448,15 @@ HAVING cnt > 10 ## Important Rules -1. ALWAYS use the validateTSQLQuery tool to check your modified query before returning it -2. If validation fails, fix the issues and try again (up to 3 attempts) -3. Use column names exactly as defined in the schema (case-sensitive) -4. For enum columns like status, use the allowed values shown in the schema -5. Always include a LIMIT clause (default to 100 if not specified) -6. Preserve the user's existing query structure and style where possible -7. Only make the changes specifically requested by the user +1. NEVER use SELECT * - ClickHouse is a columnar database where SELECT * has very poor performance +2. If the existing query uses SELECT *, replace it with specific columns (use core columns marked [CORE] as defaults) +3. ALWAYS use the validateTSQLQuery tool to check your modified query before returning it +4. If validation fails, fix the issues and try again (up to 3 attempts) +5. Use column names exactly as defined in the schema (case-sensitive) +6. For enum columns like status, use the allowed values shown in the schema +7. Always include a LIMIT clause (default to 100 if not specified) +8. Preserve the user's existing query structure and style where possible +9. Only make the changes specifically requested by the user ## Response Format diff --git a/apps/webapp/test/components/code/tsql/tsqlCompletion.test.ts b/apps/webapp/test/components/code/tsql/tsqlCompletion.test.ts index 9fcfe7cd45..f5e56fb6e2 100644 --- a/apps/webapp/test/components/code/tsql/tsqlCompletion.test.ts +++ b/apps/webapp/test/components/code/tsql/tsqlCompletion.test.ts @@ -8,6 +8,7 @@ function createMockContext(doc: string, pos: number, explicit = false) { state: { doc: { toString: () => doc, + sliceString: (from: number, to: number) => doc.slice(from, to), }, }, pos, @@ -27,7 +28,7 @@ function createMockContext(doc: string, pos: number, explicit = false) { } as any; } -// Test schema +// Test schema with enum columns const testSchema: TableSchema[] = [ { name: "runs", @@ -40,7 +41,24 @@ const testSchema: TableSchema[] = [ description: "Task runs table", columns: { id: { name: "id", type: "String", description: "Run ID" }, - status: { name: "status", type: "String", description: "Run status" }, + status: { + name: "status", + type: "LowCardinality(String)", + description: "Run status", + allowedValues: ["Completed", "Failed", "Queued", "Executing"], + }, + machine: { + name: "machine", + type: "LowCardinality(String)", + description: "Machine preset", + allowedValues: ["micro", "small-1x", "small-2x", "medium-1x"], + }, + environment_type: { + name: "environment_type", + type: "LowCardinality(String)", + description: "Environment type", + allowedValues: ["PRODUCTION", "STAGING", "DEVELOPMENT", "PREVIEW"], + }, created_at: { name: "created_at", type: "DateTime64", description: "Creation time" }, organization_id: { name: "organization_id", type: "String" }, project_id: { name: "project_id", type: "String" }, @@ -170,5 +188,141 @@ describe("createTSQLCompletion", () => { expect(labels).toContain("AND"); expect(labels).toContain("OR"); }); + + describe("enum value completions", () => { + it("should suggest enum values after = operator", () => { + const doc = "SELECT * FROM runs WHERE status = "; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + const labels = result?.options.map((o) => o.label) || []; + expect(labels).toContain("'Completed'"); + expect(labels).toContain("'Failed'"); + expect(labels).toContain("'Queued'"); + expect(labels).toContain("'Executing'"); + }); + + it("should suggest enum values after = with opening quote", () => { + const doc = "SELECT * FROM runs WHERE status = '"; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + const labels = result?.options.map((o) => o.label) || []; + expect(labels).toContain("'Completed'"); + expect(labels).toContain("'Failed'"); + }); + + it("should suggest enum values with partial input", () => { + const doc = "SELECT * FROM runs WHERE status = 'Comp"; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + const labels = result?.options.map((o) => o.label) || []; + expect(labels).toContain("'Completed'"); + }); + + it("should suggest enum values for machine column", () => { + const doc = "SELECT * FROM runs WHERE machine = "; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + const labels = result?.options.map((o) => o.label) || []; + expect(labels).toContain("'micro'"); + expect(labels).toContain("'small-1x'"); + expect(labels).toContain("'medium-1x'"); + }); + + it("should suggest enum values for environment_type column", () => { + const doc = "SELECT * FROM runs WHERE environment_type = "; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + const labels = result?.options.map((o) => o.label) || []; + expect(labels).toContain("'PRODUCTION'"); + expect(labels).toContain("'STAGING'"); + expect(labels).toContain("'DEVELOPMENT'"); + }); + + it("should suggest enum values after != operator", () => { + const doc = "SELECT * FROM runs WHERE status != "; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + const labels = result?.options.map((o) => o.label) || []; + expect(labels).toContain("'Completed'"); + expect(labels).toContain("'Failed'"); + }); + + it("should suggest enum values after IN (", () => { + const doc = "SELECT * FROM runs WHERE status IN ("; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + const labels = result?.options.map((o) => o.label) || []; + expect(labels).toContain("'Completed'"); + expect(labels).toContain("'Failed'"); + }); + + it("should suggest enum values after comma in IN clause", () => { + const doc = "SELECT * FROM runs WHERE status IN ('Completed', "; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + const labels = result?.options.map((o) => o.label) || []; + expect(labels).toContain("'Failed'"); + expect(labels).toContain("'Queued'"); + }); + + it("should include closing quote in replacement range when auto-paired", () => { + // Simulate cursor between auto-paired quotes: status = '|' + const doc = "SELECT * FROM runs WHERE status = ''"; + const pos = doc.length - 1; // Cursor is between the quotes + const context = createMockContext(doc, pos, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + // The 'to' should extend past the cursor to include the closing quote + expect(result?.to).toBe(pos + 1); + }); + + it("should not extend 'to' when no closing quote present", () => { + const doc = "SELECT * FROM runs WHERE status = '"; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + // 'to' should be undefined (or not set) since there's no closing quote + expect(result?.to).toBeUndefined(); + }); + + it("should not suggest enum values for columns without allowedValues", () => { + const doc = "SELECT * FROM runs WHERE id = "; + const context = createMockContext(doc, doc.length, true); + const result = completionSource(context); + + expect(result).not.toBeNull(); + + // Should return empty options for value context with non-enum column + const labels = result?.options.map((o) => o.label) || []; + expect(labels).not.toContain("'Completed'"); + expect(labels.length).toBe(0); + }); + }); }); diff --git a/internal-packages/clickhouse/src/client/tsql.ts b/internal-packages/clickhouse/src/client/tsql.ts index d5651f5458..c5052ab8ed 100644 --- a/internal-packages/clickhouse/src/client/tsql.ts +++ b/internal-packages/clickhouse/src/client/tsql.ts @@ -9,6 +9,7 @@ import type { ClickHouseSettings } from "@clickhouse/client"; import { z } from "zod"; import { compileTSQL, + sanitizeErrorMessage, transformResults, type TableSchema, type QuerySettings, @@ -66,6 +67,13 @@ export interface ExecuteTSQLOptions { * ``` */ fieldMappings?: FieldMappings; + /** + * Run EXPLAIN instead of executing the query. + * Returns the ClickHouse execution plan with index information. + * Should only be used by admins for debugging query performance. + * @default false + */ + explain?: boolean; } /** @@ -75,6 +83,21 @@ export interface TSQLQuerySuccess { rows: T[]; columns: OutputColumnMetadata[]; stats: QueryStats; + /** + * Columns that were hidden when SELECT * was used. + * Only populated when SELECT * is transformed to core columns only. + */ + hiddenColumns?: string[]; + /** + * The raw EXPLAIN output from ClickHouse. + * Only populated when `explain: true` is passed. + */ + explainOutput?: string; + /** + * The generated ClickHouse SQL query. + * Only populated when `explain: true` is passed. + */ + generatedSql?: string; } /** @@ -107,13 +130,14 @@ export async function executeTSQL( options: ExecuteTSQLOptions ): Promise>> { const shouldTransformValues = options.transformValues ?? true; + const isExplain = options.explain ?? false; let generatedSql: string | undefined; let generatedParams: Record | undefined; try { // 1. Compile the TSQL query to ClickHouse SQL - const { sql, params, columns } = compileTSQL(options.query, { + const { sql, params, columns, hiddenColumns } = compileTSQL(options.query, { organizationId: options.organizationId, projectId: options.projectId, environmentId: options.environmentId, @@ -125,23 +149,82 @@ export async function executeTSQL( generatedSql = sql; generatedParams = params; - // 2. Execute the query with stats + // 2. Execute the query (or EXPLAIN) with stats + const queryToExecute = isExplain ? `EXPLAIN indexes = 1 ${sql}` : sql; + const queryFn = reader.queryWithStats({ - name: options.name, - query: sql, + name: isExplain ? `${options.name}-explain` : options.name, + query: queryToExecute, params: z.record(z.any()), - schema: options.schema, + // EXPLAIN returns rows with an 'explain' column + schema: isExplain ? z.object({ explain: z.string() }) : options.schema, settings: options.clickhouseSettings, }); const [error, result] = await queryFn(params); if (error) { - return [error, null]; + // Sanitize error message to show TSQL names instead of ClickHouse internals + const sanitizedMessage = sanitizeErrorMessage(error.message, options.tableSchema); + return [new QueryError(sanitizedMessage, { query: options.query }), null]; } const { rows, stats } = result; + // Handle EXPLAIN mode - run multiple explain types and combine outputs + if (isExplain) { + const explainRows = rows as Array<{ explain: string }>; + const indexesOutput = explainRows.map((r) => r.explain).join("\n"); + + // Run additional explain queries for more comprehensive output + const explainTypes = [ + { name: "ESTIMATE", query: `EXPLAIN ESTIMATE ${sql}` }, + { name: "PIPELINE", query: `EXPLAIN PIPELINE ${sql}` }, + ]; + + const additionalOutputs: string[] = []; + + for (const explainType of explainTypes) { + try { + const additionalQueryFn = reader.queryWithStats({ + name: `${options.name}-explain-${explainType.name.toLowerCase()}`, + query: explainType.query, + params: z.record(z.any()), + schema: z.object({ explain: z.string() }), + settings: options.clickhouseSettings, + }); + + const [additionalError, additionalResult] = await additionalQueryFn(params); + + if (!additionalError && additionalResult) { + const additionalRows = additionalResult.rows as Array<{ explain: string }>; + const output = additionalRows.map((r) => r.explain).join("\n"); + additionalOutputs.push(`── ${explainType.name} ──\n${output}`); + } + } catch { + // Ignore errors from additional explain queries + } + } + + // Combine all explain outputs + const combinedOutput = ["── INDEXES ──", indexesOutput, "", ...additionalOutputs].join("\n"); + + return [ + null, + { + rows: [] as z.output[], + columns: [], + stats, + hiddenColumns, + explainOutput: combinedOutput, + generatedSql, + }, + ]; + } + + // Build the result, including hiddenColumns if present + const baseResult = { columns, stats, hiddenColumns }; + // 3. Transform result values if enabled if (shouldTransformValues && rows) { const transformedRows = transformResults( @@ -149,14 +232,14 @@ export async function executeTSQL( options.tableSchema, { fieldMappings: options.fieldMappings } ); - return [null, { rows: transformedRows as z.output[], columns, stats }]; + return [null, { rows: transformedRows as z.output[], ...baseResult }]; } - return [null, { rows: rows ?? [], columns, stats }]; + return [null, { rows: rows ?? [], ...baseResult }]; } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error"; - // Log TSQL compilation or unexpected errors + // Log TSQL compilation or unexpected errors (with original message for debugging) logger.error("[TSQL] Query error", { name: options.name, error: errorMessage, @@ -165,8 +248,11 @@ export async function executeTSQL( generatedParams: generatedParams ?? {}, }); + // Sanitize error message to show TSQL names instead of ClickHouse internals + const sanitizedMessage = sanitizeErrorMessage(errorMessage, options.tableSchema); + if (error instanceof Error) { - return [new QueryError(error.message, { query: options.query }), null]; + return [new QueryError(sanitizedMessage, { query: options.query }), null]; } return [new QueryError("Unknown error executing TSQL query", { query: options.query }), null]; } diff --git a/internal-packages/clickhouse/src/index.ts b/internal-packages/clickhouse/src/index.ts index 03b8b81e13..d437719f4b 100644 --- a/internal-packages/clickhouse/src/index.ts +++ b/internal-packages/clickhouse/src/index.ts @@ -1,4 +1,5 @@ -import { ClickHouseSettings } from "@clickhouse/client"; +import type { ClickHouseSettings } from "@clickhouse/client"; +export type { ClickHouseSettings }; import { ClickhouseClient } from "./client/client.js"; import { ClickhouseReader, ClickhouseWriter } from "./client/types.js"; import { NoopClient } from "./client/noop.js"; @@ -44,6 +45,9 @@ export { } from "./client/tsql.js"; export type { OutputColumnMetadata } from "@internal/tsql"; +// Errors +export { QueryError } from "./client/errors.js"; + export type ClickhouseCommonConfig = { keepAlive?: { enabled?: boolean; diff --git a/internal-packages/tsql/src/index.ts b/internal-packages/tsql/src/index.ts index b0ec0cb812..d60af63766 100644 --- a/internal-packages/tsql/src/index.ts +++ b/internal-packages/tsql/src/index.ts @@ -64,6 +64,8 @@ export { findColumn, findTable, getAllowedUserValues, + // Core column utilities + getCoreColumns, getExternalValue, getInternalValue, getInternalValueFromMapping, @@ -76,6 +78,8 @@ export { isValidUserValue, // Virtual column utilities isVirtualColumn, + // Error message sanitization + sanitizeErrorMessage, validateFilterColumn, validateGroupColumn, validateSelectColumn, diff --git a/internal-packages/tsql/src/query/printer.ts b/internal-packages/tsql/src/query/printer.ts index 4f396ed7ca..734b4dc87c 100644 --- a/internal-packages/tsql/src/query/printer.ts +++ b/internal-packages/tsql/src/query/printer.ts @@ -70,6 +70,11 @@ export interface PrintResult { params: Record; /** Metadata for each column in the SELECT clause, in order */ columns: OutputColumnMetadata[]; + /** + * Columns that were hidden when SELECT * was used. + * Only populated when SELECT * is transformed to core columns only. + */ + hiddenColumns?: string[]; } /** @@ -109,6 +114,8 @@ export class ClickHousePrinter { private outputColumns: OutputColumnMetadata[] = []; /** Whether we're currently processing GROUP BY expressions */ private inGroupByContext = false; + /** Columns hidden when SELECT * is expanded to core columns only */ + private hiddenColumns: string[] = []; constructor( private context: PrinterContext, @@ -122,12 +129,17 @@ export class ClickHousePrinter { */ print(node: SelectQuery | SelectSetQuery): PrintResult { this.outputColumns = []; + this.hiddenColumns = []; const sql = this.visit(node); - return { + const result: PrintResult = { sql, params: this.context.getParams(), columns: this.outputColumns, }; + if (this.hiddenColumns.length > 0) { + result.hiddenColumns = this.hiddenColumns; + } + return result; } /** @@ -605,7 +617,8 @@ export class ClickHousePrinter { } /** - * Expand SELECT * to all selectable columns from all tables in context + * Expand SELECT * to core columns only from all tables in context. + * Non-core columns are tracked in hiddenColumns for user notification. */ private expandAllTableColumns(collectMetadata: boolean): string[] { const results: string[] = []; @@ -615,7 +628,8 @@ export class ClickHousePrinter { const tableColumns = this.getSelectableColumnsFromSchema( tableSchema, tableAlias, - collectMetadata + collectMetadata, + true // onlyCoreColumns - SELECT * only returns core columns ); results.push(...tableColumns); } @@ -629,7 +643,8 @@ export class ClickHousePrinter { } /** - * Expand table.* to all selectable columns from a specific table + * Expand table.* to core columns only from a specific table. + * Non-core columns are tracked in hiddenColumns for user notification. */ private expandTableColumns(tableAlias: string, collectMetadata: boolean): string[] { const tableSchema = this.tableContexts.get(tableAlias); @@ -639,30 +654,51 @@ export class ClickHousePrinter { return [`${this.printIdentifier(tableAlias)}.*`]; } - return this.getSelectableColumnsFromSchema(tableSchema, tableAlias, collectMetadata); + return this.getSelectableColumnsFromSchema( + tableSchema, + tableAlias, + collectMetadata, + true // onlyCoreColumns - table.* only returns core columns + ); } /** - * Get all selectable columns from a table schema as SQL strings + * Get selectable columns from a table schema as SQL strings * * @param tableSchema - The table schema * @param tableAlias - The alias used for the table in the query (for table-qualified columns) * @param collectMetadata - Whether to collect column metadata + * @param onlyCoreColumns - If true, only return core columns and track hidden columns (but falls back to all columns if no core columns are defined) * @returns Array of SQL column strings */ private getSelectableColumnsFromSchema( tableSchema: TableSchema, tableAlias: string, - collectMetadata: boolean + collectMetadata: boolean, + onlyCoreColumns = false ): string[] { const results: string[] = []; + // Check if any core columns exist - if not, we'll return all columns as a fallback + const hasCoreColumns = Object.values(tableSchema.columns).some( + (col) => col.coreColumn === true && col.selectable !== false + ); + + // Only filter to core columns if the schema defines some core columns + const shouldFilterToCoreOnly = onlyCoreColumns && hasCoreColumns; + for (const [columnName, columnSchema] of Object.entries(tableSchema.columns)) { // Skip non-selectable columns if (columnSchema.selectable === false) { continue; } + // If filtering to core columns only, skip non-core and track them + if (shouldFilterToCoreOnly && !columnSchema.coreColumn) { + this.hiddenColumns.push(columnName); + continue; + } + // Build the SQL for this column let sqlResult: string; diff --git a/internal-packages/tsql/src/query/schema.test.ts b/internal-packages/tsql/src/query/schema.test.ts index 647678aac7..3d1b8b8430 100644 --- a/internal-packages/tsql/src/query/schema.test.ts +++ b/internal-packages/tsql/src/query/schema.test.ts @@ -11,8 +11,10 @@ import { getExternalValue, getInternalValueFromMapping, getInternalValueFromMappingCaseInsensitive, + sanitizeErrorMessage, type ColumnSchema, type FieldMappings, + type TableSchema, } from "./schema.js"; describe("Value mapping helper functions", () => { @@ -363,7 +365,9 @@ describe("Field mapping helper functions (runtime dynamic mappings)", () => { }); it("should return null if mapping name does not exist", () => { - expect(getInternalValueFromMapping(fieldMappings, "nonexistent", "my-project-ref")).toBeNull(); + expect( + getInternalValueFromMapping(fieldMappings, "nonexistent", "my-project-ref") + ).toBeNull(); }); it("should return null for empty mappings", () => { @@ -454,3 +458,324 @@ describe("Field mapping helper functions (runtime dynamic mappings)", () => { }); }); +describe("Error message sanitization", () => { + // Test schema mimicking the real runs schema + const runsSchema: TableSchema = { + name: "runs", + clickhouseName: "trigger_dev.task_runs_v2", + description: "Task runs table", + tenantColumns: { + organizationId: "organization_id", + projectId: "project_id", + environmentId: "environment_id", + }, + columns: { + run_id: { + name: "run_id", + clickhouseName: "friendly_id", + ...column("String"), + }, + triggered_at: { + name: "triggered_at", + clickhouseName: "created_at", + ...column("DateTime64"), + }, + machine: { + name: "machine", + clickhouseName: "machine_preset", + ...column("String"), + }, + status: { + name: "status", + // No clickhouseName - same as name + ...column("String"), + }, + task_identifier: { + name: "task_identifier", + // No clickhouseName - same as name + ...column("String"), + }, + }, + }; + + describe("sanitizeErrorMessage", () => { + it("should replace fully qualified table.column references", () => { + const error = "Missing column trigger_dev.task_runs_v2.friendly_id in query"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Missing column runs.run_id in query"); + }); + + it("should replace standalone table names", () => { + const error = "Table trigger_dev.task_runs_v2 does not exist"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Table runs does not exist"); + }); + + it("should replace standalone column names with different clickhouseName", () => { + const error = "Unknown identifier: friendly_id"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Unknown identifier: run_id"); + }); + + it("should replace multiple occurrences in the same message", () => { + const error = + "Cannot compare friendly_id with created_at: incompatible types in trigger_dev.task_runs_v2"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Cannot compare run_id with triggered_at: incompatible types in runs"); + }); + + it("should not replace column names that have no clickhouseName mapping", () => { + const error = "Invalid value for column status"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Invalid value for column status"); + }); + + it("should handle error messages with quoted identifiers", () => { + const error = "Column 'machine_preset' is not of type String"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Column 'machine' is not of type String"); + }); + + it("should handle error messages with backtick identifiers", () => { + const error = "Unknown column `friendly_id` in table"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Unknown column `run_id` in table"); + }); + + it("should not replace partial matches within larger identifiers", () => { + const error = "Column my_friendly_id_column not found"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + // Should not replace "friendly_id" within "my_friendly_id_column" + expect(sanitized).toBe("Column my_friendly_id_column not found"); + }); + + it("should return original message if no schemas provided", () => { + const error = "Some error with trigger_dev.task_runs_v2"; + const sanitized = sanitizeErrorMessage(error, []); + expect(sanitized).toBe("Some error with trigger_dev.task_runs_v2"); + }); + + it("should return original message if no matches found", () => { + const error = "Generic database error occurred"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Generic database error occurred"); + }); + + it("should handle multiple tables", () => { + const eventsSchema: TableSchema = { + name: "events", + clickhouseName: "trigger_dev.task_events", + description: "Task events table", + tenantColumns: { + organizationId: "organization_id", + projectId: "project_id", + environmentId: "environment_id", + }, + columns: { + event_id: { + name: "event_id", + clickhouseName: "internal_event_id", + ...column("String"), + }, + }, + }; + + const error = + "Cannot join trigger_dev.task_runs_v2 with trigger_dev.task_events on internal_event_id"; + const sanitized = sanitizeErrorMessage(error, [runsSchema, eventsSchema]); + expect(sanitized).toBe("Cannot join runs with events on event_id"); + }); + + it("should handle real ClickHouse error format", () => { + const error = + "Unable to query clickhouse: Code: 47. DB::Exception: Missing columns: 'friendly_id' while processing query"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + // Should remove the "Unable to query clickhouse:" prefix + expect(sanitized).toBe( + "Code: 47. DB::Exception: Missing columns: 'run_id' while processing query" + ); + }); + + it("should remove 'Unable to query clickhouse:' prefix", () => { + const error = "Unable to query clickhouse: Something went wrong"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Something went wrong"); + expect(sanitized).not.toContain("Unable to query clickhouse"); + }); + + it("should handle error with column in parentheses", () => { + const error = "Function count(friendly_id) expects different arguments"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Function count(run_id) expects different arguments"); + }); + + it("should handle error with column after comma", () => { + const error = "SELECT friendly_id, created_at FROM table"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("SELECT run_id, triggered_at FROM table"); + }); + + it("should prioritize longer matches (table.column before standalone column)", () => { + // This tests that we replace "trigger_dev.task_runs_v2.friendly_id" as a unit, + // not "trigger_dev.task_runs_v2" and then "friendly_id" separately + const error = "Error in trigger_dev.task_runs_v2.friendly_id"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Error in runs.run_id"); + }); + + it("should remove tenant isolation filters from error messages", () => { + const error = + "Unknown identifier in scope SELECT run_id FROM runs WHERE ((organization_id = 'org123') AND (project_id = 'proj456') AND (environment_id = 'env789')) AND (status = 'Failed')"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).not.toContain("organization_id"); + expect(sanitized).not.toContain("project_id"); + expect(sanitized).not.toContain("environment_id"); + expect(sanitized).toContain("status = 'Failed'"); + }); + + it("should remove redundant column aliases like 'run_id AS run_id'", () => { + const error = "Error in SELECT run_id AS run_id, machine AS machine FROM runs"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Error in SELECT run_id, machine FROM runs"); + }); + + it("should remove redundant table aliases like 'runs AS runs'", () => { + const error = "Error in SELECT * FROM runs AS runs WHERE status = 'Failed'"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + expect(sanitized).toBe("Error in SELECT * FROM runs WHERE status = 'Failed'"); + }); + + it("should handle real error with tenant filters and aliases", () => { + const error = + "Unable to query clickhouse: Unknown expression identifier `triggered_ata` in scope SELECT run_id AS run_id, machine AS machine FROM runs AS runs WHERE ((organization_id = 'cm5qtzpb800007cp7h6ebwt2i') AND (project_id = 'cme2p1yep00007calt8ugarkr') AND (environment_id = 'cme2p1ygj00027caln51kyiwl')) AND (status = 'Complted') ORDER BY triggered_ata DESC LIMIT 100."; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + + // Should not contain internal prefix + expect(sanitized).not.toContain("Unable to query clickhouse"); + + // Should not contain internal IDs + expect(sanitized).not.toContain("cm5qtzpb800007cp7h6ebwt2i"); + expect(sanitized).not.toContain("cme2p1yep00007calt8ugarkr"); + expect(sanitized).not.toContain("cme2p1ygj00027caln51kyiwl"); + expect(sanitized).not.toContain("organization_id"); + expect(sanitized).not.toContain("project_id"); + expect(sanitized).not.toContain("environment_id"); + + // Should not have redundant aliases + expect(sanitized).not.toContain("run_id AS run_id"); + expect(sanitized).not.toContain("machine AS machine"); + expect(sanitized).not.toContain("runs AS runs"); + + // Should still have the user's query parts + expect(sanitized).toContain("status = 'Complted'"); + expect(sanitized).toContain("triggered_ata"); + expect(sanitized).toContain("LIMIT 100"); + }); + + it("should remove required filters like engine = 'V2'", () => { + // Schema with required filters + const schemaWithRequiredFilters: TableSchema = { + name: "runs", + clickhouseName: "trigger_dev.task_runs_v2", + description: "Task runs table", + tenantColumns: { + organizationId: "organization_id", + projectId: "project_id", + environmentId: "environment_id", + }, + requiredFilters: [{ column: "engine", value: "V2" }], + columns: { + run_id: { + name: "run_id", + ...column("String"), + }, + }, + }; + + const error = + "Error in SELECT run_id FROM runs WHERE ((organization_id = 'org1') AND (engine = 'V2')) AND (status = 'Failed')"; + const sanitized = sanitizeErrorMessage(error, [schemaWithRequiredFilters]); + + expect(sanitized).not.toContain("engine = 'V2'"); + expect(sanitized).not.toContain("organization_id"); + expect(sanitized).toContain("status = 'Failed'"); + }); + + it("should handle project and environment field mappings in tenant columns", () => { + // The schema uses 'project' and 'environment' as column names with field mappings + const schemaWithFieldMappedTenants: TableSchema = { + name: "runs", + clickhouseName: "trigger_dev.task_runs_v2", + description: "Task runs table", + tenantColumns: { + organizationId: "organization_id", + projectId: "project", + environmentId: "environment", + }, + columns: { + run_id: { + name: "run_id", + ...column("String"), + }, + }, + }; + + const error = + "Error WHERE ((organization_id = 'org1') AND (project = 'proj1') AND (environment = 'env1')) AND (status = 'ok')"; + const sanitized = sanitizeErrorMessage(error, [schemaWithFieldMappedTenants]); + + expect(sanitized).not.toContain("organization_id = 'org1'"); + expect(sanitized).not.toContain("project = 'proj1'"); + expect(sanitized).not.toContain("environment = 'env1'"); + expect(sanitized).toContain("status = 'ok'"); + }); + + it("should handle queries with only automatic WHERE filters (no user WHERE clause)", () => { + // When user writes: SELECT * FROM runs LIMIT 10 + // The compiled query becomes: SELECT * FROM runs WHERE (org_id = '...') AND (proj_id = '...') AND (env_id = '...') + const error = + "Unable to query clickhouse: Some error in SELECT run_id FROM runs WHERE ((organization_id = 'org1') AND (project_id = 'proj1') AND (environment_id = 'env1')) LIMIT 10"; + const sanitized = sanitizeErrorMessage(error, [runsSchema]); + + expect(sanitized).not.toContain("Unable to query clickhouse"); + expect(sanitized).not.toContain("organization_id"); + expect(sanitized).not.toContain("project_id"); + expect(sanitized).not.toContain("environment_id"); + expect(sanitized).not.toContain("WHERE"); + expect(sanitized).toContain("SELECT run_id FROM runs"); + expect(sanitized).toContain("LIMIT 10"); + }); + + it("should handle queries with only automatic filters including engine filter", () => { + const schemaWithEngine: TableSchema = { + name: "runs", + clickhouseName: "trigger_dev.task_runs_v2", + description: "Task runs table", + tenantColumns: { + organizationId: "organization_id", + projectId: "project_id", + environmentId: "environment_id", + }, + requiredFilters: [{ column: "engine", value: "V2" }], + columns: { + run_id: { + name: "run_id", + ...column("String"), + }, + }, + }; + + const error = + "Error in SELECT * FROM runs WHERE ((organization_id = 'org1') AND (project_id = 'proj1') AND (environment_id = 'env1') AND (engine = 'V2')) ORDER BY run_id"; + const sanitized = sanitizeErrorMessage(error, [schemaWithEngine]); + + expect(sanitized).not.toContain("organization_id"); + expect(sanitized).not.toContain("project_id"); + expect(sanitized).not.toContain("environment_id"); + expect(sanitized).not.toContain("engine"); + expect(sanitized).not.toContain("WHERE"); + expect(sanitized).toContain("SELECT * FROM runs"); + expect(sanitized).toContain("ORDER BY run_id"); + }); + }); +}); diff --git a/internal-packages/tsql/src/query/schema.ts b/internal-packages/tsql/src/query/schema.ts index b2205e070b..3f9b5944dc 100644 --- a/internal-packages/tsql/src/query/schema.ts +++ b/internal-packages/tsql/src/query/schema.ts @@ -133,6 +133,23 @@ export interface ColumnSchema { * ``` */ example?: string; + /** + * Whether this is a core column that should be included in default queries. + * + * Core columns represent the essential information for a table and are suggested + * as alternatives when users attempt to use SELECT * (which has poor performance + * in columnar databases like ClickHouse). + * + * @example + * ```typescript + * { + * name: "run_id", + * type: "String", + * coreColumn: true, + * } + * ``` + */ + coreColumn?: boolean; /** * Name of the runtime field mapping to use for value translation. * When set, values are translated using the mapping provided at query time. @@ -682,3 +699,197 @@ export function getTableColumnNames(schema: SchemaRegistry, tableName: string): export function getAllTableNames(schema: SchemaRegistry): string[] { return Object.keys(schema.tables); } + +/** + * Get the names of core columns for a table. + * + * Core columns are the essential columns that should be used when users + * need a default set of columns (e.g., as an alternative to SELECT *). + * + * @param table - The table schema + * @returns Array of core column names, empty if none are marked as core + */ +export function getCoreColumns(table: TableSchema): string[] { + return Object.values(table.columns) + .filter((col) => col.coreColumn === true) + .map((col) => col.name); +} + +// ============================================================ +// Error Message Sanitization +// ============================================================ + +/** + * Sanitize a ClickHouse error message by replacing internal ClickHouse names + * with their user-facing TSQL equivalents and removing internal implementation details. + * + * This function handles: + * - Fully qualified table names (e.g., `trigger_dev.task_runs_v2` → `runs`) + * - Column names with table prefix (e.g., `trigger_dev.task_runs_v2.friendly_id` → `runs.run_id`) + * - Standalone column names (e.g., `friendly_id` → `run_id`) + * - Removes tenant isolation filters (organization_id, project_id, environment_id) + * - Removes required filters (e.g., engine = 'V2') + * - Removes redundant aliases (e.g., `run_id AS run_id` → `run_id`) + * + * @param message - The error message from ClickHouse + * @param schemas - The table schemas defining name mappings + * @returns The sanitized error message with TSQL names + * + * @example + * ```typescript + * const sanitized = sanitizeErrorMessage( + * "Missing column trigger_dev.task_runs_v2.friendly_id", + * [runsSchema] + * ); + * // Returns: "Missing column runs.run_id" + * ``` + */ +export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): string { + // Build reverse lookup maps + const tableNameMap = new Map(); // clickhouseName -> tsqlName + const columnNameMap = new Map(); // clickhouseName -> { tsqlName, tableTsqlName } + + // Collect tenant column names and required filter columns to strip from errors + const columnsToStrip: string[] = []; + const tableAliasPatterns: RegExp[] = []; + + for (const table of schemas) { + // Map table names + tableNameMap.set(table.clickhouseName, table.name); + + // Collect tenant column names to strip + const tenantCols = table.tenantColumns; + columnsToStrip.push(tenantCols.organizationId, tenantCols.projectId, tenantCols.environmentId); + + // Collect required filter columns to strip + if (table.requiredFilters) { + for (const filter of table.requiredFilters) { + columnsToStrip.push(filter.column); + } + } + + // Build pattern to remove table aliases like "FROM runs AS runs" + tableAliasPatterns.push( + new RegExp(`\\b${escapeRegex(table.name)}\\s+AS\\s+${escapeRegex(table.name)}\\b`, "gi") + ); + + // Map column names + for (const col of Object.values(table.columns)) { + const clickhouseColName = col.clickhouseName ?? col.name; + if (clickhouseColName !== col.name) { + // Only add to map if there's actually a different ClickHouse name + columnNameMap.set(clickhouseColName, { + tsqlName: col.name, + tableTsqlName: table.name, + }); + } + } + } + + let result = message; + + // Step 0: Remove internal prefixes that leak implementation details + result = result.replace(/^Unable to query clickhouse:\s*/i, ""); + + // Step 1: Remove tenant isolation and required filter conditions + // We need to handle multiple patterns: + // - (column = 'value') AND ... + // - ... AND (column = 'value') + // - (column = 'value') at end of expression + for (const colName of columnsToStrip) { + const escaped = escapeRegex(colName); + // Match: (column = 'value') AND (with optional surrounding parens) + result = result.replace(new RegExp(`\\(${escaped}\\s*=\\s*'[^']*'\\)\\s*AND\\s*`, "gi"), ""); + // Match: AND (column = 'value') (handles middle/end conditions) + result = result.replace(new RegExp(`\\s*AND\\s*\\(${escaped}\\s*=\\s*'[^']*'\\)`, "gi"), ""); + // Match standalone: (column = 'value') with no AND (for when it's the only/last condition) + result = result.replace(new RegExp(`\\(${escaped}\\s*=\\s*'[^']*'\\)`, "gi"), ""); + } + + // Step 2: Clean up any leftover empty WHERE conditions or double parentheses + // Clean up empty nested parens: "(())" or "( () )" -> "" + result = result.replace(/\(\s*\(\s*\)\s*\)/g, ""); + // Clean up empty parens: "()" -> "" + result = result.replace(/\(\s*\)/g, ""); + // Clean up "WHERE AND" -> "WHERE" + result = result.replace(/\bWHERE\s+AND\b/gi, "WHERE"); + // Clean up double ANDs: "AND AND" -> "AND" + result = result.replace(/\bAND\s+AND\b/gi, "AND"); + // Clean up "WHERE ((" with user condition "))" -> "WHERE (condition)" + // First normalize: "(( (condition) ))" patterns + result = result.replace(/\(\(\s*\(/g, "("); + result = result.replace(/\)\s*\)\)/g, ")"); + // Clean double parens around single condition + result = result.replace(/\(\(([^()]+)\)\)/g, "($1)"); + // Remove "WHERE ()" if the whole WHERE is now empty + result = result.replace(/\bWHERE\s*\(\s*\)\s*/gi, ""); + // Clean up trailing " )" before ORDER/LIMIT/etc + result = result.replace(/\s+\)\s*(ORDER|LIMIT|GROUP|HAVING)/gi, " $1"); + // Remove empty WHERE clause: "WHERE ORDER" or "WHERE LIMIT" -> just "ORDER" or "LIMIT" + result = result.replace(/\bWHERE\s+(ORDER|LIMIT|GROUP|HAVING)\b/gi, "$1"); + // Remove empty WHERE at end of string: "WHERE " at end -> "" + result = result.replace(/\bWHERE\s*$/gi, ""); + // Clean up multiple spaces + result = result.replace(/\s{2,}/g, " "); + + // Step 3: Replace fully qualified column references first (table.column) + // This handles patterns like: trigger_dev.task_runs_v2.friendly_id + for (const table of schemas) { + for (const col of Object.values(table.columns)) { + const clickhouseColName = col.clickhouseName ?? col.name; + const fullyQualified = `${table.clickhouseName}.${clickhouseColName}`; + const tsqlQualified = `${table.name}.${col.name}`; + + if (fullyQualified !== tsqlQualified) { + // Use word boundary-aware replacement + result = replaceAllOccurrences(result, fullyQualified, tsqlQualified); + } + } + } + + // Step 4: Replace standalone table names (after column references to avoid partial matches) + // Sort by length descending to replace longer names first + const sortedTableNames = [...tableNameMap.entries()].sort((a, b) => b[0].length - a[0].length); + for (const [clickhouseName, tsqlName] of sortedTableNames) { + if (clickhouseName !== tsqlName) { + result = replaceAllOccurrences(result, clickhouseName, tsqlName); + } + } + + // Step 5: Replace standalone column names (for unqualified references) + // Sort by length descending to replace longer names first + const sortedColumnNames = [...columnNameMap.entries()].sort((a, b) => b[0].length - a[0].length); + for (const [clickhouseName, { tsqlName }] of sortedColumnNames) { + result = replaceAllOccurrences(result, clickhouseName, tsqlName); + } + + // Step 6: Remove redundant column aliases like "run_id AS run_id" + result = result.replace(/\b(\w+)\s+AS\s+\1\b/gi, "$1"); + + // Step 7: Remove table aliases like "runs AS runs" + for (const pattern of tableAliasPatterns) { + result = result.replace(pattern, (match) => match.split(/\s+AS\s+/i)[0]); + } + + return result; +} + +/** + * Escape special regex characters in a string + */ +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +/** + * Replace all occurrences of a string, respecting word boundaries where sensible. + * Uses word-boundary matching to avoid replacing substrings within larger identifiers. + */ +function replaceAllOccurrences(text: string, search: string, replacement: string): string { + // Use word boundary matching - identifiers are typically surrounded by + // non-identifier characters (spaces, quotes, parentheses, operators, etc.) + // We use a pattern that matches the identifier when it's not part of a larger identifier + const pattern = new RegExp(`(? { describe("enum validation", () => { it("should validate enum values", () => { - const result = validateSQL("SELECT * FROM runs WHERE status = 'COMPLETED' LIMIT 10"); + const result = validateSQL("SELECT id, status FROM runs WHERE status = 'COMPLETED' LIMIT 10"); expect(result.valid).toBe(true); expect(result.issues).toHaveLength(0); }); it("should error on invalid enum values", () => { - const result = validateSQL("SELECT * FROM runs WHERE status = 'INVALID_STATUS' LIMIT 10"); + const result = validateSQL( + "SELECT id, status FROM runs WHERE status = 'INVALID_STATUS' LIMIT 10" + ); expect(result.valid).toBe(false); expect(result.issues).toHaveLength(1); expect(result.issues[0].type).toBe("invalid_enum_value"); diff --git a/internal-packages/tsql/src/query/validator.ts b/internal-packages/tsql/src/query/validator.ts index 28ef804fe7..6235a58e41 100644 --- a/internal-packages/tsql/src/query/validator.ts +++ b/internal-packages/tsql/src/query/validator.ts @@ -20,7 +20,7 @@ import type { ArithmeticOperation, } from "./ast.js"; import type { TableSchema, ColumnSchema } from "./schema.js"; -import { getAllowedUserValues, isValidUserValue } from "./schema.js"; +import { getAllowedUserValues, getCoreColumns, isValidUserValue } from "./schema.js"; import { CompareOperationOp, ArithmeticOperationOp } from "./ast.js"; /** @@ -37,7 +37,7 @@ export interface ValidationIssue { /** Severity of the issue */ severity: ValidationSeverity; /** The type of issue */ - type: "unknown_column" | "unknown_table" | "invalid_enum_value"; + type: "unknown_column" | "unknown_table" | "invalid_enum_value" | "select_star"; /** Optional: the column name that caused the issue */ columnName?: string; /** Optional: the table name that caused the issue */ @@ -46,6 +46,8 @@ export interface ValidationIssue { invalidValue?: string; /** Optional: list of allowed values */ allowedValues?: string[]; + /** Optional: suggested columns to use instead (for select_star) */ + suggestedColumns?: string[]; } /** @@ -170,6 +172,19 @@ function validateSelectSetQuery(node: SelectSetQuery, context: ValidationContext } } +/** + * Check if an expression is a SELECT * (asterisk) + */ +function isSelectStar(expr: Expression): boolean { + if ((expr as Field).expression_type !== "field") return false; + const field = expr as Field; + // SELECT * or SELECT table.* + return ( + (field.chain.length === 1 && field.chain[0] === "*") || + (field.chain.length === 2 && field.chain[1] === "*") + ); +} + /** * Validate a SELECT query */ @@ -183,6 +198,33 @@ function validateSelectQuery(node: SelectQuery, context: ValidationContext): voi extractTablesFromJoin(node.select_from, context); } + // Check for SELECT * and emit warning + if (node.select) { + const hasSelectStar = node.select.some(isSelectStar); + if (hasSelectStar) { + // Collect core columns from all tables in context + const coreColumns: string[] = []; + for (const tableSchema of context.tables.values()) { + const tableCoreColumns = getCoreColumns(tableSchema); + coreColumns.push(...tableCoreColumns); + } + + // Build info message about SELECT * behavior + let suggestionMsg = "SELECT * doesn't return all columns."; + if (coreColumns.length > 0) { + suggestionMsg += `It will return: ${coreColumns.join(", ")}. `; + } + suggestionMsg += "Specify columns explicitly to include other columns."; + + context.issues.push({ + message: suggestionMsg, + severity: "info", + type: "select_star", + suggestedColumns: coreColumns.length > 0 ? coreColumns : undefined, + }); + } + } + // Extract column aliases from SELECT clause before validation // This allows ORDER BY and HAVING to reference aliased columns if (node.select) {