-
Notifications
You must be signed in to change notification settings - Fork 3.8k
fix: HTMLRewriter no longer crashes when element handlers throw exceptions #21848
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,12 +60,19 @@ pub const HTMLRewriter = struct { | |
| listener: JSValue, | ||
| ) bun.JSError!JSValue { | ||
| const selector_slice = std.fmt.allocPrint(bun.default_allocator, "{}", .{selector_name}) catch bun.outOfMemory(); | ||
| defer bun.default_allocator.free(selector_slice); | ||
|
|
||
| var selector = LOLHTML.HTMLSelector.parse(selector_slice) catch | ||
| return createLOLHTMLError(global); | ||
| return global.throwValue(createLOLHTMLError(global)); | ||
| errdefer selector.deinit(); | ||
|
|
||
| const handler_ = try ElementHandler.init(global, listener); | ||
| const handler = bun.default_allocator.create(ElementHandler) catch bun.outOfMemory(); | ||
| handler.* = handler_; | ||
| errdefer { | ||
| handler.deinit(); | ||
| bun.default_allocator.destroy(handler); | ||
| } | ||
|
|
||
| this.builder.addElementContentHandlers( | ||
| selector, | ||
|
|
@@ -91,8 +98,7 @@ pub const HTMLRewriter = struct { | |
| else | ||
| null, | ||
| ) catch { | ||
| selector.deinit(); | ||
| return createLOLHTMLError(global); | ||
| return global.throwValue(createLOLHTMLError(global)); | ||
| }; | ||
|
|
||
| this.context.selectors.append(bun.default_allocator, selector) catch bun.outOfMemory(); | ||
|
|
@@ -110,6 +116,10 @@ pub const HTMLRewriter = struct { | |
|
|
||
| const handler = bun.default_allocator.create(DocumentHandler) catch bun.outOfMemory(); | ||
| handler.* = handler_; | ||
| errdefer { | ||
| handler.deinit(); | ||
| bun.default_allocator.destroy(handler); | ||
| } | ||
|
|
||
| // If this fails, subsequent calls to write or end should throw | ||
| this.builder.addDocumentContentHandlers( | ||
|
|
@@ -883,6 +893,11 @@ fn HandlerCallback( | |
| wrapper.deref(); | ||
| } | ||
|
|
||
| // Use a CatchScope to properly handle exceptions from the JavaScript callback | ||
| var scope: bun.jsc.CatchScope = undefined; | ||
| scope.init(this.global, @src()); | ||
| defer scope.deinit(); | ||
|
|
||
| const result = @field(this, callback_name).?.call( | ||
| this.global, | ||
| if (comptime @hasField(HandlerType, "thisObject")) | ||
|
|
@@ -891,10 +906,36 @@ fn HandlerCallback( | |
| JSValue.zero, | ||
| &.{wrapper.toJS(this.global)}, | ||
| ) catch { | ||
| // If there's an error, we'll propagate it to the caller. | ||
| // If there's an exception in the scope, capture it for later retrieval | ||
| if (scope.exception()) |exc| { | ||
| const exc_value = JSValue.fromCell(exc); | ||
| // Store the exception in the VM's unhandled rejection capture mechanism | ||
| // if it's available (this is the same mechanism used by BufferOutputSink) | ||
| if (this.global.bunVM().unhandled_pending_rejection_to_capture) |err_ptr| { | ||
| err_ptr.* = exc_value; | ||
| exc_value.protect(); | ||
| } | ||
| } | ||
| // Clear the exception from the scope to prevent assertion failures | ||
| scope.clearException(); | ||
| // Return true to indicate failure to LOLHTML, which will cause the write | ||
| // operation to fail and the error handling logic to take over. | ||
| return true; | ||
| }; | ||
|
|
||
| // Check if there's an exception that was thrown but not caught by the error union | ||
| if (scope.exception()) |exc| { | ||
| const exc_value = JSValue.fromCell(exc); | ||
| // Store the exception in the VM's unhandled rejection capture mechanism | ||
| if (this.global.bunVM().unhandled_pending_rejection_to_capture) |err_ptr| { | ||
| err_ptr.* = exc_value; | ||
| exc_value.protect(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the protect() calls here are likely memory leaks |
||
| } | ||
| // Clear the exception to prevent assertion failures | ||
| scope.clearException(); | ||
| return true; | ||
| } | ||
|
|
||
| if (!result.isUndefinedOrNull()) { | ||
| if (result.isError() or result.isAggregateError(this.global)) { | ||
| return true; | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| import { expect, test } from "bun:test"; | ||
| import { tempDirWithFiles } from "harness"; | ||
|
|
||
| test("HTMLRewriter should not crash when element handler throws an exception - issue #21680", () => { | ||
| // The most important test: ensure the original crashing case from the GitHub issue doesn't crash | ||
| // This was the exact case from the issue that caused "ASSERTION FAILED: Unexpected exception observed" | ||
|
|
||
| // Create a minimal HTML file for testing | ||
| const dir = tempDirWithFiles("htmlrewriter-crash-test", { | ||
| "min.html": "<script></script>", | ||
| }); | ||
|
|
||
| // Original failing case: this should not crash the process | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter().on("script", { | ||
| element(a) { | ||
| throw new Error("abc"); | ||
| }, | ||
| }); | ||
| rewriter.transform(new Response(Bun.file(`${dir}/min.html`))); | ||
| }).not.toThrow(); // The important thing is it doesn't crash, we're ok with it silently failing | ||
|
|
||
| // Test with Response containing string content | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter().on("script", { | ||
| element(a) { | ||
| throw new Error("response test"); | ||
| }, | ||
| }); | ||
| rewriter.transform(new Response("<script></script>")); | ||
| }).toThrow("response test"); | ||
| }); | ||
|
|
||
| test("HTMLRewriter exception handling should not break normal operation", () => { | ||
| // Ensure that after an exception occurs, the rewriter still works normally | ||
| let normalCallCount = 0; | ||
|
|
||
| // First, trigger an exception | ||
| try { | ||
| const rewriter = new HTMLRewriter().on("div", { | ||
| element(element) { | ||
| throw new Error("test error"); | ||
| }, | ||
| }); | ||
| rewriter.transform(new Response("<div>test</div>")); | ||
| } catch (e) { | ||
| // Expected to throw | ||
| } | ||
|
|
||
| // Then ensure normal operation still works | ||
| const rewriter2 = new HTMLRewriter().on("div", { | ||
| element(element) { | ||
| normalCallCount++; | ||
| element.setInnerContent("replaced"); | ||
| }, | ||
| }); | ||
|
|
||
| const result = rewriter2.transform(new Response("<div>original</div>")); | ||
| expect(normalCallCount).toBe(1); | ||
| // The transform should complete successfully without throwing | ||
| }); |
145 changes: 145 additions & 0 deletions
145
test/regression/issue/htmlrewriter-additional-bugs.test.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,145 @@ | ||
| import { expect, test } from "bun:test"; | ||
|
|
||
| test("HTMLRewriter selector validation should throw proper errors", () => { | ||
| // Test various invalid CSS selectors that should be rejected | ||
| const invalidSelectors = [ | ||
| "", // empty selector | ||
| " ", // whitespace only | ||
| "<<<", // invalid CSS | ||
| "div[", // incomplete attribute selector | ||
| "div)", // mismatched brackets | ||
| "div::", // invalid pseudo | ||
| "..invalid", // invalid start | ||
| ]; | ||
|
|
||
| invalidSelectors.forEach(selector => { | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter(); | ||
| rewriter.on(selector, { | ||
| element(element) { | ||
| element.setInnerContent("should not reach here"); | ||
| }, | ||
| }); | ||
| }).toThrow(); // Should throw a meaningful error, not silently succeed | ||
| }); | ||
| }); | ||
|
|
||
| test("HTMLRewriter should properly validate handler objects", () => { | ||
| // Test null and undefined handlers | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter(); | ||
| rewriter.on("div", null); | ||
| }).toThrow("Expected object"); | ||
|
|
||
| expect(() => { | ||
| const rewriter = new HTMLRewriter(); | ||
| rewriter.on("div", undefined); | ||
| }).toThrow("Expected object"); | ||
|
|
||
| // Test non-object handlers | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter(); | ||
| rewriter.on("div", "not an object"); | ||
| }).toThrow("Expected object"); | ||
|
|
||
| expect(() => { | ||
| const rewriter = new HTMLRewriter(); | ||
| rewriter.on("div", 42); | ||
| }).toThrow("Expected object"); | ||
| }); | ||
|
|
||
| test("HTMLRewriter memory management - no leaks on selector parse errors", () => { | ||
| // This test ensures that selector_slice memory is properly freed | ||
| // even when selector parsing fails | ||
| for (let i = 0; i < 100; i++) { | ||
| try { | ||
| const rewriter = new HTMLRewriter(); | ||
| // Use an invalid selector to trigger error path | ||
| rewriter.on("div[incomplete", { | ||
| element(element) { | ||
| console.log("Should not reach here"); | ||
| }, | ||
| }); | ||
| } catch (e) { | ||
| // Expected to throw, but no memory should leak | ||
| } | ||
| } | ||
|
|
||
| // If there were memory leaks, running this many times would consume significant memory | ||
| // The test passes if it completes without memory issues | ||
| expect(true).toBe(true); | ||
| }); | ||
|
|
||
| test("HTMLRewriter should handle various input edge cases safely", () => { | ||
| // Empty string input (should work) | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter(); | ||
| rewriter.transform(""); | ||
| }).not.toThrow(); | ||
|
|
||
| // Null input (should throw) | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter(); | ||
| rewriter.transform(null); | ||
| }).toThrow("Expected Response or Body"); | ||
|
|
||
| // Large input (should work) | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter(); | ||
| const largeHtml = "<div>" + "x".repeat(100000) + "</div>"; | ||
| rewriter.transform(largeHtml); | ||
| }).not.toThrow(); | ||
| }); | ||
|
|
||
| test("HTMLRewriter concurrent usage should work correctly", () => { | ||
| // Same rewriter instance should handle multiple transforms | ||
| const rewriter = new HTMLRewriter().on("div", { | ||
| element(element) { | ||
| element.setInnerContent("modified"); | ||
| }, | ||
| }); | ||
|
|
||
| expect(() => { | ||
| const result1 = rewriter.transform("<div>original1</div>"); | ||
| const result2 = rewriter.transform("<div>original2</div>"); | ||
| }).not.toThrow(); | ||
| }); | ||
|
|
||
| test("HTMLRewriter should handle many handlers on same element", () => { | ||
| let rewriter = new HTMLRewriter(); | ||
|
|
||
| // Add many handlers to the same element type | ||
| for (let i = 0; i < 50; i++) { | ||
| rewriter = rewriter.on("div", { | ||
| element(element) { | ||
| const current = element.getAttribute("data-count") || "0"; | ||
| element.setAttribute("data-count", (parseInt(current) + 1).toString()); | ||
| }, | ||
| }); | ||
| } | ||
|
|
||
| expect(() => { | ||
| rewriter.transform('<div data-count="0">test</div>'); | ||
| }).not.toThrow(); | ||
| }); | ||
|
|
||
| test("HTMLRewriter should handle special characters in selectors safely", () => { | ||
| // These selectors with special characters should either work or fail gracefully | ||
| const specialSelectors = [ | ||
| "div[data-test=\"'quotes'\"]", | ||
| 'div[data-test="\\"escaped\\""]', | ||
| 'div[class~="space separated"]', | ||
| 'input[type="text"]', | ||
| ]; | ||
|
|
||
| specialSelectors.forEach(selector => { | ||
| expect(() => { | ||
| const rewriter = new HTMLRewriter().on(selector, { | ||
| element(element) { | ||
| element.setAttribute("data-processed", "true"); | ||
| }, | ||
| }); | ||
| // The important thing is it doesn't crash | ||
| }).not.toThrow(); | ||
| }); | ||
| }); |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the protect() calls here are likely memory leaks