diff --git a/examples/cdp_mode/playwright/raw_seatgeek_sync.py b/examples/cdp_mode/playwright/raw_seatgeek_sync.py new file mode 100644 index 00000000000..5f64a307ab3 --- /dev/null +++ b/examples/cdp_mode/playwright/raw_seatgeek_sync.py @@ -0,0 +1,24 @@ +from playwright.sync_api import sync_playwright +from seleniumbase import sb_cdp + +sb = sb_cdp.Chrome(locale="en", ad_block=True) +endpoint_url = sb.get_endpoint_url() + +with sync_playwright() as p: + browser = p.chromium.connect_over_cdp(endpoint_url) + context = browser.contexts[0] + page = context.pages[0] + page.goto("https://seatgeek.com/") + input_field = 'input[name="search"]' + page.wait_for_selector(input_field) + sb.sleep(1.6) + query = "Jerry Seinfeld" + sb.press_keys(input_field, query) + sb.sleep(1.6) + page.click("li#active-result-item") + sb.sleep(4.2) + print('*** SeatGeek Search for "%s":' % query) + items = page.locator('[data-testid="listing-item"]') + for i in range(items.count()): + item_text = items.nth(i).inner_text() + print(item_text.replace("\n\n", "\n")) diff --git a/examples/cdp_mode/raw_async.py b/examples/cdp_mode/raw_async.py index 745070feb4b..0b42701b86d 100644 --- a/examples/cdp_mode/raw_async.py +++ b/examples/cdp_mode/raw_async.py @@ -6,7 +6,7 @@ async def main(): - url = "seleniumbase.io/simple/login" + url = "https://seleniumbase.io/simple/login" driver = await cdp_driver.start_async() page = await driver.get(url, lang="en") print(await page.get_title()) diff --git a/examples/cdp_mode/raw_basic_async.py b/examples/cdp_mode/raw_basic_async.py index 5c3c1f6c536..bbba2e8d26a 100644 --- a/examples/cdp_mode/raw_basic_async.py +++ b/examples/cdp_mode/raw_basic_async.py @@ -4,7 +4,7 @@ async def main(): - url = "seleniumbase.io/simple/login" + url = "https://seleniumbase.io/simple/login" driver = await cdp_driver.start_async() page = await driver.get(url, lang="en") print(await page.get_title()) @@ -21,7 +21,6 @@ async def main(): driver.stop() if __name__ == "__main__": - # Call an async function with awaited methods loop = asyncio.new_event_loop() with decorators.print_runtime("raw_basic_async.py"): loop.run_until_complete(main()) diff --git a/examples/cdp_mode/raw_basic_cdp.py b/examples/cdp_mode/raw_basic_cdp.py new file mode 100644 index 00000000000..126ddcb8360 --- /dev/null +++ b/examples/cdp_mode/raw_basic_cdp.py @@ -0,0 +1,17 @@ +from seleniumbase import sb_cdp + +url = "https://seleniumbase.io/simple/login" +sb = sb_cdp.Chrome(url) +sb.type("#username", "demo_user") +sb.type("#password", "secret_pass") +sb.click('a:contains("Sign in")') +sb.assert_exact_text("Welcome!", "h1") +sb.assert_element("img#image1") +sb.highlight("#image1") +top_nav = sb.find_element("div.topnav") +links = top_nav.query_selector_all("a") +for nav_item in links: + print(nav_item.text) +sb.click_link("Sign out") +sb.assert_text("signed out", "#top_message") +sb.driver.stop() diff --git a/examples/cdp_mode/raw_cdp_login.py b/examples/cdp_mode/raw_cdp_login.py new file mode 100644 index 00000000000..062a994defc --- /dev/null +++ b/examples/cdp_mode/raw_cdp_login.py @@ -0,0 +1,25 @@ +from seleniumbase import decorators +from seleniumbase import sb_cdp + + +def main(): + url = "https://seleniumbase.io/simple/login" + sb = sb_cdp.Chrome(url) + sb.type("#username", "demo_user") + sb.type("#password", "secret_pass") + sb.click('a:contains("Sign in")') + sb.assert_exact_text("Welcome!", "h1") + sb.assert_element("img#image1") + sb.highlight("#image1") + top_nav = sb.find_element("div.topnav") + links = top_nav.query_selector_all("a") + for nav_item in links: + print(nav_item.text) + sb.click_link("Sign out") + sb.assert_text("signed out", "#top_message") + sb.driver.stop() + + +if __name__ == "__main__": + with decorators.print_runtime("raw_cdp_login.py"): + main() diff --git a/examples/cdp_mode/raw_cf.py b/examples/cdp_mode/raw_cf.py index 5be45bbdbd3..79dc5339a23 100644 --- a/examples/cdp_mode/raw_cf.py +++ b/examples/cdp_mode/raw_cf.py @@ -4,13 +4,13 @@ with SB(uc=True, test=True, locale="en", guest=True) as sb: url = "https://www.cloudflare.com/login" sb.activate_cdp_mode(url) - sb.sleep(4) + sb.sleep(3) sb.uc_gui_handle_captcha() # PyAutoGUI press Tab and Spacebar - sb.sleep(2) + sb.sleep(3) with SB(uc=True, test=True, locale="en", guest=True) as sb: url = "https://www.cloudflare.com/login" sb.activate_cdp_mode(url) sb.sleep(4) sb.uc_gui_click_captcha() # PyAutoGUI click. (Linux needs it) - sb.sleep(2) + sb.sleep(3) diff --git a/examples/cdp_mode/raw_cf_captcha.py b/examples/cdp_mode/raw_cf_captcha.py new file mode 100644 index 00000000000..7c91735dd03 --- /dev/null +++ b/examples/cdp_mode/raw_cf_captcha.py @@ -0,0 +1,8 @@ +from seleniumbase import SB + +with SB(uc=True, test=True, guest=True) as sb: + url = "https://www.cloudflare.com/login" + sb.activate_cdp_mode(url) + sb.sleep(3) + sb.solve_captcha() + sb.sleep(3) diff --git a/examples/cdp_mode/raw_homedepot.py b/examples/cdp_mode/raw_homedepot.py new file mode 100644 index 00000000000..9cba9eb94f1 --- /dev/null +++ b/examples/cdp_mode/raw_homedepot.py @@ -0,0 +1,34 @@ +from seleniumbase import SB + +with SB(uc=True, test=True, ad_block=True) as sb: + url = "https://www.homedepot.com/" + sb.activate_cdp_mode(url) + sb.sleep(1.8) + search_box = "input#typeahead-search-field-input" + search = "Computer Chair" + category = "Gaming Chairs" + required_text = "Chair" + sb.click(search_box) + sb.sleep(1.2) + sb.press_keys(search_box, search) + sb.sleep(0.6) + sb.click("button#typeahead-search-icon-button") + sb.sleep(3.8) + sb.click('a[aria-label="%s"]' % category) + sb.sleep(3.2) + print('*** Home Depot Search for "%s":' % search) + print(' (Results must contain "%s".)' % required_text) + unique_item_text = [] + items = sb.find_elements('div[data-testid="product-pod"]') + for item in items: + if required_text in item.text: + description = item.querySelector( + 'span[data-testid="attribute-product-label"]' + ) + if description and description.text not in unique_item_text: + unique_item_text.append(description.text) + print("* " + description.text) + price = item.querySelector('[class*="sm:sui-text-4xl"]') + if price: + price_text = "$%s" % price.text + print(" (" + price_text + ")") diff --git a/examples/cdp_mode/raw_mobile_async.py b/examples/cdp_mode/raw_mobile_async.py index dd1fc72d9f5..a6bfd024bbe 100644 --- a/examples/cdp_mode/raw_mobile_async.py +++ b/examples/cdp_mode/raw_mobile_async.py @@ -25,7 +25,6 @@ async def main(): driver.stop() if __name__ == "__main__": - # Call an async function with awaited methods loop = asyncio.new_event_loop() with decorators.print_runtime("raw_mobile_async.py"): loop.run_until_complete(main()) diff --git a/examples/cdp_mode/raw_priceline.py b/examples/cdp_mode/raw_priceline.py index 1ee38865fc9..3703bf4a8c7 100644 --- a/examples/cdp_mode/raw_priceline.py +++ b/examples/cdp_mode/raw_priceline.py @@ -1,12 +1,12 @@ from seleniumbase import SB -with SB(uc=True, test=True, locale="en", ad_block=True) as sb: +with SB(uc=True, test=True, locale="en", incognito=True) as sb: url = "https://www.priceline.com" sb.activate_cdp_mode(url) sb.sleep(2.5) sb.click('input[name="endLocation"]') sb.sleep(1.2) - location = "Portland, Oregon, US" + location = "Portland, OR" selection = "Oregon, United States" # (Dropdown option) sb.press_keys('input[name="endLocation"]', location) sb.sleep(1.5) diff --git a/examples/cdp_mode/raw_softpedia.py b/examples/cdp_mode/raw_softpedia.py new file mode 100644 index 00000000000..2777ab13202 --- /dev/null +++ b/examples/cdp_mode/raw_softpedia.py @@ -0,0 +1,26 @@ +from seleniumbase import SB + +with SB(uc=True, test=True, ad_block=True) as sb: + url = "https://www.softpedia.com/" + sb.activate_cdp_mode(url) + search_box = 'input[name="search_term"]' + search = "3D Model Lab" + sb.click(search_box) + sb.press_keys(search_box, search + "\n") + sb.sleep(2) + sb.remove_elements("#adcontainer1") + sb.sleep(2.5) + print('*** Softpedia Search for "%s":' % search) + links = [] + item_container = 'div[style="min-height:100px;"]' + sb.wait_for_element(item_container) + items = sb.find_elements(item_container) + for item in items: + result = item.querySelector("h4 a") + links.append(result.get_attribute("href")) + print("* " + result.text) + print(item.querySelector("p").get_attribute("title")) + for link in links: + sb.open(link) + sb.remove_elements("div.ad") + sb.sleep(2) diff --git a/help_docs/syntax_formats.md b/help_docs/syntax_formats.md index 9fe32f5381d..26013ccb530 100644 --- a/help_docs/syntax_formats.md +++ b/help_docs/syntax_formats.md @@ -32,8 +32,8 @@
  • 21. SeleniumBase SB (Python context manager)
  • 22. The driver manager (via context manager)
  • 23. The driver manager (via direct import)
  • -
  • 24. CDP driver (async/await API. No Selenium)
  • -
  • 25. CDP driver (SB CDP Sync API. No Selenium)
  • +
  • 24. Pure CDP Mode (Async API. No Selenium)
  • +
  • 25. Pure CDP Mode (Sync API. No Selenium)
  • @@ -1020,9 +1020,9 @@ The ``Driver()`` manager format can be used as a drop-in replacement for virtual When using the ``Driver()`` format, you may need to activate a Virtual Display on your own if you want to run headed tests in a headless Linux environment. (See https://github.com/mdmintz/sbVirtualDisplay for details.) One such example of this is using an authenticated proxy, which is configured via a Chrome extension that is generated at runtime. (Note that regular headless mode in Chrome doesn't support extensions.) -

    24. CDP driver (async/await API. No Selenium)

    +

    24. Pure CDP Mode (Async API. No Selenium)

    -This format provides a pure CDP way of using SeleniumBase (without Selenium or a test runner). The async/await API is used. Here's an example: +This format provides a pure CDP way of using SeleniumBase (without Selenium/WebDriver or a test runner). The async/await API is used. Here's an example: ```python import asyncio @@ -1053,9 +1053,33 @@ if __name__ == "__main__": (See examples/cdp_mode/raw_basic_async.py for the test.) -

    25. CDP driver (SB CDP Sync API. No Selenium)

    +

    25. Pure CDP Mode (Sync API. No Selenium)

    -This format provides a pure CDP way of using SeleniumBase (without Selenium/WebDriver or a test runner). The expanded SB CDP Sync API is used. Here's an example: +This format provides a pure CDP way of using SeleniumBase (without Selenium/WebDriver or a test runner). The expanded sb_cdp Sync API is used. Here's an example: + +```python +from seleniumbase import sb_cdp + +url = "https://seleniumbase.io/simple/login" +sb = sb_cdp.Chrome(url) +sb.type("#username", "demo_user") +sb.type("#password", "secret_pass") +sb.click('a:contains("Sign in")') +sb.assert_exact_text("Welcome!", "h1") +sb.assert_element("img#image1") +sb.highlight("#image1") +top_nav = sb.find_element("div.topnav") +links = top_nav.query_selector_all("a") +for nav_item in links: + print(nav_item.text) +sb.click_link("Sign out") +sb.assert_text("signed out", "#top_message") +sb.driver.stop() +``` + +(See examples/cdp_mode/raw_basic_cdp.py for the test.) + +Here's a Pure CDP Mode example that bypasses bot-detection to scrape data from a website: ```python from seleniumbase import sb_cdp diff --git a/mkdocs_build/requirements.txt b/mkdocs_build/requirements.txt index 51eb1b0a35a..8e672b613e6 100644 --- a/mkdocs_build/requirements.txt +++ b/mkdocs_build/requirements.txt @@ -2,7 +2,7 @@ # Minimum Python version: 3.10 (for generating docs only) regex>=2025.11.3 -pymdown-extensions>=10.18 +pymdown-extensions>=10.19 pipdeptree>=2.30.0 python-dateutil>=2.8.2 Markdown==3.10 diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index 505895bdc30..6c126ba3490 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.45.1" +__version__ = "4.45.2" diff --git a/seleniumbase/core/browser_launcher.py b/seleniumbase/core/browser_launcher.py index bfcd9bd273e..24e931a8367 100644 --- a/seleniumbase/core/browser_launcher.py +++ b/seleniumbase/core/browser_launcher.py @@ -2769,6 +2769,9 @@ def _set_chrome_options( included_disabled_features.append("SidePanelPinning") included_disabled_features.append("UserAgentClientHint") included_disabled_features.append("DisableLoadExtensionCommandLineSwitch") + included_disabled_features.append("Bluetooth") + included_disabled_features.append("WebBluetooth") + included_disabled_features.append("UnifiedWebBluetooth") included_disabled_features.append("WebAuthentication") included_disabled_features.append("PasskeyAuth") for item in extra_disabled_features: @@ -4782,6 +4785,11 @@ def get_local_driver( included_disabled_features.append( "DisableLoadExtensionCommandLineSwitch" ) + included_disabled_features.append("Bluetooth") + included_disabled_features.append("WebBluetooth") + included_disabled_features.append("UnifiedWebBluetooth") + included_disabled_features.append("WebAuthentication") + included_disabled_features.append("PasskeyAuth") for item in extra_disabled_features: if item not in included_disabled_features: included_disabled_features.append(item) diff --git a/seleniumbase/core/sb_cdp.py b/seleniumbase/core/sb_cdp.py index aad1d27ad0c..8e0dc607292 100644 --- a/seleniumbase/core/sb_cdp.py +++ b/seleniumbase/core/sb_cdp.py @@ -184,9 +184,13 @@ def get_rd_url(self): and also applies nest-asyncio for nested event loops so that SeleniumBase methods can be called from Playwright without encountering event loop error messages such as: - Cannot run the event loop while another loop is running.""" + Cannot run the event loop while another loop is running. + Also sets an environment variable to hide this warning: + Deprecation: "url.parse() behavior is not standardized". + (github.com/microsoft/playwright-python/issues/3016)""" import nest_asyncio nest_asyncio.apply() + os.environ["NODE_NO_WARNINGS"] = "1" driver = self.driver if hasattr(driver, "cdp_base"): driver = driver.cdp_base diff --git a/seleniumbase/undetected/cdp_driver/browser.py b/seleniumbase/undetected/cdp_driver/browser.py index 58e7fc96434..a375eb34734 100644 --- a/seleniumbase/undetected/cdp_driver/browser.py +++ b/seleniumbase/undetected/cdp_driver/browser.py @@ -633,7 +633,7 @@ async def start(self=None) -> Browser: """ % (dashes, message, dashes) ) self.connection = Connection( - self.info.webSocketDebuggerUrl, _owner=self + self.info.webSocketDebuggerUrl, browser=self ) if self.config.autodiscover_targets: logger.info("Enabling autodiscover targets") @@ -807,7 +807,7 @@ async def update_targets(self): f"/{t.target_id}" ), target=t, - _owner=self, + browser=self, ) ) await asyncio.sleep(0) diff --git a/seleniumbase/undetected/cdp_driver/cdp_util.py b/seleniumbase/undetected/cdp_driver/cdp_util.py index 6bb92657f4c..8b75d26ae69 100644 --- a/seleniumbase/undetected/cdp_driver/cdp_util.py +++ b/seleniumbase/undetected/cdp_driver/cdp_util.py @@ -719,18 +719,6 @@ def start_sync(*args, **kwargs) -> Browser: loop = kwargs["loop"] else: loop = asyncio.new_event_loop() - if "user_data_dir" in kwargs and kwargs["user_data_dir"]: - headless = False - if "headless" in kwargs: - headless = kwargs["headless"] - decoy_args = kwargs - decoy_args["headless"] = True - driver = loop.run_until_complete(start(**decoy_args)) - kwargs["headless"] = headless - kwargs["user_data_dir"] = driver.config.user_data_dir - time.sleep(0.2) - driver.stop() # Due to Chrome-130, must stop & start - time.sleep(0.1) return loop.run_until_complete(start(*args, **kwargs)) diff --git a/seleniumbase/undetected/cdp_driver/config.py b/seleniumbase/undetected/cdp_driver/config.py index 1f78f1148ed..31a29a1bd13 100644 --- a/seleniumbase/undetected/cdp_driver/config.py +++ b/seleniumbase/undetected/cdp_driver/config.py @@ -206,6 +206,7 @@ def __call__(self): "OptimizationTargetPrediction,OptimizationGuideModelDownloading," "SidePanelPinning,UserAgentClientHint,PrivacySandboxSettings4," "OptimizationHintsFetching,InterestFeedContentSuggestions," + "Bluetooth,WebBluetooth,UnifiedWebBluetooth," "DisableLoadExtensionCommandLineSwitch," "WebAuthentication,PasskeyAuth" ] diff --git a/seleniumbase/undetected/cdp_driver/connection.py b/seleniumbase/undetected/cdp_driver/connection.py index 35c83c22efb..3d192749112 100644 --- a/seleniumbase/undetected/cdp_driver/connection.py +++ b/seleniumbase/undetected/cdp_driver/connection.py @@ -184,13 +184,13 @@ def __init__( self, websocket_url=None, target=None, - _owner=None, + browser=None, **kwargs, ): super().__init__() self._target = target self.__count__ = itertools.count(0) - self._owner = _owner + self.browser = browser self.websocket_url: str = websocket_url self.websocket = None self.mapper = {} @@ -426,8 +426,8 @@ async def send( await self.aopen() if not self.websocket or self.websocket.state is State.CLOSED: return - if self._owner: - browser = self._owner + if self.browser: + browser = self.browser if browser.config: if browser.config.expert: await self._prepare_expert() @@ -610,11 +610,11 @@ async def listener_loop(self): # Probably an event try: event = cdp.util.parse_json_event(message) - event_tx = EventTransaction(event) - if not self.connection.mapper: - self.connection.__count__ = itertools.count(0) - event_tx.id = next(self.connection.__count__) - self.connection.mapper[event_tx.id] = event_tx + # event_tx = EventTransaction(event) + # if not self.connection.mapper: + # self.connection.__count__ = itertools.count(0) + # event_tx.id = next(self.connection.__count__) + # self.connection.mapper[event_tx.id] = event_tx except Exception as e: logger.info( "%s: %s during parsing of json from event : %s" @@ -639,12 +639,12 @@ async def listener_loop(self): or inspect.iscoroutine(callback) ): try: - await callback(event, self.connection) + asyncio.create_task(callback(event, self)) except TypeError: - await callback(event) + asyncio.create_task(callback(event)) else: try: - callback(event, self.connection) + callback(event, self) except TypeError: callback(event) except Exception as e: