From f23661ad12aa2261d4d3f978ca28f49fc3cd15af Mon Sep 17 00:00:00 2001 From: jrycw Date: Tue, 19 Nov 2024 02:23:32 +0800 Subject: [PATCH] Isolate web driver preparation logic from `GT.save()` --- great_tables/_export.py | 80 ++++----------------- great_tables/_utils_selenium.py | 122 ++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 66 deletions(-) create mode 100644 great_tables/_utils_selenium.py diff --git a/great_tables/_export.py b/great_tables/_export.py index 7d8ac18a9..d4fcb7649 100644 --- a/great_tables/_export.py +++ b/great_tables/_export.py @@ -177,24 +177,6 @@ def as_raw_html( DebugDumpOptions: TypeAlias = Literal["zoom", "width_resize", "final_resize"] -class _NoOpDriverCtx: - """Context manager that no-ops entering a webdriver(options=...) instance.""" - - def __init__(self, driver: webdriver.Remote): - self.driver = driver - - def __call__(self, options): - # no-op what is otherwise instantiating webdriver with options, - # since a webdriver instance was already passed on init - return self - - def __enter__(self): - return self.driver - - def __exit__(self, *args): - pass - - def save( self: GT, file: Path | str, @@ -285,7 +267,7 @@ def save( # Import the required packages _try_import(name="selenium", pip_install_line="pip install selenium") - from selenium import webdriver + from ._utils_selenium import _get_web_driver if selector != "table": raise NotImplementedError("Currently, only selector='table' is supported.") @@ -296,61 +278,27 @@ def save( # Get the HTML content from the displayed output html_content = as_raw_html(self) - # Set the webdriver and options based on the chosen browser (`web_driver=` argument) - if isinstance(web_driver, webdriver.Remote): - wdriver = _NoOpDriverCtx(web_driver) - wd_options = None - - elif web_driver == "chrome": - wdriver = webdriver.Chrome - wd_options = webdriver.ChromeOptions() - elif web_driver == "safari": - wdriver = webdriver.Safari - wd_options = webdriver.SafariOptions() - elif web_driver == "firefox": - wdriver = webdriver.Firefox - wd_options = webdriver.FirefoxOptions() - elif web_driver == "edge": - wdriver = webdriver.Edge - wd_options = webdriver.EdgeOptions() - else: - raise ValueError(f"Unsupported web driver: {web_driver}") - - # specify headless flag ---- - if web_driver in {"firefox", "edge"}: - wd_options.add_argument("--headless") - elif web_driver == "chrome": - # Operate all webdrivers in headless mode - wd_options.add_argument("--headless=new") - else: - # note that safari currently doesn't support headless browsing - pass - - if debug_port: - if web_driver == "chrome": - wd_options.add_argument(f"--remote-debugging-port={debug_port}") - elif web_driver == "firefox": - # TODO: not sure how to connect to this session on firefox? - wd_options.add_argument(f"--start-debugger-server {debug_port}") - else: - warnings.warn("debug_port argument only supported on chrome and firefox") - debug_port = None + wdriver = _get_web_driver(web_driver) # run browser ---- - with wdriver(options=wd_options) as headless_browser: + with wdriver(debug_port=debug_port) as headless_browser: headless_browser.set_window_size(*window_size) encoded = base64.b64encode(html_content.encode(encoding=encoding)).decode(encoding=encoding) headless_browser.get(f"data:text/html;base64,{encoded}") _save_screenshot(headless_browser, scale, file, debug=_debug_dump, **params) - if debug_port: - input( - f"Currently debugging on port {debug_port}.\n\n" - "If you are using Chrome, enter chrome://inspect to preview the headless browser." - "Other browsers may have different ways to preview headless browser sessions.\n\n" - "Press enter to continue." - ) + if debug_port and web_driver not in {"chrome", "firefox"}: + warnings.warn("debug_port argument only supported on chrome and firefox") + debug_port = None + + if debug_port: + input( + f"Currently debugging on port {debug_port}.\n\n" + "If you are using Chrome, enter chrome://inspect to preview the headless browser." + "Other browsers may have different ways to preview headless browser sessions.\n\n" + "Press enter to continue." + ) return self diff --git a/great_tables/_utils_selenium.py b/great_tables/_utils_selenium.py new file mode 100644 index 000000000..f14ec3b40 --- /dev/null +++ b/great_tables/_utils_selenium.py @@ -0,0 +1,122 @@ +from types import TracebackType +from typing import Literal +from typing_extensions import TypeAlias, Self +from selenium import webdriver + +# Create a list of all selenium webdrivers +WebDrivers: TypeAlias = Literal[ + "chrome", + "firefox", + "safari", + "edge", +] + + +class _NoOpDriverCtx: + """Context manager that no-ops entering a webdriver(options=...) instance.""" + + def __init__(self, driver: webdriver.Remote): + self.driver = driver + + def __call__(self, options) -> Self: + # no-op what is otherwise instantiating webdriver with options, + # since a webdriver instance was already passed on init + return self + + def __enter__(self) -> webdriver.Remote: + return self.driver + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + pass + + +class _BaseWebDriver: + + def __init__(self): + self.add_arguments() + + def add_arguments(self): ... + + def __enter__(self) -> WebDrivers | webdriver.Remote: + return self.driver + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + self.driver.quit() + + +class _ChromeWebDriver(_BaseWebDriver): + def __init__(self, debug_port: int | None = None): + self.debug_port = debug_port + self.wd_options = webdriver.ChromeOptions() + super().__init__() + self.driver = webdriver.Chrome(self.wd_options) + + def add_arguments(self): + self.wd_options.add_argument("--headless=new") + if self.debug_port is not None: + self.wd_options.add_argument(f"--remote-debugging-port={self.debug_port}") + + +class _SafariWebDriver(_BaseWebDriver): + def __init__(self, debug_port: int | None = None): + self.debug_port = debug_port + self.wd_options = webdriver.SafariOptions() + super().__init__() + self.driver = webdriver.Safari(self.wd_options) + + +class _FirefoxWebDriver(_BaseWebDriver): + def __init__(self, debug_port: int | None = None): + self.debug_port = debug_port + self.wd_options = webdriver.FirefoxOptions() + super().__init__() + self.driver = webdriver.Firefox(self.wd_options) + + def add_arguments(self): + self.wd_options.add_argument("--headless") + if self.debug_port is not None: + self.wd_options.add_argument(f"--start-debugger-server {self.debug_port}") + + +class _EdgeWebDriver(_BaseWebDriver): + def __init__(self, debug_port: int | None = None): + self.debug_port = debug_port + self.wd_options = webdriver.EdgeOptions() + super().__init__() + self.driver = webdriver.Edge(self.wd_options) + + def add_arguments(self): + self.wd_options.add_argument("--headless") + + +class _NoOpWebDriver(_BaseWebDriver): + def __init__(self, debug_port: int | None = None): + self.debug_port = debug_port + self.wd_options = None + super().__init__() + self.driver = _NoOpDriverCtx(self.wd_options) + + +def _get_web_driver(web_driver: WebDrivers | webdriver.Remote): + if isinstance(web_driver, webdriver.Remote): + return _NoOpWebDriver + elif web_driver == "chrome": + return _ChromeWebDriver + elif web_driver == "safari": + return _SafariWebDriver + elif web_driver == "firefox": + return _FirefoxWebDriver + elif web_driver == "edge": + return _EdgeWebDriver + else: + raise ValueError(f"Unsupported web driver: {web_driver}")