| import re | |
| import json | |
| import base64 | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.chrome.service import Service | |
| from selenium.common.exceptions import TimeoutException | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support.expected_conditions import staleness_of | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| from selenium.webdriver.common.by import By | |
| def html2pdf( | |
| source: str, | |
| timeout: int = 2, | |
| install_driver: bool = True, | |
| print_options: dict = {}, | |
| ): | |
| result = __get_pdf_from_html(source, timeout, install_driver, print_options) | |
| return result | |
| def __send_devtools(driver, cmd, params={}): | |
| resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id | |
| url = driver.command_executor._url + resource | |
| body = json.dumps({"cmd": cmd, "params": params}) | |
| response = driver.command_executor._request("POST", url, body) | |
| if not response: | |
| raise Exception(response.get("value")) | |
| return response.get("value") | |
| def __get_pdf_from_html( | |
| path: str, | |
| timeout: int, | |
| install_driver: bool, | |
| print_options: dict | |
| ): | |
| webdriver_options = Options() | |
| webdriver_prefs = {} | |
| webdriver_options.add_argument("--headless") | |
| webdriver_options.add_argument("--disable-gpu") | |
| webdriver_options.add_argument("--no-sandbox") | |
| webdriver_options.add_argument("--disable-dev-shm-usage") | |
| webdriver_options.experimental_options["prefs"] = webdriver_prefs | |
| webdriver_prefs["profile.default_content_settings"] = {"images": 2} | |
| if install_driver: | |
| service = Service(ChromeDriverManager().install()) | |
| driver = webdriver.Chrome(service=service, options=webdriver_options) | |
| else: | |
| driver = webdriver.Chrome(options=webdriver_options) | |
| driver.get(path) | |
| try: | |
| WebDriverWait(driver, timeout).until( | |
| staleness_of(driver.find_element(by=By.TAG_NAME, value="html")) | |
| ) | |
| except TimeoutException: | |
| calculated_print_options = { | |
| "landscape": False, | |
| "displayHeaderFooter": False, | |
| "printBackground": True, | |
| "preferCSSPageSize": True, | |
| } | |
| calculated_print_options.update(print_options) | |
| result = __send_devtools( | |
| driver, "Page.printToPDF", calculated_print_options) | |
| driver.quit() | |
| return base64.b64decode(result["data"]) | |
| def is_valid_url(url: str) -> bool: | |
| return bool(re.match(r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url)) | |