| from docx import Document | |
| from docx.shared import Pt | |
| from bs4 import BeautifulSoup | |
| _HAS_WEASY = False | |
| try: | |
| from weasyprint import HTML | |
| _HAS_WEASY = True | |
| except Exception: | |
| _HAS_WEASY = False | |
| def html_to_pdf(html_str: str, out_pdf_path: str): | |
| if _HAS_WEASY: | |
| try: | |
| HTML(string=html_str).write_pdf(out_pdf_path) | |
| return | |
| except Exception: | |
| pass | |
| from xhtml2pdf import pisa | |
| with open(out_pdf_path, "wb") as f: | |
| pisa.CreatePDF(src=html_str, dest=f) | |
| def html_to_docx(html_str: str, out_docx_path: str): | |
| doc = Document() | |
| soup = BeautifulSoup(html_str, "html.parser") | |
| for tag in soup.find_all(["h1","h2","h3","p","li"]): | |
| txt = tag.get_text(strip=True) | |
| if not txt: | |
| continue | |
| if tag.name == "h1": p = doc.add_heading(txt, level=0) | |
| elif tag.name == "h2": p = doc.add_heading(txt, level=1) | |
| elif tag.name == "h3": p = doc.add_heading(txt, level=2) | |
| else: p = doc.add_paragraph(txt) | |
| for run in p.runs: run.font.size = Pt(11) | |
| doc.save(out_docx_path) | |