Spaces:
Sleeping
Sleeping
from flask import Flask, render_template, request, send_file | |
import os | |
import requests | |
from pyhtml2pdf import converter | |
import re | |
import subprocess | |
app = Flask(__name__) | |
def fetch_html_content(host, slug): | |
url = "https://gql.hashnode.com/" | |
query = f""" | |
query Publication {{ | |
publication(host: "{host}") {{ | |
post(slug: "{slug}") {{ | |
title | |
content {{ | |
html | |
}} | |
author {{ | |
name | |
}} | |
}} | |
}} | |
}} | |
""" | |
data = {"query": query} | |
response = requests.post(url, json=data) | |
if response.status_code == 200: | |
result = response.json() | |
title = result.get("data", {}).get("publication", {}).get("post", {}).get("title", "") | |
author_name = result.get("data", {}).get("publication", {}).get("post", {}).get("author", {}).get("name", "") | |
html_content = result.get("data", {}).get("publication", {}).get("post", {}).get("content", {}).get("html", "") | |
return title, author_name, html_content | |
else: | |
print(f"GraphQL request failed with status code {response.status_code}: {response.text}") | |
return None, None, None | |
def remove_emojis(html_content): | |
emoji_pattern = re.compile("[" | |
"\U0001F600-\U0001F64F" | |
"\U0001F300-\U0001F5FF" | |
"\U0001F680-\U0001F6FF" | |
"\U0001F700-\U0001F77F" | |
"\U0001F780-\U0001F7FF" | |
"\U0001F800-\U0001F8FF" | |
"\U0001F900-\U0001F9FF" | |
"\U0001FA00-\U0001FA6F" | |
"\U0001FA70-\U0001FAFF" | |
"\U00002702-\U000027B0" | |
"\U000024C2-\U0001F251" | |
"]+", flags=re.UNICODE) | |
cleaned_html = emoji_pattern.sub('', html_content) | |
return cleaned_html | |
def create_html_file(html_content): | |
html_filename = "/tmp/output.html" | |
with open(html_filename, "w", encoding="utf-8") as html_file: | |
html_file.write(html_content) | |
return html_filename | |
def convert_html_to_pdf(title, author_name, html_filename, pdf_filename): | |
# Use the title as header in the PDF | |
header_html = f"<h1>{title}</h1>" | |
# Combine the header HTML and the existing HTML content | |
full_html = header_html + open(html_filename, "r", encoding="utf-8").read() | |
# Add footer only on the last page | |
footer_html = f'<div style="text-align: center; margin-top: 20px;">Hashnode / {author_name}</div>' | |
full_html += footer_html | |
# Save the combined HTML to a temporary file | |
temp_html_filename = "/tmp/temp_output.html" | |
with open(temp_html_filename, "w", encoding="utf-8") as temp_html_file: | |
temp_html_file.write(full_html) | |
# Convert the combined HTML to PDF | |
converter.convert(f'file:///{os.path.abspath(temp_html_filename)}', pdf_filename, install_driver=False) | |
# Remove the temporary HTML file | |
os.remove(temp_html_filename) | |
def convert_html_to_epub(title, html_filename, epub_filename): | |
# Use subprocess to call Pandoc for HTML to EPUB conversion | |
subprocess.run(['pandoc', html_filename, '-o', epub_filename, '--metadata', f'title={title}']) | |
# Remove the temporary HTML file | |
os.remove(html_filename) | |
def extract_host_and_slug(blog_link): | |
# Assuming the format is "https://host/slug" | |
parts = blog_link.split("/") | |
host = parts[2] if len(parts) > 2 else "" | |
slug = parts[3] if len(parts) > 3 else "" | |
return host, slug | |
def index(): | |
host = "" | |
slug = "" | |
title = "" | |
author_name = "" | |
html_content = "" | |
if request.method == 'POST': | |
blog_link = request.form['blog_link'] | |
host, slug = extract_host_and_slug(blog_link) | |
title, author_name, html_content = fetch_html_content(host, slug) | |
return render_template('index.html', host=host, slug=slug, title=title, author_name=author_name, html_content=html_content) | |
def convert_pdf(): | |
host = request.form['host'] | |
slug = request.form['slug'] | |
title, author_name, html_content = fetch_html_content(host, slug) | |
cleaned_html = remove_emojis(html_content) | |
html_filename = create_html_file(cleaned_html) | |
pdf_filename = "/tmp/article.pdf" | |
convert_html_to_pdf(title, author_name, html_filename, pdf_filename) | |
# Move removal outside the if condition | |
# Remove HTML file after both PDF and EPUB conversions | |
if os.path.exists(html_filename): | |
os.remove(html_filename) | |
# Provide a download link for the generated PDF | |
return send_file(pdf_filename, as_attachment=True) | |
def convert_epub(): | |
host = request.form['host'] | |
slug = request.form['slug'] | |
title, _, html_content = fetch_html_content(host, slug) | |
cleaned_html = remove_emojis(html_content) | |
html_filename = create_html_file(cleaned_html) | |
epub_filename = "/tmp/article.epub" | |
convert_html_to_epub(title, html_filename, epub_filename) | |
# Move removal outside the if condition | |
# Remove HTML file after both PDF and EPUB conversions | |
if os.path.exists(html_filename): | |
os.remove(html_filename) | |
# Provide a download link for the generated EPUB | |
return send_file(epub_filename, as_attachment=True) | |
if __name__ == "__main__": | |
app.run(host='0.0.0.0',port=7860) | |