from flask import Flask, render_template, request, send_file import os import requests from pyhtml2pdf import converter import re import subprocess app = Flask(__name__) def fetch_html_content(host, slug): url = "https://gql.hashnode.com/" query = f""" query Publication {{ publication(host: "{host}") {{ post(slug: "{slug}") {{ title content {{ html }} author {{ name }} }} }} }} """ data = {"query": query} response = requests.post(url, json=data) if response.status_code == 200: result = response.json() title = result.get("data", {}).get("publication", {}).get("post", {}).get("title", "") author_name = result.get("data", {}).get("publication", {}).get("post", {}).get("author", {}).get("name", "") html_content = result.get("data", {}).get("publication", {}).get("post", {}).get("content", {}).get("html", "") return title, author_name, html_content else: print(f"GraphQL request failed with status code {response.status_code}: {response.text}") return None, None, None def remove_emojis(html_content): emoji_pattern = re.compile("[" "\U0001F600-\U0001F64F" "\U0001F300-\U0001F5FF" "\U0001F680-\U0001F6FF" "\U0001F700-\U0001F77F" "\U0001F780-\U0001F7FF" "\U0001F800-\U0001F8FF" "\U0001F900-\U0001F9FF" "\U0001FA00-\U0001FA6F" "\U0001FA70-\U0001FAFF" "\U00002702-\U000027B0" "\U000024C2-\U0001F251" "]+", flags=re.UNICODE) cleaned_html = emoji_pattern.sub('', html_content) return cleaned_html def create_html_file(html_content): html_filename = "/tmp/output.html" with open(html_filename, "w", encoding="utf-8") as html_file: html_file.write(html_content) return html_filename def convert_html_to_pdf(title, author_name, html_filename, pdf_filename): # Use the title as header in the PDF header_html = f"

{title}

" # Combine the header HTML and the existing HTML content full_html = header_html + open(html_filename, "r", encoding="utf-8").read() # Add footer only on the last page footer_html = f'
Hashnode / {author_name}
' full_html += footer_html # Save the combined HTML to a temporary file temp_html_filename = "/tmp/temp_output.html" with open(temp_html_filename, "w", encoding="utf-8") as temp_html_file: temp_html_file.write(full_html) # Convert the combined HTML to PDF converter.convert(f'file:///{os.path.abspath(temp_html_filename)}', pdf_filename, install_driver=False) # Remove the temporary HTML file os.remove(temp_html_filename) def convert_html_to_epub(title, html_filename, epub_filename): # Use subprocess to call Pandoc for HTML to EPUB conversion subprocess.run(['pandoc', html_filename, '-o', epub_filename, '--metadata', f'title={title}']) # Remove the temporary HTML file os.remove(html_filename) def extract_host_and_slug(blog_link): # Assuming the format is "https://host/slug" parts = blog_link.split("/") host = parts[2] if len(parts) > 2 else "" slug = parts[3] if len(parts) > 3 else "" return host, slug @app.route('/', methods=['GET', 'POST']) def index(): host = "" slug = "" title = "" author_name = "" html_content = "" if request.method == 'POST': blog_link = request.form['blog_link'] host, slug = extract_host_and_slug(blog_link) title, author_name, html_content = fetch_html_content(host, slug) return render_template('index.html', host=host, slug=slug, title=title, author_name=author_name, html_content=html_content) @app.route('/convert_pdf', methods=['POST']) def convert_pdf(): host = request.form['host'] slug = request.form['slug'] title, author_name, html_content = fetch_html_content(host, slug) cleaned_html = remove_emojis(html_content) html_filename = create_html_file(cleaned_html) pdf_filename = "/tmp/article.pdf" convert_html_to_pdf(title, author_name, html_filename, pdf_filename) # Move removal outside the if condition # Remove HTML file after both PDF and EPUB conversions if os.path.exists(html_filename): os.remove(html_filename) # Provide a download link for the generated PDF return send_file(pdf_filename, as_attachment=True) @app.route('/convert_epub', methods=['POST']) def convert_epub(): host = request.form['host'] slug = request.form['slug'] title, _, html_content = fetch_html_content(host, slug) cleaned_html = remove_emojis(html_content) html_filename = create_html_file(cleaned_html) epub_filename = "/tmp/article.epub" convert_html_to_epub(title, html_filename, epub_filename) # Move removal outside the if condition # Remove HTML file after both PDF and EPUB conversions if os.path.exists(html_filename): os.remove(html_filename) # Provide a download link for the generated EPUB return send_file(epub_filename, as_attachment=True) if __name__ == "__main__": app.run(host='0.0.0.0',port=7860)