Spaces:

StarTrooper08
/

HashCraft

Sleeping

File size: 5,537 Bytes

from flask import Flask, render_template, request, send_file
import os
import requests
from pyhtml2pdf import converter
import re
import subprocess

app = Flask(__name__)

def fetch_html_content(host, slug):
    url = "https://gql.hashnode.com/"

    query = f"""
    query Publication {{
      publication(host: "{host}") {{
        post(slug: "{slug}") {{
          title
          content {{
            html
          }}
          author {{
            name
          }}
        }}
      }}
    }}
    """

    data = {"query": query}
    response = requests.post(url, json=data)

    if response.status_code == 200:
        result = response.json()
        title = result.get("data", {}).get("publication", {}).get("post", {}).get("title", "")
        author_name = result.get("data", {}).get("publication", {}).get("post", {}).get("author", {}).get("name", "")
        html_content = result.get("data", {}).get("publication", {}).get("post", {}).get("content", {}).get("html", "")
        return title, author_name, html_content
    else:
        print(f"GraphQL request failed with status code {response.status_code}: {response.text}")
        return None, None, None

def remove_emojis(html_content):
    emoji_pattern = re.compile("["
                               "\U0001F600-\U0001F64F"
                               "\U0001F300-\U0001F5FF"
                               "\U0001F680-\U0001F6FF"
                               "\U0001F700-\U0001F77F"
                               "\U0001F780-\U0001F7FF"
                               "\U0001F800-\U0001F8FF"
                               "\U0001F900-\U0001F9FF"
                               "\U0001FA00-\U0001FA6F"
                               "\U0001FA70-\U0001FAFF"
                               "\U00002702-\U000027B0"
                               "\U000024C2-\U0001F251"
                               "]+", flags=re.UNICODE)

    cleaned_html = emoji_pattern.sub('', html_content)
    return cleaned_html

def create_html_file(html_content):
    html_filename = "/tmp/output.html"
    with open(html_filename, "w", encoding="utf-8") as html_file:
        html_file.write(html_content)
    return html_filename

def convert_html_to_pdf(title, author_name, html_filename, pdf_filename):
    # Use the title as header in the PDF
    header_html = f"<h1>{title}</h1>"

    # Combine the header HTML and the existing HTML content
    full_html = header_html + open(html_filename, "r", encoding="utf-8").read()

    # Add footer only on the last page
    footer_html = f'<div style="text-align: center; margin-top: 20px;">Hashnode / {author_name}</div>'
    full_html += footer_html

    # Save the combined HTML to a temporary file
    temp_html_filename = "/tmp/temp_output.html"
    with open(temp_html_filename, "w", encoding="utf-8") as temp_html_file:
        temp_html_file.write(full_html)

    # Convert the combined HTML to PDF
    converter.convert(f'file:///{os.path.abspath(temp_html_filename)}', pdf_filename,  install_driver=False)

    # Remove the temporary HTML file
    os.remove(temp_html_filename)

def convert_html_to_epub(title, html_filename, epub_filename):
    # Use subprocess to call Pandoc for HTML to EPUB conversion
    subprocess.run(['pandoc', html_filename, '-o', epub_filename, '--metadata', f'title={title}'])

    # Remove the temporary HTML file
    os.remove(html_filename)

def extract_host_and_slug(blog_link):
    # Assuming the format is "https://host/slug"
    parts = blog_link.split("/")
    host = parts[2] if len(parts) > 2 else ""
    slug = parts[3] if len(parts) > 3 else ""
    return host, slug

@app.route('/', methods=['GET', 'POST'])
def index():
    host = ""
    slug = ""
    title = ""
    author_name = ""
    html_content = ""

    if request.method == 'POST':
        blog_link = request.form['blog_link']
        host, slug = extract_host_and_slug(blog_link)
        title, author_name, html_content = fetch_html_content(host, slug)

    return render_template('index.html', host=host, slug=slug, title=title, author_name=author_name, html_content=html_content)

@app.route('/convert_pdf', methods=['POST'])
def convert_pdf():
    host = request.form['host']
    slug = request.form['slug']
    title, author_name, html_content = fetch_html_content(host, slug)

    cleaned_html = remove_emojis(html_content)
    html_filename = create_html_file(cleaned_html)

    pdf_filename = "/tmp/article.pdf"
    convert_html_to_pdf(title, author_name, html_filename, pdf_filename)

    # Move removal outside the if condition
    # Remove HTML file after both PDF and EPUB conversions
    if os.path.exists(html_filename):
        os.remove(html_filename)

    # Provide a download link for the generated PDF
    return send_file(pdf_filename, as_attachment=True)

@app.route('/convert_epub', methods=['POST'])
def convert_epub():
    host = request.form['host']
    slug = request.form['slug']
    title, _, html_content = fetch_html_content(host, slug)

    cleaned_html = remove_emojis(html_content)
    html_filename = create_html_file(cleaned_html)

    epub_filename = "/tmp/article.epub"
    convert_html_to_epub(title, html_filename, epub_filename)

    # Move removal outside the if condition
    # Remove HTML file after both PDF and EPUB conversions
    if os.path.exists(html_filename):
        os.remove(html_filename)

    # Provide a download link for the generated EPUB
    return send_file(epub_filename, as_attachment=True)

if __name__ == "__main__":
    app.run(host='0.0.0.0',port=7860)