Spaces:

StarTrooper08
/

HashCraft

Sleeping

HashCraft / app.py

Atharva

update: tmp dir

ed2edf4 over 1 year ago

5.54 kB

	from flask import Flask, render_template, request, send_file
	import os
	import requests
	from pyhtml2pdf import converter
	import re
	import subprocess

	app = Flask(__name__)

	def fetch_html_content(host, slug):
	url = "https://gql.hashnode.com/"

	query = f"""
	query Publication {{
	publication(host: "{host}") {{
	post(slug: "{slug}") {{
	title
	content {{
	html
	}}
	author {{
	name
	}}
	}}
	}}
	}}
	"""

	data = {"query": query}
	response = requests.post(url, json=data)

	if response.status_code == 200:
	result = response.json()
	title = result.get("data", {}).get("publication", {}).get("post", {}).get("title", "")
	author_name = result.get("data", {}).get("publication", {}).get("post", {}).get("author", {}).get("name", "")
	html_content = result.get("data", {}).get("publication", {}).get("post", {}).get("content", {}).get("html", "")
	return title, author_name, html_content
	else:
	print(f"GraphQL request failed with status code {response.status_code}: {response.text}")
	return None, None, None

	def remove_emojis(html_content):
	emoji_pattern = re.compile("["
	"\U0001F600-\U0001F64F"
	"\U0001F300-\U0001F5FF"
	"\U0001F680-\U0001F6FF"
	"\U0001F700-\U0001F77F"
	"\U0001F780-\U0001F7FF"
	"\U0001F800-\U0001F8FF"
	"\U0001F900-\U0001F9FF"
	"\U0001FA00-\U0001FA6F"
	"\U0001FA70-\U0001FAFF"
	"\U00002702-\U000027B0"
	"\U000024C2-\U0001F251"
	"]+", flags=re.UNICODE)

	cleaned_html = emoji_pattern.sub('', html_content)
	return cleaned_html

	def create_html_file(html_content):
	html_filename = "/tmp/output.html"
	with open(html_filename, "w", encoding="utf-8") as html_file:
	html_file.write(html_content)
	return html_filename

	def convert_html_to_pdf(title, author_name, html_filename, pdf_filename):
	# Use the title as header in the PDF
	header_html = f"<h1>{title}</h1>"

	# Combine the header HTML and the existing HTML content
	full_html = header_html + open(html_filename, "r", encoding="utf-8").read()

	# Add footer only on the last page
	footer_html = f'<div style="text-align: center; margin-top: 20px;">Hashnode / {author_name}</div>'
	full_html += footer_html

	# Save the combined HTML to a temporary file
	temp_html_filename = "/tmp/temp_output.html"
	with open(temp_html_filename, "w", encoding="utf-8") as temp_html_file:
	temp_html_file.write(full_html)

	# Convert the combined HTML to PDF
	converter.convert(f'file:///{os.path.abspath(temp_html_filename)}', pdf_filename, install_driver=False)

	# Remove the temporary HTML file
	os.remove(temp_html_filename)

	def convert_html_to_epub(title, html_filename, epub_filename):
	# Use subprocess to call Pandoc for HTML to EPUB conversion
	subprocess.run(['pandoc', html_filename, '-o', epub_filename, '--metadata', f'title={title}'])

	# Remove the temporary HTML file
	os.remove(html_filename)

	def extract_host_and_slug(blog_link):
	# Assuming the format is "https://host/slug"
	parts = blog_link.split("/")
	host = parts[2] if len(parts) > 2 else ""
	slug = parts[3] if len(parts) > 3 else ""
	return host, slug

	@app.route('/', methods=['GET', 'POST'])
	def index():
	host = ""
	slug = ""
	title = ""
	author_name = ""
	html_content = ""

	if request.method == 'POST':
	blog_link = request.form['blog_link']
	host, slug = extract_host_and_slug(blog_link)
	title, author_name, html_content = fetch_html_content(host, slug)

	return render_template('index.html', host=host, slug=slug, title=title, author_name=author_name, html_content=html_content)

	@app.route('/convert_pdf', methods=['POST'])
	def convert_pdf():
	host = request.form['host']
	slug = request.form['slug']
	title, author_name, html_content = fetch_html_content(host, slug)

	cleaned_html = remove_emojis(html_content)
	html_filename = create_html_file(cleaned_html)

	pdf_filename = "/tmp/article.pdf"
	convert_html_to_pdf(title, author_name, html_filename, pdf_filename)

	# Move removal outside the if condition
	# Remove HTML file after both PDF and EPUB conversions
	if os.path.exists(html_filename):
	os.remove(html_filename)

	# Provide a download link for the generated PDF
	return send_file(pdf_filename, as_attachment=True)

	@app.route('/convert_epub', methods=['POST'])
	def convert_epub():
	host = request.form['host']
	slug = request.form['slug']
	title, _, html_content = fetch_html_content(host, slug)

	cleaned_html = remove_emojis(html_content)
	html_filename = create_html_file(cleaned_html)

	epub_filename = "/tmp/article.epub"
	convert_html_to_epub(title, html_filename, epub_filename)

	# Move removal outside the if condition
	# Remove HTML file after both PDF and EPUB conversions
	if os.path.exists(html_filename):
	os.remove(html_filename)

	# Provide a download link for the generated EPUB
	return send_file(epub_filename, as_attachment=True)

	if __name__ == "__main__":
	app.run(host='0.0.0.0',port=7860)