sourav520's picture
Update app.py
cb0efae verified
from flask import Flask, render_template, request, send_file, jsonify
import fitz # PyMuPDF
from docx import Document
from openpyxl import Workbook
from pdf2docx import Converter
import tempfile
import os
from io import BytesIO
import math
from PyPDF2 import PdfMerger
from flask_cors import CORS
import threading, time, requests
from dotenv import load_dotenv
basedir = os.path.abspath(os.path.dirname(__file__))
load_dotenv(os.path.join(basedir, '.env'))
app = Flask(__name__)
def background_task():
while True:
try:
# Example: ping your own Space or another API
r = requests.get(os.environ.get('url'))
print("Ping:", r.status_code)
except Exception as e:
print("Error:", e)
time.sleep(600)
app = Flask(__name__)
CORS(app)
UPLOAD_FOLDER = "/tmp/uploads"
OUTPUT_FOLDER = "/tmp/compressed"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/pdf-to-text', methods=['GET', 'POST'])
def pdf_to_text():
if request.method == 'POST':
file = request.files['pdf']
doc = fitz.open(stream=file.read(), filetype="pdf")
text = ''.join(page.get_text() for page in doc)
output = BytesIO()
output.write(text.encode())
output.seek(0)
return send_file(output, download_name="output.txt", as_attachment=True)
return render_template('pdf_to_text.html')
@app.route('/pdf-to-word', methods=['GET', 'POST'])
def pdf_to_word():
if request.method == 'POST':
file = request.files['pdf']
doc = fitz.open(stream=file.read(), filetype="pdf")
word_doc = Document()
for page in doc:
word_doc.add_paragraph(page.get_text())
output = BytesIO()
word_doc.save(output)
output.seek(0)
return send_file(output, download_name="output.docx", as_attachment=True)
return render_template('pdf_to_word.html')
@app.route('/pdf-to-excel', methods=['GET', 'POST'])
def pdf_to_excel():
if request.method == 'POST':
file = request.files['pdf']
doc = fitz.open(stream=file.read(), filetype="pdf")
wb = Workbook()
ws = wb.active
for page_num, page in enumerate(doc, start=1):
words = page.get_text("words") # list of (x0, y0, x1, y1, word)
words = sorted(words, key=lambda w: (w[1], w[0])) # sort by y then x
row_map = {}
row_index = 1
for w in words:
x0, y0, x1, y1, word = w[:5]
row_key = round(y0 / 5) # group words by y position
if row_key not in row_map:
row_map[row_key] = row_index
row_index += 1
row = row_map[row_key]
col = math.floor(x0 / 50) + 1 # adjust divisor to control column spacing
ws.cell(row=row, column=col, value=word)
output = BytesIO()
wb.save(output)
output.seek(0)
return send_file(output, download_name="output.xlsx", as_attachment=True)
return render_template('pdf_to_excel.html')
@app.route("/pdf_merge")
def index_pdf_merge():
return render_template("pdf_marge.html")
@app.route("/merge", methods=["POST"])
def merge_pdfs():
try:
# must match <input name="pdfs">
files = request.files.getlist("pdfs")
if not files or len(files) < 2:
return jsonify({"error": "Please upload at least 2 PDF files"}), 400
# merge PDFs
merger = PdfMerger()
for file in files:
merger.append(file)
output = BytesIO()
merger.write(output)
merger.close()
output.seek(0)
return send_file(
output,
download_name="merged.pdf",
as_attachment=True
)
except Exception as e:
import traceback
print("❌ Merge error:", traceback.format_exc())
return jsonify({"error": str(e)}), 500
def compress_pdf(input_path, output_path, preset="medium", target_kb=None):
doc = fitz.open(input_path)
# Presets
preset_options = {
"low": {"garbage": 3, "deflate": True, "clean": True},
"medium": {"garbage": 2, "deflate": True, "clean": True},
"high": {"garbage": 1, "deflate": True, "clean": True},
}
opts = preset_options.get(preset, preset_options["medium"])
if target_kb:
step = 10
zoom = 1.0
while True:
new_doc = fitz.open()
for page in doc:
pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
rect = fitz.Rect(0, 0, pix.width, pix.height)
new_page = new_doc.new_page(width=pix.width, height=pix.height)
new_page.insert_image(rect, pixmap=pix)
new_doc.save(output_path, **opts)
size_kb = os.path.getsize(output_path) / 1024
new_doc.close()
if size_kb <= target_kb or zoom <= 0.3:
break
zoom -= 0.1
else:
doc.save(output_path, **opts)
doc.close()
@app.route("/compress", methods=['POST', 'GET'])
def compress():
if request.method == 'POST':
if "pdf" not in request.files:
return jsonify({"success": False, "error": "No file uploaded"})
file = request.files["pdf"]
preset = request.form.get("preset", "medium")
target_kb = request.form.get("target_kb")
if target_kb:
try:
target_kb = int(target_kb)
except:
target_kb = None
input_path = os.path.join(UPLOAD_FOLDER, file.filename)
output_path = os.path.join(OUTPUT_FOLDER, "compressed_" + file.filename)
file.save(input_path)
try:
compress_pdf(input_path, output_path, preset, target_kb)
return jsonify({"success": True, "filename": "compressed_" + file.filename})
except Exception as e:
return jsonify({"success": False, "error": str(e)})
return render_template('compress.html')
@app.route("/download/<filename>")
def download(filename):
return send_file(os.path.join(OUTPUT_FOLDER, filename), as_attachment=True)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)