NavigateToPage / app.py
Marthee's picture
Update app.py
4b67bf3 verified
from flask import Flask, send_file, render_template, request, jsonify
import requests
from io import BytesIO
import fitz # PyMuPDF
# Define global variables to retain PDF content across function calls
pdf_content = None
pageNumTextFound = 0
BASE_URL="https://marthee-navigatetopage.hf.space"
app = Flask(__name__)
@app.route("/", methods=["GET", "POST"])
def getInfotoMeasure():
global pdf_content, pageNumTextFound
if pdf_content is None:
return "No PDF content available.", 404
# Render the GUI with the current page number
return render_template("gui.html", page=pageNumTextFound)
@app.route('/view-pdf', methods=['GET'])
def download_pdf():
global pdf_content, pageNumTextFound
if pdf_content is None:
return "PDF content not found.", 404
pdf_bytes = BytesIO(pdf_content)
return send_file(
pdf_bytes,
mimetype='application/pdf',
as_attachment=False,
download_name=f"highlighted_page_{pageNumTextFound}.pdf"
)
# Route to handle external webhook
@app.route('/api/process-data', methods=['POST'])
def receive_pdf_data():
global pdf_content, pageNumTextFound
# Extract PDF link and keyword from the request payload
pdf_link = request.form.get('pdf_link')
keyword = request.form.get('keyword')
print('receiveddd',pdf_link,keyword)
if not pdf_link or not keyword:
return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400
try:
# Call the function to process the PDF
pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword)
if pdf_content is None:
return jsonify({"error": "No valid PDF content found."}), 404
return jsonify({
"message": "PDF processed successfully.",
"download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}"
})
except Exception as e:
return jsonify({"error": str(e)}), 500
def highlight_text_from_pdf(pdfshareablelinks, keyword):
global pdf_content, pageNumTextFound
for link in pdfshareablelinks:
pdf_content = None
if link and ('http' in link or 'dropbox' in link):
if 'dl=0' in link:
link = link.replace('dl=0', 'dl=1')
response = requests.get(link)
if response.status_code == 200:
pdf_content = BytesIO(response.content)
if pdf_content is None:
return None, 0
pageNumTextFound = 1
pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
for page_num in range(pdf_document.page_count):
page = pdf_document.load_page(page_num)
matched = page.search_for(keyword)
if matched:
for word in matched:
page.add_highlight_annot(word)
pageNumTextFound = page_num + 1
pdf_bytes = BytesIO()
pdf_document.save(pdf_bytes)
pdf_document.close()
return pdf_bytes.getvalue(), pageNumTextFound
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)
# from flask import Flask, send_file, render_template, request
# import requests
# from io import BytesIO
# import fitz # PyMuPDF
# # Define local variables to retain the PDF content across function calls
# pdf_content = None
# pageNumTextFound = 0
# app = Flask(__name__)
# @app.route("/", methods=["GET", "POST"])
# def getInfotoMeasure():
# global pdf_content, pageNumTextFound
# pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
# keyword = "To be read with preliminaries/ general conditions"
# # Call the function to process the PDF
# pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
# # Render the GUI with the current page number
# return render_template("gui.html", page=pageNumTextFound)
# @app.route('/view-pdf', methods=['GET'])
# def download_pdf():
# global pdf_content, pageNumTextFound
# if pdf_content is None:
# return "PDF content not found.", 404
# pdf_bytes = BytesIO(pdf_content)
# return send_file(
# pdf_bytes,
# mimetype='application/pdf',
# as_attachment=False,
# download_name=f"highlighted_page_{pageNumTextFound}.pdf"
# )
# def highlight_text_from_pdf(pdfshareablelinks, keyword):
# print('PDF Links:', pdfshareablelinks)
# for link in pdfshareablelinks:
# pdf_content = None
# if link and ('http' in link or 'dropbox' in link):
# if 'dl=0' in link:
# link = link.replace('dl=0', 'dl=1')
# response = requests.get(link)
# if response.status_code == 200:
# pdf_content = BytesIO(response.content)
# if pdf_content is None:
# raise ValueError("No valid PDF content found.")
# pageNumTextFound = 1
# pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
# for page_num in range(pdf_document.page_count):
# page = pdf_document.load_page(page_num)
# matched = page.search_for(keyword)
# if matched:
# for word in matched:
# page.add_highlight_annot(word)
# pageNumTextFound = page_num + 1
# # Save PDF content to memory and return it along with the page number
# pdf_bytes = BytesIO()
# pdf_document.save(pdf_bytes)
# pdf_document.close()
# return pdf_bytes.getvalue(), pageNumTextFound
# if __name__ == '__main__':
# app.run(host='0.0.0.0', port=7860)