Spaces:

Marthee
/

NavigateToPage

Sleeping

App Files Files Community

NavigateToPage / app.py

Marthee

Update app.py

4b67bf3 verified about 1 year ago

raw

history blame contribute delete

5.77 kB

	from flask import Flask, send_file, render_template, request, jsonify
	import requests
	from io import BytesIO
	import fitz # PyMuPDF

	# Define global variables to retain PDF content across function calls
	pdf_content = None
	pageNumTextFound = 0
	BASE_URL="https://marthee-navigatetopage.hf.space"
	app = Flask(__name__)

	@app.route("/", methods=["GET", "POST"])
	def getInfotoMeasure():
	global pdf_content, pageNumTextFound

	if pdf_content is None:
	return "No PDF content available.", 404

	# Render the GUI with the current page number
	return render_template("gui.html", page=pageNumTextFound)

	@app.route('/view-pdf', methods=['GET'])
	def download_pdf():
	global pdf_content, pageNumTextFound

	if pdf_content is None:
	return "PDF content not found.", 404

	pdf_bytes = BytesIO(pdf_content)
	return send_file(
	pdf_bytes,
	mimetype='application/pdf',
	as_attachment=False,
	download_name=f"highlighted_page_{pageNumTextFound}.pdf"
	)

	# Route to handle external webhook
	@app.route('/api/process-data', methods=['POST'])
	def receive_pdf_data():
	global pdf_content, pageNumTextFound

	# Extract PDF link and keyword from the request payload
	pdf_link = request.form.get('pdf_link')
	keyword = request.form.get('keyword')
	print('receiveddd',pdf_link,keyword)
	if not pdf_link or not keyword:
	return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400

	try:
	# Call the function to process the PDF
	pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword)

	if pdf_content is None:
	return jsonify({"error": "No valid PDF content found."}), 404

	return jsonify({
	"message": "PDF processed successfully.",
	"download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}"
	})

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	def highlight_text_from_pdf(pdfshareablelinks, keyword):
	global pdf_content, pageNumTextFound

	for link in pdfshareablelinks:
	pdf_content = None

	if link and ('http' in link or 'dropbox' in link):
	if 'dl=0' in link:
	link = link.replace('dl=0', 'dl=1')

	response = requests.get(link)

	if response.status_code == 200:
	pdf_content = BytesIO(response.content)

	if pdf_content is None:
	return None, 0

	pageNumTextFound = 1
	pdf_document = fitz.open(stream=pdf_content, filetype="pdf")

	for page_num in range(pdf_document.page_count):
	page = pdf_document.load_page(page_num)
	matched = page.search_for(keyword)

	if matched:
	for word in matched:
	page.add_highlight_annot(word)

	pageNumTextFound = page_num + 1

	pdf_bytes = BytesIO()
	pdf_document.save(pdf_bytes)
	pdf_document.close()

	return pdf_bytes.getvalue(), pageNumTextFound

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860)


	# from flask import Flask, send_file, render_template, request
	# import requests
	# from io import BytesIO
	# import fitz # PyMuPDF

	# # Define local variables to retain the PDF content across function calls
	# pdf_content = None
	# pageNumTextFound = 0

	# app = Flask(__name__)

	# @app.route("/", methods=["GET", "POST"])
	# def getInfotoMeasure():
	# global pdf_content, pageNumTextFound

	# pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
	# keyword = "To be read with preliminaries/ general conditions"

	# # Call the function to process the PDF
	# pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)

	# # Render the GUI with the current page number
	# return render_template("gui.html", page=pageNumTextFound)

	# @app.route('/view-pdf', methods=['GET'])
	# def download_pdf():
	# global pdf_content, pageNumTextFound

	# if pdf_content is None:
	# return "PDF content not found.", 404

	# pdf_bytes = BytesIO(pdf_content)
	# return send_file(
	# pdf_bytes,
	# mimetype='application/pdf',
	# as_attachment=False,
	# download_name=f"highlighted_page_{pageNumTextFound}.pdf"
	# )

	# def highlight_text_from_pdf(pdfshareablelinks, keyword):
	# print('PDF Links:', pdfshareablelinks)

	# for link in pdfshareablelinks:
	# pdf_content = None

	# if link and ('http' in link or 'dropbox' in link):
	# if 'dl=0' in link:
	# link = link.replace('dl=0', 'dl=1')

	# response = requests.get(link)

	# if response.status_code == 200:
	# pdf_content = BytesIO(response.content)

	# if pdf_content is None:
	# raise ValueError("No valid PDF content found.")

	# pageNumTextFound = 1
	# pdf_document = fitz.open(stream=pdf_content, filetype="pdf")

	# for page_num in range(pdf_document.page_count):
	# page = pdf_document.load_page(page_num)
	# matched = page.search_for(keyword)

	# if matched:
	# for word in matched:
	# page.add_highlight_annot(word)

	# pageNumTextFound = page_num + 1

	# # Save PDF content to memory and return it along with the page number
	# pdf_bytes = BytesIO()
	# pdf_document.save(pdf_bytes)
	# pdf_document.close()

	# return pdf_bytes.getvalue(), pageNumTextFound

	# if __name__ == '__main__':
	# app.run(host='0.0.0.0', port=7860)