Spaces:
Sleeping
Sleeping
| from flask import Flask, send_file, render_template, request, jsonify | |
| import requests | |
| from io import BytesIO | |
| import fitz # PyMuPDF | |
| # Define global variables to retain PDF content across function calls | |
| pdf_content = None | |
| pageNumTextFound = 0 | |
| BASE_URL="https://marthee-navigatetopage.hf.space" | |
| app = Flask(__name__) | |
| def getInfotoMeasure(): | |
| global pdf_content, pageNumTextFound | |
| if pdf_content is None: | |
| return "No PDF content available.", 404 | |
| # Render the GUI with the current page number | |
| return render_template("gui.html", page=pageNumTextFound) | |
| def download_pdf(): | |
| global pdf_content, pageNumTextFound | |
| if pdf_content is None: | |
| return "PDF content not found.", 404 | |
| pdf_bytes = BytesIO(pdf_content) | |
| return send_file( | |
| pdf_bytes, | |
| mimetype='application/pdf', | |
| as_attachment=False, | |
| download_name=f"highlighted_page_{pageNumTextFound}.pdf" | |
| ) | |
| # Route to handle external webhook | |
| def receive_pdf_data(): | |
| global pdf_content, pageNumTextFound | |
| # Extract PDF link and keyword from the request payload | |
| pdf_link = request.form.get('pdf_link') | |
| keyword = request.form.get('keyword') | |
| print('receiveddd',pdf_link,keyword) | |
| if not pdf_link or not keyword: | |
| return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400 | |
| try: | |
| # Call the function to process the PDF | |
| pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword) | |
| if pdf_content is None: | |
| return jsonify({"error": "No valid PDF content found."}), 404 | |
| return jsonify({ | |
| "message": "PDF processed successfully.", | |
| "download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}" | |
| }) | |
| except Exception as e: | |
| return jsonify({"error": str(e)}), 500 | |
| def highlight_text_from_pdf(pdfshareablelinks, keyword): | |
| global pdf_content, pageNumTextFound | |
| for link in pdfshareablelinks: | |
| pdf_content = None | |
| if link and ('http' in link or 'dropbox' in link): | |
| if 'dl=0' in link: | |
| link = link.replace('dl=0', 'dl=1') | |
| response = requests.get(link) | |
| if response.status_code == 200: | |
| pdf_content = BytesIO(response.content) | |
| if pdf_content is None: | |
| return None, 0 | |
| pageNumTextFound = 1 | |
| pdf_document = fitz.open(stream=pdf_content, filetype="pdf") | |
| for page_num in range(pdf_document.page_count): | |
| page = pdf_document.load_page(page_num) | |
| matched = page.search_for(keyword) | |
| if matched: | |
| for word in matched: | |
| page.add_highlight_annot(word) | |
| pageNumTextFound = page_num + 1 | |
| pdf_bytes = BytesIO() | |
| pdf_document.save(pdf_bytes) | |
| pdf_document.close() | |
| return pdf_bytes.getvalue(), pageNumTextFound | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=7860) | |
| # from flask import Flask, send_file, render_template, request | |
| # import requests | |
| # from io import BytesIO | |
| # import fitz # PyMuPDF | |
| # # Define local variables to retain the PDF content across function calls | |
| # pdf_content = None | |
| # pageNumTextFound = 0 | |
| # app = Flask(__name__) | |
| # @app.route("/", methods=["GET", "POST"]) | |
| # def getInfotoMeasure(): | |
| # global pdf_content, pageNumTextFound | |
| # pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'] | |
| # keyword = "To be read with preliminaries/ general conditions" | |
| # # Call the function to process the PDF | |
| # pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword) | |
| # # Render the GUI with the current page number | |
| # return render_template("gui.html", page=pageNumTextFound) | |
| # @app.route('/view-pdf', methods=['GET']) | |
| # def download_pdf(): | |
| # global pdf_content, pageNumTextFound | |
| # if pdf_content is None: | |
| # return "PDF content not found.", 404 | |
| # pdf_bytes = BytesIO(pdf_content) | |
| # return send_file( | |
| # pdf_bytes, | |
| # mimetype='application/pdf', | |
| # as_attachment=False, | |
| # download_name=f"highlighted_page_{pageNumTextFound}.pdf" | |
| # ) | |
| # def highlight_text_from_pdf(pdfshareablelinks, keyword): | |
| # print('PDF Links:', pdfshareablelinks) | |
| # for link in pdfshareablelinks: | |
| # pdf_content = None | |
| # if link and ('http' in link or 'dropbox' in link): | |
| # if 'dl=0' in link: | |
| # link = link.replace('dl=0', 'dl=1') | |
| # response = requests.get(link) | |
| # if response.status_code == 200: | |
| # pdf_content = BytesIO(response.content) | |
| # if pdf_content is None: | |
| # raise ValueError("No valid PDF content found.") | |
| # pageNumTextFound = 1 | |
| # pdf_document = fitz.open(stream=pdf_content, filetype="pdf") | |
| # for page_num in range(pdf_document.page_count): | |
| # page = pdf_document.load_page(page_num) | |
| # matched = page.search_for(keyword) | |
| # if matched: | |
| # for word in matched: | |
| # page.add_highlight_annot(word) | |
| # pageNumTextFound = page_num + 1 | |
| # # Save PDF content to memory and return it along with the page number | |
| # pdf_bytes = BytesIO() | |
| # pdf_document.save(pdf_bytes) | |
| # pdf_document.close() | |
| # return pdf_bytes.getvalue(), pageNumTextFound | |
| # if __name__ == '__main__': | |
| # app.run(host='0.0.0.0', port=7860) | |