KushwanthK commited on
Commit
f0b5e94
·
verified ·
1 Parent(s): f98ad5c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +22 -4
  2. requirements.txt +5 -12
app.py CHANGED
@@ -422,6 +422,8 @@ import nltk
422
  nltk.download('stopwords')
423
  from nltk.corpus import stopwords
424
  from collections import Counter
 
 
425
 
426
  def highlight_pdf(file_path, text_to_highlight, page_numbers):
427
  # Create a temporary file to save the modified PDF
@@ -484,6 +486,19 @@ def highlight_pdf(file_path, text_to_highlight, page_numbers):
484
 
485
  # Example usage
486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  # Function to display PDF in Streamlit
488
  def display_highlighted_pdf():
489
  pdf_path = "Bhagavad-Gita-As-It-Is.pdf"
@@ -494,12 +509,15 @@ def display_highlighted_pdf():
494
 
495
  print(highlighted_pdf_path)
496
 
497
- with open(highlighted_pdf_path, "rb") as file:
498
- pdf_bytes = file.read()
499
 
500
- # Use pdf_viewer to display the PDF in Streamlit
501
- pdf_viewer(pdf_bytes, width=700)
 
502
 
 
 
503
 
504
  display_highlighted_pdf()
505
 
 
422
  nltk.download('stopwords')
423
  from nltk.corpus import stopwords
424
  from collections import Counter
425
+ from streamlit_image_zoom import image_zoom
426
+ from PIL import Image
427
 
428
  def highlight_pdf(file_path, text_to_highlight, page_numbers):
429
  # Create a temporary file to save the modified PDF
 
486
 
487
  # Example usage
488
 
489
+ def pdf_to_images(pdf_path, page_numbers):
490
+ doc = fitz.open(pdf_path)
491
+ images = []
492
+ for page_number in page_numbers:
493
+ page = doc.load_page(page_number - 1)
494
+ pix = page.get_pixmap()
495
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
496
+ buf = io.BytesIO()
497
+ img.save(buf, format="PNG")
498
+ byte_im = buf.getvalue()
499
+ images.append(byte_im)
500
+ return images
501
+
502
  # Function to display PDF in Streamlit
503
  def display_highlighted_pdf():
504
  pdf_path = "Bhagavad-Gita-As-It-Is.pdf"
 
509
 
510
  print(highlighted_pdf_path)
511
 
512
+ # with open(highlighted_pdf_path, "rb") as file:
513
+ # pdf_bytes = file.read()
514
 
515
+ # # Use pdf_viewer to display the PDF in Streamlit
516
+ # pdf_viewer(pdf_bytes, width=700)
517
+ images = pdf_to_images(highlighted_pdf_path, sources)
518
 
519
+ for img in images:
520
+ image_zoom(img)
521
 
522
  display_highlighted_pdf()
523
 
requirements.txt CHANGED
@@ -1,22 +1,15 @@
1
- # pinecone-client[grpc]
2
- sentence-transformers==2.2.2
3
  datasets
4
  torch
5
  streamlit-chat-media
6
  streamlit-chat
7
- transformers==4.28.0
8
  PyPDF2
9
  ratelimit
10
  backoff
11
  tqdm
12
  openai
13
  PyMuPDF # instead of fitz
14
- nltk
15
- langchain_community
16
- langchain
17
- # faiss-gpu
18
- faiss-cpu==1.7.2
19
- stqdm
20
- python-dotenv
21
- # langchain-huggingface
22
- streamlit_pdf_viewer
 
1
+ sentence-transformers
 
2
  datasets
3
  torch
4
  streamlit-chat-media
5
  streamlit-chat
6
+ transformers
7
  PyPDF2
8
  ratelimit
9
  backoff
10
  tqdm
11
  openai
12
  PyMuPDF # instead of fitz
13
+ reportlab
14
+ PyPDF2Highlight
15
+ streamlit-image-zoom