Spaces:
Sleeping
Sleeping
KushwanthK
commited on
Upload 2 files
Browse files- app.py +22 -4
- requirements.txt +5 -12
app.py
CHANGED
@@ -422,6 +422,8 @@ import nltk
|
|
422 |
nltk.download('stopwords')
|
423 |
from nltk.corpus import stopwords
|
424 |
from collections import Counter
|
|
|
|
|
425 |
|
426 |
def highlight_pdf(file_path, text_to_highlight, page_numbers):
|
427 |
# Create a temporary file to save the modified PDF
|
@@ -484,6 +486,19 @@ def highlight_pdf(file_path, text_to_highlight, page_numbers):
|
|
484 |
|
485 |
# Example usage
|
486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
# Function to display PDF in Streamlit
|
488 |
def display_highlighted_pdf():
|
489 |
pdf_path = "Bhagavad-Gita-As-It-Is.pdf"
|
@@ -494,12 +509,15 @@ def display_highlighted_pdf():
|
|
494 |
|
495 |
print(highlighted_pdf_path)
|
496 |
|
497 |
-
with open(highlighted_pdf_path, "rb") as file:
|
498 |
-
|
499 |
|
500 |
-
|
501 |
-
pdf_viewer(pdf_bytes, width=700)
|
|
|
502 |
|
|
|
|
|
503 |
|
504 |
display_highlighted_pdf()
|
505 |
|
|
|
422 |
nltk.download('stopwords')
|
423 |
from nltk.corpus import stopwords
|
424 |
from collections import Counter
|
425 |
+
from streamlit_image_zoom import image_zoom
|
426 |
+
from PIL import Image
|
427 |
|
428 |
def highlight_pdf(file_path, text_to_highlight, page_numbers):
|
429 |
# Create a temporary file to save the modified PDF
|
|
|
486 |
|
487 |
# Example usage
|
488 |
|
489 |
+
def pdf_to_images(pdf_path, page_numbers):
|
490 |
+
doc = fitz.open(pdf_path)
|
491 |
+
images = []
|
492 |
+
for page_number in page_numbers:
|
493 |
+
page = doc.load_page(page_number - 1)
|
494 |
+
pix = page.get_pixmap()
|
495 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
496 |
+
buf = io.BytesIO()
|
497 |
+
img.save(buf, format="PNG")
|
498 |
+
byte_im = buf.getvalue()
|
499 |
+
images.append(byte_im)
|
500 |
+
return images
|
501 |
+
|
502 |
# Function to display PDF in Streamlit
|
503 |
def display_highlighted_pdf():
|
504 |
pdf_path = "Bhagavad-Gita-As-It-Is.pdf"
|
|
|
509 |
|
510 |
print(highlighted_pdf_path)
|
511 |
|
512 |
+
# with open(highlighted_pdf_path, "rb") as file:
|
513 |
+
# pdf_bytes = file.read()
|
514 |
|
515 |
+
# # Use pdf_viewer to display the PDF in Streamlit
|
516 |
+
# pdf_viewer(pdf_bytes, width=700)
|
517 |
+
images = pdf_to_images(highlighted_pdf_path, sources)
|
518 |
|
519 |
+
for img in images:
|
520 |
+
image_zoom(img)
|
521 |
|
522 |
display_highlighted_pdf()
|
523 |
|
requirements.txt
CHANGED
@@ -1,22 +1,15 @@
|
|
1 |
-
|
2 |
-
sentence-transformers==2.2.2
|
3 |
datasets
|
4 |
torch
|
5 |
streamlit-chat-media
|
6 |
streamlit-chat
|
7 |
-
transformers
|
8 |
PyPDF2
|
9 |
ratelimit
|
10 |
backoff
|
11 |
tqdm
|
12 |
openai
|
13 |
PyMuPDF # instead of fitz
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
# faiss-gpu
|
18 |
-
faiss-cpu==1.7.2
|
19 |
-
stqdm
|
20 |
-
python-dotenv
|
21 |
-
# langchain-huggingface
|
22 |
-
streamlit_pdf_viewer
|
|
|
1 |
+
sentence-transformers
|
|
|
2 |
datasets
|
3 |
torch
|
4 |
streamlit-chat-media
|
5 |
streamlit-chat
|
6 |
+
transformers
|
7 |
PyPDF2
|
8 |
ratelimit
|
9 |
backoff
|
10 |
tqdm
|
11 |
openai
|
12 |
PyMuPDF # instead of fitz
|
13 |
+
reportlab
|
14 |
+
PyPDF2Highlight
|
15 |
+
streamlit-image-zoom
|
|
|
|
|
|
|
|
|
|
|
|