Dua Rajper commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
| 2 |
import logging
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
import streamlit as st
|
| 5 |
-
from PyPDF2 import PdfReader
|
| 6 |
from langchain.text_splitter import CharacterTextSplitter
|
| 7 |
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
|
| 8 |
from langchain.vectorstores import FAISS
|
|
@@ -26,13 +26,10 @@ def get_pdf_text(pdf_docs):
|
|
| 26 |
try:
|
| 27 |
pdf_reader = PdfReader(pdf)
|
| 28 |
for page in pdf_reader.pages:
|
| 29 |
-
text += page.extract_text()
|
| 30 |
-
except PdfReadError:
|
| 31 |
-
st.warning(f"Could not read {pdf.name}. Skipping this file.")
|
| 32 |
-
logging.warning(f"Could not read {pdf.name}. Skipping.")
|
| 33 |
except Exception as e:
|
| 34 |
-
st.warning(f"
|
| 35 |
-
logging.
|
| 36 |
return text
|
| 37 |
|
| 38 |
# Function to split the extracted text into chunks
|
|
@@ -113,4 +110,4 @@ def main():
|
|
| 113 |
st.session_state.conversation = get_conversation_chain(vectorstore)
|
| 114 |
|
| 115 |
if __name__ == '__main__':
|
| 116 |
-
main()
|
|
|
|
| 2 |
import logging
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
import streamlit as st
|
| 5 |
+
from PyPDF2 import PdfReader
|
| 6 |
from langchain.text_splitter import CharacterTextSplitter
|
| 7 |
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
|
| 8 |
from langchain.vectorstores import FAISS
|
|
|
|
| 26 |
try:
|
| 27 |
pdf_reader = PdfReader(pdf)
|
| 28 |
for page in pdf_reader.pages:
|
| 29 |
+
text += page.extract_text() or ""
|
|
|
|
|
|
|
|
|
|
| 30 |
except Exception as e:
|
| 31 |
+
st.warning(f"Could not read {pdf.name}. Skipping this file.")
|
| 32 |
+
logging.warning(f"Error processing {pdf.name}: {e}")
|
| 33 |
return text
|
| 34 |
|
| 35 |
# Function to split the extracted text into chunks
|
|
|
|
| 110 |
st.session_state.conversation = get_conversation_chain(vectorstore)
|
| 111 |
|
| 112 |
if __name__ == '__main__':
|
| 113 |
+
main()
|