Chandranshu Jain commited on
Commit
214739d
1 Parent(s): 4184984

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -22
app.py CHANGED
@@ -1,16 +1,18 @@
1
  import streamlit as st
 
 
 
 
 
2
  from PyPDF2 import PdfReader
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
4
  import os
5
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
  from langchain_community.vectorstores import Chroma
7
- from langchain_google_genai import ChatGoogleGenerativeAI
8
  from langchain.chains.question_answering import load_qa_chain
9
  from langchain.prompts import PromptTemplate
10
  from langchain_community.document_loaders import PyPDFLoader
11
  from langchain_chroma import Chroma
12
- import tempfile
13
- from langchain_cohere import CohereEmbeddings
14
 
15
  #st.set_page_config(page_title="Document Genie", layout="wide")
16
 
@@ -33,19 +35,13 @@ from langchain_cohere import CohereEmbeddings
33
  # docs = loader.load()
34
  # return docs
35
 
36
- def get_pdf(uploaded_file):
37
- if uploaded_file :
38
- temp_file = "./temp.pdf"
39
- # Delete the existing temp.pdf file if it exists
40
- if os.path.exists(temp_file):
41
- os.remove(temp_file)
42
- with open(temp_file, "wb") as file:
43
- file.write(uploaded_file.getvalue())
44
- file_name = uploaded_file.name
45
- loader = PyPDFLoader(temp_file)
46
- docs = loader.load()
47
- return docs
48
-
49
  def text_splitter(text):
50
  text_splitter = RecursiveCharacterTextSplitter(
51
  # Set a really small chunk size, just to show.
@@ -55,8 +51,8 @@ def text_splitter(text):
55
  chunks=text_splitter.split_documents(text)
56
  return chunks
57
 
58
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
59
- COHERE_API_KEY = os.getenv("COHERE_API_KEY")
60
 
61
  def get_conversational_chain():
62
  prompt_template = """
@@ -79,7 +75,8 @@ def get_conversational_chain():
79
 
80
  def embedding(chunk,query):
81
  #embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
82
- embeddings = CohereEmbeddings(model="embed-english-v3.0")
 
83
  db = Chroma.from_documents(chunk,embeddings)
84
  doc = db.similarity_search(query)
85
  print(doc)
@@ -96,11 +93,12 @@ if 'messages' not in st.session_state:
96
  st.header("Chat with your pdf💁")
97
  with st.sidebar:
98
  st.title("PDF FILE UPLOAD:")
99
- pdf_docs = st.file_uploader("Upload your PDF File and Click on the Submit & Process Button", accept_multiple_files=False, key="pdf_uploader")
100
 
101
  query = st.chat_input("Ask a Question from the PDF File")
102
  if query:
103
- raw_text = get_pdf(pdf_docs)
 
104
  text_chunks = text_splitter(raw_text)
105
  st.session_state.messages.append({'role': 'user', "content": query})
106
  response = embedding(text_chunks,query)
 
1
  import streamlit as st
2
+ from langchain_community.llms import HuggingFaceEndpoint
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain_core.runnables import RunnablePassthrough
5
+ from langchain_core.output_parsers import StrOutputParser
6
+ from langchain.prompts import ChatPromptTemplate
7
  from PyPDF2 import PdfReader
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  import os
 
10
  from langchain_community.vectorstores import Chroma
 
11
  from langchain.chains.question_answering import load_qa_chain
12
  from langchain.prompts import PromptTemplate
13
  from langchain_community.document_loaders import PyPDFLoader
14
  from langchain_chroma import Chroma
15
+ from langchain_community.vectorstores import Chroma
 
16
 
17
  #st.set_page_config(page_title="Document Genie", layout="wide")
18
 
 
35
  # docs = loader.load()
36
  # return docs
37
 
38
+ def get_pdf_text(pdf_docs):
39
+ docs=[]
40
+ for pdf in pdf_docs:
41
+ loader = PyPDFLoader(temp_file)
42
+ docs.extend(loader.load())
43
+ return docs
44
+
 
 
 
 
 
 
45
  def text_splitter(text):
46
  text_splitter = RecursiveCharacterTextSplitter(
47
  # Set a really small chunk size, just to show.
 
51
  chunks=text_splitter.split_documents(text)
52
  return chunks
53
 
54
+ #GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
55
+ #COHERE_API_KEY = os.getenv("COHERE_API_KEY")
56
 
57
  def get_conversational_chain():
58
  prompt_template = """
 
75
 
76
  def embedding(chunk,query):
77
  #embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
78
+ #embeddings = CohereEmbeddings(model="embed-english-v3.0")
79
+ embeddings=HuggingFaceEmbeddings()
80
  db = Chroma.from_documents(chunk,embeddings)
81
  doc = db.similarity_search(query)
82
  print(doc)
 
93
  st.header("Chat with your pdf💁")
94
  with st.sidebar:
95
  st.title("PDF FILE UPLOAD:")
96
+ pdf_docs = st.file_uploader("Upload your PDF File and Click on the Submit & Process Button", accept_multiple_files=TRUE, key="pdf_uploader")
97
 
98
  query = st.chat_input("Ask a Question from the PDF File")
99
  if query:
100
+ for file in os.listdir(pdf_docs):
101
+ raw_text = get_pdf(file)
102
  text_chunks = text_splitter(raw_text)
103
  st.session_state.messages.append({'role': 'user', "content": query})
104
  response = embedding(text_chunks,query)