nidhibodar11 commited on
Commit
080536a
1 Parent(s): 4bf1fe8

integrated with pinecone

Browse files
Files changed (1) hide show
  1. app.py +35 -28
app.py CHANGED
@@ -1,39 +1,40 @@
1
  # Langchain imports
2
- from langchain_community.vectorstores.faiss import FAISS
3
  from langchain_community.document_loaders import WebBaseLoader
 
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.chains.combine_documents import create_stuff_documents_chain
6
  from langchain_core.prompts import ChatPromptTemplate
7
  from langchain.chains import create_retrieval_chain
 
8
 
9
- # Embedding and model imports
10
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
11
- from langchain_groq import ChatGroq
12
-
13
  # Other
14
  import streamlit as st
15
  import os
16
  import time
17
  from PyPDF2 import PdfReader
18
  import tempfile
 
 
19
 
20
- st.title("Ask your questions from pdf(s) or website")
21
  option = None
22
 
23
  # Prompt user to choose between PDFs or website
24
  option = st.radio("Choose input type:", ("PDF(s)", "Website"), index=None)
25
 
26
-
27
  def get_pdf_processed(pdf_docs):
28
- text=""
29
  for pdf in pdf_docs:
30
- pdf_reader= PdfReader(pdf)
31
- for page in pdf_reader.pages:
32
- text += page.extract_text()
33
  return text
34
 
35
  def llm_model():
36
- llm = ChatGroq(model="mixtral-8x7b-32768")
 
37
  prompt = ChatPromptTemplate.from_template(
38
  """
39
  Answer the question based on the provided context only.
@@ -51,36 +52,42 @@ def llm_model():
51
  prompt = st.text_input("Input your question here")
52
 
53
  if prompt:
54
-
55
- start =time.process_time()
56
  response = retrieval_chain.invoke({"input":prompt})
57
  st.write(response['answer'])
58
  st.write("Response time: ", time.process_time() - start)
59
 
60
- with st.expander("Did not like the response? Check out more here"):
61
- for i, doc in enumerate(response['context']):
62
- st.write(doc.page_content)
63
- st.write("-----------------------------")
64
 
65
- st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model = 'models/embedding-001')
66
  st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap= 200)
67
 
 
 
 
 
68
  if option:
69
  if option == "Website":
70
  website_link = st.text_input("Enter the website link:")
71
  if website_link:
72
- st.session_state.loader = WebBaseLoader(website_link)
73
- st.session_state.docs = st.session_state.loader.load()
74
- st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
75
- st.session_state.vector = FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings)
76
- llm_model()
77
-
 
 
78
  elif option == "PDF(s)":
79
  pdf_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
80
  if pdf_files:
81
- st.session_state.docs = get_pdf_processed(pdf_files)
82
- st.session_state.final_documents = st.session_state.text_splitter.split_text(st.session_state.docs)
83
- st.session_state.vector = FAISS.from_texts(st.session_state.final_documents,st.session_state.embeddings)
 
 
 
84
  llm_model()
85
 
86
 
 
1
  # Langchain imports
2
+ from langchain_groq import ChatGroq
3
  from langchain_community.document_loaders import WebBaseLoader
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.chains.combine_documents import create_stuff_documents_chain
7
  from langchain_core.prompts import ChatPromptTemplate
8
  from langchain.chains import create_retrieval_chain
9
+ from langchain_pinecone import PineconeVectorStore
10
 
11
+ # Embedding and model import
 
 
 
12
  # Other
13
  import streamlit as st
14
  import os
15
  import time
16
  from PyPDF2 import PdfReader
17
  import tempfile
18
+ import pdfplumber
19
+
20
 
21
+ st.title("Ask questions from your PDF(s) or website")
22
  option = None
23
 
24
  # Prompt user to choose between PDFs or website
25
  option = st.radio("Choose input type:", ("PDF(s)", "Website"), index=None)
26
 
 
27
  def get_pdf_processed(pdf_docs):
28
+ text = ""
29
  for pdf in pdf_docs:
30
+ with pdfplumber.open(pdf) as pdf_file:
31
+ for page in pdf_file.pages:
32
+ text += page.extract_text()
33
  return text
34
 
35
  def llm_model():
36
+ # llm = ChatGroq(model="mixtral-8x7b-32768",groq_api_key=st.secrets['GROQ_API_KEY'])
37
+ llm = ChatGroq(model="mixtral-8x7b-32768",groq_api_key=groq_api_key)
38
  prompt = ChatPromptTemplate.from_template(
39
  """
40
  Answer the question based on the provided context only.
 
52
  prompt = st.text_input("Input your question here")
53
 
54
  if prompt:
55
+ start = time.process_time()
 
56
  response = retrieval_chain.invoke({"input":prompt})
57
  st.write(response['answer'])
58
  st.write("Response time: ", time.process_time() - start)
59
 
60
+ # st.session_state.embeddings =GoogleGenerativeAIEmbeddings(model = 'models/embedding-001',google_api_key=st.secrets['GOOGLE_API_KEY'])
61
+ model_name = "all-MiniLM-L6-v2"
62
+ st.session_state.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
63
 
 
64
  st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap= 200)
65
 
66
+ index_name = "myindex"
67
+ st.session_state.vector = PineconeVectorStore(index_name=index_name, embedding=st.session_state.embeddings)
68
+
69
+
70
  if option:
71
  if option == "Website":
72
  website_link = st.text_input("Enter the website link:")
73
  if website_link:
74
+ with st.spinner("Loading website content..."):
75
+ st.session_state.loader = WebBaseLoader(website_link)
76
+ st.session_state.docs = st.session_state.loader.load()
77
+ st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
78
+ st.session_state.vector = PineconeVectorStore.from_documents(st.session_state.final_documents, index_name=index_name, embedding = st.session_state.embeddings)
79
+ st.success("Done!")
80
+ llm_model()
81
+
82
  elif option == "PDF(s)":
83
  pdf_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
84
  if pdf_files:
85
+ with st.spinner("Loading pdf..."):
86
+ st.session_state.docs = get_pdf_processed(pdf_files)
87
+ st.session_state.final_documents = st.session_state.text_splitter.split_text(st.session_state.docs)
88
+ st.session_state.vector = PineconeVectorStore.from_texts(st.session_state.final_documents, index_name=index_name, embedding = st.session_state.embeddings)
89
+ st.success("Done!")
90
+ st.empty()
91
  llm_model()
92
 
93