nidhibodar11 commited on
Commit
769ee66
1 Parent(s): feefb45

added main function

Browse files
Files changed (1) hide show
  1. app.py +55 -48
app.py CHANGED
@@ -1,39 +1,55 @@
1
  # Langchain imports
2
  from langchain_community.vectorstores.faiss import FAISS
3
- from langchain_groq import ChatGroq
4
  from langchain_community.document_loaders import WebBaseLoader
5
- from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.chains.combine_documents import create_stuff_documents_chain
8
  from langchain_core.prompts import ChatPromptTemplate
9
  from langchain.chains import create_retrieval_chain
10
 
11
- # Embedding and model import
 
 
 
12
  # Other
13
  import streamlit as st
14
  import os
15
  import time
16
  from PyPDF2 import PdfReader
17
  import tempfile
18
- import pdfplumber
19
-
20
-
21
- st.title("Ask questions from your PDF(s) or website")
22
- option = None
23
-
24
- # Prompt user to choose between PDFs or website
25
- option = st.radio("Choose input type:", ("PDF(s)", "Website"), index=None)
26
 
27
  def get_pdf_processed(pdf_docs):
28
- text = ""
29
  for pdf in pdf_docs:
30
- with pdfplumber.open(pdf) as pdf_file:
31
- for page in pdf_file.pages:
32
- text += page.extract_text()
33
  return text
34
 
35
- def llm_model():
36
- # llm = ChatGroq(model="mixtral-8x7b-32768",groq_api_key=st.secrets['GROQ_API_KEY'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  llm = ChatGroq(model="mixtral-8x7b-32768")
38
  prompt = ChatPromptTemplate.from_template(
39
  """
@@ -48,40 +64,31 @@ def llm_model():
48
  document_chain = create_stuff_documents_chain(llm,prompt)
49
  retriever = st.session_state.vector.as_retriever() if st.session_state.vector else None
50
  retrieval_chain = create_retrieval_chain(retriever,document_chain)
 
51
 
52
- prompt = st.text_input("Input your question here")
 
 
 
 
 
53
 
54
- if prompt:
55
- start = time.process_time()
56
- response = retrieval_chain.invoke({"input":prompt})
57
- st.write(response['answer'])
58
- st.write("Response time: ", time.process_time() - start)
59
 
60
- model_name = "all-MiniLM-L6-v2"
61
- st.session_state.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
62
 
63
- st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap= 200)
64
-
65
- if option:
66
- if option == "Website":
67
- website_link = st.text_input("Enter the website link:")
68
- if st.button("Submit & Process"):
69
- with st.spinner("Loading website content..."):
70
- st.session_state.loader = WebBaseLoader(website_link)
71
- st.session_state.docs = st.session_state.loader.load()
72
- st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
73
- st.session_state.vector = FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings)
74
- st.success("Done!")
75
- llm_model()
76
-
77
- elif option == "PDF(s)":
78
- pdf_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
79
- if st.button("Submit & Process"):
80
- with st.spinner("Loading pdf..."):
81
- st.session_state.docs = get_pdf_processed(pdf_files)
82
- st.session_state.final_documents = st.session_state.text_splitter.split_text(st.session_state.docs)
83
- st.session_state.vector = FAISS.from_texts(st.session_state.final_documents,st.session_state.embeddings)
84
- st.success("Done!")
85
- llm_model()
86
 
87
 
 
 
 
1
  # Langchain imports
2
  from langchain_community.vectorstores.faiss import FAISS
 
3
  from langchain_community.document_loaders import WebBaseLoader
 
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.chains.combine_documents import create_stuff_documents_chain
6
  from langchain_core.prompts import ChatPromptTemplate
7
  from langchain.chains import create_retrieval_chain
8
 
9
+ # Embedding and model imports
10
+ from langchain_community.embeddings import HuggingFaceEmbeddings
11
+ from langchain_groq import ChatGroq
12
+
13
  # Other
14
  import streamlit as st
15
  import os
16
  import time
17
  from PyPDF2 import PdfReader
18
  import tempfile
 
 
 
 
 
 
 
 
19
 
20
  def get_pdf_processed(pdf_docs):
21
+ text=""
22
  for pdf in pdf_docs:
23
+ pdf_reader= PdfReader(pdf)
24
+ for page in pdf_reader.pages:
25
+ text += page.extract_text()
26
  return text
27
 
28
+ st.session_state.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
29
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap= 200)
30
+
31
+ def initialize_vector_store(option):
32
+ if option:
33
+ if option == "Website":
34
+ website_link = st.text_input("Enter the website link:")
35
+ if st.button("Submit & Process"):
36
+ with st.spinner("Loading website content..."):
37
+ st.session_state.loader = WebBaseLoader(website_link)
38
+ st.session_state.docs = st.session_state.loader.load()
39
+ st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
40
+ st.session_state.vector = FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings)
41
+ st.success("Website content loaded successfully!")
42
+
43
+ elif option == "PDF(s)":
44
+ pdf_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
45
+ if st.button("Submit & Process"):
46
+ with st.spinner("Loading pdf..."):
47
+ st.session_state.docs = get_pdf_processed(pdf_files)
48
+ st.session_state.final_documents = st.session_state.text_splitter.split_text(st.session_state.docs)
49
+ st.session_state.vector = FAISS.from_texts(st.session_state.final_documents,st.session_state.embeddings)
50
+ st.success("PDF content loaded successfully!")
51
+
52
+ def get_conversational_chain():
53
  llm = ChatGroq(model="mixtral-8x7b-32768")
54
  prompt = ChatPromptTemplate.from_template(
55
  """
 
64
  document_chain = create_stuff_documents_chain(llm,prompt)
65
  retriever = st.session_state.vector.as_retriever() if st.session_state.vector else None
66
  retrieval_chain = create_retrieval_chain(retriever,document_chain)
67
+ return retrieval_chain
68
 
69
+ def user_input(prompt):
70
+ chain = get_conversational_chain()
71
+ start =time.process_time()
72
+ response = chain.invoke({"input":prompt})
73
+ st.write(response['answer'])
74
+ st.write("Response time: ", time.process_time() - start)
75
 
76
+ with st.expander("Did not like the response? Check out more here"):
77
+ for i, doc in enumerate(response['context']):
78
+ st.write(doc.page_content)
79
+ st.write("-----------------------------")
 
80
 
81
+ def main():
82
+ st.title("Ask your questions from pdf(s) or website")
83
 
84
+ option = None
85
+ # Prompt user to choose between PDFs or website
86
+ option = st.radio("Choose input type:", ("PDF(s)", "Website"), index=None)
87
+ initialize_vector_store(option)
88
+ prompt = st.text_input("Input your question here")
89
+ if prompt:
90
+ user_input(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
 
93
+ if __name__ == "__main__":
94
+ main()