Muhammad Qasim commited on
Commit
520da56
β€’
1 Parent(s): dd251ef

version updated

Browse files
Files changed (3) hide show
  1. .env.example +2 -0
  2. README.md +6 -0
  3. app.py +12 -4
.env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ HUGGINGFACEHUB_API_TOKEN=
2
+ OPENAI_API_KEY=
README.md CHANGED
@@ -38,6 +38,12 @@ Before using the chatbot, ensure you have the following installed:
38
  pip install -r requirements.txt
39
  ```
40
 
 
 
 
 
 
 
41
  ## Usage πŸ“‹
42
 
43
  1. Run the chatbot using the following command:
 
38
  pip install -r requirements.txt
39
  ```
40
 
41
+ 4. Copy .env.example to .env and set your OpenAI & Hugging Face API keys:
42
+
43
+ ```shell
44
+ cp .env.example .env
45
+ ```
46
+
47
  ## Usage πŸ“‹
48
 
49
  1. Run the chatbot using the following command:
app.py CHANGED
@@ -4,7 +4,7 @@ from PyPDF2 import PdfReader
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
  from langchain.vectorstores import FAISS
7
- from langchain.chat_models import Chat
8
 
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
@@ -12,7 +12,8 @@ from htmlTemplates import css, bot_template, user_template, hide_st_style, foote
12
  from langchain.llms import HuggingFaceHub
13
  from matplotlib import style
14
 
15
- def get_pdf_text(pdf_docs):
 
16
  text = ""
17
  for pdf in pdf_docs:
18
  pdf_reader = PdfReader(pdf)
@@ -20,6 +21,7 @@ def get_pdf_text(pdf_docs):
20
  text += page.extract_text()
21
  return text
22
 
 
23
  def get_text_chunks(text):
24
  text_splitter = CharacterTextSplitter(
25
  separator="\n",
@@ -30,12 +32,14 @@ def get_text_chunks(text):
30
  chunks = text_splitter.split_text(text)
31
  return chunks
32
 
 
33
  def get_vectorstore(text_chunks):
34
  embeddings = OpenAIEmbeddings()
35
  # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
36
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
37
  return vectorstore
38
 
 
39
  def get_conversation_chain(vectorstore):
40
  llm = ChatOpenAI()
41
  # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
@@ -49,6 +53,7 @@ def get_conversation_chain(vectorstore):
49
  )
50
  return conversation_chain
51
 
 
52
  def handle_userinput(user_question):
53
  if st.session_state.conversation is None:
54
  st.error("Please upload PDF data before starting the chat.")
@@ -65,10 +70,11 @@ def handle_userinput(user_question):
65
  st.write(bot_template.replace(
66
  "{{MSG}}", message.content), unsafe_allow_html=True)
67
 
 
68
  def main():
69
  load_dotenv()
70
  st.set_page_config(page_title="Talk with PDF",
71
- page_icon="icon.png")
72
  st.write(css, unsafe_allow_html=True)
73
 
74
  if "conversation" not in st.session_state:
@@ -92,7 +98,8 @@ def main():
92
  raw_text = get_pdf_text(pdf_docs)
93
  text_chunks = get_text_chunks(raw_text)
94
  vectorstore = get_vectorstore(text_chunks)
95
- st.session_state.conversation = get_conversation_chain(vectorstore)
 
96
  st.success("Your Data has been processed successfully")
97
 
98
  if user_question:
@@ -101,5 +108,6 @@ def main():
101
  st.markdown(hide_st_style, unsafe_allow_html=True)
102
  st.markdown(footer, unsafe_allow_html=True)
103
 
 
104
  if __name__ == '__main__':
105
  main()
 
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
  from langchain.vectorstores import FAISS
7
+ from langchain.chat_models import ChatOpenAI
8
 
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
 
12
  from langchain.llms import HuggingFaceHub
13
  from matplotlib import style
14
 
15
+
16
+ def get_pdf_text(pdf_docs):
17
  text = ""
18
  for pdf in pdf_docs:
19
  pdf_reader = PdfReader(pdf)
 
21
  text += page.extract_text()
22
  return text
23
 
24
+
25
  def get_text_chunks(text):
26
  text_splitter = CharacterTextSplitter(
27
  separator="\n",
 
32
  chunks = text_splitter.split_text(text)
33
  return chunks
34
 
35
+
36
  def get_vectorstore(text_chunks):
37
  embeddings = OpenAIEmbeddings()
38
  # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
39
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
40
  return vectorstore
41
 
42
+
43
  def get_conversation_chain(vectorstore):
44
  llm = ChatOpenAI()
45
  # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
 
53
  )
54
  return conversation_chain
55
 
56
+
57
  def handle_userinput(user_question):
58
  if st.session_state.conversation is None:
59
  st.error("Please upload PDF data before starting the chat.")
 
70
  st.write(bot_template.replace(
71
  "{{MSG}}", message.content), unsafe_allow_html=True)
72
 
73
+
74
  def main():
75
  load_dotenv()
76
  st.set_page_config(page_title="Talk with PDF",
77
+ page_icon="icon.png")
78
  st.write(css, unsafe_allow_html=True)
79
 
80
  if "conversation" not in st.session_state:
 
98
  raw_text = get_pdf_text(pdf_docs)
99
  text_chunks = get_text_chunks(raw_text)
100
  vectorstore = get_vectorstore(text_chunks)
101
+ st.session_state.conversation = get_conversation_chain(
102
+ vectorstore)
103
  st.success("Your Data has been processed successfully")
104
 
105
  if user_question:
 
108
  st.markdown(hide_st_style, unsafe_allow_html=True)
109
  st.markdown(footer, unsafe_allow_html=True)
110
 
111
+
112
  if __name__ == '__main__':
113
  main()