Silence1412 commited on
Commit
fce8396
1 Parent(s): d55b3c4

Create Chat_with_pdf_OpenAI

Browse files
Files changed (1) hide show
  1. Chat_with_pdf_OpenAI +63 -0
Chat_with_pdf_OpenAI ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.llms import OpenAI
8
+ from langchain.callbacks import get_openai_callback
9
+ import os
10
+ import openai
11
+ from streamlit_chat import message
12
+
13
+ OPENAI_API_KEY = st.text_input("Input your OpenAI API key", "")
14
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
15
+ # st.header("Ask your PDF 💬")
16
+
17
+ # upload file
18
+ pdf = st.file_uploader("Upload your PDF", type="pdf")
19
+
20
+ # extract the text
21
+ if pdf is not None:
22
+ pdf_reader = PdfReader(pdf)
23
+ text = ""
24
+ for page in pdf_reader.pages:
25
+ text += page.extract_text()
26
+
27
+ # split into chunks
28
+ text_splitter = CharacterTextSplitter(
29
+ separator="\n",
30
+ chunk_size=1000,
31
+ chunk_overlap=200,
32
+ length_function=len
33
+ )
34
+ chunks = text_splitter.split_text(text)
35
+
36
+ # create embeddings
37
+ embeddings = OpenAIEmbeddings()
38
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
39
+
40
+ if 'generated' not in st.session_state:
41
+ st.session_state['generated'] = []
42
+ if 'past' not in st.session_state:
43
+ st.session_state['past'] = []
44
+
45
+ # show user input
46
+ user_question = st.text_input("Ask a question about your PDF:")
47
+ if user_question:
48
+ docs = knowledge_base.similarity_search(user_question)
49
+
50
+ llm = OpenAI()
51
+ chain = load_qa_chain(llm, chain_type="stuff")
52
+ with get_openai_callback() as cb:
53
+ response = chain.run(input_documents=docs, question=user_question)
54
+ print(cb)
55
+
56
+ #st.write(response)
57
+ st.session_state.past.append(user_question)
58
+ st.session_state.generated.append(response)
59
+
60
+ if st.session_state['generated']:
61
+ for i in range(len(st.session_state['generated'])-1, -1, -1):
62
+ message(st.session_state["generated"][i], key=str(i))
63
+ message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')