zancmeresek commited on
Commit
b62606d
0 Parent(s):

Duplicate from zancmeresek/langchain-chat-with-pdf-openai

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +105 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: "Chat with PDF •\_OpenAI"
3
+ emoji: 📄🤖
4
+ colorFrom: purple
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 3.28.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: zancmeresek/langchain-chat-with-pdf-openai
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+
5
+ from langchain.document_loaders import OnlinePDFLoader
6
+
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+
9
+
10
+ from langchain.llms import OpenAI
11
+
12
+ from langchain.embeddings import OpenAIEmbeddings
13
+
14
+
15
+ from langchain.vectorstores import Chroma
16
+
17
+ from langchain.chains import ConversationalRetrievalChain
18
+
19
+ def loading_pdf():
20
+ return "Loading..."
21
+
22
+ def pdf_changes(pdf_doc, open_ai_key):
23
+ if openai_key is not None:
24
+ os.environ['OPENAI_API_KEY'] = open_ai_key
25
+ loader = OnlinePDFLoader(pdf_doc.name)
26
+ documents = loader.load()
27
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
28
+ texts = text_splitter.split_documents(documents)
29
+ embeddings = OpenAIEmbeddings()
30
+ db = Chroma.from_documents(texts, embeddings)
31
+ retriever = db.as_retriever(search_kwargs={"k": 1})
32
+ global qa
33
+ qa = ConversationalRetrievalChain.from_llm(
34
+ llm=OpenAI(temperature=0.5),
35
+ retriever=retriever,
36
+ return_source_documents=False)
37
+ return "Ready"
38
+ else:
39
+ return "You forgot OpenAI API key"
40
+
41
+ def add_text(history, text):
42
+ history = history + [(text, None)]
43
+ return history, ""
44
+
45
+ def bot(history):
46
+ response = infer(history[-1][0], history)
47
+ history[-1][1] = ""
48
+
49
+ for character in response:
50
+ history[-1][1] += character
51
+ time.sleep(0.05)
52
+ yield history
53
+
54
+
55
+ def infer(question, history):
56
+
57
+ res = []
58
+ for human, ai in history[:-1]:
59
+ pair = (human, ai)
60
+ res.append(pair)
61
+
62
+ chat_history = res
63
+ #print(chat_history)
64
+ query = question
65
+ result = qa({"question": query, "chat_history": chat_history})
66
+ #print(result)
67
+ return result["answer"]
68
+
69
+ css="""
70
+ #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
71
+ """
72
+
73
+ title = """
74
+ <div style="text-align: center;max-width: 700px;">
75
+ <h1>Chat with PDF • OpenAI</h1>
76
+ <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
77
+ when everything is ready, you can start asking questions about the pdf ;) <br />
78
+ This version is set to store chat history, and uses OpenAI as LLM, don't forget to copy/paste your OpenAI API key</p>
79
+ </div>
80
+ """
81
+
82
+
83
+ with gr.Blocks(css=css) as demo:
84
+ with gr.Column(elem_id="col-container"):
85
+ gr.HTML(title)
86
+
87
+ with gr.Column():
88
+ openai_key = gr.Textbox(label="You OpenAI API key", type="password")
89
+ pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
90
+ with gr.Row():
91
+ langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
92
+ load_pdf = gr.Button("Load pdf to langchain")
93
+
94
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
95
+ question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
96
+ submit_btn = gr.Button("Send Message")
97
+ load_pdf.click(loading_pdf, None, langchain_status, queue=False)
98
+ load_pdf.click(pdf_changes, inputs=[pdf_doc, openai_key], outputs=[langchain_status], queue=False)
99
+ question.submit(add_text, [chatbot, question], [chatbot, question]).then(
100
+ bot, chatbot, chatbot
101
+ )
102
+ submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
103
+ bot, chatbot, chatbot)
104
+
105
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openai
2
+ tiktoken
3
+ chromadb
4
+ langchain
5
+ unstructured
6
+ unstructured[local-inference]