Spaces:
Runtime error
Runtime error
NAB1108
commited on
Commit
•
0196549
0
Parent(s):
Duplicate from NAB1108/PDFchat
Browse files- .gitattributes +35 -0
- README.md +13 -0
- app.py +155 -0
- requirements.txt +12 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: PDFchat
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: green
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.41.2
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: NAB1108/PDFchat
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from langchain.document_loaders import OnlinePDFLoader
|
4 |
+
|
5 |
+
from langchain.text_splitter import CharacterTextSplitter
|
6 |
+
|
7 |
+
from langchain.llms import HuggingFaceHub
|
8 |
+
|
9 |
+
from langchain.embeddings import HuggingFaceHubEmbeddings
|
10 |
+
|
11 |
+
from langchain.vectorstores import Chroma
|
12 |
+
|
13 |
+
from langchain.chains import RetrievalQA
|
14 |
+
|
15 |
+
import os
|
16 |
+
import tempfile
|
17 |
+
import openai
|
18 |
+
import json
|
19 |
+
import re
|
20 |
+
from langchain.docstore.document import Document
|
21 |
+
from langchain.document_loaders import TextLoader
|
22 |
+
from langchain.text_splitter import CharacterTextSplitter
|
23 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
24 |
+
from langchain.vectorstores import FAISS
|
25 |
+
from langchain.chains import RetrievalQA
|
26 |
+
from langchain.llms import OpenAI
|
27 |
+
from langchain.document_loaders import PyPDFLoader
|
28 |
+
from langchain.indexes import VectorstoreIndexCreator
|
29 |
+
import tempfile
|
30 |
+
|
31 |
+
os.environ["OPENAI_API_KEY"] = os.environ['OpenApi_Key']
|
32 |
+
query1=" "
|
33 |
+
limit = 0
|
34 |
+
def loading_pdf():
|
35 |
+
return "Loading..."
|
36 |
+
|
37 |
+
def pdf_changes(pdf_doc, prompt):
|
38 |
+
|
39 |
+
loader = OnlinePDFLoader(pdf_doc.name)
|
40 |
+
documents = loader.load()
|
41 |
+
|
42 |
+
name_filter = "**/*.md"
|
43 |
+
separator = "\n"
|
44 |
+
chunk_size_limit = 1000
|
45 |
+
max_chunk_overlap = 50
|
46 |
+
|
47 |
+
text_splitter = CharacterTextSplitter(separator=separator, chunk_size=chunk_size_limit, chunk_overlap=max_chunk_overlap)
|
48 |
+
split_docs = text_splitter.split_documents(documents)
|
49 |
+
|
50 |
+
embeddings = OpenAIEmbeddings()
|
51 |
+
vector_store1 = FAISS.from_documents(split_docs, embeddings)
|
52 |
+
|
53 |
+
from langchain.prompts import (
|
54 |
+
ChatPromptTemplate,
|
55 |
+
SystemMessagePromptTemplate,
|
56 |
+
HumanMessagePromptTemplate,
|
57 |
+
)
|
58 |
+
|
59 |
+
|
60 |
+
system_template="""You are a helpful chatbot used by the user to chat with pdf documents. Only answer the questions by using information provided in the context provided to you. If there is no relavant context, tell 'Hmm, I'm not sure'."""+prompt+"""
|
61 |
+
----------------
|
62 |
+
{summaries}"""
|
63 |
+
|
64 |
+
messages = [
|
65 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
66 |
+
HumanMessagePromptTemplate.from_template("{question}")
|
67 |
+
]
|
68 |
+
prompt2 = ChatPromptTemplate.from_messages(messages)
|
69 |
+
|
70 |
+
from langchain.chat_models import ChatOpenAI
|
71 |
+
from langchain.chains import RetrievalQAWithSourcesChain
|
72 |
+
global query1
|
73 |
+
chain_type_kwargs = {"prompt": prompt2}
|
74 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=512) # Modify model_name if you have access to GPT-4
|
75 |
+
global chain
|
76 |
+
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
77 |
+
llm=llm,
|
78 |
+
chain_type="stuff",
|
79 |
+
retriever=vector_store1.as_retriever(search_kwargs={'k': 2}),
|
80 |
+
return_source_documents=True,
|
81 |
+
chain_type_kwargs=chain_type_kwargs
|
82 |
+
)
|
83 |
+
return "Ready"
|
84 |
+
|
85 |
+
def add_text(history, text):
|
86 |
+
history = history + [(text, None)]
|
87 |
+
return history, ""
|
88 |
+
|
89 |
+
def bot(history):
|
90 |
+
response = infer(history[-1][0])
|
91 |
+
history[-1][1] = response
|
92 |
+
return history
|
93 |
+
|
94 |
+
def infer(question):
|
95 |
+
global query1
|
96 |
+
global limit
|
97 |
+
openai.api_key = os.environ['OpenApi_Key']
|
98 |
+
prompt_text = question
|
99 |
+
if prompt_text:
|
100 |
+
query1 = query1 + "\nUser: " + prompt_text + "\nBot: "
|
101 |
+
if limit <= 5:
|
102 |
+
result = chain(query1)
|
103 |
+
query1 = query1 + result['answer']
|
104 |
+
query1 = openai.ChatCompletion.create(
|
105 |
+
model="gpt-3.5-turbo",
|
106 |
+
messages=[
|
107 |
+
{"role": "system", "content": "You are provided with chat history and latset conversation between user and bot. Summarise the history and latest conversationin minimum most tokens possible. Do not include greetings in the summary like hi, hello, etc."},
|
108 |
+
{"role": "user", "content": query1},
|
109 |
+
]
|
110 |
+
) ["choices"][0]["message"]["content"].replace("'", "")
|
111 |
+
limit += 1
|
112 |
+
return result['answer']
|
113 |
+
else:
|
114 |
+
return "Usage Limit reached :("
|
115 |
+
|
116 |
+
css="""
|
117 |
+
#col-container { margin-left: auto; margin-right: auto;}
|
118 |
+
"""
|
119 |
+
|
120 |
+
title = """
|
121 |
+
<div style="text-align: center; max-width: 700px;">
|
122 |
+
<h1 style="color: #4545FF;">Chat with PDF</h1>
|
123 |
+
<p style="text-align: center; color: #4545FF;">Upload a .PDF from your computer, click the "Load PDF" button, <br />
|
124 |
+
when everything is ready, you can start asking questions about the pdf ;)</p>
|
125 |
+
</div>
|
126 |
+
"""
|
127 |
+
|
128 |
+
|
129 |
+
with gr.Blocks(css=css,theme = gr.themes.Soft()) as demo:
|
130 |
+
with gr.Column(elem_id="col-container"):
|
131 |
+
#gr.HTML(title)
|
132 |
+
with gr.Row():
|
133 |
+
with gr.Column(scale=1):
|
134 |
+
pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
|
135 |
+
prompt = gr.Textbox(label="Behaviour Prompt (optional)", placeholder="Reply to all questions as a rap / Reply to all questions in Hindi etc. ")
|
136 |
+
#repo_id = gr.Dropdown(label="LLM", choices=["google/flan-ul2", "OpenAssistant/oasst-sft-1-pythia-12b", "bigscience/bloomz"], value="google/flan-ul2")
|
137 |
+
with gr.Row():
|
138 |
+
langchain_status = gr.Textbox(label="Status", placeholder="Waiting for PDF", interactive=False,show_label=False)
|
139 |
+
load_pdf = gr.Button("Load pdf")
|
140 |
+
with gr.Column(scale=2):
|
141 |
+
chatbot = gr.Chatbot([], elem_id="chatbot",show_label=False,show_share_button=False).style(height=750)
|
142 |
+
with gr.Row():
|
143 |
+
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ",scale=6,show_label=False)
|
144 |
+
submit_btn = gr.Button("Send",scale=1)
|
145 |
+
#load_pdf.click(loading_pdf, None, langchain_status, queue=False)
|
146 |
+
#repo_id.change(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
|
147 |
+
load_pdf.click(pdf_changes, inputs=[pdf_doc,prompt], outputs=[langchain_status], queue=False)
|
148 |
+
question.submit(add_text, [chatbot, question], [chatbot, question]).then(
|
149 |
+
bot, chatbot, chatbot
|
150 |
+
)
|
151 |
+
submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
|
152 |
+
bot, chatbot, chatbot
|
153 |
+
)
|
154 |
+
|
155 |
+
demo.launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
huggingface_hub
|
3 |
+
chromadb
|
4 |
+
langchain
|
5 |
+
unstructured
|
6 |
+
unstructured[local-inference]
|
7 |
+
langchain
|
8 |
+
openai
|
9 |
+
chromadb
|
10 |
+
tiktoken
|
11 |
+
pypdf
|
12 |
+
faiss-cpu
|