NAB1108 commited on
Commit
0196549
0 Parent(s):

Duplicate from NAB1108/PDFchat

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +155 -0
  4. requirements.txt +12 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: PDFchat
3
+ emoji: 🚀
4
+ colorFrom: yellow
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.41.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: NAB1108/PDFchat
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from langchain.document_loaders import OnlinePDFLoader
4
+
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+
7
+ from langchain.llms import HuggingFaceHub
8
+
9
+ from langchain.embeddings import HuggingFaceHubEmbeddings
10
+
11
+ from langchain.vectorstores import Chroma
12
+
13
+ from langchain.chains import RetrievalQA
14
+
15
+ import os
16
+ import tempfile
17
+ import openai
18
+ import json
19
+ import re
20
+ from langchain.docstore.document import Document
21
+ from langchain.document_loaders import TextLoader
22
+ from langchain.text_splitter import CharacterTextSplitter
23
+ from langchain.embeddings.openai import OpenAIEmbeddings
24
+ from langchain.vectorstores import FAISS
25
+ from langchain.chains import RetrievalQA
26
+ from langchain.llms import OpenAI
27
+ from langchain.document_loaders import PyPDFLoader
28
+ from langchain.indexes import VectorstoreIndexCreator
29
+ import tempfile
30
+
31
+ os.environ["OPENAI_API_KEY"] = os.environ['OpenApi_Key']
32
+ query1=" "
33
+ limit = 0
34
+ def loading_pdf():
35
+ return "Loading..."
36
+
37
+ def pdf_changes(pdf_doc, prompt):
38
+
39
+ loader = OnlinePDFLoader(pdf_doc.name)
40
+ documents = loader.load()
41
+
42
+ name_filter = "**/*.md"
43
+ separator = "\n"
44
+ chunk_size_limit = 1000
45
+ max_chunk_overlap = 50
46
+
47
+ text_splitter = CharacterTextSplitter(separator=separator, chunk_size=chunk_size_limit, chunk_overlap=max_chunk_overlap)
48
+ split_docs = text_splitter.split_documents(documents)
49
+
50
+ embeddings = OpenAIEmbeddings()
51
+ vector_store1 = FAISS.from_documents(split_docs, embeddings)
52
+
53
+ from langchain.prompts import (
54
+ ChatPromptTemplate,
55
+ SystemMessagePromptTemplate,
56
+ HumanMessagePromptTemplate,
57
+ )
58
+
59
+
60
+ system_template="""You are a helpful chatbot used by the user to chat with pdf documents. Only answer the questions by using information provided in the context provided to you. If there is no relavant context, tell 'Hmm, I'm not sure'."""+prompt+"""
61
+ ----------------
62
+ {summaries}"""
63
+
64
+ messages = [
65
+ SystemMessagePromptTemplate.from_template(system_template),
66
+ HumanMessagePromptTemplate.from_template("{question}")
67
+ ]
68
+ prompt2 = ChatPromptTemplate.from_messages(messages)
69
+
70
+ from langchain.chat_models import ChatOpenAI
71
+ from langchain.chains import RetrievalQAWithSourcesChain
72
+ global query1
73
+ chain_type_kwargs = {"prompt": prompt2}
74
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=512) # Modify model_name if you have access to GPT-4
75
+ global chain
76
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
77
+ llm=llm,
78
+ chain_type="stuff",
79
+ retriever=vector_store1.as_retriever(search_kwargs={'k': 2}),
80
+ return_source_documents=True,
81
+ chain_type_kwargs=chain_type_kwargs
82
+ )
83
+ return "Ready"
84
+
85
+ def add_text(history, text):
86
+ history = history + [(text, None)]
87
+ return history, ""
88
+
89
+ def bot(history):
90
+ response = infer(history[-1][0])
91
+ history[-1][1] = response
92
+ return history
93
+
94
+ def infer(question):
95
+ global query1
96
+ global limit
97
+ openai.api_key = os.environ['OpenApi_Key']
98
+ prompt_text = question
99
+ if prompt_text:
100
+ query1 = query1 + "\nUser: " + prompt_text + "\nBot: "
101
+ if limit <= 5:
102
+ result = chain(query1)
103
+ query1 = query1 + result['answer']
104
+ query1 = openai.ChatCompletion.create(
105
+ model="gpt-3.5-turbo",
106
+ messages=[
107
+ {"role": "system", "content": "You are provided with chat history and latset conversation between user and bot. Summarise the history and latest conversationin minimum most tokens possible. Do not include greetings in the summary like hi, hello, etc."},
108
+ {"role": "user", "content": query1},
109
+ ]
110
+ ) ["choices"][0]["message"]["content"].replace("'", "")
111
+ limit += 1
112
+ return result['answer']
113
+ else:
114
+ return "Usage Limit reached :("
115
+
116
+ css="""
117
+ #col-container { margin-left: auto; margin-right: auto;}
118
+ """
119
+
120
+ title = """
121
+ <div style="text-align: center; max-width: 700px;">
122
+ <h1 style="color: #4545FF;">Chat with PDF</h1>
123
+ <p style="text-align: center; color: #4545FF;">Upload a .PDF from your computer, click the "Load PDF" button, <br />
124
+ when everything is ready, you can start asking questions about the pdf ;)</p>
125
+ </div>
126
+ """
127
+
128
+
129
+ with gr.Blocks(css=css,theme = gr.themes.Soft()) as demo:
130
+ with gr.Column(elem_id="col-container"):
131
+ #gr.HTML(title)
132
+ with gr.Row():
133
+ with gr.Column(scale=1):
134
+ pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
135
+ prompt = gr.Textbox(label="Behaviour Prompt (optional)", placeholder="Reply to all questions as a rap / Reply to all questions in Hindi etc. ")
136
+ #repo_id = gr.Dropdown(label="LLM", choices=["google/flan-ul2", "OpenAssistant/oasst-sft-1-pythia-12b", "bigscience/bloomz"], value="google/flan-ul2")
137
+ with gr.Row():
138
+ langchain_status = gr.Textbox(label="Status", placeholder="Waiting for PDF", interactive=False,show_label=False)
139
+ load_pdf = gr.Button("Load pdf")
140
+ with gr.Column(scale=2):
141
+ chatbot = gr.Chatbot([], elem_id="chatbot",show_label=False,show_share_button=False).style(height=750)
142
+ with gr.Row():
143
+ question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ",scale=6,show_label=False)
144
+ submit_btn = gr.Button("Send",scale=1)
145
+ #load_pdf.click(loading_pdf, None, langchain_status, queue=False)
146
+ #repo_id.change(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
147
+ load_pdf.click(pdf_changes, inputs=[pdf_doc,prompt], outputs=[langchain_status], queue=False)
148
+ question.submit(add_text, [chatbot, question], [chatbot, question]).then(
149
+ bot, chatbot, chatbot
150
+ )
151
+ submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
152
+ bot, chatbot, chatbot
153
+ )
154
+
155
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ huggingface_hub
3
+ chromadb
4
+ langchain
5
+ unstructured
6
+ unstructured[local-inference]
7
+ langchain
8
+ openai
9
+ chromadb
10
+ tiktoken
11
+ pypdf
12
+ faiss-cpu