wholewhale commited on
Commit
e8c47c5
1 Parent(s): 6f46024
Files changed (1) hide show
  1. app.py +31 -100
app.py CHANGED
@@ -8,12 +8,6 @@ from langchain.llms import OpenAI
8
  from langchain.embeddings import OpenAIEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import ConversationalRetrievalChain
11
- from langchain.chat_models import ChatOpenAI
12
- from langchain.document_loaders import WebBaseLoader
13
- from langchain.chains.summarize import load_summarize_chain
14
- from langchain.chains.llm import LLMChain
15
- from langchain.prompts import PromptTemplate
16
- from langchain.chains.combine_documents.stuff import StuffDocumentsChain
17
 
18
  os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
19
 
@@ -23,62 +17,24 @@ last_interaction_time = 0
23
  def loading_pdf():
24
  return "Working on the upload. Also, pondering the usefulness of sporks..."
25
 
26
- # Inside Chroma mod
27
- def summary(self):
28
- num_documents = len(self.documents)
29
- avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
30
- return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"
31
-
32
- # PDF summary and query using stuffing
33
  def pdf_changes(pdf_doc):
34
- try:
35
- # Initialize loader and load documents
36
- loader = OnlinePDFLoader(pdf_doc.name)
37
- documents = loader.load()
38
-
39
- # Define the prompt for summarization
40
- prompt_template = """Write a concise summary of the following:
41
- "{text}"
42
- CONCISE SUMMARY:"""
43
- prompt = PromptTemplate.from_template(prompt_template)
44
-
45
- # Define the LLM chain with the specified prompt
46
- llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
47
- llm_chain = LLMChain(llm=llm, prompt=prompt)
48
-
49
- # Initialize StuffDocumentsChain
50
- stuff_chain = StuffDocumentsChain(
51
- llm_chain=llm_chain, document_variable_name="text"
52
- )
53
-
54
- # Generate summary using StuffDocumentsChain
55
- global full_summary
56
- full_summary = stuff_chain.run(documents)
57
-
58
- # Other existing logic for Chroma, embeddings, and retrieval
59
- embeddings = OpenAIEmbeddings()
60
- global db
61
- db = Chroma.from_documents(documents, embeddings)
62
-
63
- retriever = db.as_retriever()
64
- global qa
65
- qa = ConversationalRetrievalChain.from_llm(
66
- llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo-16k", max_tokens=-1, n=2),
67
- retriever=retriever,
68
- return_source_documents=False
69
- )
70
-
71
- return f"Ready. Full Summary loaded."
72
-
73
- except Exception as e:
74
- return f"Error processing PDF: {str(e)}"
75
-
76
-
77
 
78
  def clear_data():
79
- global qa, db
80
  qa = None
81
- db = None
82
  return "Data cleared"
83
 
84
  def add_text(history, text):
@@ -88,43 +44,31 @@ def add_text(history, text):
88
  return history, ""
89
 
90
  def bot(history):
91
- global full_summary
92
- if 'summary' in history[-1][0].lower(): # Check if the last question asks for a summary
93
- response = full_summary
94
- return full_summary
95
- else:
96
- response = infer(history[-1][0], history)
97
-
98
- sentences = ' \n'.join(response.split('. '))
99
- formatted_response = f"**Bot:**\n\n{sentences}"
100
  history[-1][1] = formatted_response
101
  return history
102
 
103
-
104
  def infer(question, history):
105
- try:
106
- res = []
107
- for human, ai in history[:-1]:
108
- pair = (human, ai)
109
- res.append(pair)
110
 
111
- chat_history = res
112
- query = question
113
- result = qa({"question": query, "chat_history": chat_history, "system": "This is a world-class summarizing AI, be helpful."})
114
- return result["answer"]
115
- except Exception as e:
116
- return f"Error querying chatbot: {str(e)}"
117
 
118
  def auto_clear_data():
119
- global qa, da, last_interaction_time
120
- if time.time() - last_interaction_time > 1000:
121
  qa = None
122
- db = None
123
 
124
  def periodic_clear():
125
  while True:
126
  auto_clear_data()
127
- time.sleep(1000)
128
 
129
  threading.Thread(target=periodic_clear).start()
130
 
@@ -136,17 +80,10 @@ title = """
136
  <div style="text-align: center;max-width: 700px;">
137
  <h1>CauseWriter Chat with PDF • OpenAI</h1>
138
  <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
139
- when everything is ready, you can start asking questions about the pdf. Limit ~11k words. <br />
140
- This version is set to erase chat history automatically after page timeout and uses OpenAI.</p>
141
  </div>
142
  """
143
- # Global variable for tracking last interaction time
144
- last_interaction_time = 0
145
- full_summary = "" # Added global full_summary
146
-
147
- def update_summary_box():
148
- global full_summary
149
- return {"summary_box": full_summary}
150
 
151
  with gr.Blocks(css=css) as demo:
152
  with gr.Column(elem_id="col-container"):
@@ -158,19 +95,13 @@ with gr.Blocks(css=css) as demo:
158
  langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
159
  load_pdf = gr.Button("Convert PDF to Magic AI language")
160
  clear_btn = gr.Button("Clear Data")
161
-
162
- # New Textbox to display summary
163
- summary_box = gr.Textbox(label="Document Summary", placeholder="Summary will appear here.",
164
- interactive=False, rows=5, elem_id="summary_box")
165
 
166
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
167
  question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
168
  submit_btn = gr.Button("Send Message")
169
 
170
  load_pdf.click(loading_pdf, None, langchain_status, queue=False)
171
- load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False).then(
172
- update_summary_box, state={"summary_box": summary_box}
173
- ) # Then update the summary_box
174
  clear_btn.click(clear_data, outputs=[langchain_status], queue=False)
175
  question.submit(add_text, [chatbot, question], [chatbot, question]).then(
176
  bot, chatbot, chatbot
@@ -179,4 +110,4 @@ with gr.Blocks(css=css) as demo:
179
  bot, chatbot, chatbot
180
  )
181
 
182
- demo.launch()
 
8
  from langchain.embeddings import OpenAIEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import ConversationalRetrievalChain
 
 
 
 
 
 
11
 
12
  os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
13
 
 
17
  def loading_pdf():
18
  return "Working on the upload. Also, pondering the usefulness of sporks..."
19
 
 
 
 
 
 
 
 
20
  def pdf_changes(pdf_doc):
21
+ loader = OnlinePDFLoader(pdf_doc.name)
22
+ documents = loader.load()
23
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
24
+ texts = text_splitter.split_documents(documents)
25
+ embeddings = OpenAIEmbeddings()
26
+ db = Chroma.from_documents(texts, embeddings)
27
+ retriever = db.as_retriever()
28
+ global qa
29
+ qa = ConversationalRetrievalChain.from_llm(
30
+ llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo"),
31
+ retriever=retriever,
32
+ return_source_documents=False)
33
+ return "Ready"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def clear_data():
36
+ global qa
37
  qa = None
 
38
  return "Data cleared"
39
 
40
  def add_text(history, text):
 
44
  return history, ""
45
 
46
  def bot(history):
47
+ response = infer(history[-1][0], history)
48
+ formatted_response = "**Bot:** \n" + ' \n'.join(response.split('. '))
 
 
 
 
 
 
 
49
  history[-1][1] = formatted_response
50
  return history
51
 
 
52
  def infer(question, history):
53
+ res = []
54
+ for human, ai in history[:-1]:
55
+ pair = (human, ai)
56
+ res.append(pair)
 
57
 
58
+ chat_history = res
59
+ query = question
60
+ result = qa({"question": query, "chat_history": chat_history, "system:":"This is a world-class summarizing AI, be helpful."})
61
+ return result["answer"]
 
 
62
 
63
  def auto_clear_data():
64
+ global qa, last_interaction_time
65
+ if time.time() - last_interaction_time > 600:
66
  qa = None
 
67
 
68
  def periodic_clear():
69
  while True:
70
  auto_clear_data()
71
+ time.sleep(60)
72
 
73
  threading.Thread(target=periodic_clear).start()
74
 
 
80
  <div style="text-align: center;max-width: 700px;">
81
  <h1>CauseWriter Chat with PDF • OpenAI</h1>
82
  <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
83
+ when everything is ready, you can start asking questions about the pdf. <br />
84
+ This version is set to store chat history and uses OpenAI as LLM.</p>
85
  </div>
86
  """
 
 
 
 
 
 
 
87
 
88
  with gr.Blocks(css=css) as demo:
89
  with gr.Column(elem_id="col-container"):
 
95
  langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
96
  load_pdf = gr.Button("Convert PDF to Magic AI language")
97
  clear_btn = gr.Button("Clear Data")
 
 
 
 
98
 
99
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
100
  question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
101
  submit_btn = gr.Button("Send Message")
102
 
103
  load_pdf.click(loading_pdf, None, langchain_status, queue=False)
104
+ load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
 
 
105
  clear_btn.click(clear_data, outputs=[langchain_status], queue=False)
106
  question.submit(add_text, [chatbot, question], [chatbot, question]).then(
107
  bot, chatbot, chatbot
 
110
  bot, chatbot, chatbot
111
  )
112
 
113
+ demo.launch()