wholewhale commited on
Commit
4dcf9b3
1 Parent(s): d8804c0

auto summary

Browse files
Files changed (1) hide show
  1. app.py +86 -38
app.py CHANGED
@@ -8,58 +8,77 @@ from langchain.llms import OpenAI
8
  from langchain.embeddings import OpenAIEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import ConversationalRetrievalChain
 
 
 
 
 
 
11
 
12
-
13
- os.environ['ANTHROPIC_API_KEY'] = os.getenv("Your_Anthropic_API_Key")
14
  os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
15
 
16
  # Global variable for tracking last interaction time
17
  last_interaction_time = 0
18
 
19
- # Initialize the Anthropic model instead of OpenAI
20
- from anthropic import LanguageModel
21
-
22
- anthropic_model = LanguageModel(api_key=os.environ['ANTHROPIC_API_KEY'], model="some_model")
23
-
24
-
25
-
26
  def loading_pdf():
27
  return "Working on the upload. Also, pondering the usefulness of sporks..."
28
 
 
 
 
 
 
 
 
29
  def pdf_changes(pdf_doc):
30
  try:
31
- if pdf_doc is None:
32
- return "No PDF uploaded."
33
-
34
  loader = OnlinePDFLoader(pdf_doc.name)
35
  documents = loader.load()
36
-
37
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
38
- texts = text_splitter.split_documents(documents)
39
-
40
- # Replace this with your appropriate embeddings class
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  embeddings = OpenAIEmbeddings()
42
-
43
  global db
44
- db = Chroma.from_documents(texts, embeddings)
 
45
  retriever = db.as_retriever()
46
-
47
  global qa
48
  qa = ConversationalRetrievalChain.from_llm(
49
- llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo"),
50
- retriever=retriever,
51
  return_source_documents=False
52
  )
53
-
54
- return "Ready"
55
 
56
  except Exception as e:
57
- return f"Error processing PDF: {e}"
 
58
 
59
 
60
  def clear_data():
61
- global qa
62
  qa = None
 
63
  return "Data cleared"
64
 
65
  def add_text(history, text):
@@ -69,27 +88,43 @@ def add_text(history, text):
69
  return history, ""
70
 
71
  def bot(history):
72
- response = infer_anthropic(history[-1][0], history) # Call the new infer function
 
 
 
 
 
 
73
  sentences = ' \n'.join(response.split('. '))
74
  formatted_response = f"**Bot:**\n\n{sentences}"
75
  history[-1][1] = formatted_response
76
  return history
77
 
78
- def infer_anthropic(question, history):
79
- chat_history = [(human, ai) for human, ai in history[:-1]]
80
- query = question
81
- result = anthropic_model.query(query, context=chat_history)
82
- return result['answer']
 
 
 
 
 
 
 
 
 
83
 
84
  def auto_clear_data():
85
- global qa, last_interaction_time
86
- if time.time() - last_interaction_time > 600:
87
  qa = None
 
88
 
89
  def periodic_clear():
90
  while True:
91
  auto_clear_data()
92
- time.sleep(60)
93
 
94
  threading.Thread(target=periodic_clear).start()
95
 
@@ -101,10 +136,17 @@ title = """
101
  <div style="text-align: center;max-width: 700px;">
102
  <h1>CauseWriter Chat with PDF • OpenAI</h1>
103
  <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
104
- when everything is ready, you can start asking questions about the pdf. <br />
105
- This version is set to store chat history and uses OpenAI as LLM.</p>
106
  </div>
107
  """
 
 
 
 
 
 
 
108
 
109
  with gr.Blocks(css=css) as demo:
110
  with gr.Column(elem_id="col-container"):
@@ -116,13 +158,19 @@ with gr.Blocks(css=css) as demo:
116
  langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
117
  load_pdf = gr.Button("Convert PDF to Magic AI language")
118
  clear_btn = gr.Button("Clear Data")
 
 
 
 
119
 
120
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
121
  question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
122
  submit_btn = gr.Button("Send Message")
123
 
124
  load_pdf.click(loading_pdf, None, langchain_status, queue=False)
125
- load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
 
 
126
  clear_btn.click(clear_data, outputs=[langchain_status], queue=False)
127
  question.submit(add_text, [chatbot, question], [chatbot, question]).then(
128
  bot, chatbot, chatbot
 
8
  from langchain.embeddings import OpenAIEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from langchain.document_loaders import WebBaseLoader
13
+ from langchain.chains.summarize import load_summarize_chain
14
+ from langchain.chains.llm import LLMChain
15
+ from langchain.prompts import PromptTemplate
16
+ from langchain.chains.combine_documents.stuff import StuffDocumentsChain
17
 
 
 
18
  os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
19
 
20
  # Global variable for tracking last interaction time
21
  last_interaction_time = 0
22
 
 
 
 
 
 
 
 
23
  def loading_pdf():
24
  return "Working on the upload. Also, pondering the usefulness of sporks..."
25
 
26
+ # Inside Chroma mod
27
+ def summary(self):
28
+ num_documents = len(self.documents)
29
+ avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
30
+ return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"
31
+
32
+ # PDF summary and query using stuffing
33
  def pdf_changes(pdf_doc):
34
  try:
35
+ # Initialize loader and load documents
 
 
36
  loader = OnlinePDFLoader(pdf_doc.name)
37
  documents = loader.load()
38
+
39
+ # Define the prompt for summarization
40
+ prompt_template = """Write a concise summary of the following:
41
+ "{text}"
42
+ CONCISE SUMMARY:"""
43
+ prompt = PromptTemplate.from_template(prompt_template)
44
+
45
+ # Define the LLM chain with the specified prompt
46
+ llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
47
+ llm_chain = LLMChain(llm=llm, prompt=prompt)
48
+
49
+ # Initialize StuffDocumentsChain
50
+ stuff_chain = StuffDocumentsChain(
51
+ llm_chain=llm_chain, document_variable_name="text"
52
+ )
53
+
54
+ # Generate summary using StuffDocumentsChain
55
+ global full_summary
56
+ full_summary = stuff_chain.run(documents)
57
+
58
+ # Other existing logic for Chroma, embeddings, and retrieval
59
  embeddings = OpenAIEmbeddings()
 
60
  global db
61
+ db = Chroma.from_documents(documents, embeddings)
62
+
63
  retriever = db.as_retriever()
 
64
  global qa
65
  qa = ConversationalRetrievalChain.from_llm(
66
+ llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo-16k", max_tokens=-1, n=2),
67
+ retriever=retriever,
68
  return_source_documents=False
69
  )
70
+
71
+ return f"Ready. Full Summary loaded."
72
 
73
  except Exception as e:
74
+ return f"Error processing PDF: {str(e)}"
75
+
76
 
77
 
78
  def clear_data():
79
+ global qa, db
80
  qa = None
81
+ db = None
82
  return "Data cleared"
83
 
84
  def add_text(history, text):
 
88
  return history, ""
89
 
90
  def bot(history):
91
+ global full_summary
92
+ if 'summary' in history[-1][0].lower(): # Check if the last question asks for a summary
93
+ response = full_summary
94
+ return full_summary
95
+ else:
96
+ response = infer(history[-1][0], history)
97
+
98
  sentences = ' \n'.join(response.split('. '))
99
  formatted_response = f"**Bot:**\n\n{sentences}"
100
  history[-1][1] = formatted_response
101
  return history
102
 
103
+
104
+ def infer(question, history):
105
+ try:
106
+ res = []
107
+ for human, ai in history[:-1]:
108
+ pair = (human, ai)
109
+ res.append(pair)
110
+
111
+ chat_history = res
112
+ query = question
113
+ result = qa({"question": query, "chat_history": chat_history, "system": "This is a world-class summarizing AI, be helpful."})
114
+ return result["answer"]
115
+ except Exception as e:
116
+ return f"Error querying chatbot: {str(e)}"
117
 
118
  def auto_clear_data():
119
+ global qa, da, last_interaction_time
120
+ if time.time() - last_interaction_time > 1000:
121
  qa = None
122
+ db = None
123
 
124
  def periodic_clear():
125
  while True:
126
  auto_clear_data()
127
+ time.sleep(1000)
128
 
129
  threading.Thread(target=periodic_clear).start()
130
 
 
136
  <div style="text-align: center;max-width: 700px;">
137
  <h1>CauseWriter Chat with PDF • OpenAI</h1>
138
  <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
139
+ when everything is ready, you can start asking questions about the pdf. Limit ~11k words. <br />
140
+ This version is set to erase chat history automatically after page timeout and uses OpenAI.</p>
141
  </div>
142
  """
143
+ # Global variable for tracking last interaction time
144
+ last_interaction_time = 0
145
+ full_summary = "" # Added global full_summary
146
+
147
+ def update_summary_box():
148
+ global full_summary
149
+ return {"summary_box": full_summary}
150
 
151
  with gr.Blocks(css=css) as demo:
152
  with gr.Column(elem_id="col-container"):
 
158
  langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
159
  load_pdf = gr.Button("Convert PDF to Magic AI language")
160
  clear_btn = gr.Button("Clear Data")
161
+
162
+ # New Textbox to display summary
163
+ summary_box = gr.Textbox(label="Document Summary", placeholder="Summary will appear here.",
164
+ interactive=False, rows=5, elem_id="summary_box")
165
 
166
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
167
  question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
168
  submit_btn = gr.Button("Send Message")
169
 
170
  load_pdf.click(loading_pdf, None, langchain_status, queue=False)
171
+ load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False).then(
172
+ update_summary_box, state={"summary_box": summary_box}
173
+ ) # Then update the summary_box
174
  clear_btn.click(clear_data, outputs=[langchain_status], queue=False)
175
  question.submit(add_text, [chatbot, question], [chatbot, question]).then(
176
  bot, chatbot, chatbot