whymath commited on
Commit
fbeca93
·
1 Parent(s): a9b778f

Add PDF upload Action and Assistants API

Browse files
Files changed (3) hide show
  1. app.py +90 -7
  2. requirements.txt +1 -0
  3. utils.py +5 -8
app.py CHANGED
@@ -3,19 +3,41 @@ import chainlit as cl
3
  from dotenv import load_dotenv
4
  import utils
5
 
 
 
 
6
 
7
  load_dotenv()
8
 
9
 
10
  @cl.on_chat_start
11
  async def start_chat():
12
- # Create the RAQA chain and store it in the user session
13
- raqa_chain = utils.create_raqa_chain_from_docs()
 
 
 
 
 
 
 
 
 
 
14
  settings = {
15
- "chain": raqa_chain
 
 
 
16
  }
17
  cl.user_session.set("settings", settings)
18
 
 
 
 
 
 
 
19
 
20
  @cl.on_message
21
  async def main(message: cl.Message):
@@ -25,13 +47,74 @@ async def main(message: cl.Message):
25
 
26
  # Get the chain from the user session
27
  settings = cl.user_session.get("settings")
28
- raqa_chain = settings["chain"]
 
 
 
 
29
 
30
  # Generate the response from the chain
31
- query_response = raqa_chain.invoke({"question" : user_query})
32
- query_answer = query_response["response"].content
33
- print('query_answer =', query_answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # Create and send the message stream
36
  msg = cl.Message(content=query_answer)
37
  await msg.send()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from dotenv import load_dotenv
4
  import utils
5
 
6
+ from openai import AsyncOpenAI
7
+ import time
8
+
9
 
10
  load_dotenv()
11
 
12
 
13
  @cl.on_chat_start
14
  async def start_chat():
15
+
16
+ # Create an OpenAI assistant
17
+ instructions = "You are a helpful assistant"
18
+ client = AsyncOpenAI()
19
+ assistant = client.beta.assistants.create(
20
+ name="T2L Virtual Student",
21
+ instructions=instructions,
22
+ model="gpt-3.5-turbo",
23
+ )
24
+ thread = client.beta.threads.create()
25
+
26
+ # Store the assistant and thread in the user session
27
  settings = {
28
+ "instructions": instructions,
29
+ "client": client,
30
+ "assistant": assistant,
31
+ "thread": thread
32
  }
33
  cl.user_session.set("settings", settings)
34
 
35
+ # Send a welcome message with an action button
36
+ actions = [
37
+ cl.Action(name="upload_pdf", value="upload_pdf_value", description="Upload a PDF")
38
+ ]
39
+ await cl.Message(content="You can choose to upload a PDF, or just start chatting", actions=actions).send()
40
+
41
 
42
  @cl.on_message
43
  async def main(message: cl.Message):
 
47
 
48
  # Get the chain from the user session
49
  settings = cl.user_session.get("settings")
50
+ instructions = settings["instructions"]
51
+ client = settings["client"]
52
+ assistant = settings["assistant"]
53
+ thread = settings["thread"]
54
+ raqa_chain = settings["raqa_chain"]
55
 
56
  # Generate the response from the chain
57
+ if raqa_chain:
58
+ print("Using RAQA chain to generate response")
59
+ query_response = raqa_chain.invoke({"question" : user_query})
60
+ query_answer = query_response["response"].content
61
+ print('query_answer =', query_answer)
62
+ else:
63
+ print("Using OpenAI assistant to generate response")
64
+ message = client.beta.threads.messages.create(
65
+ thread_id=thread.id,
66
+ role="user",
67
+ content=user_query
68
+ )
69
+ run = client.beta.threads.runs.create(
70
+ thread_id=thread.id,
71
+ assistant_id=assistant.id,
72
+ instructions=instructions
73
+ )
74
+ while run.status == "in_progress" or run.status == "queued":
75
+ time.sleep(1)
76
+ run = client.beta.threads.runs.retrieve(
77
+ thread_id=thread.id,
78
+ run_id=run.id
79
+ )
80
+ print("run.status =", run.status)
81
+ messages = client.beta.threads.messages.list(
82
+ thread_id=thread.id
83
+ )
84
+ query_answer = messages.data[0].content
85
 
86
  # Create and send the message stream
87
  msg = cl.Message(content=query_answer)
88
  await msg.send()
89
+
90
+
91
+ @cl.action_callback("upload_pdf")
92
+ async def upload_pdf_fn(action: cl.Action):
93
+ print("The user clicked on the action button!")
94
+
95
+ files = None
96
+
97
+ # Wait for the user to upload a file
98
+ while files == None:
99
+ files = await cl.AskFileMessage(
100
+ content="Waiting for file selection",
101
+ accept=["application/pdf"],
102
+ max_size_mb=20,
103
+ timeout=180,
104
+ ).send()
105
+
106
+ file = files[0]
107
+
108
+ msg = cl.Message(
109
+ content=f"Processing `{file.name}`...", disable_human_feedback=True
110
+ )
111
+ await msg.send()
112
+
113
+ # Create the RAQA chain and store it in the user session
114
+ raqa_chain = utils.create_raqa_chain_from_docs(file)
115
+ settings = {
116
+ "raqa_chain": raqa_chain
117
+ }
118
+ cl.user_session.set("settings", settings)
119
+
120
+ return "Thank you for clicking on the action button!"
requirements.txt CHANGED
@@ -11,3 +11,4 @@ pymupdf
11
  wandb
12
  chainlit
13
  huggingface_hub
 
 
11
  wandb
12
  chainlit
13
  huggingface_hub
14
+ openai
utils.py CHANGED
@@ -28,12 +28,12 @@ def chunk_documents(docs, tiktoken_len):
28
  return split_chunks
29
 
30
 
31
- def create_raqa_chain_from_docs():
32
 
33
- # Load the documents from a PDF file using PyMuPDFLoader
34
- docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load() # TODO: Update this to enable user to upload PDF
35
- print("Loaded", len(docs), "documents")
36
- print(docs[0])
37
 
38
  # Create a Qdrant vector store from the split chunks and embedding model, and obtain its retriever
39
  split_chunks = chunk_documents(docs, tiktoken_len)
@@ -62,8 +62,5 @@ def create_raqa_chain_from_docs():
62
  | RunnablePassthrough.assign(context=itemgetter("context"))
63
  | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
64
  )
65
- # retrieval_augmented_qa_chain = (
66
- # {"response": rag_prompt | openai_chat_model}
67
- # )
68
 
69
  return retrieval_augmented_qa_chain
 
28
  return split_chunks
29
 
30
 
31
+ def create_raqa_chain_from_docs(docs):
32
 
33
+ # # Load the documents from a PDF file using PyMuPDFLoader
34
+ # docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load() # TODO: Update this to enable user to upload PDF
35
+ # print("Loaded", len(docs), "documents")
36
+ # print(docs[0])
37
 
38
  # Create a Qdrant vector store from the split chunks and embedding model, and obtain its retriever
39
  split_chunks = chunk_documents(docs, tiktoken_len)
 
62
  | RunnablePassthrough.assign(context=itemgetter("context"))
63
  | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
64
  )
 
 
 
65
 
66
  return retrieval_augmented_qa_chain