Spaces:

ymath
/

Teach2LearnVirtualStudent

Sleeping

App Files Files Community

whymath commited on May 15, 2024

Commit

fbeca93

1 Parent(s): a9b778f

Add PDF upload Action and Assistants API

Browse files

Files changed (3) hide show

app.py +90 -7
requirements.txt +1 -0
utils.py +5 -8

app.py CHANGED Viewed

@@ -3,19 +3,41 @@ import chainlit as cl
 from dotenv import load_dotenv
 import utils
 load_dotenv()
 @cl.on_chat_start
 async def start_chat():
-    # Create the RAQA chain and store it in the user session
-    raqa_chain = utils.create_raqa_chain_from_docs()
     settings = {
-        "chain": raqa_chain
     }
     cl.user_session.set("settings", settings)
 @cl.on_message
 async def main(message: cl.Message):
@@ -25,13 +47,74 @@ async def main(message: cl.Message):
     # Get the chain from the user session
     settings = cl.user_session.get("settings")
-    raqa_chain = settings["chain"]
     # Generate the response from the chain
-    query_response = raqa_chain.invoke({"question" : user_query})
-    query_answer = query_response["response"].content
-    print('query_answer =', query_answer)
     # Create and send the message stream
     msg = cl.Message(content=query_answer)
     await msg.send()

 from dotenv import load_dotenv
 import utils
+from openai import AsyncOpenAI
+import time
 load_dotenv()
 @cl.on_chat_start
 async def start_chat():
+    # Create an OpenAI assistant
+    instructions = "You are a helpful assistant"
+    client = AsyncOpenAI()
+    assistant = client.beta.assistants.create(
+        name="T2L Virtual Student",
+        instructions=instructions,
+        model="gpt-3.5-turbo",
+    )
+    thread = client.beta.threads.create()
+    # Store the assistant and thread in the user session
     settings = {
+        "instructions": instructions,
+        "client": client,
+        "assistant": assistant,
+        "thread": thread
     }
     cl.user_session.set("settings", settings)
+    # Send a welcome message with an action button
+    actions = [
+        cl.Action(name="upload_pdf", value="upload_pdf_value", description="Upload a PDF")
+    ]
+    await cl.Message(content="You can choose to upload a PDF, or just start chatting", actions=actions).send()
 @cl.on_message
 async def main(message: cl.Message):
     # Get the chain from the user session
     settings = cl.user_session.get("settings")
+    instructions = settings["instructions"]
+    client = settings["client"]
+    assistant = settings["assistant"]
+    thread = settings["thread"]
+    raqa_chain = settings["raqa_chain"]
     # Generate the response from the chain
+    if raqa_chain:
+        print("Using RAQA chain to generate response")
+        query_response = raqa_chain.invoke({"question" : user_query})
+        query_answer = query_response["response"].content
+        print('query_answer =', query_answer)
+    else:
+        print("Using OpenAI assistant to generate response")
+        message = client.beta.threads.messages.create(
+            thread_id=thread.id,
+            role="user",
+            content=user_query
+        )
+        run = client.beta.threads.runs.create(
+            thread_id=thread.id,
+            assistant_id=assistant.id,
+            instructions=instructions
+        )
+        while run.status == "in_progress" or run.status == "queued":
+            time.sleep(1)
+            run = client.beta.threads.runs.retrieve(
+                thread_id=thread.id,
+                run_id=run.id
+            )
+        print("run.status =", run.status)
+        messages = client.beta.threads.messages.list(
+            thread_id=thread.id
+        )
+        query_answer = messages.data[0].content
     # Create and send the message stream
     msg = cl.Message(content=query_answer)
     await msg.send()
+@cl.action_callback("upload_pdf")
+async def upload_pdf_fn(action: cl.Action):
+    print("The user clicked on the action button!")
+    files = None
+    # Wait for the user to upload a file
+    while files == None:
+        files = await cl.AskFileMessage(
+            content="Waiting for file selection",
+            accept=["application/pdf"],
+            max_size_mb=20,
+            timeout=180,
+        ).send()
+    file = files[0]
+    msg = cl.Message(
+        content=f"Processing `{file.name}`...", disable_human_feedback=True
+    )
+    await msg.send()
+    # Create the RAQA chain and store it in the user session
+    raqa_chain = utils.create_raqa_chain_from_docs(file)
+    settings = {
+        "raqa_chain": raqa_chain
+    }
+    cl.user_session.set("settings", settings)
+    return "Thank you for clicking on the action button!"

requirements.txt CHANGED Viewed

@@ -11,3 +11,4 @@ pymupdf
 wandb
 chainlit
 huggingface_hub

 wandb
 chainlit
 huggingface_hub
+openai

utils.py CHANGED Viewed

@@ -28,12 +28,12 @@ def chunk_documents(docs, tiktoken_len):
     return split_chunks
-def create_raqa_chain_from_docs():
-    # Load the documents from a PDF file using PyMuPDFLoader
-    docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load() # TODO: Update this to enable user to upload PDF
-    print("Loaded", len(docs), "documents")
-    print(docs[0])
     # Create a Qdrant vector store from the split chunks and embedding model, and obtain its retriever
     split_chunks = chunk_documents(docs, tiktoken_len)
@@ -62,8 +62,5 @@ def create_raqa_chain_from_docs():
         | RunnablePassthrough.assign(context=itemgetter("context"))
         | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
     )
-    # retrieval_augmented_qa_chain = (
-    #     {"response": rag_prompt | openai_chat_model}
-    # )
     return retrieval_augmented_qa_chain

     return split_chunks
+def create_raqa_chain_from_docs(docs):
+    # # Load the documents from a PDF file using PyMuPDFLoader
+    # docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load() # TODO: Update this to enable user to upload PDF
+    # print("Loaded", len(docs), "documents")
+    # print(docs[0])
     # Create a Qdrant vector store from the split chunks and embedding model, and obtain its retriever
     split_chunks = chunk_documents(docs, tiktoken_len)
         | RunnablePassthrough.assign(context=itemgetter("context"))
         | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
     )
     return retrieval_augmented_qa_chain