Spaces:
Runtime error
Runtime error
File size: 5,232 Bytes
d3942c3 ba61386 d3942c3 a44b0d7 d3942c3 a44b0d7 d3942c3 95b96e9 a44b0d7 d3942c3 a44b0d7 d3942c3 a44b0d7 d3942c3 95b96e9 a44b0d7 d3942c3 a44b0d7 d3942c3 a44b0d7 d3942c3 a44b0d7 95b96e9 d3942c3 a44b0d7 95b96e9 a44b0d7 d3942c3 a44b0d7 d3942c3 a44b0d7 d3942c3 a44b0d7 d3942c3 ccba092 d3942c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import os
from typing import List
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import (
ConversationalRetrievalChain,
)
from langchain.document_loaders import PyPDFLoader
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.docstore.document import Document
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
import chainlit as cl
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
system_template = """Use the following pieces of context to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
The "SOURCES" part should be a reference to the source of the document from which you got your answer.
And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
Example of your response should be:
The answer is foo
SOURCES: xyz
Begin!
----------------
{summaries}"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate.from_messages(messages)
chain_type_kwargs = {"prompt": prompt}
def process_file(file_path: str):
# Example using PyPDF2 to extract text from a PDF file
from PyPDF2 import PdfReader
reader = PdfReader(file_path)
texts = []
for page in reader.pages:
texts.append(page.extract_text())
return texts
@cl.on_chat_start
async def on_chat_start():
file = None
# Prompt users to upload either a text or PDF file
while file is None:
files = await cl.AskFileMessage(
content="Please upload a text or PDF file to begin!",
accept=["text/plain", "application/pdf"], # This line is for UI guidance
max_size_mb=20,
timeout=180,
).send()
if files:
file = files[0] # Assuming the user uploads one file at a time
filename = file.name
# Initialize an empty list for texts, which will be populated based on the file type
texts = []
# Process the file based on its extension
if filename.endswith('.txt'):
# Handle as text file
with open(file.path, "r", encoding="utf-8") as f:
text = f.read()
texts.append(text)
await cl.Message(content=f"`{filename}` uploaded, it contains {len(text)} characters!").send()
elif filename.endswith('.pdf'):
# Handle as PDF
texts = process_file(file.path) # Adjust this call according to your PDF processing implementation
else:
await cl.Message(content="Unsupported file type uploaded. Please upload a text or PDF file.").send()
return # Exit if the file type is not supported
# Process texts for conversational retrieval or other purposes here
# For demonstration, we'll just set up a simple Chroma vector store and conversational retrieval chain
# Create a Chroma vector store
embeddings = OpenAIEmbeddings()
docsearch = await cl.make_async(Chroma.from_texts)(
texts, embeddings, metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))]
)
message_history = ChatMessageHistory()
memory = ConversationBufferMemory(
memory_key="chat_history",
output_key="answer",
chat_memory=message_history,
return_messages=True,
)
# Set up the conversational retrieval chain
chain = ConversationalRetrievalChain.from_llm(
ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
chain_type="stuff",
retriever=docsearch.as_retriever(),
memory=memory,
return_source_documents=True,
)
# Let the user know that the system is ready
await cl.Message(content=f"Your file `{filename}` is now ready for questions!").send()
# Save the chain in the user session for later use
cl.user_session.set("chain", chain)
@cl.on_message
async def main(message):
chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
cb = cl.AsyncLangchainCallbackHandler()
res = await chain.acall(message.content, callbacks=[cb])
answer = res["answer"]
source_documents = res["source_documents"] # type: List[Document]
text_elements = [] # type: List[cl.Text]
if source_documents:
for source_idx, source_doc in enumerate(source_documents):
source_name = f"source_{source_idx}"
# Create the text element referenced in the message
text_elements.append(
cl.Text(content=source_doc.page_content, name=source_name)
)
source_names = [text_el.name for text_el in text_elements]
if source_names:
answer += f"\nSources: {', '.join(source_names)}"
else:
answer += "\nNo sources found"
await cl.Message(content=answer, elements=text_elements).send()
|