| | import os |
| | from typing import List |
| |
|
| | from langchain.embeddings.openai import OpenAIEmbeddings |
| | from langchain.text_splitter import RecursiveCharacterTextSplitter |
| | from langchain.vectorstores import Chroma |
| | from langchain.chains import ( |
| | ConversationalRetrievalChain, |
| | ) |
| | from langchain.document_loaders import PyPDFLoader |
| | from langchain.chat_models import ChatOpenAI |
| | from langchain.prompts.chat import ( |
| | ChatPromptTemplate, |
| | SystemMessagePromptTemplate, |
| | HumanMessagePromptTemplate, |
| | ) |
| | from langchain.docstore.document import Document |
| | from langchain.memory import ChatMessageHistory, ConversationBufferMemory |
| | from chainlit.types import AskFileResponse |
| |
|
| | import chainlit as cl |
| |
|
| | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
| |
|
| | system_template = """Use the following pieces of context to answer the users question. |
| | If you don't know the answer, just say that you don't know, don't try to make up an answer. |
| | ALWAYS return a "SOURCES" part in your answer. |
| | The "SOURCES" part should be a reference to the source of the document from which you got your answer. |
| | |
| | And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well. |
| | |
| | Example of your response should be: |
| | |
| | The answer is foo |
| | SOURCES: xyz |
| | |
| | |
| | Begin! |
| | ---------------- |
| | {summaries}""" |
| | messages = [ |
| | SystemMessagePromptTemplate.from_template(system_template), |
| | HumanMessagePromptTemplate.from_template("{question}"), |
| | ] |
| | prompt = ChatPromptTemplate.from_messages(messages) |
| | chain_type_kwargs = {"prompt": prompt} |
| |
|
| |
|
| | def process_file(file: AskFileResponse): |
| | import tempfile |
| |
|
| | with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile: |
| | with open(tempfile.name, "wb") as f: |
| | f.write(file.content) |
| |
|
| | pypdf_loader = PyPDFLoader(tempfile.name) |
| | texts = pypdf_loader.load_and_split() |
| | texts = [text.page_content for text in texts] |
| | return texts |
| |
|
| |
|
| | @cl.on_chat_start |
| | async def on_chat_start(): |
| | files = None |
| |
|
| | |
| | while files == None: |
| | files = await cl.AskFileMessage( |
| | content="Please upload a PDF file to begin!", |
| | accept=["application/pdf"], |
| | max_size_mb=20, |
| | timeout=180, |
| | ).send() |
| |
|
| | file = files[0] |
| |
|
| | msg = cl.Message( |
| | content=f"Processing `{file.name}`...", disable_human_feedback=True |
| | ) |
| | await msg.send() |
| |
|
| | |
| | texts = process_file(file) |
| |
|
| | print(texts[0]) |
| |
|
| | |
| | metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))] |
| |
|
| | |
| | embeddings = OpenAIEmbeddings() |
| | docsearch = await cl.make_async(Chroma.from_texts)( |
| | texts, embeddings, metadatas=metadatas |
| | ) |
| |
|
| | message_history = ChatMessageHistory() |
| |
|
| | memory = ConversationBufferMemory( |
| | memory_key="chat_history", |
| | output_key="answer", |
| | chat_memory=message_history, |
| | return_messages=True, |
| | ) |
| |
|
| | |
| | chain = ConversationalRetrievalChain.from_llm( |
| | ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True), |
| | chain_type="stuff", |
| | retriever=docsearch.as_retriever(), |
| | memory=memory, |
| | return_source_documents=True, |
| | ) |
| |
|
| | |
| | msg.content = f"Processing `{file.name}` done. You can now ask questions!" |
| | await msg.update() |
| |
|
| | cl.user_session.set("chain", chain) |
| |
|
| |
|
| | @cl.on_message |
| | async def main(message): |
| | chain = cl.user_session.get("chain") |
| | cb = cl.AsyncLangchainCallbackHandler() |
| |
|
| | res = await chain.acall(message.content, callbacks=[cb]) |
| | answer = res["answer"] |
| | source_documents = res["source_documents"] |
| |
|
| | text_elements = [] |
| |
|
| | if source_documents: |
| | for source_idx, source_doc in enumerate(source_documents): |
| | source_name = f"source_{source_idx}" |
| | |
| | text_elements.append( |
| | cl.Text(content=source_doc.page_content, name=source_name) |
| | ) |
| | source_names = [text_el.name for text_el in text_elements] |
| |
|
| | if source_names: |
| | answer += f"\nSources: {', '.join(source_names)}" |
| | else: |
| | answer += "\nNo sources found" |
| |
|
| | await cl.Message(content=answer, elements=text_elements).send() |
| |
|