import chainlit as cl import os from dotenv import load_dotenv from langchain_openai import OpenAIEmbeddings from langchain_core.prompts import ChatPromptTemplate from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Pinecone from operator import itemgetter from langchain.schema.runnable import RunnablePassthrough from langchain_openai import ChatOpenAI from langchain.schema.runnable.config import RunnableConfig from langchain_core.output_parsers import StrOutputParser from langchain_community.document_loaders import UnstructuredPDFLoader load_dotenv() RAG_PROMPT = """ CONTEXT: {context} QUERY: {question} You house builder and can only provide your answers from the context. You can only provide a response in danish +++ Please provide sample text from the context next to your response. +++ Don't tell in your response that you are getting it from the context. """ text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1800, chunk_overlap = 50, length_function=len, is_separator_regex=True, separators=[ "\n\n", "\n", " ", ".", ",", "\u200B", "\uff0c", "\u3001", "\uff0e", "\u3002", "", ], ) loader = UnstructuredPDFLoader("br_femogfirs.pdf", strategy="fast") data = loader.load_and_split(text_splitter) rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT) @cl.on_chat_start async def main(): user_env = await cl.AskUserMessage(content="Indsæt venligst din api-nøgle før vi kan gå videre:").send() if user_env: os.environ["OPENAI_API_KEY"] = user_env['output'] await cl.Message(content=f"Din api nøgle er nu tilføjet for sessionen - nu kan du lave en forespørgsel!").send() model = ChatOpenAI(model="gpt-3.5-turbo") embedding_model = OpenAIEmbeddings(model="text-embedding-3-small") vector_store = Pinecone.from_documents(data, embedding_model, index_name="bygnings-regl-rag-1") retriever = vector_store.as_retriever() building_qa_chain = ( {"context": itemgetter("question") | retriever, "question": itemgetter("question")} | RunnablePassthrough.assign(context=itemgetter("context")) | rag_prompt | model | StrOutputParser()) cl.user_session.set("runnable", building_qa_chain) @cl.on_message async def on_message(message: cl.Message): runnable = cl.user_session.get("runnable") msg = cl.Message(content="") async for chunk in runnable.astream( {"question":message.content}, config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]), ): await msg.stream_token(chunk)