HeRksTAn's picture
bug fix
0e2bc2a
import chainlit as cl
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Pinecone
from operator import itemgetter
from langchain.schema.runnable import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain.schema.runnable.config import RunnableConfig
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import UnstructuredPDFLoader
load_dotenv()
RAG_PROMPT = """
CONTEXT:
{context}
QUERY:
{question}
You house builder and can only provide your answers from the context.
You can only provide a response in danish
+++
Please provide sample text from the context next to your response.
+++
Don't tell in your response that you are getting it from the context.
"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1800,
chunk_overlap = 50,
length_function=len,
is_separator_regex=True,
separators=[
"\n\n",
"\n",
" ",
".",
",",
"\u200B",
"\uff0c",
"\u3001",
"\uff0e",
"\u3002",
"",
],
)
loader = UnstructuredPDFLoader("br_femogfirs.pdf", strategy="fast")
data = loader.load_and_split(text_splitter)
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
@cl.on_chat_start
async def main():
user_env = await cl.AskUserMessage(content="Indsæt venligst din api-nøgle før vi kan gå videre:").send()
if user_env:
os.environ["OPENAI_API_KEY"] = user_env['output']
await cl.Message(content=f"Din api nøgle er nu tilføjet for sessionen - nu kan du lave en forespørgsel!").send()
model = ChatOpenAI(model="gpt-3.5-turbo")
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = Pinecone.from_documents(data, embedding_model, index_name="bygnings-regl-rag-1")
retriever = vector_store.as_retriever()
building_qa_chain = (
{"context": itemgetter("question") | retriever, "question": itemgetter("question")}
| RunnablePassthrough.assign(context=itemgetter("context"))
| rag_prompt | model | StrOutputParser())
cl.user_session.set("runnable", building_qa_chain)
@cl.on_message
async def on_message(message: cl.Message):
runnable = cl.user_session.get("runnable")
msg = cl.Message(content="")
async for chunk in runnable.astream(
{"question":message.content},
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
):
await msg.stream_token(chunk)