Spaces:
Sleeping
Sleeping
import chainlit as cl | |
import os | |
from dotenv import load_dotenv | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Pinecone | |
from operator import itemgetter | |
from langchain.schema.runnable import RunnablePassthrough | |
from langchain_openai import ChatOpenAI | |
from langchain.schema.runnable.config import RunnableConfig | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_community.document_loaders import UnstructuredPDFLoader | |
load_dotenv() | |
RAG_PROMPT = """ | |
CONTEXT: | |
{context} | |
QUERY: | |
{question} | |
You house builder and can only provide your answers from the context. | |
You can only provide a response in danish | |
+++ | |
Please provide a sample from the context in your response | |
+++ | |
Don't tell in your response that you are getting it from the context. | |
""" | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size = 1800, | |
chunk_overlap = 50, | |
length_function=len, | |
is_separator_regex=True, | |
separators=[ | |
"\n\n", | |
"\n", | |
" ", | |
".", | |
",", | |
"\u200B", | |
"\uff0c", | |
"\u3001", | |
"\uff0e", | |
"\u3002", | |
"", | |
], | |
) | |
loader = UnstructuredPDFLoader("br_femogfirs.pdf", strategy="fast") | |
data = loader.load_and_split(text_splitter) | |
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT) | |
async def main(): | |
user_env = await cl.AskUserMessage(content="Indsæt venligst din api-nøgle før vi kan gå videre:").send() | |
if user_env: | |
os.environ["OPENAI_API_KEY"] = user_env['output'] | |
await cl.Message(content=f"Din api nøgle er nu tilføjet - nu kan du lave en forespørgsel!",).send() | |
model = ChatOpenAI(model="gpt-3.5-turbo") | |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small") | |
vector_store = Pinecone.from_documents(data, embedding_model, index_name="bygnings-regl-rag-1") | |
retriever = vector_store.as_retriever() | |
mecanic_qa_chain = ( | |
{"context": itemgetter("question") | retriever, "question": itemgetter("question")} | |
| RunnablePassthrough.assign(context=itemgetter("context")) | |
| rag_prompt | model | StrOutputParser()) | |
cl.user_session.set("runnable", mecanic_qa_chain) | |
async def on_message(message: cl.Message): | |
runnable = cl.user_session.get("runnable") | |
msg = cl.Message(content="") | |
async for chunk in runnable.astream( | |
{"question":message.content}, | |
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]), | |
): | |
await msg.stream_token(chunk) |