Spaces:
Sleeping
Sleeping
File size: 2,790 Bytes
981700e 6f09265 981700e 6f09265 981700e ad96042 981700e ad96042 981700e ad96042 981700e ad96042 981700e 39cf3f6 a09c231 39cf3f6 ad96042 981700e ad96042 981700e 041ea1b 981700e ad96042 981700e 9e4e42d 0e2bc2a 9e4e42d a09c231 f5ea51b 9e4e42d a09c231 9e4e42d 981700e a09c231 981700e 9e4e42d 981700e ad96042 981700e ad96042 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import chainlit as cl
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Pinecone
from operator import itemgetter
from langchain.schema.runnable import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain.schema.runnable.config import RunnableConfig
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import UnstructuredPDFLoader
load_dotenv()
RAG_PROMPT = """
CONTEXT:
{context}
QUERY:
{question}
You house builder and can only provide your answers from the context.
You can only provide a response in danish
+++
Please provide sample text from the context next to your response.
+++
Don't tell in your response that you are getting it from the context.
"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1800,
chunk_overlap = 50,
length_function=len,
is_separator_regex=True,
separators=[
"\n\n",
"\n",
" ",
".",
",",
"\u200B",
"\uff0c",
"\u3001",
"\uff0e",
"\u3002",
"",
],
)
loader = UnstructuredPDFLoader("br_femogfirs.pdf", strategy="fast")
data = loader.load_and_split(text_splitter)
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
@cl.on_chat_start
async def main():
user_env = await cl.AskUserMessage(content="Indsæt venligst din api-nøgle før vi kan gå videre:").send()
if user_env:
os.environ["OPENAI_API_KEY"] = user_env['output']
await cl.Message(content=f"Din api nøgle er nu tilføjet for sessionen - nu kan du lave en forespørgsel!").send()
model = ChatOpenAI(model="gpt-3.5-turbo")
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = Pinecone.from_documents(data, embedding_model, index_name="bygnings-regl-rag-1")
retriever = vector_store.as_retriever()
building_qa_chain = (
{"context": itemgetter("question") | retriever, "question": itemgetter("question")}
| RunnablePassthrough.assign(context=itemgetter("context"))
| rag_prompt | model | StrOutputParser())
cl.user_session.set("runnable", building_qa_chain)
@cl.on_message
async def on_message(message: cl.Message):
runnable = cl.user_session.get("runnable")
msg = cl.Message(content="")
async for chunk in runnable.astream(
{"question":message.content},
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
):
await msg.stream_token(chunk) |