File size: 2,790 Bytes
981700e
6f09265
981700e
 
 
 
 
 
 
 
 
 
 
 
6f09265
981700e
 
 
ad96042
981700e
ad96042
 
981700e
ad96042
 
981700e
ad96042
 
981700e
39cf3f6
a09c231
39cf3f6
 
ad96042
981700e
ad96042
981700e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
041ea1b
 
981700e
ad96042
981700e
 
 
9e4e42d
 
0e2bc2a
9e4e42d
 
 
 
 
a09c231
f5ea51b
9e4e42d
 
 
 
 
a09c231
9e4e42d
 
 
981700e
a09c231
981700e
9e4e42d
 
981700e
 
 
ad96042
981700e
ad96042
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import chainlit as cl
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Pinecone
from operator import itemgetter
from langchain.schema.runnable import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain.schema.runnable.config import RunnableConfig
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import UnstructuredPDFLoader


load_dotenv()


RAG_PROMPT = """

CONTEXT:
{context}

QUERY:
{question}

You house builder and can only provide your answers from the context. 
You can only provide a response in danish

+++
Please provide sample text from the context next to your response.
+++

Don't tell in your response that you are getting it from the context.

"""


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1800,
    chunk_overlap = 50,
    length_function=len,
    is_separator_regex=True,
    separators=[
        "\n\n",
        "\n",
        " ",
        ".",
        ",",
        "\u200B",
        "\uff0c",
        "\u3001",
        "\uff0e",
        "\u3002",
        "",
    ],
)


loader = UnstructuredPDFLoader("br_femogfirs.pdf",  strategy="fast")
data = loader.load_and_split(text_splitter)

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)


@cl.on_chat_start
async def main():    
    
    user_env = await cl.AskUserMessage(content="Indsæt venligst din api-nøgle før vi kan gå videre:").send()

    if user_env:

            os.environ["OPENAI_API_KEY"] = user_env['output']

            await cl.Message(content=f"Din api nøgle er nu tilføjet for sessionen - nu kan du lave en forespørgsel!").send()

            model = ChatOpenAI(model="gpt-3.5-turbo")
            embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
            vector_store = Pinecone.from_documents(data, embedding_model, index_name="bygnings-regl-rag-1")
            retriever = vector_store.as_retriever()

            building_qa_chain = (
            {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
            | RunnablePassthrough.assign(context=itemgetter("context"))
            | rag_prompt | model | StrOutputParser())

            cl.user_session.set("runnable", building_qa_chain)

 

@cl.on_message
async def on_message(message: cl.Message):
    runnable = cl.user_session.get("runnable")
    msg = cl.Message(content="")

    async for chunk in runnable.astream(
        {"question":message.content},
        config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
    ):
        await msg.stream_token(chunk)