import chainlit as cl import os from dotenv import load_dotenv from langchain_openai import OpenAIEmbeddings from langchain_core.prompts import ChatPromptTemplate from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Pinecone from operator import itemgetter from langchain.schema.runnable import RunnablePassthrough from langchain_openai import ChatOpenAI from langchain.schema.runnable.config import RunnableConfig from langchain_core.output_parsers import StrOutputParser from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import UnstructuredPDFLoader from transformers import AutoTokenizer, AutoModelForCausalLM,BitsAndBytesConfig import torch from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline load_dotenv() RAG_PROMPT = """ CONTEXT: {context} QUERY: {question} You house builder and can only provide your answers from the context. You can only provide a response in danish Don't tell in your response that you are getting it from the context. """ text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1800, chunk_overlap = 50, length_function=len, is_separator_regex=True, separators=[ "\n\n", "\n", " ", ".", ",", "\u200B", "\uff0c", "\u3001", "\uff0e", "\u3002", "", ], ) bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_double_quant=True, bnb_4bit_compute_dtype=torch.float16, ) # tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", # trust_remote_code=True, # quantization_config=bnb_config, # attn_implementation='eager', # device_map='auto',) # model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) # hf = HuggingFacePipeline.from_model_id( # model_id="microsoft/Phi-3-mini-4k-instruct", # task="text-generation", # device_map="auto", # pipeline_kwargs={"max_new_tokens": 10}, # ) loader = UnstructuredPDFLoader("./br_syvoghalvfjerds.pdf") data = loader.load_and_split(text_splitter) embedding_model = OpenAIEmbeddings(model="text-embedding-3-small") vector_store = Pinecone.from_documents(data, embedding_model, index_name=os.environ.get('index')) retriever = vector_store.as_retriever() rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT) model = ChatOpenAI(model="gpt-3.5-turbo") @cl.on_chat_start async def main(): mecanic_qa_chain = "" mecanic_qa_chain = ( {"context": itemgetter("question") | retriever, "question": itemgetter("question")} | RunnablePassthrough.assign(context=itemgetter("context")) | rag_prompt | model | StrOutputParser() ) cl.user_session.set("runnable", mecanic_qa_chain) @cl.on_message async def on_message(message: cl.Message): runnable = cl.user_session.get("runnable") msg = cl.Message(content="") async for chunk in runnable.astream( {"question":message.content}, config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]), ): await msg.stream_token(chunk)