from ast import List from langchain.document_loaders import DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter import dotenv from langchain.prompts import PromptTemplate import gradio as gr from langchain import PromptTemplate, LLMChain import requests from fastembed.embedding import FlagEmbedding as Embedding import numpy as np import os from langchain.schema.messages import HumanMessage dotenv.load_dotenv() api_token = os.environ.get("API_TOKEN") API_URL = "https://vpb8x4glbmizmiya.eu-west-1.aws.endpoints.huggingface.cloud" headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json", } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def get_top_k(query_embedding, embeddings, documents, k=3): # use numpy to calculate the cosine similarity between the query and the documents scores = np.dot(embeddings, query_embedding) # sort the scores in descending order sorted_scores = np.argsort(scores)[::-1] # print the top 5 result = [] for i in range(k): print(f"Rank {i+1}: {documents[sorted_scores[i]]}", "\n") result.append(documents[sorted_scores[i]]) return result prompt_template = """ You are the helpful assistant representing the company Philip Morris. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use the following pieces of context to answer the question at the end. Think step by step in your answer. Only answer the given question. Context: {context} Question: {question} Answer: """ PROMPT = PromptTemplate( template=prompt_template, input_variables=["context", "question"] ) loader = DirectoryLoader("./documents", glob="**/*.txt", show_progress=True) docs = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150) texts = text_splitter.split_documents(docs) embedding_model = Embedding(model_name="BAAI/bge-base-en", max_length=512) embeddings = list(embedding_model.embed([text.page_content for text in texts])) with gr.Blocks() as demo: chatbot = gr.Chatbot(height=800) msg = gr.Textbox() clear = gr.ClearButton([msg, chatbot]) def respond(message, chat_history): message_embedding = list(embedding_model.embed([message]))[0] result_docs = get_top_k(message_embedding, embeddings, texts, k=2) human_message = HumanMessage( content=PROMPT.format(context=result_docs, question=message) ) print("Question: ", human_message) output = query( { "inputs": human_message.content, "parameters": { "temperature": 0.9, "top_p": 0.95, "repetition_penalty": 1.2, "top_k": 50, "truncate": 1000, "max_new_tokens": 1024, }, } ) print("Response: ", output, "\n") bot_message = "" if output[0]["generated_text"]: bot_message = output[0]["generated_text"] bot_message += "\n \n" bot_message += "Document sources" bot_message += "\n \n" for i, doc in enumerate(result_docs): bot_message += f"⚫️ Source {i+1}: {doc.page_content}\n Document link: N/A Page: N/A \n" chat_history.append((message, bot_message)) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) if __name__ == "__main__": demo.launch()