Spaces:

momegas
/

pmi

Sleeping

App Files Files Community

pmi / app.py

momegas

Update app.py

7f62156 over 1 year ago

raw

history blame contribute delete

3.65 kB

	from ast import List
	from langchain.document_loaders import DirectoryLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	import dotenv
	from langchain.prompts import PromptTemplate
	import gradio as gr
	from langchain import PromptTemplate, LLMChain
	import requests
	from fastembed.embedding import FlagEmbedding as Embedding
	import numpy as np
	import os
	from langchain.schema.messages import HumanMessage


	dotenv.load_dotenv()

	api_token = os.environ.get("API_TOKEN")
	API_URL = "https://vpb8x4glbmizmiya.eu-west-1.aws.endpoints.huggingface.cloud"
	headers = {
	"Authorization": f"Bearer {api_token}",
	"Content-Type": "application/json",
	}


	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()


	def get_top_k(query_embedding, embeddings, documents, k=3):
	# use numpy to calculate the cosine similarity between the query and the documents
	scores = np.dot(embeddings, query_embedding)
	# sort the scores in descending order
	sorted_scores = np.argsort(scores)[::-1]
	# print the top 5
	result = []
	for i in range(k):
	print(f"Rank {i+1}: {documents[sorted_scores[i]]}", "\n")
	result.append(documents[sorted_scores[i]])

	return result


	prompt_template = """
	You are the helpful assistant representing the company Philip Morris.
	If you don't know the answer, just say that you don't know, don't try to make up an answer.
	Use the following pieces of context to answer the question at the end.
	Think step by step in your answer.
	Only answer the given question.

	Context:
	{context}

	Question: {question}
	Answer:
	"""


	PROMPT = PromptTemplate(
	template=prompt_template, input_variables=["context", "question"]
	)

	loader = DirectoryLoader("./documents", glob="*/.txt", show_progress=True)
	docs = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150)
	texts = text_splitter.split_documents(docs)

	embedding_model = Embedding(model_name="BAAI/bge-base-en", max_length=512)
	embeddings = list(embedding_model.embed([text.page_content for text in texts]))


	with gr.Blocks() as demo:
	chatbot = gr.Chatbot(height=800)
	msg = gr.Textbox()
	clear = gr.ClearButton([msg, chatbot])

	def respond(message, chat_history):
	message_embedding = list(embedding_model.embed([message]))[0]
	result_docs = get_top_k(message_embedding, embeddings, texts, k=2)

	human_message = HumanMessage(
	content=PROMPT.format(context=result_docs, question=message)
	)

	print("Question: ", human_message)
	output = query(
	{
	"inputs": human_message.content,
	"parameters": {
	"temperature": 0.9,
	"top_p": 0.95,
	"repetition_penalty": 1.2,
	"top_k": 50,
	"truncate": 1000,
	"max_new_tokens": 1024,
	},
	}
	)
	print("Response: ", output, "\n")
	bot_message = ""

	if output[0]["generated_text"]:
	bot_message = output[0]["generated_text"]
	bot_message += "\n \n"
	bot_message += "Document sources"
	bot_message += "\n \n"


	for i, doc in enumerate(result_docs):
	bot_message += f"⚫️ Source {i+1}: {doc.page_content}\n Document link: N/A Page: N/A \n"

	chat_history.append((message, bot_message))
	return "", chat_history

	msg.submit(respond, [msg, chatbot], [msg, chatbot])


	if __name__ == "__main__":
	demo.launch()