Spaces:

ManjotSingh
/

LLMchatbotAPP

Configuration error

App Files Files Community

LLMchatbotAPP / app.py

ManjotSingh

Update app.py

fc34500 verified 12 months ago

raw

history blame contribute delete

4.33 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from typing import List, Tuple
	import fitz # PyMuPDF
	from sentence_transformers import SentenceTransformer, util
	import numpy as np
	import faiss

	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	class MyApp:
	def __init__(self) -> None:
	self.documents = []
	self.embeddings = None
	self.index = None
	self.load_pdf("North_Indian_Diet.pdf")
	self.build_vector_db()

	def load_pdf(self, file_path: str) -> None:
	"""Extracts text from a PDF file and stores it in the app's documents."""
	doc = fitz.open(file_path)
	self.documents = []
	for page_num in range(len(doc)):
	page = doc[page_num]
	text = page.get_text()
	self.documents.append({"page": page_num + 1, "content": text})
	print("PDF processed successfully!")

	def build_vector_db(self) -> None:
	"""Builds a vector database using the content of the PDF."""
	model = SentenceTransformer('all-MiniLM-L6-v2')
	# Generate embeddings for all document contents
	self.embeddings = model.encode([doc["content"] for doc in self.documents])
	# Create a FAISS index
	self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
	# Add the embeddings to the index
	self.index.add(np.array(self.embeddings))
	print("Vector database built successfully!")

	def search_documents(self, query: str, k: int = 3) -> List[str]:
	"""Searches for relevant documents using vector similarity."""
	model = SentenceTransformer('all-MiniLM-L6-v2')
	# Generate an embedding for the query
	query_embedding = model.encode([query])
	# Perform a search in the FAISS index
	D, I = self.index.search(np.array(query_embedding), k)
	# Retrieve the top-k documents
	results = [self.documents[i]["content"] for i in I[0]]
	return results if results else ["No relevant documents found."]

	app = MyApp()

	def respond(
	message: str,
	history: List[Tuple[str, str]],
	system_message: str,
	max_tokens: int,
	temperature: float,
	top_p: float,
	):
	system_message = "You are a North Indian diet expert. You provide dietary advice, suggest meal plans, and answer questions related to North Indian cuisine and nutrition. Feel free to ask about healthy recipes, nutritional benefits of foods, or meal planning tips."
	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	# RAG - Retrieve relevant documents
	retrieved_docs = app.search_documents(message)
	context = "\n".join(retrieved_docs)
	messages.append({"role": "system", "content": "Relevant documents: " + context})

	response = ""
	for message in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = message.choices[0].delta.content
	response += token
	yield response

	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(
	value="You are a North Indian diet expert. You provide dietary advice, suggest meal plans, and answer questions related to North Indian cuisine and nutrition. Feel free to ask about healthy recipes, nutritional benefits of foods, or meal planning tips.",
	label="System message"
	),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)"
	),
	],
	examples=[
	["Can you suggest a healthy North Indian breakfast?"],
	["What are the nutritional benefits of chickpeas?"],
	["How can I plan a balanced North Indian meal?"]
	],
	title='North Indian Diet Expert 🍛'
	)

	if __name__ == "__main__":
	demo.launch()