LLMchatbotAPP / app.py
ManjotSingh's picture
Update app.py
fc34500 verified
import gradio as gr
from huggingface_hub import InferenceClient
from typing import List, Tuple
import fitz # PyMuPDF
from sentence_transformers import SentenceTransformer, util
import numpy as np
import faiss
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
class MyApp:
def __init__(self) -> None:
self.documents = []
self.embeddings = None
self.index = None
self.load_pdf("North_Indian_Diet.pdf")
self.build_vector_db()
def load_pdf(self, file_path: str) -> None:
"""Extracts text from a PDF file and stores it in the app's documents."""
doc = fitz.open(file_path)
self.documents = []
for page_num in range(len(doc)):
page = doc[page_num]
text = page.get_text()
self.documents.append({"page": page_num + 1, "content": text})
print("PDF processed successfully!")
def build_vector_db(self) -> None:
"""Builds a vector database using the content of the PDF."""
model = SentenceTransformer('all-MiniLM-L6-v2')
# Generate embeddings for all document contents
self.embeddings = model.encode([doc["content"] for doc in self.documents])
# Create a FAISS index
self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
# Add the embeddings to the index
self.index.add(np.array(self.embeddings))
print("Vector database built successfully!")
def search_documents(self, query: str, k: int = 3) -> List[str]:
"""Searches for relevant documents using vector similarity."""
model = SentenceTransformer('all-MiniLM-L6-v2')
# Generate an embedding for the query
query_embedding = model.encode([query])
# Perform a search in the FAISS index
D, I = self.index.search(np.array(query_embedding), k)
# Retrieve the top-k documents
results = [self.documents[i]["content"] for i in I[0]]
return results if results else ["No relevant documents found."]
app = MyApp()
def respond(
message: str,
history: List[Tuple[str, str]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
):
system_message = "You are a North Indian diet expert. You provide dietary advice, suggest meal plans, and answer questions related to North Indian cuisine and nutrition. Feel free to ask about healthy recipes, nutritional benefits of foods, or meal planning tips."
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
# RAG - Retrieve relevant documents
retrieved_docs = app.search_documents(message)
context = "\n".join(retrieved_docs)
messages.append({"role": "system", "content": "Relevant documents: " + context})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a North Indian diet expert. You provide dietary advice, suggest meal plans, and answer questions related to North Indian cuisine and nutrition. Feel free to ask about healthy recipes, nutritional benefits of foods, or meal planning tips.",
label="System message"
),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
examples=[
["Can you suggest a healthy North Indian breakfast?"],
["What are the nutritional benefits of chickpeas?"],
["How can I plan a balanced North Indian meal?"]
],
title='North Indian Diet Expert πŸ›'
)
if __name__ == "__main__":
demo.launch()