Spaces:
Configuration error
Configuration error
import gradio as gr | |
from huggingface_hub import InferenceClient | |
from typing import List, Tuple | |
import fitz # PyMuPDF | |
from sentence_transformers import SentenceTransformer, util | |
import numpy as np | |
import faiss | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
class MyApp: | |
def __init__(self) -> None: | |
self.documents = [] | |
self.embeddings = None | |
self.index = None | |
self.load_pdf("North_Indian_Diet.pdf") | |
self.build_vector_db() | |
def load_pdf(self, file_path: str) -> None: | |
"""Extracts text from a PDF file and stores it in the app's documents.""" | |
doc = fitz.open(file_path) | |
self.documents = [] | |
for page_num in range(len(doc)): | |
page = doc[page_num] | |
text = page.get_text() | |
self.documents.append({"page": page_num + 1, "content": text}) | |
print("PDF processed successfully!") | |
def build_vector_db(self) -> None: | |
"""Builds a vector database using the content of the PDF.""" | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Generate embeddings for all document contents | |
self.embeddings = model.encode([doc["content"] for doc in self.documents]) | |
# Create a FAISS index | |
self.index = faiss.IndexFlatL2(self.embeddings.shape[1]) | |
# Add the embeddings to the index | |
self.index.add(np.array(self.embeddings)) | |
print("Vector database built successfully!") | |
def search_documents(self, query: str, k: int = 3) -> List[str]: | |
"""Searches for relevant documents using vector similarity.""" | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Generate an embedding for the query | |
query_embedding = model.encode([query]) | |
# Perform a search in the FAISS index | |
D, I = self.index.search(np.array(query_embedding), k) | |
# Retrieve the top-k documents | |
results = [self.documents[i]["content"] for i in I[0]] | |
return results if results else ["No relevant documents found."] | |
app = MyApp() | |
def respond( | |
message: str, | |
history: List[Tuple[str, str]], | |
system_message: str, | |
max_tokens: int, | |
temperature: float, | |
top_p: float, | |
): | |
system_message = "You are a North Indian diet expert. You provide dietary advice, suggest meal plans, and answer questions related to North Indian cuisine and nutrition. Feel free to ask about healthy recipes, nutritional benefits of foods, or meal planning tips." | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
# RAG - Retrieve relevant documents | |
retrieved_docs = app.search_documents(message) | |
context = "\n".join(retrieved_docs) | |
messages.append({"role": "system", "content": "Relevant documents: " + context}) | |
response = "" | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox( | |
value="You are a North Indian diet expert. You provide dietary advice, suggest meal plans, and answer questions related to North Indian cuisine and nutrition. Feel free to ask about healthy recipes, nutritional benefits of foods, or meal planning tips.", | |
label="System message" | |
), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p (nucleus sampling)" | |
), | |
], | |
examples=[ | |
["Can you suggest a healthy North Indian breakfast?"], | |
["What are the nutritional benefits of chickpeas?"], | |
["How can I plan a balanced North Indian meal?"] | |
], | |
title='North Indian Diet Expert π' | |
) | |
if __name__ == "__main__": | |
demo.launch() | |