import gradio as gr from huggingface_hub import InferenceClient from typing import List, Tuple import fitz # PyMuPDF from sentence_transformers import SentenceTransformer, util import numpy as np import faiss client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") class MyApp: def __init__(self) -> None: self.documents = [] self.embeddings = None self.index = None self.load_pdf("North_Indian_Diet.pdf") self.build_vector_db() def load_pdf(self, file_path: str) -> None: """Extracts text from a PDF file and stores it in the app's documents.""" doc = fitz.open(file_path) self.documents = [] for page_num in range(len(doc)): page = doc[page_num] text = page.get_text() self.documents.append({"page": page_num + 1, "content": text}) print("PDF processed successfully!") def build_vector_db(self) -> None: """Builds a vector database using the content of the PDF.""" model = SentenceTransformer('all-MiniLM-L6-v2') # Generate embeddings for all document contents self.embeddings = model.encode([doc["content"] for doc in self.documents]) # Create a FAISS index self.index = faiss.IndexFlatL2(self.embeddings.shape[1]) # Add the embeddings to the index self.index.add(np.array(self.embeddings)) print("Vector database built successfully!") def search_documents(self, query: str, k: int = 3) -> List[str]: """Searches for relevant documents using vector similarity.""" model = SentenceTransformer('all-MiniLM-L6-v2') # Generate an embedding for the query query_embedding = model.encode([query]) # Perform a search in the FAISS index D, I = self.index.search(np.array(query_embedding), k) # Retrieve the top-k documents results = [self.documents[i]["content"] for i in I[0]] return results if results else ["No relevant documents found."] app = MyApp() def respond( message: str, history: List[Tuple[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, ): system_message = "You are a North Indian diet expert. You provide dietary advice, suggest meal plans, and answer questions related to North Indian cuisine and nutrition. Feel free to ask about healthy recipes, nutritional benefits of foods, or meal planning tips." messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) # RAG - Retrieve relevant documents retrieved_docs = app.search_documents(message) context = "\n".join(retrieved_docs) messages.append({"role": "system", "content": "Relevant documents: " + context}) response = "" for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value="You are a North Indian diet expert. You provide dietary advice, suggest meal plans, and answer questions related to North Indian cuisine and nutrition. Feel free to ask about healthy recipes, nutritional benefits of foods, or meal planning tips.", label="System message" ), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" ), ], examples=[ ["Can you suggest a healthy North Indian breakfast?"], ["What are the nutritional benefits of chickpeas?"], ["How can I plan a balanced North Indian meal?"] ], title='North Indian Diet Expert 🍛' ) if __name__ == "__main__": demo.launch()