Spaces:

ghostai1
/

Ghostdemo

Paused

App Files Files Community

ghostai1 commited on 18 days ago

Commit

743990e

verified ·

1 Parent(s): 8f120f7

Update app.py

Browse files

HF free RAG demo tools used ill make custom FAISS on my cluster

Files changed (1) hide show

app.py +31 -33

app.py CHANGED Viewed

@@ -1,43 +1,41 @@
 import gradio as gr
-import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
-# Initialize the sentence transformer model
-model = SentenceTransformer('all-MiniLM-L6-v2')
-# Example documents for retrieval (replace with your own dataset)
-documents = [
-    "Hugging Face hosts pre-trained models for easy access.",
-    "RAG stands for Retrieval-Augmented Generation.",
-    "Gradio is an easy-to-use tool for creating machine learning demos.",
-    "AI and machine learning are transforming industries worldwide."
 ]
-# Embedding the documents
-document_embeddings = model.encode(documents)
-# Set up FAISS for fast similarity search
-index = faiss.IndexFlatL2(768)  # 768 dimensions for MiniLM model
-index.add(np.array(document_embeddings))
-def retrieve_answer(query):
-    # Embed the query
-    query_embedding = model.encode([query])
-    # Find the most similar document
-    distances, indices = index.search(np.array(query_embedding), k=1)
-    # Return the most similar document as the answer
-    return documents[indices[0][0]]
-# Set up Gradio interface
-iface = gr.Interface(fn=retrieve_answer,
-                     inputs="text",
-                     outputs="text",
-                     live=True,
-                     title="RAG Mini-Agent",
-                     description="Ask a question and get answers from the documents")
-# Launch the interface
-iface.launch()

 import gradio as gr
 import numpy as np
 from sentence_transformers import SentenceTransformer
+# 1. Load embedder
+embedder = SentenceTransformer('all-MiniLM-L6-v2')
+# 2. Your “knowledge base” (swap in your own docs)
+docs = [
+    "Hugging Face makes it easy to host and serve models.",
+    "Retrieval-Augmented Generation combines embeddings + LLMs for up-to-date answers.",
+    "Gradio lets you build ML UIs in minutes with Python only.",
+    "You can deploy a RAG demo on Hugging Face Spaces for free."
 ]
+# 3. Pre-compute doc embeddings
+doc_embeddings = embedder.encode(docs, convert_to_numpy=True, normalize_embeddings=True)
+def rag_answer(query, top_k=1):
+    # 4. Embed and normalize query
+    q_emb = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
+    # 5. Cosine-similarity = dot product on normalized vectors
+    sims = doc_embeddings @ q_emb
+    idxs = np.argsort(sims)[::-1][:top_k]
+    # 6. Return top-k docs joined
+    return "\n\n".join(docs[i] for i in idxs)
+# 7. Gradio interface
+iface = gr.Interface(
+    fn=rag_answer,
+    inputs=[
+      gr.Textbox(lines=2, placeholder="Ask me anything…", label="Your question"),
+      gr.Slider(1, len(docs), value=1, step=1, label="Number of results")
+    ],
+    outputs=gr.Textbox(label="Answer"),
+    title="Simple Mini-RAG Demo",
+    description="Retrieval + LLM = RAG. This example finds most relevant docs via cosine similarity."
+)
+if __name__ == "__main__":
+    iface.launch()