ghostai1 commited on
Commit
743990e
·
verified ·
1 Parent(s): 8f120f7

Update app.py

Browse files

HF free RAG demo tools used ill make custom FAISS on my cluster

Files changed (1) hide show
  1. app.py +31 -33
app.py CHANGED
@@ -1,43 +1,41 @@
1
  import gradio as gr
2
- import faiss
3
  import numpy as np
4
  from sentence_transformers import SentenceTransformer
5
 
6
- # Initialize the sentence transformer model
7
- model = SentenceTransformer('all-MiniLM-L6-v2')
8
 
9
- # Example documents for retrieval (replace with your own dataset)
10
- documents = [
11
- "Hugging Face hosts pre-trained models for easy access.",
12
- "RAG stands for Retrieval-Augmented Generation.",
13
- "Gradio is an easy-to-use tool for creating machine learning demos.",
14
- "AI and machine learning are transforming industries worldwide."
15
  ]
16
 
17
- # Embedding the documents
18
- document_embeddings = model.encode(documents)
19
 
20
- # Set up FAISS for fast similarity search
21
- index = faiss.IndexFlatL2(768) # 768 dimensions for MiniLM model
22
- index.add(np.array(document_embeddings))
 
 
 
 
 
23
 
24
- def retrieve_answer(query):
25
- # Embed the query
26
- query_embedding = model.encode([query])
27
-
28
- # Find the most similar document
29
- distances, indices = index.search(np.array(query_embedding), k=1)
30
-
31
- # Return the most similar document as the answer
32
- return documents[indices[0][0]]
 
 
33
 
34
- # Set up Gradio interface
35
- iface = gr.Interface(fn=retrieve_answer,
36
- inputs="text",
37
- outputs="text",
38
- live=True,
39
- title="RAG Mini-Agent",
40
- description="Ask a question and get answers from the documents")
41
-
42
- # Launch the interface
43
- iface.launch()
 
1
  import gradio as gr
 
2
  import numpy as np
3
  from sentence_transformers import SentenceTransformer
4
 
5
+ # 1. Load embedder
6
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
7
 
8
+ # 2. Your “knowledge base” (swap in your own docs)
9
+ docs = [
10
+ "Hugging Face makes it easy to host and serve models.",
11
+ "Retrieval-Augmented Generation combines embeddings + LLMs for up-to-date answers.",
12
+ "Gradio lets you build ML UIs in minutes with Python only.",
13
+ "You can deploy a RAG demo on Hugging Face Spaces for free."
14
  ]
15
 
16
+ # 3. Pre-compute doc embeddings
17
+ doc_embeddings = embedder.encode(docs, convert_to_numpy=True, normalize_embeddings=True)
18
 
19
+ def rag_answer(query, top_k=1):
20
+ # 4. Embed and normalize query
21
+ q_emb = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
22
+ # 5. Cosine-similarity = dot product on normalized vectors
23
+ sims = doc_embeddings @ q_emb
24
+ idxs = np.argsort(sims)[::-1][:top_k]
25
+ # 6. Return top-k docs joined
26
+ return "\n\n".join(docs[i] for i in idxs)
27
 
28
+ # 7. Gradio interface
29
+ iface = gr.Interface(
30
+ fn=rag_answer,
31
+ inputs=[
32
+ gr.Textbox(lines=2, placeholder="Ask me anything…", label="Your question"),
33
+ gr.Slider(1, len(docs), value=1, step=1, label="Number of results")
34
+ ],
35
+ outputs=gr.Textbox(label="Answer"),
36
+ title="Simple Mini-RAG Demo",
37
+ description="Retrieval + LLM = RAG. This example finds most relevant docs via cosine similarity."
38
+ )
39
 
40
+ if __name__ == "__main__":
41
+ iface.launch()