| """ |
| LAB 3: Embeddings β Visualize Semantic Similarity |
| =================================================== |
| FREE β No API key needed! Uses HuggingFace sentence-transformers. |
| """ |
|
|
| import gradio as gr |
| import numpy as np |
| from sentence_transformers import SentenceTransformer |
| from sklearn.metrics.pairwise import cosine_similarity |
|
|
| model = SentenceTransformer("all-MiniLM-L6-v2") |
|
|
| CHUNKS = [ |
| "RAG stands for Retrieval Augmented Generation.", |
| "Vector databases store embeddings for similarity search.", |
| "The fee for the AI bootcamp is βΉ15,000.", |
| "LangChain is a framework for building LLM applications.", |
| "Chunking splits documents into smaller pieces for indexing.", |
| "Temperature controls the randomness of LLM outputs.", |
| "RAGAS is used to evaluate RAG pipeline quality.", |
| "Pavan Kumar is the instructor at pavancoaching.org", |
| ] |
|
|
| def search(query: str, top_k: int) -> str: |
| query_emb = model.encode([query]) |
| chunk_embs = model.encode(CHUNKS) |
| scores = cosine_similarity(query_emb, chunk_embs)[0] |
| ranked = sorted(zip(scores, CHUNKS), reverse=True)[:int(top_k)] |
|
|
| result = f"π Query: '{query}'\n\nTop {int(top_k)} Results:\n{'='*50}\n\n" |
| for rank, (score, chunk) in enumerate(ranked, 1): |
| bar = "β" * int(score * 20) |
| result += f"#{rank} Score: {score:.4f} {bar}\n {chunk}\n\n" |
| return result |
|
|
| with gr.Blocks(title="Lab 3: Semantic Search", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("## π’ Lab 3: Semantic Search with Embeddings (FREE)") |
| query = gr.Textbox(label="Your Question", value="How do I evaluate my RAG pipeline?") |
| top_k = gr.Slider(1, 8, value=3, step=1, label="Top-K Results") |
| btn = gr.Button("π Search", variant="primary") |
| output = gr.Textbox(label="Results", lines=20) |
| btn.click(fn=search, inputs=[query, top_k], outputs=output) |
|
|
| demo.launch() |