import gradio as gr from datasets import load_dataset from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import numpy as np # Load dataset ds = load_dataset("STEM-AI-mtl/Electrical-engineering", split="train") # Combine input (question) and output (answer) for vectorization docs = [inp + " " + out for inp, out in zip(ds["input"], ds["output"])] vectorizer = TfidfVectorizer().fit(docs) tfidf_matrix = vectorizer.transform(docs) # Retrieval function def retrieve_answer(user_q, top_k=1): vec = vectorizer.transform([user_q]) sims = cosine_similarity(vec, tfidf_matrix).flatten() idxs = np.argsort(-sims)[:top_k] return "\n\n".join([f"**Q:** {ds['input'][i]}\n**A:** {ds['output'][i]}" for i in idxs]) # Gradio app with gr.Blocks() as demo: gr.Markdown("# 🤖 Electronics Engineering Q&A Chatbot") gr.Markdown("Ask any electronics-related question and get an AI-assisted answer based on a curated dataset.") with gr.Row(): user_q = gr.Textbox(label="Your Question", lines=2, placeholder="e.g. What is the purpose of a Zener diode?") answer_box = gr.Markdown(label="Answer") submit_btn = gr.Button("Get Answer") submit_btn.click(fn=retrieve_answer, inputs=user_q, outputs=answer_box) if __name__ == "__main__": demo.launch()