import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch from sklearn.metrics.pairwise import cosine_similarity import numpy as np # Load a pre-trained model and tokenizer from Hugging Face model_name = "sentence-transformers/all-MiniLM-L6-v2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Sample dataset of questions and answers dataset = [ ("What is the capital of France?", "Paris is the capital of France."), ("Who is the creator of Python?", "Guido van Rossum created Python."), ("What is the tallest mountain in the world?", "Mount Everest is the tallest mountain in the world."), ] # Function to find the most relevant answer def find_most_relevant_answer(question): question_embedding = model(**tokenizer(question, return_tensors="pt", padding=True, truncation=True))[0].mean(dim=1).detach().numpy() highest_similarity = -1 most_relevant_answer = "" for q, a in dataset: answer_embedding = model(**tokenizer(q, return_tensors="pt", padding=True, truncation=True))[0].mean(dim=1).detach().numpy() similarity = cosine_similarity([question_embedding], [answer_embedding])[0][0] if similarity > highest_similarity: highest_similarity = similarity most_relevant_answer = a return most_relevant_answer # Set up Gradio interface def chat_with_bot(question): return find_most_relevant_answer(question) iface = gr.Interface(fn=chat_with_bot, inputs="text", outputs="text", title="Simple QA Chatbot") iface.launch()