|
import json |
|
from sentence_transformers import SentenceTransformer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import numpy as np |
|
import gradio as gr |
|
|
|
|
|
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') |
|
|
|
|
|
with open('final_data_with_embeddings.json', 'r') as f: |
|
data = json.load(f) |
|
|
|
|
|
def search_courses(user_query): |
|
query_embedding = model.encode(user_query) |
|
similarity_scores = [] |
|
|
|
|
|
for dets in data: |
|
embed = np.array(dets['embedding']) |
|
similarity = cosine_similarity([query_embedding], [embed]) |
|
similarity_scores.append((similarity[0][0], dets)) |
|
|
|
|
|
similarity_scores.sort(key=lambda x: x[0], reverse=True) |
|
|
|
|
|
top_4_dets = [item[1] for item in similarity_scores[:4]] |
|
|
|
results = [] |
|
for i,det in enumerate(top_4_dets,1): |
|
course_info = f"{i}. " \ |
|
f"**Category**: {det['Course Category']}\n\n" \ |
|
f"**Course Name**: {det['Course Name']}\n\n" \ |
|
f"**Course URL**: {det['Course Url']}\n\n" \ |
|
f"**Description**: {det['Course Description']}\n\n" |
|
results.append(course_info) |
|
|
|
return "\n\n\n".join(results) |
|
|
|
|
|
iface = gr.Interface(fn=search_courses, |
|
inputs="text", |
|
outputs="markdown", |
|
title="Course Search with Sentence Transformers", |
|
description="Enter a query to find the top 4 most similar courses.") |
|
|
|
|
|
iface.launch() |
|
|