Vishal1806 commited on
Commit
64aa0bc
·
verified ·
1 Parent(s): ac845d1
Files changed (1) hide show
  1. model.py +27 -0
model.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import pandas as pd
3
+ import torch
4
+
5
+ # Load pre-trained model for sentence embedding
6
+ model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')
7
+
8
+ # Load scraped courses data
9
+ courses_df = pd.read_csv("courses_data.csv")
10
+
11
+ # Encode course descriptions
12
+ courses_df['embedding'] = courses_df['description'].apply(lambda x: model.encode(x, convert_to_tensor=True))
13
+
14
+ def search_courses(query, top_k=5):
15
+ query_embedding = model.encode(query, convert_to_tensor=True)
16
+ cosine_scores = torch.nn.functional.cosine_similarity(query_embedding, torch.stack(courses_df['embedding'].tolist()))
17
+ top_results = torch.topk(cosine_scores, k=top_k)
18
+
19
+ results = []
20
+ for idx in top_results.indices:
21
+ course = courses_df.iloc[idx.item()]
22
+ results.append({
23
+ 'title': course['title'],
24
+ 'description': course['description'],
25
+ 'curriculum': course['curriculum']
26
+ })
27
+ return results