its-abhay777 commited on
Commit
245c540
·
verified ·
1 Parent(s): c5a92ce

Upload 4 files

Browse files
Search_Engine/SmartSearchEngine.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ # Load the DataFrame with embeddings
7
+ df = pd.read_pickle('output/courses_with_embeddings.pkl')
8
+
9
+ # Load the model
10
+ model = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ # Function to perform a search query
13
+ def search_courses(query, top_n=5):
14
+ # Generate the embedding for the query
15
+ query_embedding = model.encode([query])
16
+
17
+ # Calculate cosine similarity between the query and course descriptions
18
+ similarities = cosine_similarity(query_embedding, np.array(df['Embeddings'].tolist()))
19
+
20
+ # Sort courses based on similarity score
21
+ sorted_indices = np.argsort(similarities[0])[::-1]
22
+
23
+ # Retrieve the top N most relevant courses
24
+ top_courses = df.iloc[sorted_indices[:top_n]]
25
+
26
+ return top_courses[['Course Title', 'Description', 'Rating', 'Link', 'Duration', 'Level']]
27
+
28
+ # Example search
29
+ query = "data science beginner course"
30
+ results = search_courses(query)
31
+ print(results)
Search_Engine/VectorEmbedding.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sentence_transformers import SentenceTransformer
3
+ import numpy as np
4
+
5
+ df = pd.read_csv('output/analytics_vidhya_courses_with_ratings.csv')
6
+
7
+ model = SentenceTransformer('all-MiniLM-L6-v2')
8
+
9
+ course_descriptions = df['Description'].tolist()
10
+ description_embeddings = model.encode(course_descriptions)
11
+
12
+ # Store the embeddings into the DataFrame
13
+ df['Embeddings'] = list(description_embeddings)
14
+
15
+ # Save the DataFrame with embeddings for later use
16
+ df.to_pickle('output/courses_with_embeddings.pkl')
17
+ print("Embeddings generated and saved successfully!")
Search_Engine/__pycache__/SmartSearchEngine.cpython-310.pyc ADDED
Binary file (953 Bytes). View file
 
Search_Engine/__pycache__/SmartSearchEngine.cpython-39.pyc ADDED
Binary file (947 Bytes). View file