its-abhay777
commited on
Upload 4 files
Browse files
Search_Engine/SmartSearchEngine.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
3 |
+
import numpy as np
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
|
6 |
+
# Load the DataFrame with embeddings
|
7 |
+
df = pd.read_pickle('output/courses_with_embeddings.pkl')
|
8 |
+
|
9 |
+
# Load the model
|
10 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
11 |
+
|
12 |
+
# Function to perform a search query
|
13 |
+
def search_courses(query, top_n=5):
|
14 |
+
# Generate the embedding for the query
|
15 |
+
query_embedding = model.encode([query])
|
16 |
+
|
17 |
+
# Calculate cosine similarity between the query and course descriptions
|
18 |
+
similarities = cosine_similarity(query_embedding, np.array(df['Embeddings'].tolist()))
|
19 |
+
|
20 |
+
# Sort courses based on similarity score
|
21 |
+
sorted_indices = np.argsort(similarities[0])[::-1]
|
22 |
+
|
23 |
+
# Retrieve the top N most relevant courses
|
24 |
+
top_courses = df.iloc[sorted_indices[:top_n]]
|
25 |
+
|
26 |
+
return top_courses[['Course Title', 'Description', 'Rating', 'Link', 'Duration', 'Level']]
|
27 |
+
|
28 |
+
# Example search
|
29 |
+
query = "data science beginner course"
|
30 |
+
results = search_courses(query)
|
31 |
+
print(results)
|
Search_Engine/VectorEmbedding.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sentence_transformers import SentenceTransformer
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
df = pd.read_csv('output/analytics_vidhya_courses_with_ratings.csv')
|
6 |
+
|
7 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
8 |
+
|
9 |
+
course_descriptions = df['Description'].tolist()
|
10 |
+
description_embeddings = model.encode(course_descriptions)
|
11 |
+
|
12 |
+
# Store the embeddings into the DataFrame
|
13 |
+
df['Embeddings'] = list(description_embeddings)
|
14 |
+
|
15 |
+
# Save the DataFrame with embeddings for later use
|
16 |
+
df.to_pickle('output/courses_with_embeddings.pkl')
|
17 |
+
print("Embeddings generated and saved successfully!")
|
Search_Engine/__pycache__/SmartSearchEngine.cpython-310.pyc
ADDED
Binary file (953 Bytes). View file
|
|
Search_Engine/__pycache__/SmartSearchEngine.cpython-39.pyc
ADDED
Binary file (947 Bytes). View file
|
|