search engine
Browse files
Search Engine/SmartSearchEngine.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
3 |
+
import numpy as np
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
|
6 |
+
# Load the DataFrame with embeddings
|
7 |
+
df = pd.read_pickle('output/courses_with_embeddings.pkl')
|
8 |
+
|
9 |
+
# Load the model
|
10 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
11 |
+
|
12 |
+
# Function to perform a search query
|
13 |
+
def search_courses(query, top_n=5):
|
14 |
+
# Generate the embedding for the query
|
15 |
+
query_embedding = model.encode([query])
|
16 |
+
|
17 |
+
# Calculate cosine similarity between the query and course descriptions
|
18 |
+
similarities = cosine_similarity(query_embedding, np.array(df['Embeddings'].tolist()))
|
19 |
+
|
20 |
+
# Sort courses based on similarity score
|
21 |
+
sorted_indices = np.argsort(similarities[0])[::-1]
|
22 |
+
|
23 |
+
# Retrieve the top N most relevant courses
|
24 |
+
top_courses = df.iloc[sorted_indices[:top_n]]
|
25 |
+
|
26 |
+
return top_courses[['Course Title', 'Description', 'Rating', 'Link', 'Duration', 'Level']]
|
27 |
+
|
28 |
+
# Example search
|
29 |
+
query = "data science beginner course"
|
30 |
+
results = search_courses(query)
|
31 |
+
print(results)
|
Search Engine/VectorEmbedding.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sentence_transformers import SentenceTransformer
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
df = pd.read_csv('output/analytics_vidhya_courses_with_ratings.csv')
|
6 |
+
|
7 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
8 |
+
|
9 |
+
course_descriptions = df['Description'].tolist()
|
10 |
+
description_embeddings = model.encode(course_descriptions)
|
11 |
+
|
12 |
+
# Store the embeddings into the DataFrame
|
13 |
+
df['Embeddings'] = list(description_embeddings)
|
14 |
+
|
15 |
+
# Save the DataFrame with embeddings for later use
|
16 |
+
df.to_pickle('output/courses_with_embeddings.pkl')
|
17 |
+
print("Embeddings generated and saved successfully!")
|
Search Engine/__pycache__/SmartSearchEngine.cpython-310.pyc
ADDED
Binary file (953 Bytes). View file
|
|
Search Engine/__pycache__/SmartSearchEngine.cpython-39.pyc
ADDED
Binary file (947 Bytes). View file
|
|
app.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import gradio as gr
|
3 |
+
from SmartSearchEngine import search_courses
|
4 |
+
|
5 |
+
# Load the DataFrame with embeddings
|
6 |
+
df = pd.read_pickle('output/courses_with_embeddings.pkl')
|
7 |
+
|
8 |
+
# Define the search function to be used in the Gradio interface
|
9 |
+
def gradio_search(query):
|
10 |
+
results = search_courses(query, top_n=5)
|
11 |
+
return results.to_dict(orient='records')
|
12 |
+
|
13 |
+
# Create a Gradio interface
|
14 |
+
iface = gr.Interface(
|
15 |
+
fn=gradio_search,
|
16 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter your search query here..."),
|
17 |
+
outputs=gr.JSON(label="Search Results"),
|
18 |
+
title="Smart Course Search Tool",
|
19 |
+
description="Search for the most relevant courses on Analytics Vidhya"
|
20 |
+
)
|
21 |
+
|
22 |
+
# Launch the Gradio interface
|
23 |
+
iface.launch(share=True)
|