its-abhay777 commited on
Commit
2f39413
·
verified ·
1 Parent(s): 3654f68

search engine

Browse files
Search Engine/SmartSearchEngine.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ # Load the DataFrame with embeddings
7
+ df = pd.read_pickle('output/courses_with_embeddings.pkl')
8
+
9
+ # Load the model
10
+ model = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ # Function to perform a search query
13
+ def search_courses(query, top_n=5):
14
+ # Generate the embedding for the query
15
+ query_embedding = model.encode([query])
16
+
17
+ # Calculate cosine similarity between the query and course descriptions
18
+ similarities = cosine_similarity(query_embedding, np.array(df['Embeddings'].tolist()))
19
+
20
+ # Sort courses based on similarity score
21
+ sorted_indices = np.argsort(similarities[0])[::-1]
22
+
23
+ # Retrieve the top N most relevant courses
24
+ top_courses = df.iloc[sorted_indices[:top_n]]
25
+
26
+ return top_courses[['Course Title', 'Description', 'Rating', 'Link', 'Duration', 'Level']]
27
+
28
+ # Example search
29
+ query = "data science beginner course"
30
+ results = search_courses(query)
31
+ print(results)
Search Engine/VectorEmbedding.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sentence_transformers import SentenceTransformer
3
+ import numpy as np
4
+
5
+ df = pd.read_csv('output/analytics_vidhya_courses_with_ratings.csv')
6
+
7
+ model = SentenceTransformer('all-MiniLM-L6-v2')
8
+
9
+ course_descriptions = df['Description'].tolist()
10
+ description_embeddings = model.encode(course_descriptions)
11
+
12
+ # Store the embeddings into the DataFrame
13
+ df['Embeddings'] = list(description_embeddings)
14
+
15
+ # Save the DataFrame with embeddings for later use
16
+ df.to_pickle('output/courses_with_embeddings.pkl')
17
+ print("Embeddings generated and saved successfully!")
Search Engine/__pycache__/SmartSearchEngine.cpython-310.pyc ADDED
Binary file (953 Bytes). View file
 
Search Engine/__pycache__/SmartSearchEngine.cpython-39.pyc ADDED
Binary file (947 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ from SmartSearchEngine import search_courses
4
+
5
+ # Load the DataFrame with embeddings
6
+ df = pd.read_pickle('output/courses_with_embeddings.pkl')
7
+
8
+ # Define the search function to be used in the Gradio interface
9
+ def gradio_search(query):
10
+ results = search_courses(query, top_n=5)
11
+ return results.to_dict(orient='records')
12
+
13
+ # Create a Gradio interface
14
+ iface = gr.Interface(
15
+ fn=gradio_search,
16
+ inputs=gr.Textbox(lines=2, placeholder="Enter your search query here..."),
17
+ outputs=gr.JSON(label="Search Results"),
18
+ title="Smart Course Search Tool",
19
+ description="Search for the most relevant courses on Analytics Vidhya"
20
+ )
21
+
22
+ # Launch the Gradio interface
23
+ iface.launch(share=True)