Spaces:

its-abhay777
/

SmartSearchEngine-AnalyticsVidhya

Sleeping

its-abhay777 commited on Oct 24, 2024

Commit

2f39413

verified ·

1 Parent(s): 3654f68

search engine

Files changed (5) hide show

Search Engine/SmartSearchEngine.py ADDED Viewed

+import pandas as pd
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+from sentence_transformers import SentenceTransformer
+# Load the DataFrame with embeddings
+df = pd.read_pickle('output/courses_with_embeddings.pkl')
+# Load the model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Function to perform a search query
+def search_courses(query, top_n=5):
+    # Generate the embedding for the query
+    query_embedding = model.encode([query])
+    # Calculate cosine similarity between the query and course descriptions
+    similarities = cosine_similarity(query_embedding, np.array(df['Embeddings'].tolist()))
+    # Sort courses based on similarity score
+    sorted_indices = np.argsort(similarities[0])[::-1]
+    # Retrieve the top N most relevant courses
+    top_courses = df.iloc[sorted_indices[:top_n]]
+    return top_courses[['Course Title', 'Description', 'Rating', 'Link', 'Duration', 'Level']]
+# Example search
+query = "data science beginner course"
+results = search_courses(query)
+print(results)

Search Engine/VectorEmbedding.py ADDED Viewed

+import pandas as pd
+from sentence_transformers import SentenceTransformer
+import numpy as np
+df = pd.read_csv('output/analytics_vidhya_courses_with_ratings.csv')
+model = SentenceTransformer('all-MiniLM-L6-v2')
+course_descriptions = df['Description'].tolist()
+description_embeddings = model.encode(course_descriptions)
+# Store the embeddings into the DataFrame
+df['Embeddings'] = list(description_embeddings)
+# Save the DataFrame with embeddings for later use
+df.to_pickle('output/courses_with_embeddings.pkl')
+print("Embeddings generated and saved successfully!")

Search Engine/__pycache__/SmartSearchEngine.cpython-310.pyc ADDED Viewed

Binary file (953 Bytes). View file

Search Engine/__pycache__/SmartSearchEngine.cpython-39.pyc ADDED Viewed

Binary file (947 Bytes). View file

app.py ADDED Viewed

+import pandas as pd
+import gradio as gr
+from SmartSearchEngine import search_courses
+# Load the DataFrame with embeddings
+df = pd.read_pickle('output/courses_with_embeddings.pkl')
+# Define the search function to be used in the Gradio interface
+def gradio_search(query):
+    results = search_courses(query, top_n=5)
+    return results.to_dict(orient='records')
+# Create a Gradio interface
+iface = gr.Interface(
+    fn=gradio_search,
+    inputs=gr.Textbox(lines=2, placeholder="Enter your search query here..."),
+    outputs=gr.JSON(label="Search Results"),
+    title="Smart Course Search Tool",
+    description="Search for the most relevant courses on Analytics Vidhya"
+)
+# Launch the Gradio interface
+iface.launch(share=True)