Spaces:

yashbyname
/

Course_Recommender_system

Runtime error

App Files Files Community

Course_Recommender_system / app.py

yashbyname

Update app.py

94dabc7 verified 5 months ago

raw

history blame contribute delete

6.43 kB

	import os
	import torch
	import requests
	import numpy as np
	import pandas as pd
	import gradio as gr
	import google.generativeai as genai
	from sentence_transformers import SentenceTransformer
	from torch.nn.functional import cosine_similarity

	df = pd.read_csv("cleaned_data.csv")

	bert_model = SentenceTransformer('all-MiniLM-L6-v2')
	df["course_embedding"] = df["Transformed_description"].apply(lambda x: bert_model.encode(x, convert_to_tensor=True))
	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

	def recommend_courses(skills, interests, experience, education, time, certificates, careerpath):
	try:
	# Create initial user profile
	user_profile = {
	"skills": [s.strip() for s in skills.split(",") if s.strip()],
	"interests": [s.strip() for s in interests.split(",") if s.strip()],
	"experience": [s.strip() for s in experience.split(",") if s.strip()],
	"education": [s.strip() for s in education.split(",") if s.strip()],
	"time": [s.strip() for s in time.split(",") if s.strip()],
	"certificates": [s.strip() for s in certificates.split(",") if s.strip()],
	"careerpath": [s.strip() for s in careerpath.split(",") if s.strip()]
	}

	# Get skill level assessment
	response = client.models.generate_content(
	model="gemini-pro",
	contents=f"""
	Give the current skill level in one word out of 'beginner', 'intermediate', 'advanced'.
	Here is the user profile: {user_profile}
	strictly do not output any extra textual data."""
	)

	CurrentSkill = response.text.strip().replace("\n", "")
	user_profile["CurrentSkill"] = [CurrentSkill]

	user_text = " ".join(user_profile["skills"] + user_profile["interests"] + user_profile["experience"] + user_profile["education"] + user_profile["time"] + user_profile["certificates"] + user_profile["careerpath"] + user_profile["CurrentSkill"])

	# # Create weighted user text representation
	# user_text = " ".join([
	# " ".join(user_profile["skills"]) * 3,
	# " ".join(user_profile["interests"]) * 2,
	# " ".join(user_profile["careerpath"]) * 2,
	# " ".join(user_profile["experience"]),
	# " ".join(user_profile["education"]),
	# " ".join(user_profile["certificates"]),
	# " ".join(user_profile["CurrentSkill"]) * 2
	# ])

	user_embedding = bert_model.encode(user_text, convert_to_tensor=True)
	course_embeddings = torch.stack(df["course_embedding"].tolist())
	similarities = cosine_similarity(user_embedding, course_embeddings)
	# similarities = cosine_similarity(user_embedding.unsqueeze(0), course_embeddings)[0]

	# Original weighting scheme

	weights = {
	"similarity": 0.6,
	"rating": 0.2,
	"difficulty": 0.1,
	"time_to_complete": 0.1
	}

	df["normalized_rating"] = (df["course_rating"] - df["course_rating"].min()) / (
	df["course_rating"].max() - df["course_rating"].min())
	df["normalized_difficulty"] = 1 - (df["course_difficulty"] / df["course_difficulty"].max())

	df["ranking_score"] = (
	weights["similarity"] * similarities.cpu().numpy() +
	weights["rating"] * df["normalized_rating"].values +
	weights["difficulty"] * df["normalized_difficulty"].values
	)

	top_courses = df.sort_values(by="ranking_score", ascending=False).head(6)
	output = top_courses["course_name"].tolist()

	response2 = client.models.generate_content(
	model="gemini-pro",
	contents=f"""
	Return a JSON object with this exact structure:
	{{
	"beginner": [
	{{"name": "course name", "url": "course url"}}
	],
	"intermediate": [
	{{"name": "course name", "url": "course url"}}
	],
	"advanced": [
	{{"name": "course name", "url": "course url"}}
	]
	}}

	Categorize these courses: {output}

	Add Url of the specific course from {df["course_url"]}
	Based on:
	- User skill level: {CurrentSkill}
	- Course difficulties: {top_courses['normalized_difficulty'].tolist()}
	- User skills: {user_profile['skills']}

	Categorise atleast one course for each beginner, intermediate and advanced.
	Return ONLY valid JSON without any extra text.
	"""
	)

	try:
	json_response = json.loads(response2.text.strip().replace('```json', '').replace('```', ''))
	# Validate structure
	for level in ['beginner', 'intermediate', 'advanced']:
	if level not in json_response:
	json_response[level] = []
	else:
	# Ensure each course has name and url
	for course in json_response[level]:
	if not isinstance(course, dict) or 'name' not in course or 'url' not in course:
	json_response[level] = []
	break
	return json_response
	except:
	return {
	"beginner": [],
	"intermediate": [],
	"advanced": [],
	"error": "Failed to categorize courses"
	}

	except Exception as e:
	return {"error": str(e)}

	# Create Gradio interface
	iface = gr.Interface(
	fn=recommend_courses,
	inputs=[
	gr.Textbox(label="Skills", placeholder="python, machine learning"),
	gr.Textbox(label="Interests", placeholder="AI, data science"),
	gr.Textbox(label="Experience", placeholder="2 years python"),
	gr.Textbox(label="Education", placeholder="bachelor's in CS"),
	gr.Textbox(label="Time Available", placeholder="6 months"),
	gr.Textbox(label="Certificates", placeholder="AWS, GCP"),
	gr.Textbox(label="Career Path", placeholder="ML engineer")
	],
	outputs=gr.JSON(),
	title="Personalized Course Recommender",
	description="Enter your profile details to get course recommendations organized by difficulty level"
	)

	if __name__ == "__main__":
	iface.launch(share=True)