Spaces:
Runtime error
Runtime error
File size: 6,433 Bytes
b5a35fb 0971dea 523e4cc 0971dea 94dabc7 69bd769 2830df9 69bd769 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import os
import torch
import requests
import numpy as np
import pandas as pd
import gradio as gr
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from torch.nn.functional import cosine_similarity
df = pd.read_csv("cleaned_data.csv")
bert_model = SentenceTransformer('all-MiniLM-L6-v2')
df["course_embedding"] = df["Transformed_description"].apply(lambda x: bert_model.encode(x, convert_to_tensor=True))
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
def recommend_courses(skills, interests, experience, education, time, certificates, careerpath):
try:
# Create initial user profile
user_profile = {
"skills": [s.strip() for s in skills.split(",") if s.strip()],
"interests": [s.strip() for s in interests.split(",") if s.strip()],
"experience": [s.strip() for s in experience.split(",") if s.strip()],
"education": [s.strip() for s in education.split(",") if s.strip()],
"time": [s.strip() for s in time.split(",") if s.strip()],
"certificates": [s.strip() for s in certificates.split(",") if s.strip()],
"careerpath": [s.strip() for s in careerpath.split(",") if s.strip()]
}
# Get skill level assessment
response = client.models.generate_content(
model="gemini-pro",
contents=f"""
Give the current skill level in one word out of 'beginner', 'intermediate', 'advanced'.
Here is the user profile: {user_profile}
strictly do not output any extra textual data."""
)
CurrentSkill = response.text.strip().replace("\n", "")
user_profile["CurrentSkill"] = [CurrentSkill]
user_text = " ".join(user_profile["skills"] + user_profile["interests"] + user_profile["experience"] + user_profile["education"] + user_profile["time"] + user_profile["certificates"] + user_profile["careerpath"] + user_profile["CurrentSkill"])
# # Create weighted user text representation
# user_text = " ".join([
# " ".join(user_profile["skills"]) * 3,
# " ".join(user_profile["interests"]) * 2,
# " ".join(user_profile["careerpath"]) * 2,
# " ".join(user_profile["experience"]),
# " ".join(user_profile["education"]),
# " ".join(user_profile["certificates"]),
# " ".join(user_profile["CurrentSkill"]) * 2
# ])
user_embedding = bert_model.encode(user_text, convert_to_tensor=True)
course_embeddings = torch.stack(df["course_embedding"].tolist())
similarities = cosine_similarity(user_embedding, course_embeddings)
# similarities = cosine_similarity(user_embedding.unsqueeze(0), course_embeddings)[0]
# Original weighting scheme
weights = {
"similarity": 0.6,
"rating": 0.2,
"difficulty": 0.1,
"time_to_complete": 0.1
}
df["normalized_rating"] = (df["course_rating"] - df["course_rating"].min()) / (
df["course_rating"].max() - df["course_rating"].min())
df["normalized_difficulty"] = 1 - (df["course_difficulty"] / df["course_difficulty"].max())
df["ranking_score"] = (
weights["similarity"] * similarities.cpu().numpy() +
weights["rating"] * df["normalized_rating"].values +
weights["difficulty"] * df["normalized_difficulty"].values
)
top_courses = df.sort_values(by="ranking_score", ascending=False).head(6)
output = top_courses["course_name"].tolist()
response2 = client.models.generate_content(
model="gemini-pro",
contents=f"""
Return a JSON object with this exact structure:
{{
"beginner": [
{{"name": "course name", "url": "course url"}}
],
"intermediate": [
{{"name": "course name", "url": "course url"}}
],
"advanced": [
{{"name": "course name", "url": "course url"}}
]
}}
Categorize these courses: {output}
Add Url of the specific course from {df["course_url"]}
Based on:
- User skill level: {CurrentSkill}
- Course difficulties: {top_courses['normalized_difficulty'].tolist()}
- User skills: {user_profile['skills']}
Categorise atleast one course for each beginner, intermediate and advanced.
Return ONLY valid JSON without any extra text.
"""
)
try:
json_response = json.loads(response2.text.strip().replace('```json', '').replace('```', ''))
# Validate structure
for level in ['beginner', 'intermediate', 'advanced']:
if level not in json_response:
json_response[level] = []
else:
# Ensure each course has name and url
for course in json_response[level]:
if not isinstance(course, dict) or 'name' not in course or 'url' not in course:
json_response[level] = []
break
return json_response
except:
return {
"beginner": [],
"intermediate": [],
"advanced": [],
"error": "Failed to categorize courses"
}
except Exception as e:
return {"error": str(e)}
# Create Gradio interface
iface = gr.Interface(
fn=recommend_courses,
inputs=[
gr.Textbox(label="Skills", placeholder="python, machine learning"),
gr.Textbox(label="Interests", placeholder="AI, data science"),
gr.Textbox(label="Experience", placeholder="2 years python"),
gr.Textbox(label="Education", placeholder="bachelor's in CS"),
gr.Textbox(label="Time Available", placeholder="6 months"),
gr.Textbox(label="Certificates", placeholder="AWS, GCP"),
gr.Textbox(label="Career Path", placeholder="ML engineer")
],
outputs=gr.JSON(),
title="Personalized Course Recommender",
description="Enter your profile details to get course recommendations organized by difficulty level"
)
if __name__ == "__main__":
iface.launch(share=True) |