Spaces:

yashbyname
/

Course_Recommender_system

Runtime error

File size: 6,433 Bytes

b5a35fb
0971dea
 
 
 
 
523e4cc
0971dea
 
 
94dabc7
69bd769
 
 
2830df9
69bd769

import os
import torch
import requests
import numpy as np
import pandas as pd
import gradio as gr
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from torch.nn.functional import cosine_similarity

df = pd.read_csv("cleaned_data.csv")

bert_model = SentenceTransformer('all-MiniLM-L6-v2')
df["course_embedding"] = df["Transformed_description"].apply(lambda x: bert_model.encode(x, convert_to_tensor=True))
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

def recommend_courses(skills, interests, experience, education, time, certificates, careerpath):
    try:
        # Create initial user profile
        user_profile = {
            "skills": [s.strip() for s in skills.split(",") if s.strip()],
            "interests": [s.strip() for s in interests.split(",") if s.strip()],
            "experience": [s.strip() for s in experience.split(",") if s.strip()],
            "education": [s.strip() for s in education.split(",") if s.strip()],
            "time": [s.strip() for s in time.split(",") if s.strip()],
            "certificates": [s.strip() for s in certificates.split(",") if s.strip()],
            "careerpath": [s.strip() for s in careerpath.split(",") if s.strip()]
        }

        # Get skill level assessment
        response = client.models.generate_content(
            model="gemini-pro",
            contents=f"""
            Give the current skill level in one word out of 'beginner', 'intermediate', 'advanced'.
            Here is the user profile: {user_profile}
            strictly do not output any extra textual data."""
        )
        
        CurrentSkill = response.text.strip().replace("\n", "")
        user_profile["CurrentSkill"] = [CurrentSkill]

        user_text = " ".join(user_profile["skills"] + user_profile["interests"] + user_profile["experience"] + user_profile["education"] + user_profile["time"] + user_profile["certificates"] + user_profile["careerpath"] + user_profile["CurrentSkill"])

        # # Create weighted user text representation
        # user_text = " ".join([
        #     " ".join(user_profile["skills"]) * 3,
        #     " ".join(user_profile["interests"]) * 2,
        #     " ".join(user_profile["careerpath"]) * 2,
        #     " ".join(user_profile["experience"]),
        #     " ".join(user_profile["education"]),
        #     " ".join(user_profile["certificates"]),
        #     " ".join(user_profile["CurrentSkill"]) * 2
        # ])

        user_embedding = bert_model.encode(user_text, convert_to_tensor=True)
        course_embeddings = torch.stack(df["course_embedding"].tolist())
        similarities = cosine_similarity(user_embedding, course_embeddings)
        # similarities = cosine_similarity(user_embedding.unsqueeze(0), course_embeddings)[0]

        # Original weighting scheme

        weights = {
            "similarity": 0.6,
            "rating": 0.2,
            "difficulty": 0.1,
            "time_to_complete": 0.1
        }

        df["normalized_rating"] = (df["course_rating"] - df["course_rating"].min()) / (
            df["course_rating"].max() - df["course_rating"].min())
        df["normalized_difficulty"] = 1 - (df["course_difficulty"] / df["course_difficulty"].max())

        df["ranking_score"] = (
            weights["similarity"] * similarities.cpu().numpy() +
            weights["rating"] * df["normalized_rating"].values +
            weights["difficulty"] * df["normalized_difficulty"].values
        )

        top_courses = df.sort_values(by="ranking_score", ascending=False).head(6)
        output = top_courses["course_name"].tolist()

        response2 = client.models.generate_content(
            model="gemini-pro",
            contents=f"""
            Return a JSON object with this exact structure:
            {{
                "beginner": [
                    {{"name": "course name", "url": "course url"}}
                ],
                "intermediate": [
                    {{"name": "course name", "url": "course url"}}
                ],
                "advanced": [
                    {{"name": "course name", "url": "course url"}}
                ]
            }}
            
            Categorize these courses: {output}

            Add Url of the specific course from {df["course_url"]}
            Based on:
            - User skill level: {CurrentSkill}
            - Course difficulties: {top_courses['normalized_difficulty'].tolist()}
            - User skills: {user_profile['skills']}
            
            Categorise atleast one course for each beginner, intermediate and advanced.
            Return ONLY valid JSON without any extra text.
            """
        )

        try:
            json_response = json.loads(response2.text.strip().replace('```json', '').replace('```', ''))
            # Validate structure
            for level in ['beginner', 'intermediate', 'advanced']:
                if level not in json_response:
                    json_response[level] = []
                else:
            # Ensure each course has name and url
                    for course in json_response[level]:
                        if not isinstance(course, dict) or 'name' not in course or 'url' not in course:
                            json_response[level] = []
                        break
            return json_response
        except:
            return {
                "beginner": [],
                "intermediate": [],
                "advanced": [],
                "error": "Failed to categorize courses"
            }

    except Exception as e:
        return {"error": str(e)}

# Create Gradio interface
iface = gr.Interface(
    fn=recommend_courses,
    inputs=[
        gr.Textbox(label="Skills", placeholder="python, machine learning"),
        gr.Textbox(label="Interests", placeholder="AI, data science"),
        gr.Textbox(label="Experience", placeholder="2 years python"),
        gr.Textbox(label="Education", placeholder="bachelor's in CS"),
        gr.Textbox(label="Time Available", placeholder="6 months"),
        gr.Textbox(label="Certificates", placeholder="AWS, GCP"),
        gr.Textbox(label="Career Path", placeholder="ML engineer")
    ],
    outputs=gr.JSON(),
    title="Personalized Course Recommender",
    description="Enter your profile details to get course recommendations organized by difficulty level"
)

if __name__ == "__main__":
    iface.launch(share=True)