yashbyname commited on
Commit
69bd769
·
verified ·
1 Parent(s): 2b55b6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -0
app.py CHANGED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ df = pd.read_csv("Course_Recommender_system/cleaned_data.csv")
2
+
3
+ bert_model = SentenceTransformer('all-MiniLM-L6-v2')
4
+ df["course_embedding"] = df["Transformed_description"].apply(lambda x: bert_model.encode(x, convert_to_tensor=True))
5
+ client = genai.Client(api_key='AIzaSyBxTbIk5Lwrjs8iPmiIZERmZ7VTwwCyizE')
6
+
7
+ def recommend_courses(skills, interests, experience, education, time, certificates, careerpath):
8
+ try:
9
+ # Create initial user profile
10
+ user_profile = {
11
+ "skills": [s.strip() for s in skills.split(",") if s.strip()],
12
+ "interests": [s.strip() for s in interests.split(",") if s.strip()],
13
+ "experience": [s.strip() for s in experience.split(",") if s.strip()],
14
+ "education": [s.strip() for s in education.split(",") if s.strip()],
15
+ "time": [s.strip() for s in time.split(",") if s.strip()],
16
+ "certificates": [s.strip() for s in certificates.split(",") if s.strip()],
17
+ "careerpath": [s.strip() for s in careerpath.split(",") if s.strip()]
18
+ }
19
+
20
+ # Get skill level assessment
21
+ response = client.models.generate_content(
22
+ model="gemini-pro",
23
+ contents=f"""
24
+ Give the current skill level in one word out of 'beginner', 'intermediate', 'advanced'.
25
+ Here is the user profile: {user_profile}
26
+ strictly do not output any extra textual data."""
27
+ )
28
+
29
+ CurrentSkill = response.text.strip().replace("\n", "")
30
+ user_profile["CurrentSkill"] = [CurrentSkill]
31
+
32
+ user_text = " ".join(user_profile["skills"] + user_profile["interests"] + user_profile["experience"] + user_profile["education"] + user_profile["time"] + user_profile["certificates"] + user_profile["careerpath"] + user_profile["CurrentSkill"])
33
+
34
+ # # Create weighted user text representation
35
+ # user_text = " ".join([
36
+ # " ".join(user_profile["skills"]) * 3,
37
+ # " ".join(user_profile["interests"]) * 2,
38
+ # " ".join(user_profile["careerpath"]) * 2,
39
+ # " ".join(user_profile["experience"]),
40
+ # " ".join(user_profile["education"]),
41
+ # " ".join(user_profile["certificates"]),
42
+ # " ".join(user_profile["CurrentSkill"]) * 2
43
+ # ])
44
+
45
+ user_embedding = bert_model.encode(user_text, convert_to_tensor=True)
46
+ course_embeddings = torch.stack(df["course_embedding"].tolist())
47
+ similarities = cosine_similarity(user_embedding, course_embeddings)
48
+ # similarities = cosine_similarity(user_embedding.unsqueeze(0), course_embeddings)[0]
49
+
50
+ # Original weighting scheme
51
+
52
+ weights = {
53
+ "similarity": 0.6,
54
+ "rating": 0.2,
55
+ "difficulty": 0.1,
56
+ "time_to_complete": 0.1
57
+ }
58
+
59
+ df["normalized_rating"] = (df["course_rating"] - df["course_rating"].min()) / (
60
+ df["course_rating"].max() - df["course_rating"].min())
61
+ df["normalized_difficulty"] = 1 - (df["course_difficulty"] / df["course_difficulty"].max())
62
+
63
+ df["ranking_score"] = (
64
+ weights["similarity"] * similarities.cpu().numpy() +
65
+ weights["rating"] * df["normalized_rating"].values +
66
+ weights["difficulty"] * df["normalized_difficulty"].values
67
+ )
68
+
69
+ top_courses = df.sort_values(by="ranking_score", ascending=False).head(6)
70
+ output = top_courses["course_name"].tolist()
71
+
72
+ response2 = client.models.generate_content(
73
+ model="gemini-pro",
74
+ contents=f"""
75
+ Return a JSON object with this exact structure:
76
+ {{
77
+ "beginner": [
78
+ {{"name": "course name", "url": "course url"}}
79
+ ],
80
+ "intermediate": [
81
+ {{"name": "course name", "url": "course url"}}
82
+ ],
83
+ "advanced": [
84
+ {{"name": "course name", "url": "course url"}}
85
+ ]
86
+ }}
87
+
88
+ Categorize these courses: {output}
89
+
90
+ Add Url of the specific course from {df["course_url"]}
91
+ Based on:
92
+ - User skill level: {CurrentSkill}
93
+ - Course difficulties: {top_courses['normalized_difficulty'].tolist()}
94
+ - User skills: {user_profile['skills']}
95
+
96
+ Categorise atleast one course for each beginner, intermediate and advanced.
97
+ Return ONLY valid JSON without any extra text.
98
+ """
99
+ )
100
+
101
+ try:
102
+ json_response = json.loads(response2.text.strip().replace('```json', '').replace('```', ''))
103
+ # Validate structure
104
+ for level in ['beginner', 'intermediate', 'advanced']:
105
+ if level not in json_response:
106
+ json_response[level] = []
107
+ else:
108
+ # Ensure each course has name and url
109
+ for course in json_response[level]:
110
+ if not isinstance(course, dict) or 'name' not in course or 'url' not in course:
111
+ json_response[level] = []
112
+ break
113
+ return json_response
114
+ except:
115
+ return {
116
+ "beginner": [],
117
+ "intermediate": [],
118
+ "advanced": [],
119
+ "error": "Failed to categorize courses"
120
+ }
121
+
122
+ except Exception as e:
123
+ return {"error": str(e)}
124
+
125
+ # Create Gradio interface
126
+ iface = gr.Interface(
127
+ fn=recommend_courses,
128
+ inputs=[
129
+ gr.Textbox(label="Skills", placeholder="python, machine learning"),
130
+ gr.Textbox(label="Interests", placeholder="AI, data science"),
131
+ gr.Textbox(label="Experience", placeholder="2 years python"),
132
+ gr.Textbox(label="Education", placeholder="bachelor's in CS"),
133
+ gr.Textbox(label="Time Available", placeholder="6 months"),
134
+ gr.Textbox(label="Certificates", placeholder="AWS, GCP"),
135
+ gr.Textbox(label="Career Path", placeholder="ML engineer")
136
+ ],
137
+ outputs=gr.JSON(),
138
+ title="Personalized Course Recommender",
139
+ description="Enter your profile details to get course recommendations organized by difficulty level"
140
+ )
141
+
142
+ if __name__ == "__main__":
143
+ iface.launch(share=True)