|
import json
|
|
import re
|
|
import hashlib
|
|
from typing import List, Dict
|
|
|
|
class KnowledgeBase:
|
|
def __init__(self):
|
|
self.programs = {}
|
|
self.courses = []
|
|
self._load_data()
|
|
|
|
self.itmo_keywords = [
|
|
'итмо', 'магистратура', 'учебный план', 'дисциплина', 'курс',
|
|
'ии', 'ai', 'ai product', 'институт ии', 'программа',
|
|
'машинное обучение', 'глубокое обучение', 'nlp', 'компьютерное зрение',
|
|
'нейронные сети', 'анализ данных', 'продуктовая аналитика'
|
|
]
|
|
|
|
def _load_data(self):
|
|
try:
|
|
with open('data/processed/programs.json', 'r', encoding='utf-8') as f:
|
|
self.programs = json.load(f)
|
|
except FileNotFoundError:
|
|
print('Файл programs.json не найден')
|
|
|
|
try:
|
|
with open('data/processed/courses.json', 'r', encoding='utf-8') as f:
|
|
self.courses = json.load(f)
|
|
except FileNotFoundError:
|
|
print('Файл courses.json не найден')
|
|
|
|
def is_itmo_query(self, message: str) -> bool:
|
|
message_lower = message.lower()
|
|
|
|
keyword_match = any(keyword in message_lower for keyword in self.itmo_keywords)
|
|
|
|
if keyword_match:
|
|
return True
|
|
|
|
return False
|
|
|
|
def recommend(self, profile: dict) -> List[Dict]:
|
|
semester = profile.get('semester')
|
|
if not semester:
|
|
return []
|
|
|
|
semester = int(semester)
|
|
interests = profile.get('interests', [])
|
|
programming_exp = profile.get('programming_experience', 2)
|
|
math_level = profile.get('math_level', 2)
|
|
|
|
filtered_courses = [
|
|
course for course in self.courses
|
|
if course.get('semester') == semester
|
|
]
|
|
|
|
if not filtered_courses:
|
|
return []
|
|
|
|
scored_courses = []
|
|
for course in filtered_courses:
|
|
score = self._calculate_recommendation_score(course, profile)
|
|
scored_courses.append((course, score))
|
|
|
|
scored_courses.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
recommendations = []
|
|
for course, score in scored_courses[:7]:
|
|
why = self._generate_recommendation_reason(course, profile)
|
|
recommendations.append({
|
|
'semester': course['semester'],
|
|
'name': course['name'],
|
|
'credits': course['credits'],
|
|
'why': why
|
|
})
|
|
|
|
return recommendations
|
|
|
|
def _calculate_recommendation_score(self, course: dict, profile: dict) -> float:
|
|
interests = profile.get('interests', [])
|
|
programming_exp = profile.get('programming_experience', 2)
|
|
math_level = profile.get('math_level', 2)
|
|
|
|
course_text = f"{course.get('name', '')} {course.get('short_desc', '')}".lower()
|
|
course_tags = course.get('tags', [])
|
|
|
|
similarity_score = 0.0
|
|
if interests:
|
|
interest_matches = sum(1 for interest in interests if interest in course_tags)
|
|
similarity_score = interest_matches / len(interests)
|
|
|
|
rule_score = 0.0
|
|
|
|
if programming_exp >= 3:
|
|
if any(tag in course_tags for tag in ['ml', 'dl', 'systems']):
|
|
rule_score += 0.3
|
|
|
|
if 'product' in interests or 'business' in interests:
|
|
if any(tag in course_tags for tag in ['product', 'business', 'pm']):
|
|
rule_score += 0.3
|
|
|
|
if math_level >= 3:
|
|
if any(tag in course_tags for tag in ['math', 'stats', 'dl']):
|
|
rule_score += 0.3
|
|
|
|
generic_score = 0.1
|
|
|
|
final_score = 0.6 * similarity_score + 0.3 * rule_score + 0.1 * generic_score
|
|
return final_score
|
|
|
|
def _generate_recommendation_reason(self, course: dict, profile: dict) -> str:
|
|
interests = profile.get('interests', [])
|
|
course_tags = course.get('tags', [])
|
|
|
|
matching_tags = [tag for tag in interests if tag in course_tags]
|
|
|
|
if matching_tags:
|
|
tag_names = {
|
|
'ml': 'машинное обучение',
|
|
'dl': 'глубокое обучение',
|
|
'nlp': 'обработка естественного языка',
|
|
'cv': 'компьютерное зрение',
|
|
'product': 'продуктовая разработка',
|
|
'business': 'бизнес-аналитика',
|
|
'research': 'исследования',
|
|
'data': 'анализ данных',
|
|
'systems': 'системная архитектура'
|
|
}
|
|
|
|
tag_descriptions = [tag_names.get(tag, tag) for tag in matching_tags]
|
|
return f'Соответствует вашим интересам: {", ".join(tag_descriptions)}'
|
|
|
|
return 'Курс из учебного плана программы'
|
|
|
|
def get_course_by_id(self, course_id: str) -> dict:
|
|
for course in self.courses:
|
|
if course.get('id') == course_id:
|
|
return course
|
|
return {}
|
|
|
|
def get_program_by_id(self, program_id: str) -> dict:
|
|
return self.programs.get(program_id, {})
|
|
|
|
def search_courses(self, query: str, limit: int = 10) -> List[Dict]:
|
|
query_lower = query.lower()
|
|
results = []
|
|
|
|
for course in self.courses:
|
|
course_text = f"{course.get('name', '')} {course.get('short_desc', '')}".lower()
|
|
|
|
if query_lower in course_text:
|
|
results.append(course)
|
|
|
|
if len(results) >= limit:
|
|
break
|
|
|
|
return results
|
|
|