import json import re import hashlib from typing import List, Dict class KnowledgeBase: def __init__(self): self.programs = {} self.courses = [] self._load_data() self.itmo_keywords = [ 'итмо', 'магистратура', 'учебный план', 'дисциплина', 'курс', 'ии', 'ai', 'ai product', 'институт ии', 'программа', 'машинное обучение', 'глубокое обучение', 'nlp', 'компьютерное зрение', 'нейронные сети', 'анализ данных', 'продуктовая аналитика' ] def _load_data(self): try: with open('data/processed/programs.json', 'r', encoding='utf-8') as f: self.programs = json.load(f) except FileNotFoundError: print('Файл programs.json не найден') try: with open('data/processed/courses.json', 'r', encoding='utf-8') as f: self.courses = json.load(f) except FileNotFoundError: print('Файл courses.json не найден') def is_itmo_query(self, message: str) -> bool: message_lower = message.lower() keyword_match = any(keyword in message_lower for keyword in self.itmo_keywords) if keyword_match: return True return False def recommend(self, profile: dict) -> List[Dict]: semester = profile.get('semester') if not semester: return [] semester = int(semester) interests = profile.get('interests', []) programming_exp = profile.get('programming_experience', 2) math_level = profile.get('math_level', 2) filtered_courses = [ course for course in self.courses if course.get('semester') == semester ] if not filtered_courses: return [] scored_courses = [] for course in filtered_courses: score = self._calculate_recommendation_score(course, profile) scored_courses.append((course, score)) scored_courses.sort(key=lambda x: x[1], reverse=True) recommendations = [] for course, score in scored_courses[:7]: why = self._generate_recommendation_reason(course, profile) recommendations.append({ 'semester': course['semester'], 'name': course['name'], 'credits': course['credits'], 'why': why }) return recommendations def _calculate_recommendation_score(self, course: dict, profile: dict) -> float: interests = profile.get('interests', []) programming_exp = profile.get('programming_experience', 2) math_level = profile.get('math_level', 2) course_text = f"{course.get('name', '')} {course.get('short_desc', '')}".lower() course_tags = course.get('tags', []) similarity_score = 0.0 if interests: interest_matches = sum(1 for interest in interests if interest in course_tags) similarity_score = interest_matches / len(interests) rule_score = 0.0 if programming_exp >= 3: if any(tag in course_tags for tag in ['ml', 'dl', 'systems']): rule_score += 0.3 if 'product' in interests or 'business' in interests: if any(tag in course_tags for tag in ['product', 'business', 'pm']): rule_score += 0.3 if math_level >= 3: if any(tag in course_tags for tag in ['math', 'stats', 'dl']): rule_score += 0.3 generic_score = 0.1 final_score = 0.6 * similarity_score + 0.3 * rule_score + 0.1 * generic_score return final_score def _generate_recommendation_reason(self, course: dict, profile: dict) -> str: interests = profile.get('interests', []) course_tags = course.get('tags', []) matching_tags = [tag for tag in interests if tag in course_tags] if matching_tags: tag_names = { 'ml': 'машинное обучение', 'dl': 'глубокое обучение', 'nlp': 'обработка естественного языка', 'cv': 'компьютерное зрение', 'product': 'продуктовая разработка', 'business': 'бизнес-аналитика', 'research': 'исследования', 'data': 'анализ данных', 'systems': 'системная архитектура' } tag_descriptions = [tag_names.get(tag, tag) for tag in matching_tags] return f'Соответствует вашим интересам: {", ".join(tag_descriptions)}' return 'Курс из учебного плана программы' def get_course_by_id(self, course_id: str) -> dict: for course in self.courses: if course.get('id') == course_id: return course return {} def get_program_by_id(self, program_id: str) -> dict: return self.programs.get(program_id, {}) def search_courses(self, query: str, limit: int = 10) -> List[Dict]: query_lower = query.lower() results = [] for course in self.courses: course_text = f"{course.get('name', '')} {course.get('short_desc', '')}".lower() if query_lower in course_text: results.append(course) if len(results) >= limit: break return results