test5 / knowledge_base.py
vydrking's picture
Upload 18 files
2fc8dc5 verified
import json
import re
import hashlib
from typing import List, Dict
class KnowledgeBase:
def __init__(self):
self.programs = {}
self.courses = []
self._load_data()
self.itmo_keywords = [
'итмо', 'магистратура', 'учебный план', 'дисциплина', 'курс',
'ии', 'ai', 'ai product', 'институт ии', 'программа',
'машинное обучение', 'глубокое обучение', 'nlp', 'компьютерное зрение',
'нейронные сети', 'анализ данных', 'продуктовая аналитика'
]
def _load_data(self):
try:
with open('data/processed/programs.json', 'r', encoding='utf-8') as f:
self.programs = json.load(f)
except FileNotFoundError:
print('Файл programs.json не найден')
try:
with open('data/processed/courses.json', 'r', encoding='utf-8') as f:
self.courses = json.load(f)
except FileNotFoundError:
print('Файл courses.json не найден')
def is_itmo_query(self, message: str) -> bool:
message_lower = message.lower()
keyword_match = any(keyword in message_lower for keyword in self.itmo_keywords)
if keyword_match:
return True
return False
def recommend(self, profile: dict) -> List[Dict]:
semester = profile.get('semester')
if not semester:
return []
semester = int(semester)
interests = profile.get('interests', [])
programming_exp = profile.get('programming_experience', 2)
math_level = profile.get('math_level', 2)
filtered_courses = [
course for course in self.courses
if course.get('semester') == semester
]
if not filtered_courses:
return []
scored_courses = []
for course in filtered_courses:
score = self._calculate_recommendation_score(course, profile)
scored_courses.append((course, score))
scored_courses.sort(key=lambda x: x[1], reverse=True)
recommendations = []
for course, score in scored_courses[:7]:
why = self._generate_recommendation_reason(course, profile)
recommendations.append({
'semester': course['semester'],
'name': course['name'],
'credits': course['credits'],
'why': why
})
return recommendations
def _calculate_recommendation_score(self, course: dict, profile: dict) -> float:
interests = profile.get('interests', [])
programming_exp = profile.get('programming_experience', 2)
math_level = profile.get('math_level', 2)
course_text = f"{course.get('name', '')} {course.get('short_desc', '')}".lower()
course_tags = course.get('tags', [])
similarity_score = 0.0
if interests:
interest_matches = sum(1 for interest in interests if interest in course_tags)
similarity_score = interest_matches / len(interests)
rule_score = 0.0
if programming_exp >= 3:
if any(tag in course_tags for tag in ['ml', 'dl', 'systems']):
rule_score += 0.3
if 'product' in interests or 'business' in interests:
if any(tag in course_tags for tag in ['product', 'business', 'pm']):
rule_score += 0.3
if math_level >= 3:
if any(tag in course_tags for tag in ['math', 'stats', 'dl']):
rule_score += 0.3
generic_score = 0.1
final_score = 0.6 * similarity_score + 0.3 * rule_score + 0.1 * generic_score
return final_score
def _generate_recommendation_reason(self, course: dict, profile: dict) -> str:
interests = profile.get('interests', [])
course_tags = course.get('tags', [])
matching_tags = [tag for tag in interests if tag in course_tags]
if matching_tags:
tag_names = {
'ml': 'машинное обучение',
'dl': 'глубокое обучение',
'nlp': 'обработка естественного языка',
'cv': 'компьютерное зрение',
'product': 'продуктовая разработка',
'business': 'бизнес-аналитика',
'research': 'исследования',
'data': 'анализ данных',
'systems': 'системная архитектура'
}
tag_descriptions = [tag_names.get(tag, tag) for tag in matching_tags]
return f'Соответствует вашим интересам: {", ".join(tag_descriptions)}'
return 'Курс из учебного плана программы'
def get_course_by_id(self, course_id: str) -> dict:
for course in self.courses:
if course.get('id') == course_id:
return course
return {}
def get_program_by_id(self, program_id: str) -> dict:
return self.programs.get(program_id, {})
def search_courses(self, query: str, limit: int = 10) -> List[Dict]:
query_lower = query.lower()
results = []
for course in self.courses:
course_text = f"{course.get('name', '')} {course.get('short_desc', '')}".lower()
if query_lower in course_text:
results.append(course)
if len(results) >= limit:
break
return results