NavigatedLearning / backend /nlp_api.py
Prabhas Jupalli
opt: Restore startup loading and remove redundant player icon
a458ae1
"""
NLP Learning Grid API
Provides endpoints for the frontend grid-based NLP learning system.
"""
import os
import json
import pandas as pd
from flask import jsonify, request
import numpy as np
from datetime import datetime
import nltk
# Ensure requisite NLTK data is available
try:
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))
except Exception as e:
print(f"Warning: Could not load NLTK stopwords: {e}")
STOPWORDS = set()
# Import backend modules (support both script and package execution)
try:
from .init import app
from .database import get_session, update_session, save_summary, save_polyline, get_polylines as get_db_polylines, get_notes, add_note, get_lectures, reset_db, get_bookmarks, add_bookmark, remove_bookmark, reset_session_data
from .request_logger import log_request
from .utils import utils_preprocess_text, get_cos_sim
from . import navigator
from . import persona_service
from . import radial_mapper
except ImportError:
from init import app
from database import get_session, update_session, save_summary, save_polyline, get_polylines as get_db_polylines, get_notes, add_note, get_lectures, reset_db, get_bookmarks, add_bookmark, remove_bookmark, reset_session_data
from request_logger import log_request
from utils import utils_preprocess_text, get_cos_sim
import navigator
import persona_service
import radial_mapper
# Define stopwords
stop_words = set(stopwords.words('english'))
# Polyline logging
POLYLINE_LOG_FILE = os.path.join(os.path.dirname(__file__), 'polyline_generation.log')
def log_polyline_step(step, details):
"""Log detailed steps of polyline generation"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(POLYLINE_LOG_FILE, 'a', encoding='utf-8') as f:
f.write(f"[{timestamp}] [{step}]\n{details}\n{'-'*50}\n")
try:
from sentence_transformers import SentenceTransformer
print("Loading BERT model (on startup)...")
_bert_model = SentenceTransformer('all-MiniLM-L6-v2')
print("BERT model loaded successfully")
except Exception as e:
print(f"Error loading BERT model: {e}")
_bert_model = None
def get_bert_model():
return _bert_model
# Load NLP data from JSON (Excel was rejected by HF)
nlp_json_path = os.path.join(os.path.dirname(__file__), 'nlp', 'nlp_resources.json')
def load_nlp_resources():
"""Load NLP resources from JSON file"""
try:
# Check if the JSON file exists
if not os.path.exists(nlp_json_path):
print(f"File not found: {nlp_json_path}")
return []
with open(nlp_json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if not isinstance(data, list):
print(f"Unexpected data format in {nlp_json_path}")
return []
# First-quadrant arc projection (bottom-left origin)
# Resources fan from 5° to 85° like Q1 of a polar chart
# Origin: bottom-LEFT of 20×20 grid
cx, cy = 0.0, 19.5
# Sequential split: now 19 modules
ordered_data = data[:19] if len(data) >= 19 else data
tier_configs = [
{'label': 'Fundamentals', 'count': 5, 'radius': 3, 'difficulty': 2},
{'label': 'Intermediate', 'count': 5, 'radius': 7, 'difficulty': 4},
{'label': 'Advance', 'count': 5, 'radius': 11, 'difficulty': 6},
{'label': 'Mastery', 'count': 4, 'radius': 15, 'difficulty': 8},
]
used_positions = set()
resources = []
resource_idx = 0
# Fan: 8° to 82° (keeps resources comfortably inside axes)
angle_start_deg = 8.0
angle_end_deg = 82.0
for t_idx, tier in enumerate(tier_configs):
count = tier['count']
r_val = tier['radius']
tier_data = ordered_data[resource_idx : resource_idx + count]
resource_idx += count
n = len(tier_data)
if n == 0:
continue
angle_step = (angle_end_deg - angle_start_deg) / (n - 1) if n > 1 else 0.0
for i, row in enumerate(tier_data):
angle_deg = angle_start_deg + i * angle_step
angle_rad = np.radians(angle_deg)
x_raw = cx + r_val * np.cos(angle_rad)
y_raw = cy - r_val * np.sin(angle_rad)
x = int(round(np.clip(x_raw, 0, 18)))
y = int(round(np.clip(y_raw, 0, 18)))
# Resolve collisions by nudging along the arc (y direction)
attempts = 0
while (x, y) in used_positions and attempts < 20:
y = max(0, min(18, y - 1 if attempts % 2 == 0 else y + 1))
attempts += 1
used_positions.add((x, y))
# Tier-based points: Fundamentals=50, Intermediate=100, Advance=150, Mastery=200
tier_points = {2: 50, 4: 100, 6: 150, 8: 200}
base_pts = tier_points.get(tier['difficulty'], 50)
# Per-resource high_line: seeded random 0.70-0.85
import random as _rnd
_rnd.seed(len(resources) + 42) # deterministic per resource index
high_line = round(_rnd.uniform(0.70, 0.85), 2)
resources.append({
'id': str(len(resources) + 1),
'position': {'x': int(x), 'y': int(y)},
'type': 'video' if 'youtube' in str(row.get('links', '')).lower() else 'book',
'title': str(row.get('name', f'Resource {len(resources) + 1}')).strip(),
'visited': False,
'difficulty': tier['difficulty'],
'reward': base_pts,
'base_points': base_pts,
'high_line': high_line,
'url': str(row.get('links', '')).strip(),
'description':str(row.get('description', '')).strip(),
'module': str(row.get('module', 'NLP Concept')).strip()
})
print(f"Successfully projected {len(resources)} resources into 4-tier Radar arcs")
return resources
except Exception as e:
print(f"Error loading NLP resources: {e}")
return []
# Cache resources
nlp_resources = load_nlp_resources()
# Load YouTube links mapping
_youtube_links_path = os.path.join(os.path.dirname(__file__), 'data', 'youtube_links.json')
try:
if os.path.exists(_youtube_links_path):
with open(_youtube_links_path, 'r', encoding='utf-8') as f:
raw_links = json.load(f)
# Create a normalized mapping for easier lookup
_youtube_links = {str(k).strip().lower(): v for k, v in raw_links.items()}
print(f"Loaded {len(_youtube_links)} YouTube links from mapping file")
# Inject youtube_url into each resource
for r in nlp_resources:
module_lower = r['module'].lower()
title_lower = r['title'].lower()
# 1. Exact module match
url = _youtube_links.get(module_lower, '')
# 2. Fuzzy match on title or module
if not url:
for key, val in _youtube_links.items():
if key in title_lower or key in module_lower or title_lower in key or module_lower in key:
url = val
break
r['youtube_url'] = url
yt_count = sum(1 for r in nlp_resources if r.get('youtube_url'))
print(f"Matched YouTube URLs for {yt_count}/{len(nlp_resources)} resources")
else:
print(f"YouTube links file not found: {_youtube_links_path}")
for r in nlp_resources: r['youtube_url'] = ''
except Exception as e:
print(f"Could not load YouTube links: {e}")
for r in nlp_resources: r['youtube_url'] = ''
# Pre-compute module embeddings
module_embeddings = {}
def compute_module_embeddings():
bert_model = get_bert_model()
if not bert_model:
return
print("Computing module embeddings...")
# Group resources by module to form a "document" for each module
module_docs = {}
for r in nlp_resources:
m = r['module']
# Combine title and description for a rich representation
text = f"{r['title']} {r.get('description', '')}"
if m in module_docs:
module_docs[m] += " " + text
else:
module_docs[m] = text
# Compute embeddings
for m, doc in module_docs.items():
# Apply preprocessing
clean_doc = utils_preprocess_text(doc, flg_stemm=False, flg_lemm=True, lst_stopwords=stop_words)
module_embeddings[m] = bert_model.encode(clean_doc)
print(f"Computed embeddings for {len(module_embeddings)} modules")
# Compute embeddings on startup for immediate availability
compute_module_embeddings()
# =============================================
# RESOURCES ENDPOINTS
# =============================================
@app.before_request
def before_request_logging():
if request.path.startswith('/api'):
log_request()
@app.route('/api/reset', methods=['POST'])
def reset_database():
"""Wipes the database memory completely"""
try:
reset_db()
return jsonify({'status': 'success', 'message': 'Database memory wiped completely'})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route('/api/resources', methods=['GET'])
def get_resources():
"""Get all NLP learning resources with their grid positions and correct visited state"""
session_id = request.args.get('session_id', 'default')
session = get_session(session_id)
visited_ids = set(str(v).strip() for v in session.get('visitedResources', []))
# Return a copy of resources with updated visited flags
updated_resources = []
for r in nlp_resources:
r_copy = r.copy()
r_copy['visited'] = str(r['id']).strip() in visited_ids
updated_resources.append(r_copy)
return jsonify(updated_resources)
@app.route('/api/resources/<resource_id>', methods=['GET'])
def get_resource(resource_id):
"""Get a single resource by ID"""
resource = next((r for r in nlp_resources if r['id'] == resource_id), None)
if not resource:
return jsonify({'error': 'Resource not found'}), 404
return jsonify(resource)
# =============================================
# AGENT STATE ENDPOINTS
# =============================================
@app.route('/api/agent', methods=['GET'])
def get_agent_state():
"""Get current agent state (position, level, reward)"""
session_id = request.args.get('session_id', 'default')
return jsonify(get_session(session_id))
@app.route('/api/agent/move', methods=['POST'])
def move_agent():
"""Move agent to a new position"""
data = request.get_json()
session_id = data.get('session_id', 'default')
position = data.get('position', {})
session = get_session(session_id)
session['position'] = position
update_session(session_id, session)
return jsonify(session)
# =============================================
# NOTIFICATION ENDPOINTS
# =============================================
@app.route('/api/notifications', methods=['GET'])
def get_notifications():
"""Get all notifications for a session"""
session_id = request.args.get('session_id', 'default')
session = get_session(session_id)
return jsonify(session.get('notifications', []))
@app.route('/api/notifications/add', methods=['POST'])
def add_notification():
"""Add a new notification to the database"""
data = request.get_json()
session_id = data.get('session_id', 'default')
message = data.get('message')
notif_type = data.get('type', 'info')
if not message:
return jsonify({'error': 'Message required'}), 400
session = get_session(session_id)
if 'notifications' not in session:
session['notifications'] = []
new_notif = {
'id': f"notif_{int(datetime.now().timestamp())}",
'type': notif_type,
'message': message,
'timestamp': int(datetime.now().timestamp() * 1000),
'read': False
}
session['notifications'].insert(0, new_notif)
update_session(session_id, session)
return jsonify(new_notif)
@app.route('/api/notifications/read', methods=['POST'])
def mark_notifications_read():
"""Mark all notifications as read in the database"""
data = request.get_json()
session_id = data.get('session_id', 'default')
session = get_session(session_id)
if 'notifications' in session:
for n in session['notifications']:
n['read'] = True
update_session(session_id, session)
return jsonify({'status': 'success'})
def sync_agent_progression(session):
"""Utility to ensure level and totalReward are consistent"""
# Level = totalReward // 100 (Stage 1 starts at 0 pts, Stage 2 at 100 pts, etc.)
# Floor level is 1
session['level'] = max(1, (session.get('totalReward', 0) // 100) + 1)
return session
# =============================================
# RESOURCE INTERACTION ENDPOINTS
# =============================================
@app.route('/api/resource/visit', methods=['POST'])
def visit_resource():
"""Mark a resource as visited and update agent"""
data = request.get_json()
session_id = data.get('session_id', 'default')
resource_id = data.get('resource_id')
session = get_session(session_id)
# Find resource
resource = next((r for r in nlp_resources if r['id'] == resource_id), None)
if not resource:
return jsonify({'error': 'Resource not found'}), 404
# Update session
if resource_id not in session['visitedResources']:
session['visitedResources'].append(resource_id)
# Add reward
session['totalReward'] = session.get('totalReward', 0) + resource.get('reward', 0)
# Sync progression
session = sync_agent_progression(session)
update_session(session_id, session)
return jsonify(session)
# =============================================
# LEARNING SUMMARY ENDPOINTS
# =============================================
@app.route('/api/summary/create', methods=['POST'])
def create_learning_summary():
"""
Create a learning summary from visited resources
"""
data = request.get_json()
session_id = data.get('session_id', 'default')
session = get_session(session_id)
title = data.get('title', '')
summary = data.get('summary', '')
visited_ids = data.get('visited_resources', [])
if not title or not summary:
return jsonify({'error': 'Title and summary required'}), 400
# Get visited resources using robust ID matching
visited_set = set(str(v).strip() for v in visited_ids)
visited_resources = [r for r in nlp_resources if str(r['id']).strip() in visited_set]
print(f"[DEBUG] create_learning_summary: incoming visited_ids={visited_ids}, matched count={len(visited_resources)}")
# Calculate learning metrics
total_difficulty = sum(r['difficulty'] for r in visited_resources)
total_reward = sum(r['reward'] for r in visited_resources)
avg_difficulty = total_difficulty / len(visited_resources) if visited_resources else 0
# Extract unique modules from resources (preserving order)
seen_modules = set()
ordered_modules = []
for r in nlp_resources:
m = r['module']
if m not in seen_modules:
ordered_modules.append(m)
seen_modules.add(m)
# Module Aliases for better keyword matching
module_aliases = {
'Pre training objectives': ['pre-training', 'pre training', 'objectives'],
'Pre trained models': ['pre-trained', 'pre trained'],
'Tutorial: Introduction to huggingface': ['huggingface', 'hugging face'],
'Fine tuning LLM': ['fine-tuning', 'fine tuning', 'ft'],
'Instruction tuning': ['instruction tuning', 'instruction-tuning'],
'Prompt based learning': ['prompt based', 'prompt-based'],
'Parameter efficient fine tuning': ['peft', 'parameter efficient'],
'Incontext Learning': ['in-context', 'incontext', 'icl'],
'Prompting methods': ['prompting'],
'Retrieval Methods': ['retrieval'],
'Retrieval Augmented Generation': ['rag', 'retrieval augmented'],
'Quantization': ['quantization', 'quantized'],
'Mixture of Experts Model': ['moe', 'mixture of experts'],
'Agentic AI': ['agentic', 'agents'],
'Multimodal LLMs': ['multimodal', 'multi-modal'],
'Vision Language Models': ['vlm', 'vision-language', 'vision language'],
'Policy learning using DQN': ['dqn', 'deep q', 'policy gradient'],
'RLHF': ['rlhf', 'reinforcement learning from human feedback']
}
# Check for keywords in summary text
summary_lower = summary.lower()
keywords_found = []
for module in ordered_modules:
if module.lower() in summary_lower:
keywords_found.append(module)
continue
aliases = module_aliases.get(module, [])
for alias in aliases:
if alias.lower() in summary_lower:
keywords_found.append(module)
break
for r in visited_resources:
if r['title'].lower() in summary_lower and r['title'] not in keywords_found:
keywords_found.append(r['title'])
# Calculate module scores for polyline
module_scores = []
log_polyline_step("START_GENERATION", f"Generating polyline for summary: '{summary[:100]}...'")
bert_model = get_bert_model()
if bert_model:
if not module_embeddings:
compute_module_embeddings()
try:
clean_summary = utils_preprocess_text(summary, flg_stemm=False, flg_lemm=True, lst_stopwords=stop_words)
summary_embedding = bert_model.encode(clean_summary)
for module in ordered_modules:
score = 0.0
if module in module_embeddings:
sim = get_cos_sim(summary_embedding, module_embeddings[module])
score = max(0.0, sim)
if module in keywords_found: score += 0.3
module_visited_count = sum(1 for r in visited_resources if r['module'] == module)
if module_visited_count > 0: score += 0.1 * module_visited_count
module_scores.append(float(max(0.0, min(1.0, score))))
except Exception as e:
print(f"Error computing BERT scores: {e}")
for module in ordered_modules:
score = 0.5 + (np.random.random() - 0.5) * 0.1
if module in keywords_found: score += 0.2
module_visited_count = sum(1 for r in visited_resources if r['module'] == module)
if module_visited_count > 0: score += 0.1 * module_visited_count
module_scores.append(float(max(0.0, min(1.0, score))))
else:
for module in ordered_modules:
score = 0.5 + (np.random.random() - 0.5) * 0.1
if module in keywords_found: score += 0.2
module_visited_count = sum(1 for r in visited_resources if r['module'] == module)
if module_visited_count > 0: score += 0.1 * module_visited_count
module_scores.append(float(max(0.0, min(1.0, score))))
# ── DQN Recommendation ──
rec_result = navigator.recommend_next(visited_ids, module_scores, nlp_resources)
next_recommendation_obj = rec_result.get('resource')
recommendations = []
if next_recommendation_obj:
recommendations.append(next_recommendation_obj['title'])
unvisited_remaining = [r for r in nlp_resources if r['id'] not in visited_ids and r['title'] not in recommendations]
unvisited_remaining.sort(key=lambda r: (-r.get('reward', 0), r.get('difficulty', 0)))
for r in unvisited_remaining:
if len(recommendations) < 3: recommendations.append(r['title'])
else: break
strengths = keywords_found if keywords_found else [r['title'] for r in visited_resources if r.get('difficulty', 0) <= 2]
# Analysis results
polylines = get_db_polylines()
from collections import Counter
all_keywords = []
for p in polylines.values():
if 'keywords_found' in p: all_keywords.extend(p['keywords_found'])
all_keywords.extend(keywords_found)
keyword_counts = Counter(all_keywords)
most_common_keywords = [k for k, v in keyword_counts.most_common(3)]
dominant_topics = most_common_keywords
# Define scored_modules for recommendation logic
scored_modules = list(zip(ordered_modules, module_scores))
# Calculate XP based on high lines
current_polyline_sum = sum(module_scores)
total_earned_base_pts = 0
high_line_sum = 0
for module, score in scored_modules:
resource = next((r for r in nlp_resources if r['module'] == module), None)
if resource:
hl = float(resource.get('high_line', 0.8))
high_line_sum += hl
if score >= hl:
total_earned_base_pts += resource.get('base_points', 50)
# ──── XP SYNC & REWARD CALCULATION ──────────────────────────
# Ensure backend Reward stays synced with frontend.
current_reward = session.get('totalReward', 0)
base_visited_reward = sum(r.get('reward', 50) for r in visited_resources)
# Calculate summary quality bonus
high_line_sum = max(0.1, high_line_sum)
summary_bonus = int(total_earned_base_pts * (current_polyline_sum / high_line_sum))
# Final XP Earned (ensure participation reward)
xp_earned = max(25, summary_bonus)
# Update session (ensuring no point loss)
session['totalReward'] = max(current_reward, base_visited_reward) + xp_earned
session = sync_agent_progression(session)
update_session(session_id, session)
# Generate generic AI analysis
ai_analysis = f"Learning profile enriched by modules like {', '.join(keywords_found[:3]) if keywords_found else 'Basics'}. Stage {session['level']} achieved with {session['totalReward']} points."
# Recommendations: Unvisited modules with high rewards or logical next steps
visited_module_names = set(r['module'] for r in visited_resources)
all_module_names = set(r['module'] for r in nlp_resources)
unvisited_modules = list(all_module_names - visited_module_names)
# Sort unvisited modules by order in ORDERED_MODULES
unvisited_modules.sort(key=lambda m: ordered_modules.index(m) if m in ordered_modules else 99)
# Combine BERT scores with sequential progression for recommendations
recommendations = unvisited_modules[:3] if unvisited_modules else [m for m, s in scored_modules if s <= 0.3][:3]
timestamp_id = datetime.now().strftime("%Y%m%d_%H%M%S")
summary_result = {
'id': f"summary_{session_id}_{timestamp_id}",
'title': title, 'summary': summary, 'keywords_found': keywords_found,
'totalResources': len(nlp_resources), 'visitedResources': len(visited_resources),
'currentLevel': session['level'],
'strengths': strengths, 'recommendations': recommendations,
'ai_analysis': ai_analysis,
'avgDifficulty': round(avg_difficulty, 2), 'totalReward': session['totalReward'],
'xp_earned': xp_earned,
'timestamp': int(datetime.now().timestamp() * 1000)
}
save_summary(summary_result)
# Final result construction — compute true 2D assimilation position
# using the radial-axis dimensionality reduction (Equations 6-12)
assimilation_position = radial_mapper.polyline_to_grid(
module_scores, num_topics=len(ordered_modules)
)
log_polyline_step("ASSIMILATION_2D", f"module_scores={[round(s,3) for s in module_scores]} -> grid=({assimilation_position['x']}, {assimilation_position['y']})")
polyline_id = f"polyline_{timestamp_id}"
new_polyline = {
'id': polyline_id, 'name': title, 'path': [r['position'] for r in visited_resources],
'color': f'rgba({np.random.randint(100,200)}, {np.random.randint(100,200)}, 255, 0.4)',
'isActive': False, 'summary': summary, 'keywords_found': keywords_found,
'module_scores': module_scores, 'strengths': strengths, 'dominant_topics': dominant_topics,
'ai_analysis': ai_analysis, 'assimilation_position': assimilation_position,
'next_recommendation': {
'id': next_recommendation_obj['id'], 'title': next_recommendation_obj['title'],
'position': next_recommendation_obj['position'], 'module': rec_result['module'], 'reason': rec_result['reason']
} if next_recommendation_obj else None
}
save_polyline(polyline_id, new_polyline)
# Calculate updated average polyline
all_polylines = get_db_polylines()
history_scores = [p.get('module_scores', []) for p in all_polylines.values() if p.get('module_scores')]
num_histories = len(history_scores)
avg_scores = [0.0] * 19
if num_histories > 0:
for scores in history_scores:
for i, s in enumerate(scores):
if i < 19: avg_scores[i] += s
avg_scores = [s / num_histories for s in avg_scores]
return jsonify({
'polyline': new_polyline,
'average_polyline': {
'id': 'current_average',
'name': 'Current Average Knowledge',
'module_scores': avg_scores,
'isActive': True,
'color': 'rgba(59, 130, 246, 0.8)',
'assimilation_position': radial_mapper.polyline_to_grid(avg_scores, num_topics=len(ordered_modules))
},
'next_recommendation': new_polyline['next_recommendation'],
'keywords_found': keywords_found,
'totalReward': session['totalReward'],
'xp_earned': xp_earned
})
# =============================================
# POLYLINE ENDPOINTS
# =============================================
@app.route('/api/polylines', methods=['GET'])
def get_polylines_route():
"""Get all polylines including dynamically generated High Line and Current Average polylines"""
polylines = get_db_polylines()
# Generate ordered_modules to ensure consistent mapping
seen_modules = set()
ordered_modules = []
for r in nlp_resources:
m = r['module']
if m not in seen_modules:
ordered_modules.append(m)
seen_modules.add(m)
# Compute average module scores across all historical polylines
import math
history_scores = [p.get('module_scores', []) for p in polylines.values() if p.get('module_scores')]
num_histories = len(history_scores)
avg_module_scores = [0.0] * len(ordered_modules)
if num_histories > 0:
for scores in history_scores:
# Ensure we only average up to the length of ordered_modules
for i, s in enumerate(scores):
if i < len(avg_module_scores):
avg_module_scores[i] += s
avg_module_scores = [s / num_histories for s in avg_module_scores]
else:
# Default to some base value if no histories exist
avg_module_scores = [0.1] * len(ordered_modules)
# Sort resources by angle (origin is 0, 19)
def compute_angle(r):
return math.atan2(19 - r['position']['y'], r['position']['x'])
resources_sorted = sorted(nlp_resources, key=compute_angle)
high_line_path = []
current_path = []
for r in resources_sorted:
dx = r['position']['x']
dy = 19 - r['position']['y']
radius = math.hypot(dx, dy)
theta = math.atan2(dy, dx)
# High Line
hl = float(r.get('high_line', 0.8))
hl_rad = radius * hl
hl_x = hl_rad * math.cos(theta)
hl_y = 19 - hl_rad * math.sin(theta)
high_line_path.append({'x': hl_x, 'y': hl_y})
# Current Average
try:
m_idx = ordered_modules.index(r['module'])
avg_s = avg_module_scores[m_idx] if num_histories > 0 else 0.0
except ValueError:
avg_s = 0.0
cur_rad = radius * avg_s
cur_x = cur_rad * math.cos(theta)
cur_y = 19 - cur_rad * math.sin(theta)
current_path.append({'x': cur_x, 'y': cur_y})
# Create the virtual polylines. Close the loops by adding the first point to the end.
if high_line_path:
high_line_path.append(high_line_path[0])
if current_path:
current_path.append(current_path[0])
# Compute assimilation positions for virtual polylines via radial mapper
hl_scores = [float(r.get('high_line', 0.8)) for r in resources_sorted]
# Ensure we use per-module high_line scores (one per ordered module)
hl_module_scores = []
for m in ordered_modules:
res = next((r for r in nlp_resources if r['module'] == m), None)
hl_module_scores.append(float(res.get('high_line', 0.8)) if res else 0.8)
hl_assimilation = radial_mapper.polyline_to_grid(
hl_module_scores, num_topics=len(ordered_modules)
)
cur_assimilation = radial_mapper.polyline_to_grid(
avg_module_scores, num_topics=len(ordered_modules)
)
hl_polyline = {
'id': 'high_line',
'name': 'Peak Potential',
'path': high_line_path,
'module_scores': hl_module_scores,
'color': 'rgba(239, 68, 68, 0.8)', # Red
'isActive': True,
'confidence': 1.0,
'summary': 'Target threshold for each module',
'assimilation_position': hl_assimilation
}
cur_polyline = {
'id': 'current_average',
'name': 'Current Average Knowledge',
'path': current_path,
'module_scores': avg_module_scores,
'color': 'rgba(59, 130, 246, 0.8)', # Blue
'isActive': True,
'confidence': 1.0,
'summary': 'Your overall average knowledge across all summaries',
'assimilation_position': cur_assimilation
}
# Format result: Return ONLY the virtual polylines, or include histories?
# User said "everywhere it should be shown like a high polyline... and current should be average of all histories"
# We will return the historical ones but set them to inactive, and these two to strictly active.
# Format result: Return ONLY the virtual polylines as active
result = []
for p in polylines.values():
p_copy = p.copy()
p_copy['isActive'] = False # Strictly disable historical polylines
result.append(p_copy)
result.append(hl_polyline)
result.append(cur_polyline)
return jsonify(result)
@app.route('/api/polylines/<polyline_id>', methods=['GET'])
def get_polyline(polyline_id):
"""Get a specific polyline"""
polylines = get_db_polylines()
polyline = polylines.get(polyline_id)
if not polyline:
return jsonify({'error': 'Polyline not found'}), 404
return jsonify(polyline)
@app.route('/api/polylines/<polyline_id>/toggle', methods=['POST'])
def toggle_polyline(polyline_id):
"""Toggle polyline visibility"""
data = request.get_json()
is_active = data.get('isActive', False)
polylines = get_db_polylines()
polyline = polylines.get(polyline_id)
if not polyline:
return jsonify({'error': 'Polyline not found'}), 404
polyline['isActive'] = is_active
save_polyline(polyline_id, polyline)
return jsonify(polyline)
# =============================================
# DQN PATH ENDPOINTS
# =============================================
@app.route('/api/dqn-path', methods=['POST'])
def generate_dqn_path():
"""
Generate DQN optimal path using the Navigator module.
Request JSON:
{
"session_id": "str",
"agent_position": {"x": int, "y": int},
"visited_resource_ids": ["id1", "id2", ...]
}
"""
data = request.get_json()
agent_pos = data.get('agent_position', {'x': 10, 'y': 10})
visited_ids = list(data.get('visited_resource_ids', []))
# Get latest module scores from most recent polyline (if any)
polylines = get_db_polylines()
latest_scores = []
if polylines:
last_polyline = list(polylines.values())[-1]
latest_scores = last_polyline.get('module_scores', [])
# Use DQN navigator to get top recommendation
rec = navigator.recommend_next(
visited_ids=visited_ids,
module_scores=latest_scores,
nlp_resources=nlp_resources
)
# Build a path: agent → recommended resource, plus up to 4 more close unvisited
path = [agent_pos]
visited_set = set(str(v).strip() for v in visited_ids)
if rec['resource']:
path.append(rec['resource']['position'])
# Add up to 4 more nearest unvisited resources
remaining = [r for r in nlp_resources
if str(r['id']).strip() not in visited_set and r['id'] != rec['resource']['id']]
remaining.sort(key=lambda r: (
(r['position']['x'] - rec['resource']['position']['x'])**2 +
(r['position']['y'] - rec['resource']['position']['y'])**2
))
for r in remaining[:4]:
path.append(r['position'])
final_resource = rec['resource']
total_reward = sum(r['reward'] for r in nlp_resources
if r['position'] in path[1:]) if path else 0
return jsonify({
'path': path,
'finalResource': final_resource,
'totalReward': total_reward,
'pathLength': len(path),
'navigatorReason': rec['reason']
})
@app.route('/api/next-recommendation', methods=['GET'])
def get_next_recommendation():
"""
Get the DQN navigator's next resource recommendation for a session.
Returns: { resource, module, reason, q_values }
"""
session_id = request.args.get('session_id', 'default')
session = get_session(session_id)
visited_ids = [str(v).strip() for v in session.get('visitedResources', [])]
# Get latest module scores from most recent polyline
polylines = get_db_polylines()
latest_scores = []
if polylines:
last_polyline = list(polylines.values())[-1]
latest_scores = last_polyline.get('module_scores', [])
rec = navigator.recommend_next(
visited_ids=visited_ids,
module_scores=latest_scores,
nlp_resources=nlp_resources
)
return jsonify(rec)
# =============================================
# LEARNING DATA ENDPOINTS
# =============================================
@app.route('/api/learning-data', methods=['GET'])
def get_learning_data():
"""Get comprehensive learning data based on session history and latest summary"""
session_id = request.args.get('session_id', 'default')
session = get_session(session_id)
visited_ids = set(str(v).strip() for v in session.get('visitedResources', []))
visited_resources = [r for r in nlp_resources if str(r['id']).strip() in visited_ids]
# Defaults
strengths = [r['title'] for r in visited_resources if r.get('difficulty', 0) <= 2]
# Recommendations using rewarding modules that are unvisited
unvisited = [r for r in nlp_resources if str(r['id']).strip() not in visited_ids]
unvisited.sort(key=lambda r: (-r.get('reward', 0), r.get('difficulty', 0)))
recommendations = [r['title'] for r in unvisited[:3]]
# Try to augment with results from the latest summary analysis
ai_analysis = ""
xp_earned = 0
try:
try:
from .database import load_db
except (ImportError, ValueError):
from database import load_db
db = load_db()
# Find latest summary for this session (they contain session_id in their ID or we match title)
matching_summaries = [s for s in db.get('summaries', []) if f"summary_{session_id}" in s.get('id', '')]
if matching_summaries:
latest = matching_summaries[-1]
if latest.get('strengths'):
strengths = latest['strengths']
if latest.get('recommendations'):
recommendations = latest['recommendations']
if latest.get('ai_analysis'):
ai_analysis = latest['ai_analysis']
xp_earned = latest.get('xp_earned', 0)
except Exception as e:
print(f"Error augmenting learning data from summaries: {e}")
# Calculate Student's Highline Persona
persona_data = None
try:
# Use existing top-level import get_db_polylines
all_polylines = get_db_polylines()
history_scores = [p.get('module_scores', []) for p in all_polylines.values() if p.get('module_scores')]
if history_scores:
print(f"[PERSONA] Calculating from {len(history_scores)} historical vectors")
# Calculate component-wise maximum (The Student's Highline)
# Ensure vectors are padded to 19 to match the current GMM model
arrays = [np.array(s + [0.0]*(19-len(s)))[:19] for s in history_scores]
highline_vector = np.maximum.reduce(arrays)
persona_data = persona_service.classify_persona(highline_vector.tolist())
else:
print("[PERSONA] No historical scores found, using default vector")
# Initial persona for new students
persona_data = persona_service.classify_persona([0.0]*19)
print(f"[PERSONA] Assigned: {persona_data.get('name') if persona_data else 'None'}")
except Exception as e:
print(f"Error calculating persona: {e}")
# Calculate activity log and heatmap from all summaries for this session
activity_heatmap = {}
activity_log = []
try:
# 1. Add Summary Activity
# Filter summaries by session_id to isolate user data
try:
from .database import load_db
except (ImportError, ValueError):
from database import load_db
db = load_db()
all_summaries = db.get('summaries', [])
matching_summaries = [s for s in all_summaries if f"summary_{session_id}" in s.get('id', '')]
for s in matching_summaries:
s_id = s.get('id', '')
ts = s.get('timestamp')
# Use timestamp if available, otherwise parse from ID
if ts:
dt = datetime.fromtimestamp(ts / 1000.0)
formatted_date = dt.strftime('%Y-%m-%d')
activity_heatmap[formatted_date] = activity_heatmap.get(formatted_date, 0) + 2
final_ts = ts
elif f"summary_" in s_id:
parts = s_id.split('_')
date_str = None
for p in parts[2:]:
if len(p) == 8 and p.isdigit() and p.startswith('20'):
date_str = p
break
if date_str:
formatted_date = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}"
activity_heatmap[formatted_date] = activity_heatmap.get(formatted_date, 0) + 2
# Approximate timestamp from date string if missing
try:
final_ts = int(datetime.strptime(date_str, "%Y%m%d").timestamp() * 1000)
except:
final_ts = int(datetime.now().timestamp() * 1000)
else:
final_ts = int(datetime.now().timestamp() * 1000)
else:
final_ts = int(datetime.now().timestamp() * 1000)
activity_log.append({
'id': s_id,
'type': 'summary',
'title': s.get('title', 'Summary Written'),
'timestamp': final_ts
})
# 2. Add Notification/Visit Activity
notifs = session.get('notifications', [])
for n in notifs:
ts = n.get('timestamp')
if ts:
# Convert ms timestamp to YYYY-MM-DD
dt = datetime.fromtimestamp(ts / 1000.0)
formatted_date = dt.strftime('%Y-%m-%d')
activity_heatmap[formatted_date] = activity_heatmap.get(formatted_date, 0) + 1
# Sort log by timestamp descending to show most recent at the top
activity_log.sort(key=lambda x: str(x.get('timestamp', '')), reverse=True)
activity_log = activity_log[:50] # Limit window
except Exception as e:
print(f"Error calculating activity log: {e}")
# Find most visited module
from collections import Counter
module_counts = Counter(r['module'] for r in visited_resources)
most_visited_module = module_counts.most_common(1)[0][0] if module_counts else "None"
return jsonify({
'totalResources': len(nlp_resources),
'visitedResources': len(visited_resources),
'currentLevel': session.get('level', 1),
'strengths': strengths[:3],
'recommendations': recommendations[:3],
'ai_analysis': ai_analysis,
'activityHeatmap': activity_heatmap,
'activityLog': activity_log,
'nextOptimalResource': unvisited[0]['position'] if unvisited else None,
'totalReward': session.get('totalReward', 0),
'mostVisitedModule': most_visited_module,
'xp_earned': xp_earned,
'persona': persona_data
})
# =============================================
# BOOKMARK ENDPOINTS
# =============================================
@app.route('/api/bookmarks', methods=['GET'])
def get_bookmarks():
"""Get all bookmarked resources for a session"""
session_id = request.args.get('session_id', 'default')
from database import get_bookmarks as get_db_bookmarks
return jsonify(get_db_bookmarks(session_id))
@app.route('/api/bookmarks/add', methods=['POST'])
def add_bookmark():
"""Add a resource to bookmarks"""
data = request.get_json()
session_id = data.get('session_id', 'default')
resource_id = data.get('resource_id')
if not resource_id:
return jsonify({'error': 'Resource ID required'}), 400
from database import add_bookmark as add_db_bookmark
add_db_bookmark(session_id, resource_id)
return jsonify({'status': 'success', 'resource_id': resource_id})
@app.route('/api/bookmarks/remove', methods=['POST'])
def remove_bookmark():
"""Remove a resource from bookmarks"""
data = request.get_json()
session_id = data.get('session_id', 'default')
resource_id = data.get('resource_id')
if not resource_id:
return jsonify({'error': 'Resource ID required'}), 400
from database import remove_bookmark as remove_db_bookmark
remove_db_bookmark(session_id, resource_id)
return jsonify({'status': 'success', 'resource_id': resource_id})
# =============================================
# NOTES ENDPOINTS
# =============================================
@app.route('/api/notes', methods=['GET'])
def get_notes_route():
"""Get all notes for a session"""
session_id = request.args.get('session_id', 'default')
return jsonify(get_notes(session_id))
@app.route('/api/notes', methods=['POST'])
def add_note_route():
"""Add a new note"""
data = request.get_json()
session_id = data.get('session_id', 'default')
note_data = data.get('note')
if not note_data:
return jsonify({'error': 'Note data required'}), 400
new_note = add_note(session_id, note_data)
return jsonify(new_note)
# =============================================
# LECTURES ENDPOINTS
# =============================================
@app.route('/api/lectures', methods=['GET'])
def get_lectures_route():
"""Get all available lectures"""
return jsonify(get_lectures())
# =============================================
# AI SIDER CHAT ENDPOINT
# =============================================
# Load YouTube transcripts
_transcripts_path = os.path.join(os.path.dirname(__file__), 'data', 'youtube_transcripts.json')
try:
if os.path.exists(_transcripts_path):
with open(_transcripts_path, 'r', encoding='utf-8') as f:
raw_transcripts = json.load(f)
# Normalize keys to lowercase for robust matching
_youtube_transcripts = {str(k).strip().lower(): v for k, v in raw_transcripts.items()}
print(f"Loaded and normalized transcripts for {len(_youtube_transcripts)} modules")
else:
print(f"Transcripts file not found: {_transcripts_path}")
_youtube_transcripts = {}
except Exception as e:
print(f"Could not load transcripts: {e}")
_youtube_transcripts = {}
from openai import OpenAI
# AI Client configuration
# Using Groq (OpenAI-compatible) for free high-quality inference
_ai_client = None
try:
_api_key = os.getenv("GROQ_API_KEY") or os.getenv("OPENAI_API_KEY") or "FIXME_YOUR_API_KEY"
_base_url = "https://api.groq.com/openai/v1" if "GROQ" in _api_key or _api_key == "FIXME_YOUR_API_KEY" else None
_ai_client = OpenAI(api_key=_api_key, base_url=_base_url)
except Exception as e:
print(f"AI Client initialization warning: {e}")
@app.route('/api/chat', methods=['POST'])
def chat_with_ai():
"""
AI Sider chat endpoint - upgraded to use the openai package.
Uses YouTube transcript context and a premium model for better answers.
"""
data = request.get_json()
module = data.get('module', '')
question = data.get('question', '')
history = data.get('history', [])
if not question.strip():
return jsonify({'answer': 'Please ask a question about this lesson.', 'source': 'none'})
# 1. Find transcript/context with better matching
# Normalize input module for lookup
module_norm = str(module).strip().lower()
transcript = _youtube_transcripts.get(module_norm, '')
if not transcript:
# Try finding the resource first to get its formal title
resource_match = None
for r in nlp_resources:
if r['id'] == module or r['title'].lower() == module_norm or r.get('module', '').lower() == module_norm:
resource_match = r
break
target_name = resource_match['title'] if resource_match else module_norm
target_name_lower = target_name.lower()
# Fuzzy match on transcripts keys
for key, val in _youtube_transcripts.items():
if key in target_name_lower or target_name_lower in key:
transcript = val
break
resource_desc = ''
for r in nlp_resources:
if r.get('module', '').lower() == module_norm or r.get('title', '').lower() == module_norm:
resource_desc = r.get('description', '')[:1000]
break
context = transcript[:4500] if transcript else resource_desc[:1500]
# 2. Try Premium Inference via OpenAI Package
# Check for actual keys, not just the placeholder
_key = os.getenv("GROQ_API_KEY") or os.getenv("OPENAI_API_KEY")
if _ai_client and _key and _key != "FIXME_YOUR_API_KEY":
try:
# Determine model based on provider
if "groq" in (_ai_client.base_url or "").lower():
model = "llama-3.3-70b-versatile"
else:
model = "gpt-3.5-turbo"
system_prompt = f"""You are 'Sider AI', a premium learning assistant for an Advanced NLP course.
Your goal is to help students understand the current lesson module: '{module}'.
Use the following context from the lesson's YouTube transcript/description to answer the student's question accurately:
---
{context}
---
INSTRUCTIONS:
- Be concise, professional, and encouraging.
- If the answer is in the context, prioritize that information.
- If the answer isn't in the context, use your general LLM knowledge to explain the concept.
- Format your response using clean Markdown."""
messages = [{"role": "system", "content": system_prompt}]
# Add limited history for continuity
for msg in history[-4:]:
role = "user" if msg.get("role") == "user" else "assistant"
messages.append({"role": role, "content": msg.get("content", "")})
messages.append({"role": "user", "content": question})
completion = _ai_client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
max_tokens=800
)
answer = completion.choices[0].message.content
return jsonify({'answer': answer, 'source': f'openai-{model}'})
except Exception as e:
print(f"[CHAT] Premium AI error: {e}")
# Fall through to lookup if premium fails
# 3. Fallback to Search/Lookup (Avoiding T5 to prevent worker timeouts on HF)
relevant_context = ""
if context:
sentences = context.split('.')
# Find sentences containing keywords from the question
keywords = [w.lower() for w in question.split() if len(w) > 3]
matching = []
for s in sentences:
if any(k in s.lower() for k in keywords):
matching.append(s.strip())
relevant_context = ". ".join(matching[:3])
if relevant_context:
answer = f"I found some relevant information in the lesson material: {relevant_context}. For a deeper explanation, please ensure an API key is configured in the environment."
else:
answer = f"I'm here to help with the lesson on '{module}'. I couldn't find a specific answer in the local material, but you should review the module description for more details. (Tip: Configure an AI API key for better responses)."
return jsonify({'answer': answer, 'source': 'transcript-lookup'})
@app.route('/api/reset_session', methods=['POST'])
def reset_session_route():
data = request.get_json()
session_id = data.get('session_id', 'default')
try:
new_session = reset_session_data(session_id)
return jsonify({
'status': 'success',
'message': 'Journey reset successfully',
'session': new_session
})
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
if __name__ == '__main__':
print(f"Loaded {len(nlp_resources)} NLP resources")