KrishnaCosmic commited on
Commit
5ce16cc
·
1 Parent(s): 6337b71

checking changes

Browse files
main.py CHANGED
@@ -34,9 +34,16 @@ from services.rag_chatbot_service import rag_chatbot_service
34
  from services.mentor_matching_service import mentor_matching_service
35
  from services.hype_generator_service import hype_generator_service
36
  from services.rag_data_prep import rag_data_prep
 
 
37
 
38
  # Import models for request/response types
39
  from models.issue import Issue
 
 
 
 
 
40
 
41
 
42
  @asynccontextmanager
@@ -143,6 +150,33 @@ class RAGDataPrepRequest(BaseModel):
143
  collection_name: str = "rag_chunks"
144
 
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  # =============================================================================
147
  # Health & Status Endpoints
148
  # =============================================================================
@@ -491,9 +525,280 @@ async def get_rag_chunks(batch_size: int = 100, skip_embedded: bool = True):
491
 
492
 
493
  # =============================================================================
494
- # Run with: uvicorn main:app --host 0.0.0.0 --port 7860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  # =============================================================================
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  if __name__ == "__main__":
498
  import uvicorn
499
  port = int(os.getenv("PORT", "7860"))
 
34
  from services.mentor_matching_service import mentor_matching_service
35
  from services.hype_generator_service import hype_generator_service
36
  from services.rag_data_prep import rag_data_prep
37
+ from services.sentiment_analysis_service import sentiment_analysis_service
38
+ from services.mentor_leaderboard_service import mentor_leaderboard_service
39
 
40
  # Import models for request/response types
41
  from models.issue import Issue
42
+ from models.mentor_leaderboard import (
43
+ MentorLeaderboardEntry,
44
+ LeaderboardResponse,
45
+ LeaderboardEdit
46
+ )
47
 
48
 
49
  @asynccontextmanager
 
150
  collection_name: str = "rag_chunks"
151
 
152
 
153
+ class CommentSentimentRequest(BaseModel):
154
+ """Request for sentiment analysis of a single comment"""
155
+ comment_id: str
156
+ body: str
157
+ author: Optional[str] = "unknown"
158
+ force_recalc: bool = False
159
+
160
+
161
+ class BatchCommentSentimentRequest(BaseModel):
162
+ """Request for sentiment analysis of multiple comments"""
163
+ comments: List[Dict[str, Any]]
164
+ # Each comment dict should have: id, body, author (optional)
165
+
166
+
167
+ class LeaderboardEditRequest(BaseModel):
168
+ """Request to edit a leaderboard entry"""
169
+ mentor_id: str
170
+ edited_by: str # Maintainer username
171
+ reason: Optional[str] = None
172
+ # Can update:
173
+ custom_notes: Optional[str] = None
174
+ sentiment_score: Optional[float] = None
175
+ expertise_score: Optional[float] = None
176
+ engagement_score: Optional[float] = None
177
+ best_language: Optional[str] = None
178
+
179
+
180
  # =============================================================================
181
  # Health & Status Endpoints
182
  # =============================================================================
 
525
 
526
 
527
  # =============================================================================
528
+ # Sentiment Analysis Endpoints (Stage 3 Integration)
529
+ # =============================================================================
530
+
531
+ @app.post("/sentiment/analyze")
532
+ async def analyze_comment_sentiment(request: CommentSentimentRequest):
533
+ """
534
+ Analyze sentiment of a single PR comment using DistilBERT.
535
+
536
+ Returns:
537
+ - sentiment_label: "POSITIVE" or "NEGATIVE"
538
+ - sentiment_score: Confidence (0.0-1.0)
539
+ - prominent_language: Detected language category (technical, positive, negative, etc.)
540
+
541
+ Used in Stage 3 RAG prompt: "The reviewers' sentiment is {sentiment_label}...
542
+ with focus on {prominent_language} aspects"
543
+ """
544
+ try:
545
+ result = sentiment_analysis_service.analyze_comment(
546
+ comment_id=request.comment_id,
547
+ comment_text=request.body,
548
+ author=request.author,
549
+ force_recalc=request.force_recalc
550
+ )
551
+ return result
552
+ except Exception as e:
553
+ logger.error(f"Sentiment analysis error: {e}")
554
+ raise HTTPException(status_code=500, detail=str(e))
555
+
556
+
557
+ @app.post("/sentiment/analyze-batch")
558
+ async def analyze_batch_sentiment(request: BatchCommentSentimentRequest):
559
+ """
560
+ Analyze sentiment for multiple comments at once.
561
+
562
+ Each comment should have:
563
+ - id: Comment identifier
564
+ - body: Comment text
565
+ - author: (optional) Comment author
566
+
567
+ Returns List of sentiment results + summary stats
568
+ """
569
+ try:
570
+ results = sentiment_analysis_service.analyze_batch(request.comments)
571
+
572
+ # Get summary overview
573
+ summary = sentiment_analysis_service.get_summary(results)
574
+
575
+ return {
576
+ "comments": results,
577
+ "summary": summary,
578
+ "total_analyzed": len(results)
579
+ }
580
+ except Exception as e:
581
+ logger.error(f"Batch sentiment analysis error: {e}")
582
+ raise HTTPException(status_code=500, detail=str(e))
583
+
584
+
585
+ @app.get("/sentiment/summary")
586
+ async def get_sentiment_summary(repo_name: Optional[str] = None):
587
+ """
588
+ Get sentiment summary for comments (if you have them cached).
589
+
590
+ For Stage 3 prompt input, this helps determine:
591
+ - Is the review tone supportive or critical?
592
+ - Are reviewers focused on technical debt or new features?
593
+ """
594
+ try:
595
+ # In a real implementation, fetch comments from DB for this repo
596
+ # For now, return cache stats
597
+ cache_stats = sentiment_analysis_service.get_cache_stats()
598
+
599
+ return {
600
+ "cache_status": cache_stats,
601
+ "message": "Sentiment analysis service is ready. Send /sentiment/analyze-batch with comments to get summary."
602
+ }
603
+ except Exception as e:
604
+ logger.error(f"Sentiment summary error: {e}")
605
+ raise HTTPException(status_code=500, detail=str(e))
606
+
607
+
608
+ @app.post("/sentiment/clear-cache")
609
+ async def clear_sentiment_cache(auth: dict = Depends(require_api_key_or_auth)):
610
+ """
611
+ Clear the sentiment analysis cache (admin only).
612
+
613
+ Useful if you've updated keywords or want fresh analysis.
614
+ """
615
+ try:
616
+ sentiment_analysis_service.clear_cache()
617
+ return {"message": "Sentiment analysis cache cleared", "status": "success"}
618
+ except Exception as e:
619
+ logger.error(f"Cache clear error: {e}")
620
+ raise HTTPException(status_code=500, detail=str(e))
621
+
622
+
623
+ # =============================================================================
624
+ # Mentor Leaderboard Endpoints (AI-Powered Rankings with Sentiment)
625
  # =============================================================================
626
 
627
+ @app.post("/leaderboard/generate")
628
+ async def generate_leaderboard(
629
+ exclude_maintainer: Optional[str] = None,
630
+ auth: dict = Depends(require_api_key_or_auth)
631
+ ):
632
+ """
633
+ Generate the mentor leaderboard from scratch.
634
+
635
+ This endpoint:
636
+ 1. Fetches all mentor conversations
637
+ 2. Analyzes sentiment of each conversation using DistilBERT
638
+ 3. Detects programming languages mentioned
639
+ 4. Ranks mentors by: Sentiment (35%) + Expertise (40%) + Engagement (25%)
640
+
641
+ Returns ranked mentors with scores for each component.
642
+
643
+ **Parameters:**
644
+ - exclude_maintainer: User ID of maintainer to exclude from rankings
645
+
646
+ **Returns leaderboard with:**
647
+ - overall_score: Weighted ranking score (0-100)
648
+ - sentiment_score: Quality of mentorship interactions
649
+ - expertise_score: Programming language proficiency
650
+ - best_language: Top detected language
651
+ - rank: Current position
652
+ """
653
+ try:
654
+ logger.info(f"Generating leaderboard (exclude_maintainer={exclude_maintainer})...")
655
+ result = await mentor_leaderboard_service.generate_leaderboard(
656
+ exclude_maintainer_id=exclude_maintainer
657
+ )
658
+ return result
659
+ except Exception as e:
660
+ logger.error(f"Leaderboard generation error: {e}")
661
+ raise HTTPException(status_code=500, detail=str(e))
662
+
663
+
664
+ @app.get("/leaderboard")
665
+ async def get_leaderboard(
666
+ limit: int = 50,
667
+ skip: int = 0,
668
+ auth: dict = Depends(require_api_key_or_auth)
669
+ ):
670
+ """
671
+ Get the cached mentor leaderboard.
672
+
673
+ Returns top mentors with their rankings.
674
+
675
+ **Query Parameters:**
676
+ - limit: Number of entries to return (default: 50)
677
+ - skip: Number to skip for pagination (default: 0)
678
+ """
679
+ try:
680
+ result = await mentor_leaderboard_service.get_leaderboard(
681
+ limit=limit,
682
+ skip=skip
683
+ )
684
+ return result
685
+ except Exception as e:
686
+ logger.error(f"Get leaderboard error: {e}")
687
+ raise HTTPException(status_code=500, detail=str(e))
688
+
689
+
690
+ @app.get("/leaderboard/mentor/{mentor_id}")
691
+ async def get_mentor_leaderboard_entry(
692
+ mentor_id: str,
693
+ auth: dict = Depends(require_api_key_or_auth)
694
+ ):
695
+ """
696
+ Get leaderboard entry for a specific mentor.
697
+
698
+ Returns their ranking, scores, language proficiency, and edit history.
699
+ """
700
+ try:
701
+ entry = await mentor_leaderboard_service.get_entry(mentor_id)
702
+ if not entry:
703
+ raise HTTPException(status_code=404, detail=f"Mentor {mentor_id} not in leaderboard")
704
+ return entry
705
+ except HTTPException:
706
+ raise
707
+ except Exception as e:
708
+ logger.error(f"Get mentor entry error: {e}")
709
+ raise HTTPException(status_code=500, detail=str(e))
710
+
711
+
712
+ @app.post("/leaderboard/edit")
713
+ async def edit_leaderboard_entry(
714
+ request: LeaderboardEditRequest,
715
+ auth: dict = Depends(require_api_key_or_auth)
716
+ ):
717
+ """
718
+ Edit a leaderboard entry (maintainer only).
719
+
720
+ Allows manual adjustments to mentor rankings. All edits are tracked.
721
+
722
+ **Editable fields:**
723
+ - custom_notes: Custom notes about this mentor
724
+ - sentiment_score: Adjust sentiment component (0-100)
725
+ - expertise_score: Adjust expertise component (0-100)
726
+ - engagement_score: Adjust engagement component (0-100)
727
+ - best_language: Override detected language
728
+
729
+ **All edits are recorded in:**
730
+ - edit_history: List of all changes with timestamp and reason
731
+ - is_custom_edited: Flag marking entry as manually tweaked
732
+ - last_edited_by: Who made the edit
733
+ """
734
+ try:
735
+ # Build update dict from request
736
+ updates = {
737
+ "edited_by": request.edited_by,
738
+ "reason": request.reason
739
+ }
740
+
741
+ if request.custom_notes is not None:
742
+ updates["custom_notes"] = request.custom_notes
743
+ if request.sentiment_score is not None:
744
+ updates["score_sentiment"] = request.sentiment_score
745
+ if request.expertise_score is not None:
746
+ updates["score_expertise"] = request.expertise_score
747
+ if request.engagement_score is not None:
748
+ updates["score_engagement"] = request.engagement_score
749
+ if request.best_language is not None:
750
+ updates["best_language"] = request.best_language
751
+
752
+ entry = await mentor_leaderboard_service.edit_entry(
753
+ request.mentor_id,
754
+ **updates
755
+ )
756
+ return entry
757
+ except ValueError as e:
758
+ raise HTTPException(status_code=404, detail=str(e))
759
+ except Exception as e:
760
+ logger.error(f"Edit leaderboard error: {e}")
761
+ raise HTTPException(status_code=500, detail=str(e))
762
+
763
+
764
+ @app.get("/leaderboard/export")
765
+ async def export_leaderboard(
766
+ format: str = "json",
767
+ auth: dict = Depends(require_api_key_or_auth)
768
+ ):
769
+ """
770
+ Export leaderboard in various formats.
771
+
772
+ **Formats:**
773
+ - json: Full JSON with all fields
774
+ - csv: Simplified CSV for spreadsheets
775
+ """
776
+ try:
777
+ if format not in ["json", "csv"]:
778
+ raise HTTPException(status_code=400, detail="Format must be 'json' or 'csv'")
779
+
780
+ data = await mentor_leaderboard_service.export_leaderboard(format)
781
+
782
+ if format == "csv":
783
+ return {
784
+ "format": "csv",
785
+ "data": data,
786
+ "message": "Copy this data into a CSV file"
787
+ }
788
+
789
+ return {
790
+ "format": "json",
791
+ "data": data
792
+ }
793
+ except HTTPException:
794
+ raise
795
+ except Exception as e:
796
+ logger.error(f"Export leaderboard error: {e}")
797
+ raise HTTPException(status_code=500, detail=str(e))
798
+
799
+
800
+
801
+
802
  if __name__ == "__main__":
803
  import uvicorn
804
  port = int(os.getenv("PORT", "7860"))
models/mentor_leaderboard.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mentor Leaderboard Models - AI-powered ranking with sentiment analysis.
3
+ """
4
+ from pydantic import BaseModel, Field
5
+ from typing import Optional, List, Dict
6
+ from datetime import datetime, timezone
7
+ from enum import Enum
8
+ import uuid
9
+
10
+
11
+ class MentorLeaderboardEntry(BaseModel):
12
+ """A mentor's ranking entry in the AI leaderboard."""
13
+ id: str = Field(default_factory=lambda: str(uuid.uuid4()))
14
+ mentor_id: str
15
+ mentor_username: str
16
+
17
+ # Ranking scores (0-100)
18
+ overall_score: float = 0.0 # Weighted average of all metrics
19
+ sentiment_score: float = 0.0 # Based on conversation sentiment analysis
20
+ expertise_score: float = 0.0 # Based on language proficiency
21
+ engagement_score: float = 0.0 # Based on message frequency, session count
22
+
23
+ # Best programming language
24
+ best_language: Optional[str] = None
25
+ language_proficiency: Dict[str, float] = {} # {"python": 85.0, "javascript": 72.0, ...}
26
+
27
+ # Sentiment breakdown
28
+ avg_sentiment_score: float = 0.0
29
+ positive_sentiment_ratio: float = 0.0 # 0-1
30
+ conversations_analyzed: int = 0
31
+
32
+ # Expertise metrics
33
+ total_sessions: int = 0
34
+ total_mentees: int = 0
35
+ avg_session_duration_minutes: float = 0.0
36
+
37
+ # Leaderboard position
38
+ rank: int = 0
39
+ rank_change: int = 0 # +/- from last ranking
40
+
41
+ # Customization/Editing
42
+ is_custom_edited: bool = False
43
+ custom_notes: Optional[str] = None
44
+ manual_adjustments: Dict[str, float] = {} # {field: adjustment_value}
45
+
46
+ # Metadata
47
+ last_updated: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
48
+ last_edited_by: Optional[str] = None # Maintainer username
49
+ edit_history: List[Dict] = [] # [{timestamp, edited_by, field, old_value, new_value}]
50
+
51
+
52
+ class LeaderboardEdit(BaseModel):
53
+ """Record of a leaderboard edit made by maintainer."""
54
+ id: str = Field(default_factory=lambda: str(uuid.uuid4()))
55
+ entry_id: str # Leaderboard entry ID being edited
56
+ mentor_id: str
57
+ edited_by: str # Maintainer username
58
+
59
+ # What was changed
60
+ field: str # Which field was edited
61
+ old_value: any
62
+ new_value: any
63
+ reason: Optional[str] = None
64
+
65
+ timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
66
+
67
+
68
+ class LeaderboardConfig(BaseModel):
69
+ """Configuration for leaderboard generation."""
70
+ id: str = Field(default_factory=lambda: str(uuid.uuid4()))
71
+
72
+ # Weighting for overall score
73
+ sentiment_weight: float = 0.35 # How much sentiment impacts score
74
+ expertise_weight: float = 0.40
75
+ engagement_weight: float = 0.25
76
+
77
+ # Language detection patterns
78
+ programming_languages: List[str] = [
79
+ "python", "javascript", "typescript", "java", "cpp", "c++", "rust",
80
+ "go", "ruby", "php", "swift", "kotlin", "scala", "clojure",
81
+ "react", "vue", "angular", "django", "flask", "fastapi",
82
+ "node", "express", "nextjs", "nuxt"
83
+ ]
84
+
85
+ # Sentiment thresholds
86
+ positive_sentiment_threshold: float = 0.6
87
+ negative_sentiment_threshold: float = 0.4
88
+
89
+ # Session filters
90
+ min_sessions_for_ranking: int = 1
91
+ days_lookback: int = 90 # Only include recent conversations
92
+
93
+ updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
94
+
95
+
96
+ class LeaderboardResponse(BaseModel):
97
+ """Paginated leaderboard response."""
98
+ entries: List[MentorLeaderboardEntry]
99
+ total_mentors: int
100
+ timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
101
+ config: Optional[LeaderboardConfig] = None
requirements.txt CHANGED
@@ -48,5 +48,11 @@ PyJWT>=2.8.0
48
  redis>=5.0.0
49
 
50
  # Turso (libsql) Database - try both package names
 
 
 
 
 
 
51
  libsql-experimental>=0.0.55
52
  libsql_client
 
48
  redis>=5.0.0
49
 
50
  # Turso (libsql) Database - try both package names
51
+ libsql-client>=0.5.0
52
+
53
+ # Sentiment Analysis - Local Hugging Face models (DistilBERT)
54
+ transformers>=4.40.0
55
+ torch>=2.1.0 # CPU version is fine, auto-installs CPU build if GPU not detected
56
+ sentencepiece>=0.1.99
57
  libsql-experimental>=0.0.55
58
  libsql_client
services/mentor_leaderboard_service.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mentor Leaderboard Service - AI-powered ranking system with sentiment analysis.
3
+
4
+ Generates mentor rankings based on:
5
+ - Sentiment analysis of conversations
6
+ - Programming language expertise detection
7
+ - Engagement metrics (session count, mentee count)
8
+ - Manual edits by maintainer
9
+
10
+ Saves rankings to MongoDB for persistence.
11
+ """
12
+ import logging
13
+ from typing import List, Dict, Optional
14
+ from datetime import datetime, timezone, timedelta
15
+ from collections import defaultdict, Counter
16
+ import re
17
+
18
+ from config.database import db
19
+ from models.mentor_leaderboard import (
20
+ MentorLeaderboardEntry,
21
+ LeaderboardConfig,
22
+ LeaderboardEdit,
23
+ LeaderboardResponse
24
+ )
25
+ from services.sentiment_analysis_service import sentiment_analysis_service
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class MentorLeaderboardService:
31
+ """Service for generating and managing mentor leaderboards."""
32
+
33
+ def __init__(self):
34
+ self.db = db
35
+ self.sentiment_service = sentiment_analysis_service
36
+ self.config: Optional[LeaderboardConfig] = None
37
+ self._config_cache_time = None
38
+
39
+ async def _get_config(self) -> LeaderboardConfig:
40
+ """Get or create leaderboard configuration."""
41
+ if self.config and self._config_cache_time:
42
+ # Cache for 1 hour
43
+ if (datetime.now(timezone.utc) - self._config_cache_time).total_seconds() < 3600:
44
+ return self.config
45
+
46
+ config_doc = await self.db.leaderboard_config.find_one({})
47
+ if config_doc:
48
+ config_doc.pop('_id', None)
49
+ self.config = LeaderboardConfig(**config_doc)
50
+ else:
51
+ self.config = LeaderboardConfig()
52
+ await self.db.leaderboard_config.insert_one(self.config.dict())
53
+
54
+ self._config_cache_time = datetime.now(timezone.utc)
55
+ return self.config
56
+
57
+ async def analyze_mentor_conversations(self, mentor_id: str, mentor_username: str) -> Dict:
58
+ """
59
+ Analyze all conversations for a single mentor.
60
+
61
+ Fetches all chat sessions, extracts sentiment data, detects languages.
62
+ """
63
+ config = await self._get_config()
64
+
65
+ # Fetch all chat sessions for this mentor
66
+ sessions = await self.db.chat_sessions.find({
67
+ "mentor_id": mentor_id,
68
+ "status": "completed"
69
+ }).to_list(None)
70
+
71
+ if not sessions:
72
+ logger.info(f"No completed sessions for mentor {mentor_username}")
73
+ return {
74
+ "mentor_id": mentor_id,
75
+ "mentor_username": mentor_username,
76
+ "total_sessions": 0,
77
+ "conversations_analyzed": 0,
78
+ "avg_sentiment_score": 0.0,
79
+ "positive_sentiment_ratio": 0.0,
80
+ "language_proficiency": {},
81
+ "best_language": None,
82
+ "total_mentees": 0,
83
+ "avg_session_duration_minutes": 0.0
84
+ }
85
+
86
+ # Collect all messages from these sessions
87
+ sentiment_scores = []
88
+ language_mentions = defaultdict(int)
89
+ total_mentees = set()
90
+ total_duration = 0
91
+ all_messages = []
92
+
93
+ for session in sessions:
94
+ session_id = session.get('_id') or session.get('id')
95
+ total_mentees.update(session.get('mentee_ids', []))
96
+ total_duration += session.get('duration_minutes', 0)
97
+
98
+ # Fetch all messages in this session
99
+ messages = await self.db.chat_messages.find({
100
+ "session_id": str(session_id)
101
+ }).to_list(None)
102
+
103
+ all_messages.extend(messages)
104
+
105
+ # Analyze sentiment of all mentor messages
106
+ mentor_messages = [m for m in all_messages if m.get('is_mentor', False)]
107
+
108
+ if mentor_messages:
109
+ message_texts = [m.get('content', '') for m in mentor_messages]
110
+
111
+ # Batch sentiment analysis
112
+ comments = [
113
+ {"id": f"msg_{i}", "body": text, "author": mentor_username}
114
+ for i, text in enumerate(message_texts)
115
+ ]
116
+
117
+ sentiment_results = self.sentiment_service.analyze_batch(comments)
118
+
119
+ for result in sentiment_results:
120
+ score = result.get('sentiment_score', 0.5)
121
+ sentiment_scores.append(score)
122
+
123
+ # Extract programming languages from all messages
124
+ for message in mentor_messages:
125
+ content = message.get('content', '').lower()
126
+ language = message.get('language', '').lower()
127
+
128
+ # Check for language mentions in message content
129
+ for lang in config.programming_languages:
130
+ pattern = r'\b' + re.escape(lang) + r'\b'
131
+ matches = len(re.findall(pattern, content))
132
+ if matches > 0:
133
+ language_mentions[lang] += matches
134
+
135
+ # Also check explicit language tags
136
+ if language and language in config.programming_languages:
137
+ language_mentions[language] += 5 # Higher weight for explicit tags
138
+
139
+ # Compute aggregates
140
+ avg_sentiment = sum(sentiment_scores) / len(sentiment_scores) if sentiment_scores else 0.5
141
+
142
+ positive_count = sum(1 for s in sentiment_scores if s >= config.positive_sentiment_threshold)
143
+ positive_ratio = positive_count / len(sentiment_scores) if sentiment_scores else 0.0
144
+
145
+ # Normalize language proficiency (0-100 scale)
146
+ best_language = None
147
+ language_proficiency = {}
148
+
149
+ if language_mentions:
150
+ max_mentions = max(language_mentions.values())
151
+ for lang, mentions in sorted(language_mentions.items(), key=lambda x: x[1], reverse=True)[:10]:
152
+ score = (mentions / max_mentions) * 100
153
+ language_proficiency[lang] = score
154
+ if not best_language:
155
+ best_language = lang
156
+
157
+ avg_duration = total_duration / len(sessions) if sessions else 0
158
+
159
+ return {
160
+ "mentor_id": mentor_id,
161
+ "mentor_username": mentor_username,
162
+ "total_sessions": len(sessions),
163
+ "conversations_analyzed": len(sentiment_scores),
164
+ "avg_sentiment_score": round(avg_sentiment, 2),
165
+ "positive_sentiment_ratio": round(positive_ratio, 2),
166
+ "language_proficiency": language_proficiency,
167
+ "best_language": best_language,
168
+ "total_mentees": len(total_mentees),
169
+ "avg_session_duration_minutes": round(avg_duration, 1),
170
+ "sentiment_scores": sentiment_scores # For internal calculation
171
+ }
172
+
173
+ async def generate_leaderboard(self, exclude_maintainer_id: Optional[str] = None) -> LeaderboardResponse:
174
+ """
175
+ Generate complete mentor leaderboard.
176
+
177
+ Analyzes all mentors' conversations and ranks them.
178
+ Excludes the maintainer if specified.
179
+ """
180
+ config = await self._get_config()
181
+ logger.info("Generating mentor leaderboard...")
182
+
183
+ # Fetch all active mentor profiles
184
+ mentors = await self.db.mentor_profiles.find({
185
+ "is_active": True
186
+ }).to_list(None)
187
+
188
+ if exclude_maintainer_id:
189
+ mentors = [m for m in mentors if m.get('user_id') != exclude_maintainer_id]
190
+
191
+ entries = []
192
+ mentor_scores = {}
193
+
194
+ # Analyze each mentor
195
+ for mentor in mentors:
196
+ mentor_id = mentor.get('user_id') or mentor.get('id')
197
+ mentor_username = mentor.get('username', 'Unknown')
198
+
199
+ analysis = await self.analyze_mentor_conversations(mentor_id, mentor_username)
200
+
201
+ # Only include mentors with minimum session requirement
202
+ if analysis['total_sessions'] < config.min_sessions_for_ranking:
203
+ logger.debug(f"Mentor {mentor_username} has fewer than {config.min_sessions_for_ranking} sessions")
204
+ continue
205
+
206
+ # Compute component scores (0-100)
207
+ sentiment_score = (analysis['avg_sentiment_score'] * 100) if analysis['conversations_analyzed'] > 0 else 50
208
+
209
+ engagement_score = min(100, (analysis['total_sessions'] / 10) * 100) # Scale by 10
210
+ expertise_score = max(analysis['language_proficiency'].values()) if analysis['language_proficiency'] else 50
211
+
212
+ # Compute weighted overall score
213
+ overall_score = (
214
+ (sentiment_score * config.sentiment_weight) +
215
+ (expertise_score * config.expertise_weight) +
216
+ (engagement_score * config.engagement_weight)
217
+ )
218
+
219
+ mentor_scores[mentor_id] = overall_score
220
+
221
+ # Check for existing entry (for edit history)
222
+ existing_entry = await self.db.leaderboard_entries.find_one({"mentor_id": mentor_id})
223
+ old_overall_score = existing_entry.get('overall_score', 0) if existing_entry else 0
224
+
225
+ # Create leaderboard entry
226
+ entry = MentorLeaderboardEntry(
227
+ mentor_id=mentor_id,
228
+ mentor_username=mentor_username,
229
+ overall_score=round(overall_score, 2),
230
+ sentiment_score=round(sentiment_score, 2),
231
+ expertise_score=round(expertise_score, 2),
232
+ engagement_score=round(engagement_score, 2),
233
+ best_language=analysis['best_language'],
234
+ language_proficiency=analysis['language_proficiency'],
235
+ avg_sentiment_score=round(analysis['avg_sentiment_score'], 2),
236
+ positive_sentiment_ratio=round(analysis['positive_sentiment_ratio'], 2),
237
+ conversations_analyzed=analysis['conversations_analyzed'],
238
+ total_sessions=analysis['total_sessions'],
239
+ total_mentees=analysis['total_mentees'],
240
+ avg_session_duration_minutes=analysis['avg_session_duration_minutes'],
241
+ is_custom_edited=existing_entry.get('is_custom_edited', False) if existing_entry else False,
242
+ custom_notes=existing_entry.get('custom_notes', '') if existing_entry else None,
243
+ manual_adjustments=existing_entry.get('manual_adjustments', {}) if existing_entry else {},
244
+ last_edited_by=existing_entry.get('last_edited_by') if existing_entry else None,
245
+ edit_history=existing_entry.get('edit_history', []) if existing_entry else []
246
+ )
247
+
248
+ entries.append(entry)
249
+
250
+ # Sort by overall score and assign ranks
251
+ entries.sort(key=lambda e: e.overall_score, reverse=True)
252
+
253
+ for i, entry in enumerate(entries):
254
+ entry.rank = i + 1
255
+
256
+ # Calculate rank change from previous ranking
257
+ if mentor_scores:
258
+ prev_rank = 0
259
+ for j, other_id in enumerate(sorted(mentor_scores.keys(), key=lambda x: mentor_scores[x], reverse=True)):
260
+ if other_id == entry.mentor_id:
261
+ prev_rank = j + 1
262
+ break
263
+ if prev_rank > 0:
264
+ entry.rank_change = prev_rank - entry.rank
265
+
266
+ # Save entries to database
267
+ for entry in entries:
268
+ await self.db.leaderboard_entries.update_one(
269
+ {"mentor_id": entry.mentor_id},
270
+ {"$set": entry.dict()},
271
+ upsert=True
272
+ )
273
+
274
+ logger.info(f"Generated leaderboard with {len(entries)} mentors")
275
+
276
+ return LeaderboardResponse(
277
+ entries=entries,
278
+ total_mentors=len(entries),
279
+ config=config
280
+ )
281
+
282
+ async def get_leaderboard(self, limit: int = 50, skip: int = 0) -> LeaderboardResponse:
283
+ """Fetch cached leaderboard from database."""
284
+ entries_data = await self.db.leaderboard_entries.find({}).sort(
285
+ "rank", 1
286
+ ).skip(skip).limit(limit).to_list(None)
287
+
288
+ entries = []
289
+ for data in entries_data:
290
+ data.pop('_id', None)
291
+ entries.append(MentorLeaderboardEntry(**data))
292
+
293
+ total = await self.db.leaderboard_entries.count_documents({})
294
+ config = await self._get_config()
295
+
296
+ return LeaderboardResponse(
297
+ entries=entries,
298
+ total_mentors=total,
299
+ config=config
300
+ )
301
+
302
+ async def edit_entry(self, mentor_id: str, **updates) -> MentorLeaderboardEntry:
303
+ """
304
+ Allow maintainer to edit a leaderboard entry.
305
+
306
+ Tracks all edits in edit_history.
307
+ """
308
+ maintainer_id = updates.pop('edited_by', 'admin')
309
+ reason = updates.pop('reason', 'Manual adjustment')
310
+
311
+ # Fetch existing entry
312
+ entry_data = await self.db.leaderboard_entries.find_one({"mentor_id": mentor_id})
313
+ if not entry_data:
314
+ raise ValueError(f"Leaderboard entry not found for mentor {mentor_id}")
315
+
316
+ entry_data.pop('_id', None)
317
+ entry = MentorLeaderboardEntry(**entry_data)
318
+
319
+ # Record edits
320
+ edit_history = entry.edit_history or []
321
+
322
+ for field, new_value in updates.items():
323
+ if field in ['custom_notes', 'manual_adjustments']:
324
+ # Direct field updates
325
+ old_value = getattr(entry, field, None)
326
+ setattr(entry, field, new_value)
327
+ entry.is_custom_edited = True
328
+ elif field.startswith('score_'):
329
+ # Score adjustments (sentiment_score, expertise_score, etc.)
330
+ actual_field = field.replace('score_', '') + '_score'
331
+ old_value = getattr(entry, actual_field, 0)
332
+ setattr(entry, actual_field, new_value)
333
+ entry.manual_adjustments[actual_field] = new_value - old_value
334
+ entry.is_custom_edited = True
335
+
336
+ # Log the edit
337
+ edit_history.append({
338
+ "timestamp": datetime.now(timezone.utc).isoformat(),
339
+ "edited_by": maintainer_id,
340
+ "field": field,
341
+ "old_value": old_value,
342
+ "new_value": new_value,
343
+ "reason": reason
344
+ })
345
+
346
+ # Recalculate overall score if component scores changed
347
+ config = await self._get_config()
348
+ entry.overall_score = (
349
+ (entry.sentiment_score * config.sentiment_weight) +
350
+ (entry.expertise_score * config.expertise_weight) +
351
+ (entry.engagement_score * config.engagement_weight)
352
+ )
353
+
354
+ entry.edit_history = edit_history[-100:] # Keep last 100 edits
355
+ entry.last_edited_by = maintainer_id
356
+ entry.last_updated = datetime.now(timezone.utc)
357
+
358
+ # Save back to database
359
+ await self.db.leaderboard_entries.update_one(
360
+ {"mentor_id": mentor_id},
361
+ {"$set": entry.dict()},
362
+ upsert=False
363
+ )
364
+
365
+ logger.info(f"Updated leaderboard entry for {mentor_id}: {updates}")
366
+
367
+ return entry
368
+
369
+ async def get_entry(self, mentor_id: str) -> Optional[MentorLeaderboardEntry]:
370
+ """Get a single leaderboard entry."""
371
+ data = await self.db.leaderboard_entries.find_one({"mentor_id": mentor_id})
372
+ if data:
373
+ data.pop('_id', None)
374
+ return MentorLeaderboardEntry(**data)
375
+ return None
376
+
377
+ async def export_leaderboard(self, format: str = "json") -> str:
378
+ """Export leaderboard in various formats."""
379
+ response = await self.get_leaderboard(limit=1000)
380
+
381
+ if format == "json":
382
+ import json
383
+ return json.dumps(
384
+ [e.dict() for e in response.entries],
385
+ default=str
386
+ )
387
+ elif format == "csv":
388
+ import csv
389
+ from io import StringIO
390
+
391
+ output = StringIO()
392
+ writer = csv.DictWriter(output, fieldnames=[
393
+ "rank", "mentor_username", "overall_score", "sentiment_score",
394
+ "expertise_score", "engagement_score", "best_language",
395
+ "total_sessions", "avg_sentiment_score"
396
+ ])
397
+ writer.writeheader()
398
+
399
+ for entry in response.entries:
400
+ writer.writerow({
401
+ "rank": entry.rank,
402
+ "mentor_username": entry.mentor_username,
403
+ "overall_score": entry.overall_score,
404
+ "sentiment_score": entry.sentiment_score,
405
+ "expertise_score": entry.expertise_score,
406
+ "engagement_score": entry.engagement_score,
407
+ "best_language": entry.best_language,
408
+ "total_sessions": entry.total_sessions,
409
+ "avg_sentiment_score": entry.avg_sentiment_score
410
+ })
411
+
412
+ return output.getvalue()
413
+
414
+ return ""
415
+
416
+
417
+ # Singleton instance
418
+ mentor_leaderboard_service = MentorLeaderboardService()