Spaces:
Sleeping
Sleeping
| """Tech/CS Domain Plugin | |
| Scores technical competency based on: | |
| - GitHub activity (commits, repos, stars, descriptions) | |
| - LeetCode profile (problems solved, ranking) | |
| - Portfolio links (project depth analysis) | |
| """ | |
| import re | |
| import time | |
| import logging | |
| import requests | |
| from typing import Dict, List | |
| from .base_plugin import BaseDomainPlugin, DomainScore | |
| from .plugin_factory import register_plugin | |
| logger = logging.getLogger(__name__) | |
| class TechPlugin(BaseDomainPlugin): | |
| """Technical domain scoring plugin""" | |
| def _get_domain_type(self) -> str: | |
| return 'tech' | |
| def _get_feature_weights(self) -> Dict[str, float]: | |
| return { | |
| 'github_activity_score': 0.30, | |
| 'github_repo_quality': 0.20, | |
| 'leetcode_score': 0.25, | |
| 'portfolio_depth': 0.15, | |
| 'recent_activity': 0.10 | |
| } | |
| def get_required_fields(self) -> List[str]: | |
| return ['github_url'] # At least GitHub is required | |
| def get_optional_fields(self) -> List[str]: | |
| return ['leetcode_handle', 'portfolio_url', 'linkedin_url'] | |
| def score(self, evidence_data: Dict) -> DomainScore: | |
| """Calculate tech domain score""" | |
| start_time = time.time() | |
| features = {} | |
| # GitHub analysis | |
| github_url = evidence_data.get('github_url', '') | |
| if github_url: | |
| features['github_activity_score'] = self._analyze_github_activity(github_url) | |
| features['github_repo_quality'] = self._analyze_repo_quality(github_url) | |
| features['recent_activity'] = self._check_recent_commits(github_url) | |
| else: | |
| features['github_activity_score'] = 0.0 | |
| features['github_repo_quality'] = 0.0 | |
| features['recent_activity'] = 0.0 | |
| # LeetCode analysis | |
| leetcode_handle = evidence_data.get('leetcode_handle', '') | |
| if leetcode_handle: | |
| features['leetcode_score'] = self._analyze_leetcode(leetcode_handle) | |
| else: | |
| features['leetcode_score'] = 0.0 | |
| # Portfolio analysis | |
| portfolio_url = evidence_data.get('portfolio_url', '') | |
| if portfolio_url: | |
| features['portfolio_depth'] = self._analyze_portfolio(portfolio_url) | |
| else: | |
| features['portfolio_depth'] = 0.0 | |
| # Calculate weighted score | |
| score = sum(features[k] * self.feature_weights[k] for k in features.keys()) | |
| # Calculate confidence | |
| confidence = self.calculate_confidence(evidence_data) | |
| processing_time = (time.time() - start_time) * 1000 | |
| return DomainScore( | |
| domain_type='tech', | |
| score=min(score, 1.0), | |
| confidence=confidence, | |
| raw_features=features, | |
| processing_time_ms=processing_time | |
| ) | |
| def _analyze_github_activity(self, github_url: str) -> float: | |
| """ | |
| Analyze GitHub profile activity | |
| Returns: 0-1 score based on public repos, commits, contributions | |
| """ | |
| try: | |
| username = self._extract_github_username(github_url) | |
| if not username: | |
| return 0.0 | |
| # GitHub API endpoint | |
| api_url = f"https://api.github.com/users/{username}" | |
| headers = {'Accept': 'application/vnd.github.v3+json'} | |
| response = requests.get(api_url, headers=headers, timeout=5) | |
| if response.status_code != 200: | |
| logger.warning(f"GitHub API error for {username}: {response.status_code}") | |
| return 0.3 # Fallback score if API fails | |
| data = response.json() | |
| # Extract metrics | |
| public_repos = data.get('public_repos', 0) | |
| followers = data.get('followers', 0) | |
| following = data.get('following', 0) | |
| # Simple scoring heuristic | |
| repo_score = min(public_repos / 20, 1.0) * 0.5 # 20+ repos = max | |
| follower_score = min(followers / 50, 1.0) * 0.3 # 50+ followers = max | |
| engagement_score = min((followers + following) / 100, 1.0) * 0.2 | |
| total_score = repo_score + follower_score + engagement_score | |
| logger.info(f"GitHub activity for {username}: {total_score:.2f}") | |
| return total_score | |
| except Exception as e: | |
| logger.error(f"Error analyzing GitHub activity: {e}") | |
| return 0.3 # Fallback score | |
| def _analyze_repo_quality(self, github_url: str) -> float: | |
| """ | |
| Analyze quality of top repositories | |
| Returns: 0-1 score based on stars, forks, descriptions | |
| """ | |
| try: | |
| username = self._extract_github_username(github_url) | |
| if not username: | |
| return 0.0 | |
| # Get repos sorted by stars | |
| api_url = f"https://api.github.com/users/{username}/repos?sort=stars&per_page=10" | |
| headers = {'Accept': 'application/vnd.github.v3+json'} | |
| response = requests.get(api_url, headers=headers, timeout=5) | |
| if response.status_code != 200: | |
| return 0.3 | |
| repos = response.json() | |
| if not repos: | |
| return 0.0 | |
| # Analyze top repos | |
| total_stars = sum(r.get('stargazers_count', 0) for r in repos[:5]) | |
| total_forks = sum(r.get('forks_count', 0) for r in repos[:5]) | |
| has_descriptions = sum(1 for r in repos[:5] if r.get('description')) | |
| has_readmes = sum(1 for r in repos[:5] if r.get('has_wiki') or r.get('has_pages')) | |
| star_score = min(total_stars / 50, 1.0) * 0.4 # 50+ stars = max | |
| fork_score = min(total_forks / 20, 1.0) * 0.2 # 20+ forks = max | |
| desc_score = (has_descriptions / 5) * 0.2 | |
| readme_score = (has_readmes / 5) * 0.2 | |
| total_score = star_score + fork_score + desc_score + readme_score | |
| logger.info(f"GitHub repo quality for {username}: {total_score:.2f}") | |
| return total_score | |
| except Exception as e: | |
| logger.error(f"Error analyzing repo quality: {e}") | |
| return 0.3 | |
| def _check_recent_commits(self, github_url: str) -> float: | |
| """ | |
| Check for recent activity (commits in last 90 days) | |
| Returns: 0-1 score based on recency | |
| """ | |
| try: | |
| username = self._extract_github_username(github_url) | |
| if not username: | |
| return 0.0 | |
| # Get recent events | |
| api_url = f"https://api.github.com/users/{username}/events/public?per_page=30" | |
| headers = {'Accept': 'application/vnd.github.v3+json'} | |
| response = requests.get(api_url, headers=headers, timeout=5) | |
| if response.status_code != 200: | |
| return 0.5 # Neutral fallback | |
| events = response.json() | |
| # Count push events (commits) in last 90 days | |
| from datetime import datetime, timedelta | |
| ninety_days_ago = datetime.now() - timedelta(days=90) | |
| recent_commits = 0 | |
| for event in events: | |
| if event.get('type') == 'PushEvent': | |
| created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ') | |
| if created_at > ninety_days_ago: | |
| recent_commits += 1 | |
| # Score based on commit frequency | |
| score = min(recent_commits / 20, 1.0) # 20+ commits in 90 days = max | |
| logger.info(f"Recent activity for {username}: {score:.2f} ({recent_commits} commits)") | |
| return score | |
| except Exception as e: | |
| logger.error(f"Error checking recent activity: {e}") | |
| return 0.5 | |
| def _analyze_leetcode(self, leetcode_handle: str) -> float: | |
| """ | |
| Analyze LeetCode profile | |
| Returns: 0-1 score based on problems solved and ranking | |
| Note: LeetCode doesn't have an official public API, so this uses heuristic fallback | |
| In production, consider using unofficial APIs or web scraping with proper rate limiting | |
| """ | |
| try: | |
| # Placeholder for LeetCode analysis | |
| # In real implementation, would scrape profile or use unofficial API | |
| # For now, return medium score if handle is provided | |
| logger.info(f"LeetCode handle provided: {leetcode_handle}") | |
| return 0.5 # Neutral score when handle exists | |
| except Exception as e: | |
| logger.error(f"Error analyzing LeetCode: {e}") | |
| return 0.0 | |
| def _analyze_portfolio(self, portfolio_url: str) -> float: | |
| """ | |
| Analyze portfolio website | |
| Returns: 0-1 score based on presence and basic checks | |
| """ | |
| try: | |
| # Basic URL validation | |
| if not portfolio_url.startswith(('http://', 'https://')): | |
| portfolio_url = 'https://' + portfolio_url | |
| # Check if URL is accessible | |
| response = requests.head(portfolio_url, timeout=5, allow_redirects=True) | |
| if response.status_code == 200: | |
| # Portfolio exists and is accessible | |
| score = 0.7 | |
| # Bonus for custom domain (not github.io, netlify.app, etc.) | |
| if not any(host in portfolio_url for host in ['github.io', 'netlify.app', 'vercel.app', 'repl.it']): | |
| score += 0.3 | |
| logger.info(f"Portfolio accessible: {portfolio_url} (score: {score})") | |
| return min(score, 1.0) | |
| else: | |
| logger.warning(f"Portfolio not accessible: {portfolio_url}") | |
| return 0.2 # Some credit for providing URL | |
| except Exception as e: | |
| logger.error(f"Error analyzing portfolio: {e}") | |
| return 0.2 | |
| def _extract_github_username(self, github_url: str) -> str: | |
| """Extract username from GitHub URL""" | |
| # Handle formats: https://github.com/username or github.com/username | |
| pattern = r'github\.com/([a-zA-Z0-9_-]+)' | |
| match = re.search(pattern, github_url) | |
| return match.group(1) if match else '' | |