Spaces:

parthnuwal7
/

FCT

Sleeping

FCT / services /domain_plugins /tech_plugin.py

Parthnuwal7

Adding analytical content

3d015cd 3 months ago

10.6 kB

	"""Tech/CS Domain Plugin

	Scores technical competency based on:
	- GitHub activity (commits, repos, stars, descriptions)
	- LeetCode profile (problems solved, ranking)
	- Portfolio links (project depth analysis)
	"""
	import re
	import time
	import logging
	import requests
	from typing import Dict, List
	from .base_plugin import BaseDomainPlugin, DomainScore
	from .plugin_factory import register_plugin

	logger = logging.getLogger(__name__)


	@register_plugin('tech')
	class TechPlugin(BaseDomainPlugin):
	"""Technical domain scoring plugin"""

	def _get_domain_type(self) -> str:
	return 'tech'

	def _get_feature_weights(self) -> Dict[str, float]:
	return {
	'github_activity_score': 0.30,
	'github_repo_quality': 0.20,
	'leetcode_score': 0.25,
	'portfolio_depth': 0.15,
	'recent_activity': 0.10
	}

	def get_required_fields(self) -> List[str]:
	return ['github_url'] # At least GitHub is required

	def get_optional_fields(self) -> List[str]:
	return ['leetcode_handle', 'portfolio_url', 'linkedin_url']

	def score(self, evidence_data: Dict) -> DomainScore:
	"""Calculate tech domain score"""
	start_time = time.time()
	features = {}

	# GitHub analysis
	github_url = evidence_data.get('github_url', '')
	if github_url:
	features['github_activity_score'] = self._analyze_github_activity(github_url)
	features['github_repo_quality'] = self._analyze_repo_quality(github_url)
	features['recent_activity'] = self._check_recent_commits(github_url)
	else:
	features['github_activity_score'] = 0.0
	features['github_repo_quality'] = 0.0
	features['recent_activity'] = 0.0

	# LeetCode analysis
	leetcode_handle = evidence_data.get('leetcode_handle', '')
	if leetcode_handle:
	features['leetcode_score'] = self._analyze_leetcode(leetcode_handle)
	else:
	features['leetcode_score'] = 0.0

	# Portfolio analysis
	portfolio_url = evidence_data.get('portfolio_url', '')
	if portfolio_url:
	features['portfolio_depth'] = self._analyze_portfolio(portfolio_url)
	else:
	features['portfolio_depth'] = 0.0

	# Calculate weighted score
	score = sum(features[k] * self.feature_weights[k] for k in features.keys())

	# Calculate confidence
	confidence = self.calculate_confidence(evidence_data)

	processing_time = (time.time() - start_time) * 1000

	return DomainScore(
	domain_type='tech',
	score=min(score, 1.0),
	confidence=confidence,
	raw_features=features,
	processing_time_ms=processing_time
	)

	def _analyze_github_activity(self, github_url: str) -> float:
	"""
	Analyze GitHub profile activity
	Returns: 0-1 score based on public repos, commits, contributions
	"""
	try:
	username = self._extract_github_username(github_url)
	if not username:
	return 0.0

	# GitHub API endpoint
	api_url = f"https://api.github.com/users/{username}"
	headers = {'Accept': 'application/vnd.github.v3+json'}

	response = requests.get(api_url, headers=headers, timeout=5)

	if response.status_code != 200:
	logger.warning(f"GitHub API error for {username}: {response.status_code}")
	return 0.3 # Fallback score if API fails

	data = response.json()

	# Extract metrics
	public_repos = data.get('public_repos', 0)
	followers = data.get('followers', 0)
	following = data.get('following', 0)

	# Simple scoring heuristic
	repo_score = min(public_repos / 20, 1.0) * 0.5 # 20+ repos = max
	follower_score = min(followers / 50, 1.0) * 0.3 # 50+ followers = max
	engagement_score = min((followers + following) / 100, 1.0) * 0.2

	total_score = repo_score + follower_score + engagement_score

	logger.info(f"GitHub activity for {username}: {total_score:.2f}")
	return total_score

	except Exception as e:
	logger.error(f"Error analyzing GitHub activity: {e}")
	return 0.3 # Fallback score

	def _analyze_repo_quality(self, github_url: str) -> float:
	"""
	Analyze quality of top repositories
	Returns: 0-1 score based on stars, forks, descriptions
	"""
	try:
	username = self._extract_github_username(github_url)
	if not username:
	return 0.0

	# Get repos sorted by stars
	api_url = f"https://api.github.com/users/{username}/repos?sort=stars&per_page=10"
	headers = {'Accept': 'application/vnd.github.v3+json'}

	response = requests.get(api_url, headers=headers, timeout=5)

	if response.status_code != 200:
	return 0.3

	repos = response.json()

	if not repos:
	return 0.0

	# Analyze top repos
	total_stars = sum(r.get('stargazers_count', 0) for r in repos[:5])
	total_forks = sum(r.get('forks_count', 0) for r in repos[:5])
	has_descriptions = sum(1 for r in repos[:5] if r.get('description'))
	has_readmes = sum(1 for r in repos[:5] if r.get('has_wiki') or r.get('has_pages'))

	star_score = min(total_stars / 50, 1.0) * 0.4 # 50+ stars = max
	fork_score = min(total_forks / 20, 1.0) * 0.2 # 20+ forks = max
	desc_score = (has_descriptions / 5) * 0.2
	readme_score = (has_readmes / 5) * 0.2

	total_score = star_score + fork_score + desc_score + readme_score

	logger.info(f"GitHub repo quality for {username}: {total_score:.2f}")
	return total_score

	except Exception as e:
	logger.error(f"Error analyzing repo quality: {e}")
	return 0.3

	def _check_recent_commits(self, github_url: str) -> float:
	"""
	Check for recent activity (commits in last 90 days)
	Returns: 0-1 score based on recency
	"""
	try:
	username = self._extract_github_username(github_url)
	if not username:
	return 0.0

	# Get recent events
	api_url = f"https://api.github.com/users/{username}/events/public?per_page=30"
	headers = {'Accept': 'application/vnd.github.v3+json'}

	response = requests.get(api_url, headers=headers, timeout=5)

	if response.status_code != 200:
	return 0.5 # Neutral fallback

	events = response.json()

	# Count push events (commits) in last 90 days
	from datetime import datetime, timedelta
	ninety_days_ago = datetime.now() - timedelta(days=90)

	recent_commits = 0
	for event in events:
	if event.get('type') == 'PushEvent':
	created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ')
	if created_at > ninety_days_ago:
	recent_commits += 1

	# Score based on commit frequency
	score = min(recent_commits / 20, 1.0) # 20+ commits in 90 days = max

	logger.info(f"Recent activity for {username}: {score:.2f} ({recent_commits} commits)")
	return score

	except Exception as e:
	logger.error(f"Error checking recent activity: {e}")
	return 0.5

	def _analyze_leetcode(self, leetcode_handle: str) -> float:
	"""
	Analyze LeetCode profile
	Returns: 0-1 score based on problems solved and ranking

	Note: LeetCode doesn't have an official public API, so this uses heuristic fallback
	In production, consider using unofficial APIs or web scraping with proper rate limiting
	"""
	try:
	# Placeholder for LeetCode analysis
	# In real implementation, would scrape profile or use unofficial API

	# For now, return medium score if handle is provided
	logger.info(f"LeetCode handle provided: {leetcode_handle}")
	return 0.5 # Neutral score when handle exists

	except Exception as e:
	logger.error(f"Error analyzing LeetCode: {e}")
	return 0.0

	def _analyze_portfolio(self, portfolio_url: str) -> float:
	"""
	Analyze portfolio website
	Returns: 0-1 score based on presence and basic checks
	"""
	try:
	# Basic URL validation
	if not portfolio_url.startswith(('http://', 'https://')):
	portfolio_url = 'https://' + portfolio_url

	# Check if URL is accessible
	response = requests.head(portfolio_url, timeout=5, allow_redirects=True)

	if response.status_code == 200:
	# Portfolio exists and is accessible
	score = 0.7

	# Bonus for custom domain (not github.io, netlify.app, etc.)
	if not any(host in portfolio_url for host in ['github.io', 'netlify.app', 'vercel.app', 'repl.it']):
	score += 0.3

	logger.info(f"Portfolio accessible: {portfolio_url} (score: {score})")
	return min(score, 1.0)
	else:
	logger.warning(f"Portfolio not accessible: {portfolio_url}")
	return 0.2 # Some credit for providing URL

	except Exception as e:
	logger.error(f"Error analyzing portfolio: {e}")
	return 0.2

	def _extract_github_username(self, github_url: str) -> str:
	"""Extract username from GitHub URL"""
	# Handle formats: https://github.com/username or github.com/username
	pattern = r'github\.com/([a-zA-Z0-9_-]+)'
	match = re.search(pattern, github_url)
	return match.group(1) if match else ''