| | import gradio as gr |
| | import joblib |
| | import numpy as np |
| | import pandas as pd |
| | from sklearn.preprocessing import StandardScaler |
| | from fastapi import FastAPI, HTTPException |
| | from pydantic import BaseModel |
| | import uvicorn |
| | import os |
| | import requests |
| | import json |
| | from datetime import datetime, timedelta, timezone |
| | from typing import Dict, List, Optional |
| | from urllib.parse import urlparse |
| | import time |
| |
|
| | |
| | app = FastAPI(title="Developer Productivity Prediction API", version="1.0.0") |
| |
|
| | |
| | model = joblib.load('dev_productivity_model.joblib') |
| | scaler = joblib.load('scaler.joblib') |
| |
|
| | |
| | class ProductivityRequest(BaseModel): |
| | daily_coding_hours: float |
| | commits_per_day: int |
| | pull_requests_per_week: int |
| | issues_closed_per_week: int |
| | active_repos: int |
| | code_reviews_per_week: int |
| |
|
| | class ProductivityResponse(BaseModel): |
| | predicted_score: float |
| | status: str |
| |
|
| | class GitHubAnalysisRequest(BaseModel): |
| | repo_url: str |
| | github_token: str |
| |
|
| | class GitHubAnalysisResponse(BaseModel): |
| | repo_metrics: dict |
| | ml_features: dict |
| | predicted_score: float |
| | productivity_indicators: dict |
| | status: str |
| |
|
| | |
| | class RepoProductivityAnalyzer: |
| | def __init__(self, github_token: str): |
| | if not github_token or github_token == "YOUR_TOKEN_HERE": |
| | raise ValueError("Please provide a valid GitHub token") |
| | |
| | self.token = github_token |
| | self.headers = { |
| | 'Authorization': f'token {github_token}', |
| | 'Accept': 'application/vnd.github.v3+json' |
| | } |
| | self.days_back = 90 |
| | self.max_retries = 3 |
| | |
| | def safe_request(self, url: str, retries: int = None) -> Optional[List]: |
| | if retries is None: |
| | retries = self.max_retries |
| | |
| | for attempt in range(retries): |
| | try: |
| | response = requests.get(url, headers=self.headers, timeout=30) |
| | |
| | if response.status_code == 200: |
| | return response.json() |
| | elif response.status_code == 403: |
| | time.sleep(60) |
| | continue |
| | elif response.status_code == 404: |
| | return [] |
| | else: |
| | return [] |
| | |
| | except requests.exceptions.RequestException: |
| | if attempt < retries - 1: |
| | time.sleep(2 ** attempt) |
| | else: |
| | return [] |
| | return [] |
| | |
| | def parse_repo_url(self, repo_url: str) -> tuple: |
| | try: |
| | parsed = urlparse(repo_url) |
| | path = parsed.path.strip('/').split('/') |
| | if len(path) < 2: |
| | raise ValueError("Invalid GitHub URL format") |
| | return path[0], path[1] |
| | except Exception as e: |
| | raise ValueError(f"Invalid repo URL: {str(e)}") |
| | |
| | def safe_parse_datetime(self, date_str: str) -> Optional[datetime]: |
| | if not date_str: |
| | return None |
| | try: |
| | dt = datetime.fromisoformat(date_str.replace('Z', '+00:00')) |
| | if dt.tzinfo is None: |
| | dt = dt.replace(tzinfo=timezone.utc) |
| | return dt |
| | except: |
| | return None |
| | |
| | def get_metrics(self, repo_url: str) -> Dict: |
| | try: |
| | owner, repo = self.parse_repo_url(repo_url) |
| | except ValueError as e: |
| | return {"error": str(e)} |
| | |
| | now = datetime.now(timezone.utc) |
| | since_dt = now - timedelta(days=self.days_back) |
| | since = since_dt.isoformat() |
| | |
| | metrics = { |
| | 'repo': f"{owner}/{repo}", |
| | 'period_days': self.days_back, |
| | 'analyzed_at': now.isoformat(), |
| | 'status': 'success' |
| | } |
| | |
| | try: |
| | |
| | repo_info = self.safe_request(f"https://api.github.com/repos/{owner}/{repo}") |
| | if not repo_info: |
| | return {"error": "Repository not found or inaccessible"} |
| | |
| | |
| | commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits?per_page=100&since={since}" |
| | commits = self.safe_request(commits_url) or [] |
| | metrics['total_commits'] = len(commits) |
| | |
| | |
| | prs_url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&per_page=100" |
| | prs = self.safe_request(prs_url) or [] |
| | |
| | recent_prs = [] |
| | for pr in prs: |
| | created_at = self.safe_parse_datetime(pr.get('created_at')) |
| | if created_at and created_at >= since_dt: |
| | recent_prs.append(pr) |
| | |
| | metrics['prs_total'] = len(recent_prs) |
| | metrics['prs_merged'] = len([p for p in recent_prs if p.get('merged_at')]) |
| | |
| | |
| | issues_url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=closed&per_page=100" |
| | issues = self.safe_request(issues_url) or [] |
| | |
| | recent_issues = [] |
| | for issue in issues: |
| | closed_at = self.safe_parse_datetime(issue.get('closed_at')) |
| | if closed_at and closed_at >= since_dt: |
| | recent_issues.append(issue) |
| | |
| | metrics['issues_total'] = len(recent_issues) |
| | |
| | |
| | metrics['commits_per_day'] = metrics['total_commits'] / max(self.days_back, 1) |
| | metrics['prs_per_week'] = metrics['prs_total'] / max((self.days_back / 7), 1) |
| | metrics['issues_per_week'] = metrics['issues_total'] / max((self.days_back / 7), 1) |
| | |
| | return metrics |
| | |
| | except Exception as e: |
| | return { |
| | "error": f"Analysis failed: {str(e)}", |
| | "repo": f"{owner}/{repo}", |
| | "analyzed_at": now.isoformat() |
| | } |
| |
|
| | def predict_productivity_core(daily_coding_hours, commits_per_day, pull_requests_per_week, |
| | issues_closed_per_week, active_repos, code_reviews_per_week): |
| | try: |
| | |
| | |
| | |
| | |
| | |
| | cycle_time = max(1, 7 - commits_per_day) |
| | pr_size = max(100, 500 - (pull_requests_per_week * 50)) |
| | dev_satisfaction = min(10, 5 + (daily_coding_hours * 0.5)) |
| | deployment_frequency = max(1, 7 - (pull_requests_per_week * 0.5)) |
| | change_failure_rate = max(0.1, 0.5 - (code_reviews_per_week * 0.05)) |
| | cognitive_load = max(1, 8 - daily_coding_hours) |
| | test_coverage = min(1.0, 0.6 + (code_reviews_per_week * 0.05)) |
| | |
| | features = np.array([[ |
| | cycle_time, pr_size, dev_satisfaction, deployment_frequency, |
| | change_failure_rate, cognitive_load, test_coverage |
| | ]]) |
| | features_scaled = scaler.transform(features) |
| | prediction = model.predict(features_scaled)[0] |
| | return float(prediction) |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}") |
| |
|
| | |
| | @app.get("/") |
| | async def root(): |
| | return {"message": "Developer Productivity Prediction API", "status": "online"} |
| |
|
| | @app.post("/predict", response_model=ProductivityResponse) |
| | async def predict_productivity(request: ProductivityRequest): |
| | try: |
| | prediction = predict_productivity_core( |
| | request.daily_coding_hours, request.commits_per_day, request.pull_requests_per_week, |
| | request.issues_closed_per_week, request.active_repos, request.code_reviews_per_week |
| | ) |
| | return ProductivityResponse(predicted_score=prediction, status="success") |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=str(e)) |
| |
|
| | @app.post("/analyze-github", response_model=GitHubAnalysisResponse) |
| | async def analyze_github_repo(request: GitHubAnalysisRequest): |
| | try: |
| | |
| | if not request.repo_url or not request.github_token: |
| | raise HTTPException(status_code=422, detail="repo_url and github_token are required") |
| | |
| | analyzer = RepoProductivityAnalyzer(request.github_token) |
| | metrics = analyzer.get_metrics(request.repo_url) |
| | |
| | if "error" in metrics: |
| | raise HTTPException(status_code=400, detail=metrics["error"]) |
| | |
| | |
| | commits_per_day = float(metrics.get('commits_per_day', 0)) |
| | prs_per_week = float(metrics.get('prs_per_week', 0)) |
| | issues_per_week = float(metrics.get('issues_per_week', 0)) |
| | |
| | |
| | ml_features = { |
| | 'daily_coding_hours': min(commits_per_day * 2, 8), |
| | 'commits_per_day': max(int(commits_per_day), 0), |
| | 'pull_requests_per_week': max(int(prs_per_week), 0), |
| | 'issues_closed_per_week': max(int(issues_per_week), 0), |
| | 'active_repos': 1, |
| | 'code_reviews_per_week': max(int(prs_per_week), 0) |
| | } |
| | |
| | prediction = predict_productivity_core(**ml_features) |
| | |
| | productivity_indicators = { |
| | 'high_commit_frequency': commits_per_day > 1, |
| | 'active_pr_process': prs_per_week > 2, |
| | 'good_issue_resolution': issues_per_week > 1, |
| | 'overall_productivity': prediction > 0.7 |
| | } |
| | |
| | return GitHubAnalysisResponse( |
| | repo_metrics=metrics, |
| | ml_features=ml_features, |
| | predicted_score=float(prediction), |
| | productivity_indicators=productivity_indicators, |
| | status="success" |
| | ) |
| | except HTTPException: |
| | raise |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}") |
| |
|
| | |
| | def gradio_predict(daily_coding_hours, commits_per_day, pull_requests_per_week, |
| | issues_closed_per_week, active_repos, code_reviews_per_week): |
| | try: |
| | prediction = predict_productivity_core( |
| | daily_coding_hours, commits_per_day, pull_requests_per_week, |
| | issues_closed_per_week, active_repos, code_reviews_per_week |
| | ) |
| | return f"Predicted Score: {prediction:.3f}" |
| | except Exception as e: |
| | return f"Error: {str(e)}" |
| |
|
| | def gradio_github_analysis(repo_url, github_token): |
| | try: |
| | analyzer = RepoProductivityAnalyzer(github_token) |
| | metrics = analyzer.get_metrics(repo_url) |
| | |
| | if "error" in metrics: |
| | return f"Error: {metrics['error']}" |
| | |
| | ml_features = { |
| | 'daily_coding_hours': min(metrics['commits_per_day'] * 2, 8), |
| | 'commits_per_day': max(int(metrics['commits_per_day']), 0), |
| | 'pull_requests_per_week': max(int(metrics['prs_per_week']), 0), |
| | 'issues_closed_per_week': max(int(metrics['issues_per_week']), 0), |
| | 'active_repos': 1, |
| | 'code_reviews_per_week': max(int(metrics['prs_per_week']), 0) |
| | } |
| | |
| | prediction = predict_productivity_core(**ml_features) |
| | |
| | return f"""π PRODUCTIVITY ANALYSIS |
| | π Repository: {metrics['repo']} |
| | β±οΈ Period: {metrics['period_days']} days |
| | |
| | π KEY METRICS: |
| | β’ Commits/day: {metrics['commits_per_day']:.1f} |
| | β’ PRs/week: {metrics['prs_per_week']:.1f} |
| | β’ Issues/week: {metrics['issues_per_week']:.1f} |
| | |
| | π€ ML PREDICTION: {prediction:.3f} |
| | {'π High Productivity!' if prediction > 0.7 else 'β οΈ Room for improvement'} |
| | |
| | π‘ FEATURES: |
| | β’ Daily coding hours: {ml_features['daily_coding_hours']} |
| | β’ Commits/day: {ml_features['commits_per_day']} |
| | β’ PRs/week: {ml_features['pull_requests_per_week']} |
| | β’ Issues/week: {ml_features['issues_closed_per_week']} |
| | β’ Active repos: {ml_features['active_repos']} |
| | β’ Reviews/week: {ml_features['code_reviews_per_week']}""" |
| | |
| | except Exception as e: |
| | return f"Error: {str(e)}" |
| |
|
| | |
| | with gr.Blocks(title="Developer Productivity Predictor") as demo: |
| | gr.Markdown("# π Developer Productivity Predictor") |
| | gr.Markdown("Predict productivity scores and analyze GitHub repositories using ML") |
| | |
| | with gr.Tab("Manual Prediction"): |
| | gr.Markdown("### Enter your development metrics:") |
| | with gr.Row(): |
| | daily_hours = gr.Slider(1, 12, value=6, label="Daily Coding Hours") |
| | commits = gr.Slider(0, 20, value=3, label="Commits per Day") |
| | prs = gr.Slider(0, 10, value=2, label="Pull Requests per Week") |
| | with gr.Row(): |
| | issues = gr.Slider(0, 15, value=3, label="Issues Closed per Week") |
| | repos = gr.Slider(1, 10, value=2, label="Active Repositories") |
| | reviews = gr.Slider(0, 20, value=5, label="Code Reviews per Week") |
| | |
| | predict_btn = gr.Button("π Predict Productivity", variant="primary") |
| | prediction_output = gr.Textbox(label="Prediction Result", lines=2) |
| | |
| | predict_btn.click( |
| | gradio_predict, |
| | inputs=[daily_hours, commits, prs, issues, repos, reviews], |
| | outputs=prediction_output |
| | ) |
| | |
| | with gr.Tab("GitHub Analysis"): |
| | gr.Markdown("### Analyze any GitHub repository:") |
| | |
| | repo_url_input = gr.Textbox( |
| | label="GitHub Repository URL", |
| | placeholder="https://github.com/owner/repo", |
| | value="https://github.com/microsoft/vscode" |
| | ) |
| | token_input = gr.Textbox( |
| | label="GitHub Token", |
| | type="password", |
| | placeholder="ghp_xxxxxxxxxxxx" |
| | ) |
| | |
| | analyze_btn = gr.Button("π Analyze Repository", variant="primary") |
| | analysis_output = gr.Textbox(label="Analysis Result", lines=15) |
| | |
| | analyze_btn.click( |
| | gradio_github_analysis, |
| | inputs=[repo_url_input, token_input], |
| | outputs=analysis_output |
| | ) |
| |
|
| | |
| | app = gr.mount_gradio_app(app, demo, path="/") |
| |
|
| | if __name__ == "__main__": |
| | uvicorn.run(app, host="0.0.0.0", port=7860) |