Spaces:
Running
Running
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
AI Review Service | |
This module provides functionality for AI-powered code review using Nebius Qwen2.5-72B-Instruct model. | |
""" | |
import os | |
import logging | |
import json | |
import re | |
from openai import OpenAI | |
from dotenv import load_dotenv | |
logger = logging.getLogger(__name__) | |
# Load environment variables | |
load_dotenv() | |
class AIReviewService: | |
""" | |
Service for AI-powered code review using Nebius Qwen2.5-72B-Instruct model. | |
""" | |
def __init__(self): | |
""" | |
Initialize the AIReviewService. | |
""" | |
self.api_key = os.getenv('NEBIUS_API_KEY') | |
if not self.api_key: | |
logger.warning("NEBIUS_API_KEY not found in environment variables. AI review will not be available.") | |
else: | |
self.client = OpenAI( | |
base_url="https://api.studio.nebius.com/v1/", | |
api_key=self.api_key | |
) | |
logger.info("Initialized AIReviewService") | |
def is_available(self): | |
""" | |
Check if the AI review service is available. | |
Returns: | |
bool: True if the service is available, False otherwise. | |
""" | |
return self.api_key is not None | |
def review_code(self, file_path, file_content, language, context=None): | |
""" | |
Review code using Qwen. | |
Args: | |
file_path (str): The path to the file being reviewed. | |
file_content (str): The content of the file being reviewed. | |
language (str): The programming language of the file. | |
context (dict, optional): Additional context for the review. | |
Returns: | |
dict: The review results. | |
""" | |
if not self.is_available(): | |
return { | |
'status': 'error', | |
'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.', | |
'suggestions': [], | |
} | |
logger.info(f"Reviewing {language} code in {file_path}") | |
# Prepare the prompt for Qwen | |
prompt = self._prepare_prompt(file_path, file_content, language, context) | |
try: | |
# Call Nebius API with Qwen2.5-72B-Instruct model | |
response = self.client.chat.completions.create( | |
model="Qwen/Qwen2.5-72B-Instruct", | |
max_tokens=4000, | |
temperature=0, | |
messages=[ | |
{"role": "system", "content": self._get_system_prompt(language)}, | |
{"role": "user", "content": prompt} | |
] | |
) | |
# Parse the response | |
review_text = response.choices[0].message.content | |
suggestions = self._parse_review(review_text) | |
return { | |
'status': 'success', | |
'review_text': review_text, | |
'suggestions': suggestions, | |
} | |
except Exception as e: | |
logger.error(f"Error calling Qwen API: {e}") | |
return { | |
'status': 'error', | |
'error': str(e), | |
'suggestions': [], | |
} | |
def review_repository(self, repo_path, files, languages, analysis_results=None): | |
""" | |
Review a repository using Qwen. | |
Args: | |
repo_path (str): The path to the repository. | |
files (list): A list of files to review. | |
languages (list): A list of programming languages in the repository. | |
analysis_results (dict, optional): Results from other analysis tools. | |
Returns: | |
dict: The review results. | |
""" | |
if not self.is_available(): | |
return { | |
'status': 'error', | |
'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.', | |
'reviews': {}, | |
'summary': '', | |
} | |
logger.info(f"Reviewing repository at {repo_path}") | |
# Limit the number of files to review to avoid excessive API usage | |
max_files = 10 | |
if len(files) > max_files: | |
logger.warning(f"Too many files to review ({len(files)}). Limiting to {max_files} files.") | |
files = files[:max_files] | |
# Review each file | |
reviews = {} | |
for file_path in files: | |
try: | |
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
file_content = f.read() | |
# Determine the language based on file extension | |
_, ext = os.path.splitext(file_path) | |
language = self._get_language_from_extension(ext) | |
if language: | |
# Provide context from analysis results if available | |
context = None | |
if analysis_results: | |
context = self._extract_context_for_file(file_path, analysis_results) | |
# Review the file | |
review_result = self.review_code(file_path, file_content, language, context) | |
reviews[file_path] = review_result | |
except Exception as e: | |
logger.error(f"Error reviewing file {file_path}: {e}") | |
reviews[file_path] = { | |
'status': 'error', | |
'error': str(e), | |
'suggestions': [], | |
} | |
# Generate a summary of the review | |
summary = self._generate_repository_summary(repo_path, reviews, languages, analysis_results) | |
return { | |
'status': 'success', | |
'reviews': reviews, | |
'summary': summary, | |
} | |
def _prepare_prompt(self, file_path, file_content, language, context=None): | |
""" | |
Prepare a prompt for Qwen. | |
Args: | |
file_path (str): The path to the file being reviewed. | |
file_content (str): The content of the file being reviewed. | |
language (str): The programming language of the file. | |
context (dict, optional): Additional context for the review. | |
Returns: | |
str: The prompt for Qwen. | |
""" | |
prompt = f"""Please review the following {language} code and provide constructive feedback: | |
File: {file_path} | |
```{language} | |
{file_content} | |
``` | |
""" | |
if context: | |
prompt += "Additional context:\n" | |
if 'issues' in context: | |
prompt += "\nIssues detected by other tools:\n" | |
for issue in context['issues']: | |
prompt += f"- {issue.get('issue', 'Unknown issue')} at line {issue.get('line', 'unknown')}: {issue.get('description', '')}\n" | |
if 'vulnerabilities' in context: | |
prompt += "\nSecurity vulnerabilities detected:\n" | |
for vuln in context['vulnerabilities']: | |
prompt += f"- {vuln.get('issue', 'Unknown vulnerability')} at line {vuln.get('line', 'unknown')}: {vuln.get('description', '')}\n" | |
prompt += "\nPlease provide your review with the following sections:\n" | |
prompt += "1. Code Quality: Assess the overall quality, readability, and maintainability.\n" | |
prompt += "2. Potential Issues: Identify any bugs, edge cases, or potential problems.\n" | |
prompt += "3. Security Concerns: Highlight any security vulnerabilities or risks.\n" | |
prompt += "4. Performance Considerations: Note any performance bottlenecks or inefficiencies.\n" | |
prompt += "5. Specific Suggestions: Provide concrete, actionable suggestions for improvement.\n" | |
return prompt | |
def _get_system_prompt(self, language): | |
""" | |
Get the system prompt for Qwen based on the programming language. | |
Args: | |
language (str): The programming language. | |
Returns: | |
str: The system prompt for Qwen. | |
""" | |
base_prompt = """You are an expert code reviewer with deep knowledge of software development best practices, design patterns, and security. | |
Your task is to review code and provide constructive, actionable feedback. | |
Be thorough but prioritize the most important issues. | |
Format your response in markdown with clear sections. | |
For each suggestion, include the line number, the issue, and a recommended solution. | |
Focus on: | |
- Code quality and readability | |
- Potential bugs and edge cases | |
- Security vulnerabilities | |
- Performance optimizations | |
- Adherence to best practices | |
Your feedback should be specific, actionable, and educational. Explain why each suggestion matters. | |
Do not hallucinate vulnerabilities. Base claims on code patterns. | |
""" | |
# Add language-specific guidance | |
if language == 'Python': | |
base_prompt += "\nFor Python code, pay special attention to PEP 8 compliance, proper exception handling, and Pythonic idioms." | |
elif language in ['JavaScript', 'TypeScript']: | |
base_prompt += "\nFor JavaScript/TypeScript code, focus on modern ES6+ practices, proper async handling, and potential type issues." | |
elif language == 'Java': | |
base_prompt += "\nFor Java code, examine object-oriented design, proper exception handling, and resource management." | |
elif language == 'Go': | |
base_prompt += "\nFor Go code, check for idiomatic Go patterns, proper error handling, and concurrency issues." | |
elif language == 'Rust': | |
base_prompt += "\nFor Rust code, verify memory safety, proper use of ownership/borrowing, and idiomatic Rust patterns." | |
return base_prompt | |
def _parse_review(self, review_text): | |
""" | |
Parse the review text from Qwen to extract structured suggestions. | |
Args: | |
review_text (str): The review text from Qwen. | |
Returns: | |
list: A list of structured suggestions. | |
""" | |
suggestions = [] | |
# Split the review into sections | |
sections = review_text.split('##') | |
for section in sections: | |
if not section.strip(): | |
continue | |
# Extract suggestions from the section | |
lines = section.strip().split('\n') | |
section_title = lines[0].strip() | |
current_suggestion = None | |
for line in lines[1:]: | |
line = line.strip() | |
if not line: | |
continue | |
# Check if this line starts a new suggestion (often contains line numbers) | |
line_number_match = re.search(r'line\s+(\d+)', line, re.IGNORECASE) | |
if line_number_match or line.startswith('- ') or line.startswith('* '): | |
# Save the previous suggestion if it exists | |
if current_suggestion: | |
suggestions.append(current_suggestion) | |
# Start a new suggestion | |
line_number = int(line_number_match.group(1)) if line_number_match else None | |
current_suggestion = { | |
'section': section_title, | |
'line': line_number, | |
'description': line, | |
'details': '', | |
} | |
elif current_suggestion: | |
# Add to the details of the current suggestion | |
current_suggestion['details'] += line + '\n' | |
# Add the last suggestion in the section | |
if current_suggestion: | |
suggestions.append(current_suggestion) | |
return suggestions | |
def _get_language_from_extension(self, extension): | |
""" | |
Get the programming language from a file extension. | |
Args: | |
extension (str): The file extension. | |
Returns: | |
str: The programming language, or None if unknown. | |
""" | |
extension_to_language = { | |
'.py': 'Python', | |
'.js': 'JavaScript', | |
'.jsx': 'JavaScript', | |
'.ts': 'TypeScript', | |
'.tsx': 'TypeScript', | |
'.java': 'Java', | |
'.go': 'Go', | |
'.rs': 'Rust', | |
'.cpp': 'C++', | |
'.cc': 'C++', | |
'.c': 'C', | |
'.h': 'C', | |
'.hpp': 'C++', | |
'.cs': 'C#', | |
'.php': 'PHP', | |
'.rb': 'Ruby', | |
} | |
return extension_to_language.get(extension.lower()) | |
def _extract_context_for_file(self, file_path, analysis_results): | |
""" | |
Extract relevant context for a file from analysis results. | |
Args: | |
file_path (str): The path to the file. | |
analysis_results (dict): Results from other analysis tools. | |
Returns: | |
dict: Context for the file. | |
""" | |
context = { | |
'issues': [], | |
'vulnerabilities': [], | |
} | |
# Extract code quality issues | |
if 'code_analysis' in analysis_results: | |
for language, language_results in analysis_results['code_analysis'].items(): | |
for issue in language_results.get('issues', []): | |
if issue.get('file', '') == file_path: | |
context['issues'].append(issue) | |
# Extract security vulnerabilities | |
if 'security_scan' in analysis_results: | |
for language, language_results in analysis_results['security_scan'].items(): | |
for vuln in language_results.get('vulnerabilities', []): | |
if vuln.get('file', '') == file_path: | |
context['vulnerabilities'].append(vuln) | |
# Extract performance issues | |
if 'performance_analysis' in analysis_results: | |
for language, language_results in analysis_results['performance_analysis'].get('language_results', {}).items(): | |
for issue in language_results.get('issues', []): | |
if issue.get('file', '') == file_path: | |
context['issues'].append(issue) | |
return context | |
def _generate_repository_summary(self, repo_path, reviews, languages, analysis_results=None): | |
""" | |
Generate a summary of the repository review. | |
Args: | |
repo_path (str): The path to the repository. | |
reviews (dict): The review results for each file. | |
languages (list): A list of programming languages in the repository. | |
analysis_results (dict, optional): Results from other analysis tools. | |
Returns: | |
str: A summary of the repository review. | |
""" | |
if not self.is_available(): | |
return "AI review service is not available. Please set ANTHROPIC_API_KEY in environment variables." | |
# Prepare the prompt for Qwen | |
prompt = f"""Please provide a summary of the code review for the repository at {repo_path}. | |
Languages used in the repository: {', '.join(languages)} | |
""" | |
# Add information about the files reviewed | |
prompt += "\nFiles reviewed:\n" | |
for file_path, review in reviews.items(): | |
if review.get('status') == 'success': | |
suggestion_count = len(review.get('suggestions', [])) | |
prompt += f"- {file_path}: {suggestion_count} suggestions\n" | |
else: | |
prompt += f"- {file_path}: Error - {review.get('error', 'Unknown error')}\n" | |
# Add summary of analysis results if available | |
if analysis_results: | |
prompt += "\nAnalysis results summary:\n" | |
if 'code_analysis' in analysis_results: | |
total_issues = sum(result.get('issue_count', 0) for result in analysis_results['code_analysis'].values()) | |
prompt += f"- Code quality issues: {total_issues}\n" | |
if 'security_scan' in analysis_results: | |
total_vulns = sum(result.get('vulnerability_count', 0) for result in analysis_results['security_scan'].values()) | |
prompt += f"- Security vulnerabilities: {total_vulns}\n" | |
if 'performance_analysis' in analysis_results: | |
total_perf_issues = sum(result.get('issue_count', 0) for result in analysis_results['performance_analysis'].get('language_results', {}).values()) | |
prompt += f"- Performance issues: {total_perf_issues}\n" | |
prompt += "\nPlease provide a comprehensive summary of the code review, including:\n" | |
prompt += "1. Overall code quality assessment\n" | |
prompt += "2. Common patterns and issues found across the codebase\n" | |
prompt += "3. Strengths of the codebase\n" | |
prompt += "4. Areas for improvement\n" | |
prompt += "5. Prioritized recommendations\n" | |
try: | |
# Call Nebius API with Qwen2.5-72B-Instruct model | |
response = self.client.chat.completions.create( | |
model="Qwen/Qwen2.5-72B-Instruct", | |
max_tokens=4000, | |
temperature=0, | |
messages=[ | |
{"role": "system", "content": "You are an expert code reviewer providing a summary of a repository review. Be concise, insightful, and actionable in your feedback. Format your response in markdown with clear sections."}, | |
{"role": "user", "content": prompt} | |
] | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
logger.error(f"Error generating repository summary: {e}") | |
return f"Error generating repository summary: {e}" |