Spaces:

kedar-bhumkar
/

Code_change_impact_analyzer

Running

App Files Files Community

Code_change_impact_analyzer / backend.py

kedar-bhumkar

Upload 5 files

3d833be verified 4 months ago

raw

history blame contribute delete

8.25 kB

	import os
	import git
	from pathlib import Path
	from openai import OpenAI
	from anthropic import Anthropic
	from dotenv import load_dotenv
	from pydantic_model import ImpactAnalysis
	import tiktoken
	import json
	from typing import List, Tuple, Dict, Any

	# Load environment variables
	load_dotenv()

	# Initialize API clients
	openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
	anthropic_client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

	def clone_repository(repo_url, temp_dir):
	"""Clone a git repository to a temporary directory."""
	try:
	git.Repo.clone_from(repo_url, temp_dir)
	return True, None
	except Exception as e:
	return False, str(e)

	def read_code_files(directory):
	"""Read all code files from the directory."""
	code_files = []
	code_extensions = {'.py', '.js', '.jsx', '.ts', '.tsx', '.java', '.cpp', '.c', '.cs', '.go', '.rb', '.php', '.cls', '.object','.page'}
	warnings = []

	for root, _, files in os.walk(directory):
	for file in files:
	if Path(file).suffix in code_extensions:
	file_path = os.path.join(root, file)
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()
	relative_path = os.path.relpath(file_path, directory)
	code_files.append({
	'path': relative_path,
	'content': content
	})
	except Exception as e:
	warnings.append(f"Could not read file {file_path}: {str(e)}")

	return code_files, warnings

	def count_tokens(text: str, model: str = "gpt-4") -> int:
	"""Count the number of tokens in a text string."""
	encoding = tiktoken.encoding_for_model(model)
	return len(encoding.encode(text))

	def chunk_files(code_files: List[Dict[str, str]], model: str = "gpt-4", max_tokens: int = 120000) -> List[List[Dict[str, str]]]:
	"""Split files into chunks that fit within the context window."""
	chunks = []
	current_chunk = []
	current_tokens = 0

	for file in code_files:
	file_content = f"File: {file['path']}\nContent:\n{file['content']}\n"
	file_tokens = count_tokens(file_content, model)

	# If a single file is larger than max_tokens, skip it
	if file_tokens > max_tokens:
	print(f"Warning: File {file['path']} is too large ({file_tokens} tokens) and will be skipped")
	continue

	# If adding this file would exceed max_tokens, start a new chunk
	if current_tokens + file_tokens > max_tokens:
	if current_chunk: # Only add non-empty chunks
	chunks.append(current_chunk)
	current_chunk = [file]
	current_tokens = file_tokens
	else:
	current_chunk.append(file)
	current_tokens += file_tokens

	# Add the last chunk if it's not empty
	if current_chunk:
	chunks.append(current_chunk)

	return chunks

	def analyze_code_chunk(chunk: List[Dict[str, str]], prompt: str, model: str) -> Tuple[str, str]:
	"""Analyze a chunk of code files."""
	try:
	# Prepare the context from the chunk
	context = "Here are the relevant code files:\n\n"
	for file in chunk:
	context += f"File: {file['path']}\n```\n{file['content']}\n```\n"

	if model == "gpt-4":
	json_schema = ImpactAnalysis.model_json_schema()
	messages = [
	{"role": "system", "content": "You are a code analysis expert. Analyze the provided code based on the user's prompt."},
	{"role": "user", "content": f"Please check the impact of performing the below code/configuration changes on the above codebase. Provide only the summary of the impact in a table with aggregate analysis that outputs a JSON object with the following schema : {json_schema} . Pls note : Do not add the characters ``` json anywhere in the response. Do not respond with messages like 'Here is the response in the required JSON format:'.\n\nCode or configuration changes: {prompt}\n\n{context}"}
	]

	response = openai_client.chat.completions.create(
	model="gpt-4o",
	messages=messages,
	temperature=0.7,
	max_tokens=2000
	)
	return response.choices[0].message.content, ""
	else:
	# Keep original Claude implementation
	system_message = "You are a code analysis expert. Analyze the provided code based on the user's prompt."
	user_message = f"Please check the impact of performing the below code/configuration changes on the above codebase. Provide only the summary of the impact in a table with aggregate analysis that includes 1) List of files impacted. 2) No of files impacted 3) Impactd etail on each file impacted . Surface a 'Severity Level' at the top of table with possible values: Low, Medium, High based on the 'Number of impacted files' impacted. E.g. if 'Number of impacted files' > 0 but < 3 then LOW, if 'Number of impacted files' > 3 but < 8 then MEDIUM, if 'Number of impacted files' > 8 then HIGH.\n\nCode or configuration changes: {prompt}\n\n{context}"

	response = anthropic_client.messages.create(
	model="claude-3-7-sonnet-20250219",
	max_tokens=2000,
	temperature=0.7,
	system=system_message,
	messages=[{"role": "user", "content": user_message}]
	)
	return response.content[0].text, ""
	except Exception as e:
	return "", str(e)

	def analyze_code(code_files: List[Dict[str, str]], prompt: str, model: str) -> Tuple[str, str]:
	"""Analyze code files with chunking to handle large codebases."""
	try:
	# Split files into chunks
	chunks = chunk_files(code_files, model)

	if not chunks:
	return "", "No valid files to analyze"

	# Analyze each chunk
	all_analyses = []
	for i, chunk in enumerate(chunks):
	analysis, error = analyze_code_chunk(chunk, prompt, model)
	if error:
	return "", f"Error analyzing chunk {i+1}: {error}"
	if analysis:
	all_analyses.append(analysis)

	if not all_analyses:
	return "", "No analysis results generated"

	# Combine results from all chunks
	combined_analysis = {
	"severity_level": "LOW", # Default to lowest severity
	"number_of_files_impacted": 0,
	"files_impacted": []
	}

	# Merge results from all chunks
	for analysis in all_analyses:
	try:
	chunk_data = json.loads(analysis)
	combined_analysis["number_of_files_impacted"] += chunk_data.get("number_of_files_impacted", 0)
	combined_analysis["files_impacted"].extend(chunk_data.get("files_impacted", []))

	# Update severity level based on the highest severity found
	severity_map = {"LOW": 1, "MEDIUM": 2, "HIGH": 3}
	current_severity = severity_map.get(combined_analysis["severity_level"], 0)
	chunk_severity = severity_map.get(chunk_data.get("severity_level", "LOW"), 0)
	if chunk_severity > current_severity:
	combined_analysis["severity_level"] = chunk_data["severity_level"]
	except json.JSONDecodeError:
	continue

	return json.dumps(combined_analysis), ""

	except Exception as e:
	return "", str(e)

	def check_api_keys():
	"""Check if required API keys are set."""
	openai_key = os.getenv("OPENAI_API_KEY") is not None
	anthropic_key = os.getenv("ANTHROPIC_API_KEY") is not None
	return {
	"gpt-4": openai_key,
	"claude-sonnet": anthropic_key
	}