Spaces:

lakkiroy
/

git-chat

Running

lakkiroy

Update git-chat application with improved LLM integration and configuration

9ed6f59 9 months ago

8.97 kB

	"""
	Simple LLM service that works without API keys using basic text processing
	"""
	import re
	from typing import List, Dict

	class SimpleLLM:
	"""A basic LLM that provides helpful responses about code repositories without requiring API keys"""

	def __init__(self):
	self.code_keywords = {
	'python': ['def ', 'class ', 'import ', 'from ', '__init__', 'self.'],
	'javascript': ['function', 'const ', 'let ', 'var ', 'async', 'await'],
	'java': ['public class', 'private ', 'public ', 'static', 'void'],
	'cpp': ['#include', 'int main', 'class ', 'namespace'],
	'go': ['func ', 'package ', 'import', 'type'],
	'rust': ['fn ', 'struct ', 'impl ', 'use ', 'mod '],
	}

	def analyze_code_context(self, context: str) -> Dict:
	"""Analyze the code context to extract useful information"""
	lines = context.split('\n')

	# Detect programming languages
	languages = set()
	for lang, keywords in self.code_keywords.items():
	for keyword in keywords:
	if keyword in context:
	languages.add(lang)

	# Extract file paths
	file_paths = []
	for line in lines:
	if 'path' in line.lower() and ('/' in line or '\\' in line):
	file_paths.append(line.strip())

	# Count functions and classes
	functions = len(re.findall(r'\bdef\s+\w+\|function\s+\w+\|func\s+\w+', context))
	classes = len(re.findall(r'\bclass\s+\w+', context))

	# Extract imports/dependencies
	imports = re.findall(r'import\s+[\w.]+\|from\s+[\w.]+\s+import\|#include\s*<[\w.]+>', context)

	return {
	'languages': list(languages),
	'file_paths': file_paths[:5], # Limit to 5 paths
	'functions_count': functions,
	'classes_count': classes,
	'imports': imports[:10], # Limit to 10 imports
	'total_lines': len(lines)
	}

	def generate_response(self, question: str, context: str) -> str:
	"""Generate a helpful response based on the question and code context"""
	question_lower = question.lower()
	analysis = self.analyze_code_context(context)

	# Question type detection and response generation
	if any(word in question_lower for word in ['what', 'about', 'project', 'repository']):
	return self._describe_project(analysis, context)

	elif any(word in question_lower for word in ['structure', 'organized', 'architecture']):
	return self._describe_structure(analysis, context)

	elif any(word in question_lower for word in ['function', 'method', 'class']):
	return self._describe_functions_classes(analysis, context)

	elif any(word in question_lower for word in ['dependency', 'dependencies', 'import', 'library']):
	return self._describe_dependencies(analysis, context)

	elif any(word in question_lower for word in ['test', 'testing']):
	return self._describe_tests(analysis, context)

	elif any(word in question_lower for word in ['error', 'exception', 'handling']):
	return self._describe_error_handling(analysis, context)

	else:
	return self._general_response(analysis, context)

	def _describe_project(self, analysis: Dict, context: str) -> str:
	languages = ", ".join(analysis['languages']) if analysis['languages'] else "multiple languages"

	response = f"This project appears to be written in {languages}. "

	if analysis['classes_count'] > 0:
	response += f"It contains {analysis['classes_count']} classes "
	if analysis['functions_count'] > 0:
	response += f"and {analysis['functions_count']} functions. "

	if 'api' in context.lower() or 'endpoint' in context.lower():
	response += "It appears to be an API or web service. "

	if 'test' in context.lower():
	response += "The project includes test files. "

	return response.strip()

	def _describe_structure(self, analysis: Dict, context: str) -> str:
	response = "The code is organized with the following structure:\n\n"

	if analysis['file_paths']:
	response += "Key files/directories:\n"
	for path in analysis['file_paths']:
	response += f"- {path}\n"
	response += "\n"

	if analysis['languages']:
	response += f"Languages used: {', '.join(analysis['languages'])}\n\n"

	if analysis['classes_count'] > 0:
	response += f"Classes found: {analysis['classes_count']}\n"
	if analysis['functions_count'] > 0:
	response += f"Functions found: {analysis['functions_count']}\n"

	return response

	def _describe_functions_classes(self, analysis: Dict, context: str) -> str:
	response = ""

	if analysis['classes_count'] > 0:
	response += f"Found {analysis['classes_count']} classes in the codebase. "

	if analysis['functions_count'] > 0:
	response += f"Found {analysis['functions_count']} functions/methods. "

	# Extract some actual function/class names from context
	class_names = re.findall(r'class\s+(\w+)', context)
	function_names = re.findall(r'def\s+(\w+)\|function\s+(\w+)', context)

	if class_names:
	response += f"\n\nSome classes: {', '.join(class_names[:5])}"

	if function_names:
	func_list = [name for group in function_names for name in group if name]
	response += f"\n\nSome functions: {', '.join(func_list[:5])}"

	return response if response else "No classes or functions clearly identified in the provided context."

	def _describe_dependencies(self, analysis: Dict, context: str) -> str:
	if analysis['imports']:
	response = "Dependencies and imports found:\n\n"
	for imp in analysis['imports']:
	response += f"- {imp}\n"
	return response
	else:
	return "No clear dependencies or imports identified in the provided context."

	def _describe_tests(self, analysis: Dict, context: str) -> str:
	if 'test' in context.lower():
	return "Test files appear to be present in this codebase. Look for files with 'test' in their names or directories."
	else:
	return "No obvious test files identified in the provided context."

	def _describe_error_handling(self, analysis: Dict, context: str) -> str:
	error_patterns = ['try:', 'except:', 'catch', 'throw', 'error', 'exception']
	found_patterns = [pattern for pattern in error_patterns if pattern in context.lower()]

	if found_patterns:
	return f"Error handling appears to be implemented using: {', '.join(found_patterns)}"
	else:
	return "No obvious error handling patterns identified in the provided context."

	def _general_response(self, analysis: Dict, context: str) -> str:
	response = "Based on the code context provided:\n\n"

	if analysis['languages']:
	response += f"- Languages: {', '.join(analysis['languages'])}\n"

	if analysis['total_lines'] > 0:
	response += f"- Code size: {analysis['total_lines']} lines analyzed\n"

	if analysis['functions_count'] > 0:
	response += f"- Functions: {analysis['functions_count']} found\n"

	if analysis['classes_count'] > 0:
	response += f"- Classes: {analysis['classes_count']} found\n"

	response += "\nFor more specific information, please ask about particular aspects of the code."

	return response

	class NoAPILLM:
	"""Wrapper to use SimpleLLM with the same interface as other LLMs"""

	def __init__(self):
	self.simple_llm = SimpleLLM()

	def __call__(self, prompt: str) -> str:
	# Extract context and question from the prompt
	if "Context from repository:" in prompt and "Question:" in prompt:
	parts = prompt.split("Question:")
	if len(parts) == 2:
	context_part = parts[0].replace("Context from repository:", "").strip()
	question_part = parts[1].replace("Answer:", "").strip()

	return self.simple_llm.generate_response(question_part, context_part)

	# Fallback for other prompt formats
	return "I can help analyze code repositories. Please provide specific questions about the codebase structure, functions, dependencies, or other aspects of the code."