git-chat / services /simple_llm.py
lakkiroy
Update git-chat application with improved LLM integration and configuration
9ed6f59
"""
Simple LLM service that works without API keys using basic text processing
"""
import re
from typing import List, Dict
class SimpleLLM:
"""A basic LLM that provides helpful responses about code repositories without requiring API keys"""
def __init__(self):
self.code_keywords = {
'python': ['def ', 'class ', 'import ', 'from ', '__init__', 'self.'],
'javascript': ['function', 'const ', 'let ', 'var ', 'async', 'await'],
'java': ['public class', 'private ', 'public ', 'static', 'void'],
'cpp': ['#include', 'int main', 'class ', 'namespace'],
'go': ['func ', 'package ', 'import', 'type'],
'rust': ['fn ', 'struct ', 'impl ', 'use ', 'mod '],
}
def analyze_code_context(self, context: str) -> Dict:
"""Analyze the code context to extract useful information"""
lines = context.split('\n')
# Detect programming languages
languages = set()
for lang, keywords in self.code_keywords.items():
for keyword in keywords:
if keyword in context:
languages.add(lang)
# Extract file paths
file_paths = []
for line in lines:
if 'path' in line.lower() and ('/' in line or '\\' in line):
file_paths.append(line.strip())
# Count functions and classes
functions = len(re.findall(r'\bdef\s+\w+|function\s+\w+|func\s+\w+', context))
classes = len(re.findall(r'\bclass\s+\w+', context))
# Extract imports/dependencies
imports = re.findall(r'import\s+[\w.]+|from\s+[\w.]+\s+import|#include\s*<[\w.]+>', context)
return {
'languages': list(languages),
'file_paths': file_paths[:5], # Limit to 5 paths
'functions_count': functions,
'classes_count': classes,
'imports': imports[:10], # Limit to 10 imports
'total_lines': len(lines)
}
def generate_response(self, question: str, context: str) -> str:
"""Generate a helpful response based on the question and code context"""
question_lower = question.lower()
analysis = self.analyze_code_context(context)
# Question type detection and response generation
if any(word in question_lower for word in ['what', 'about', 'project', 'repository']):
return self._describe_project(analysis, context)
elif any(word in question_lower for word in ['structure', 'organized', 'architecture']):
return self._describe_structure(analysis, context)
elif any(word in question_lower for word in ['function', 'method', 'class']):
return self._describe_functions_classes(analysis, context)
elif any(word in question_lower for word in ['dependency', 'dependencies', 'import', 'library']):
return self._describe_dependencies(analysis, context)
elif any(word in question_lower for word in ['test', 'testing']):
return self._describe_tests(analysis, context)
elif any(word in question_lower for word in ['error', 'exception', 'handling']):
return self._describe_error_handling(analysis, context)
else:
return self._general_response(analysis, context)
def _describe_project(self, analysis: Dict, context: str) -> str:
languages = ", ".join(analysis['languages']) if analysis['languages'] else "multiple languages"
response = f"This project appears to be written in {languages}. "
if analysis['classes_count'] > 0:
response += f"It contains {analysis['classes_count']} classes "
if analysis['functions_count'] > 0:
response += f"and {analysis['functions_count']} functions. "
if 'api' in context.lower() or 'endpoint' in context.lower():
response += "It appears to be an API or web service. "
if 'test' in context.lower():
response += "The project includes test files. "
return response.strip()
def _describe_structure(self, analysis: Dict, context: str) -> str:
response = "The code is organized with the following structure:\n\n"
if analysis['file_paths']:
response += "**Key files/directories:**\n"
for path in analysis['file_paths']:
response += f"- {path}\n"
response += "\n"
if analysis['languages']:
response += f"**Languages used:** {', '.join(analysis['languages'])}\n\n"
if analysis['classes_count'] > 0:
response += f"**Classes found:** {analysis['classes_count']}\n"
if analysis['functions_count'] > 0:
response += f"**Functions found:** {analysis['functions_count']}\n"
return response
def _describe_functions_classes(self, analysis: Dict, context: str) -> str:
response = ""
if analysis['classes_count'] > 0:
response += f"Found {analysis['classes_count']} classes in the codebase. "
if analysis['functions_count'] > 0:
response += f"Found {analysis['functions_count']} functions/methods. "
# Extract some actual function/class names from context
class_names = re.findall(r'class\s+(\w+)', context)
function_names = re.findall(r'def\s+(\w+)|function\s+(\w+)', context)
if class_names:
response += f"\n\n**Some classes:** {', '.join(class_names[:5])}"
if function_names:
func_list = [name for group in function_names for name in group if name]
response += f"\n\n**Some functions:** {', '.join(func_list[:5])}"
return response if response else "No classes or functions clearly identified in the provided context."
def _describe_dependencies(self, analysis: Dict, context: str) -> str:
if analysis['imports']:
response = "**Dependencies and imports found:**\n\n"
for imp in analysis['imports']:
response += f"- {imp}\n"
return response
else:
return "No clear dependencies or imports identified in the provided context."
def _describe_tests(self, analysis: Dict, context: str) -> str:
if 'test' in context.lower():
return "Test files appear to be present in this codebase. Look for files with 'test' in their names or directories."
else:
return "No obvious test files identified in the provided context."
def _describe_error_handling(self, analysis: Dict, context: str) -> str:
error_patterns = ['try:', 'except:', 'catch', 'throw', 'error', 'exception']
found_patterns = [pattern for pattern in error_patterns if pattern in context.lower()]
if found_patterns:
return f"Error handling appears to be implemented using: {', '.join(found_patterns)}"
else:
return "No obvious error handling patterns identified in the provided context."
def _general_response(self, analysis: Dict, context: str) -> str:
response = "Based on the code context provided:\n\n"
if analysis['languages']:
response += f"- **Languages:** {', '.join(analysis['languages'])}\n"
if analysis['total_lines'] > 0:
response += f"- **Code size:** {analysis['total_lines']} lines analyzed\n"
if analysis['functions_count'] > 0:
response += f"- **Functions:** {analysis['functions_count']} found\n"
if analysis['classes_count'] > 0:
response += f"- **Classes:** {analysis['classes_count']} found\n"
response += "\nFor more specific information, please ask about particular aspects of the code."
return response
class NoAPILLM:
"""Wrapper to use SimpleLLM with the same interface as other LLMs"""
def __init__(self):
self.simple_llm = SimpleLLM()
def __call__(self, prompt: str) -> str:
# Extract context and question from the prompt
if "Context from repository:" in prompt and "Question:" in prompt:
parts = prompt.split("Question:")
if len(parts) == 2:
context_part = parts[0].replace("Context from repository:", "").strip()
question_part = parts[1].replace("Answer:", "").strip()
return self.simple_llm.generate_response(question_part, context_part)
# Fallback for other prompt formats
return "I can help analyze code repositories. Please provide specific questions about the codebase structure, functions, dependencies, or other aspects of the code."