Spaces:

spandanroy2
/

intelligent-bug-triage

Sleeping

Spandan Roy commited on Oct 7

Commit

dc673ed

1 Parent(s): 23046d5

Phase 1 Complete: Triage Agent with zero-shot classification

- Added Neo4j schema and client
- Added Qdrant vector store setup
- Implemented Triage Agent with BART model
- Updated Gradio UI with bug classification tab
- Added sample bug dataset
- Project structure complete

Files changed (6) hide show

agents/triage/triage_agent.py +166 -0
app.py +123 -49
data/sample/bugs_sample.csv +11 -0
knowledge-graph/neo4j_client.py +82 -0
knowledge-graph/schemas/bug_schema.cypher +56 -0
rag-system/vectorstore/qdrant_client.py +103 -0

agents/triage/triage_agent.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""
+Triage Agent: Automated Bug Classification and Priority Assignment
+"""
+import pandas as pd
+from transformers import pipeline
+from typing import Dict, List
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class TriageAgent:
+    """
+    Agent responsible for:
+    1. Classifying bugs into categories (UI, API, Database, Performance)
+    2. Assigning priority levels (P0-P4)
+    3. Determining severity (Critical, High, Medium, Low)
+    """
+    def __init__(self):
+        """Initialize the triage agent with ML models"""
+        self.classifier = None
+        self.categories = ['UI', 'API', 'Database', 'Performance', 'Backend']
+        self.priorities = ['P0', 'P1', 'P2', 'P3', 'P4']
+        self.severities = ['Critical', 'High', 'Medium', 'Low']
+    def load_model(self):
+        """Load pre-trained classification model"""
+        try:
+            # Using zero-shot classification for now
+            # Later: Fine-tune BERT on bug data
+            self.classifier = pipeline(
+                "zero-shot-classification",
+                model="facebook/bart-large-mnli",
+                device=-1  # CPU
+            )
+            logger.info("✅ Triage model loaded successfully")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Model loading failed: {e}")
+            return False
+    def classify_bug(self, title: str, description: str) -> Dict:
+        """
+        Classify a bug based on title and description
+        Args:
+            title: Bug title
+            description: Bug description
+        Returns:
+            Dict with category, priority, severity, confidence
+        """
+        if not self.classifier:
+            logger.error("Model not loaded!")
+            return None
+        # Combine title and description
+        bug_text = f"{title}. {description}"
+        # Classify category
+        category_result = self.classifier(
+            bug_text,
+            candidate_labels=self.categories
+        )
+        # Determine priority based on keywords
+        priority = self._determine_priority(bug_text)
+        # Determine severity
+        severity = self._determine_severity(bug_text, priority)
+        result = {
+            'category': category_result['labels'][0],
+            'category_confidence': category_result['scores'][0],
+            'priority': priority,
+            'severity': severity,
+            'reasoning': self._generate_reasoning(
+                category_result['labels'][0],
+                priority,
+                severity
+            )
+        }
+        logger.info(f"✅ Classified bug as {result['category']} / {result['priority']}")
+        return result
+    def _determine_priority(self, text: str) -> str:
+        """Determine priority based on keywords"""
+        text_lower = text.lower()
+        # Critical keywords -> P0
+        critical_keywords = ['crash', 'critical', 'security', 'data loss', 'payment']
+        if any(keyword in text_lower for keyword in critical_keywords):
+            return 'P0'
+        # High priority keywords -> P1
+        high_keywords = ['broken', 'not working', 'error', 'fail', 'timeout']
+        if any(keyword in text_lower for keyword in high_keywords):
+            return 'P1'
+        # Medium priority keywords -> P2
+        medium_keywords = ['slow', 'incorrect', 'missing', 'wrong']
+        if any(keyword in text_lower for keyword in medium_keywords):
+            return 'P2'
+        # Low priority keywords -> P3
+        low_keywords = ['cosmetic', 'color', 'spacing', 'typo']
+        if any(keyword in text_lower for keyword in low_keywords):
+            return 'P3'
+        # Default
+        return 'P2'
+    def _determine_severity(self, text: str, priority: str) -> str:
+        """Map priority to severity"""
+        severity_map = {
+            'P0': 'Critical',
+            'P1': 'High',
+            'P2': 'Medium',
+            'P3': 'Low',
+            'P4': 'Low'
+        }
+        return severity_map.get(priority, 'Medium')
+    def _generate_reasoning(self, category: str, priority: str, severity: str) -> str:
+        """Generate explanation for classification"""
+        return f"""
+        This bug has been classified as a {category} issue with {priority} priority
+        and {severity} severity based on the content analysis and keyword detection.
+        """
+    def batch_classify(self, bugs_df: pd.DataFrame) -> pd.DataFrame:
+        """Classify multiple bugs from a DataFrame"""
+        results = []
+        for idx, row in bugs_df.iterrows():
+            result = self.classify_bug(row['title'], row['description'])
+            results.append(result)
+        # Add results to dataframe
+        bugs_df['predicted_category'] = [r['category'] for r in results]
+        bugs_df['predicted_priority'] = [r['priority'] for r in results]
+        bugs_df['predicted_severity'] = [r['severity'] for r in results]
+        bugs_df['confidence'] = [r['category_confidence'] for r in results]
+        return bugs_df
+# Example usage and testing
+if __name__ == "__main__":
+    agent = TriageAgent()
+    if agent.load_model():
+        # Test single bug
+        result = agent.classify_bug(
+            title="Login button not responding",
+            description="Users cannot click the login button on mobile devices"
+        )
+        print("Classification Result:")
+        print(result)
+        # Test batch classification
+        bugs_df = pd.read_csv('../../data/sample/bugs_sample.csv')
+        classified_df = agent.batch_classify(bugs_df)
+        print("\nBatch Classification Results:")
+        print(classified_df[['bug_id', 'category', 'predicted_category', 'confidence']])

app.py CHANGED Viewed

@@ -1,51 +1,81 @@
 import gradio as gr
 import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Set HF_HOME to persist model caches
 os.environ['HF_HOME'] = os.getenv('HF_HOME', '/data/.huggingface')
 def welcome_message():
-    """Initial welcome message"""
     return """
     # 🐛 Intelligent Bug Triage System
-    Welcome to the AI-powered bug triage and resolution system.
-    ## Current Status: Phase 1 - Foundation Setup
-    ### System Capabilities (Coming Soon):
-    - 🎯 Automated bug classification and priority assignment
-    - 👥 Intelligent developer assignment with workload balancing
-    - 💡 Solution recommendations using RAG
-    - 📊 Real-time analytics and performance tracking
-    ### Next Steps:
-    1. Set up Neo4j knowledge graph
-    2. Configure vector database (Qdrant)
-    3. Train classification models
-    4. Build multi-agent system
-    **Project Status**: Development Environment Ready ✅
     """
 def test_space_storage():
-    """Test available storage in the Space"""
     import shutil
     disk_usage = shutil.disk_usage('/')
     total_gb = disk_usage.total / (1024**3)
-    used_gb = disk_usage.used / (1024**3)
     free_gb = disk_usage.free / (1024**3)
-    return f"""
-    **Space Storage Information:**
-    - Total: {total_gb:.2f} GB
-    - Used: {used_gb:.2f} GB
-    - Free: {free_gb:.2f} GB
-    """
 # Create Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft(), title="Bug Triage System") as demo:
@@ -53,41 +83,85 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Bug Triage System") as demo:
     gr.Markdown("*AI-Powered Bug Management with Knowledge Graphs & Multi-Agent RAG*")
     with gr.Tab("Home"):
-        welcome_output = gr.Markdown(welcome_message())
-    with gr.Tab("System Info"):
-        gr.Markdown("## Development Environment Status")
-        storage_btn = gr.Button("Check Storage")
-        storage_output = gr.Textbox(label="Storage Status", lines=6)
-        storage_btn.click(test_space_storage, outputs=storage_output)
-    with gr.Tab("Documentation"):
         gr.Markdown("""
-        ## Project Documentation
-        ### Architecture Overview
-        This system uses a **multi-agent architecture** with:
-        - **Triage Agent**: Classifies bugs and assigns priorities
-        - **Assignment Agent**: Routes bugs to appropriate developers
-        - **Resolution Agent**: Provides solution recommendations
-        - **Analytics Agent**: Tracks metrics and performance
         ### Technology Stack
-        - **Backend**: FastAPI
-        - **Knowledge Graph**: Neo4j Community Edition
         - **Vector DB**: Qdrant
         - **Agent Framework**: LangChain
-        - **ML Models**: Transformers (BERT)
         - **Frontend**: Gradio
-        ### Development Phases
-        1. **Phase 1**: Foundation (4-6 weeks) - Current
-        2. **Phase 2**: Multi-Agent System (4-6 weeks)
-        3. **Phase 3**: RAG Implementation (4-6 weeks)
-        4. **Phase 4**: Integration & Deployment (3-4 weeks)
         """)
-# Launch the app
 if __name__ == "__main__":
     port = int(os.environ.get('PORT', 7860))
     demo.launch(

 import gradio as gr
 import os
+import sys
 from dotenv import load_dotenv
+import pandas as pd
+# Add project root to Python path
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 # Load environment variables
 load_dotenv()
 os.environ['HF_HOME'] = os.getenv('HF_HOME', '/data/.huggingface')
+# Import agents (will handle import errors gracefully)
+try:
+    from agents.triage.triage_agent import TriageAgent
+    TRIAGE_AVAILABLE = True
+except ImportError as e:
+    print(f"Triage agent not available: {e}")
+    TRIAGE_AVAILABLE = False
 def welcome_message():
+    """Welcome message"""
     return """
     # 🐛 Intelligent Bug Triage System
+    ## Phase 1: Foundation - Triage Agent Ready! 🎉
+    ### Current Features:
+    - ✅ **Automated Bug Classification** (UI, API, Database, Performance, Backend)
+    - ✅ **Priority Assignment** (P0-P4)
+    - ✅ **Severity Detection** (Critical, High, Medium, Low)
+    - ✅ **Confidence Scoring**
+    ### Try It Now:
+    Navigate to the "Bug Triage" tab to classify your first bug!
+    **Status**: Triage Agent Online ✅
     """
+def classify_single_bug(title, description):
+    """Classify a single bug"""
+    if not TRIAGE_AVAILABLE:
+        return "❌ Triage agent not loaded. Please check dependencies."
+    if not title or not description:
+        return "⚠️ Please provide both title and description."
+    try:
+        agent = TriageAgent()
+        if not agent.load_model():
+            return "❌ Failed to load triage model."
+        result = agent.classify_bug(title, description)
+        output = f"""
+        ## Classification Results 🎯
+        **Category:** {result['category']}
+        **Priority:** {result['priority']}
+        **Severity:** {result['severity']}
+        **Confidence:** {result['category_confidence']:.2%}
+        ### Reasoning:
+        {result['reasoning']}
+        """
+        return output
+    except Exception as e:
+        return f"❌ Classification failed: {str(e)}"
 def test_space_storage():
+    """Test storage"""
     import shutil
     disk_usage = shutil.disk_usage('/')
     total_gb = disk_usage.total / (1024**3)
     free_gb = disk_usage.free / (1024**3)
+    return f"**Storage:** {free_gb:.2f} GB free of {total_gb:.2f} GB total"
 # Create Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft(), title="Bug Triage System") as demo:
     gr.Markdown("*AI-Powered Bug Management with Knowledge Graphs & Multi-Agent RAG*")
     with gr.Tab("Home"):
+        gr.Markdown(welcome_message())
+        storage_output = gr.Markdown(test_space_storage())
+    with gr.Tab("Bug Triage 🎯"):
+        gr.Markdown("## Classify a New Bug")
+        gr.Markdown("Enter bug details below to get automated classification:")
+        with gr.Row():
+            with gr.Column():
+                bug_title = gr.Textbox(
+                    label="Bug Title",
+                    placeholder="e.g., Login button not responding",
+                    lines=2
+                )
+                bug_description = gr.Textbox(
+                    label="Bug Description",
+                    placeholder="Detailed description of the issue...",
+                    lines=5
+                )
+                classify_btn = gr.Button("🔍 Classify Bug", variant="primary")
+            with gr.Column():
+                classification_output = gr.Markdown(label="Classification Results")
+        classify_btn.click(
+            fn=classify_single_bug,
+            inputs=[bug_title, bug_description],
+            outputs=classification_output
+        )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["Login button not responding", "Users cannot click the login button on mobile devices. Issue occurs on iOS Safari only."],
+                ["API timeout on large queries", "Database queries taking over 30 seconds to complete when fetching user data."],
+                ["Memory leak in background service", "Application consuming 8GB RAM after running for 24 hours continuously."],
+            ],
+            inputs=[bug_title, bug_description]
+        )
+    with gr.Tab("Documentation 📚"):
         gr.Markdown("""
+        ## Project Architecture
+        ### Multi-Agent System
+        1. **Triage Agent** ✅ (ACTIVE)
+           - Classifies bugs into categories
+           - Assigns priority levels
+           - Determines severity
+        2. **Assignment Agent** (Coming in Phase 2)
+           - Routes bugs to developers
+           - Balances workload
+           - Considers expertise
+        3. **Resolution Agent** (Coming in Phase 3)
+           - Provides solution recommendations
+           - Uses RAG for knowledge retrieval
+        4. **Analytics Agent** (Coming in Phase 4)
+           - Tracks performance metrics
+           - Generates insights
         ### Technology Stack
+        - **ML Models**: Transformers (BART, BERT)
+        - **Knowledge Graph**: Neo4j
         - **Vector DB**: Qdrant
         - **Agent Framework**: LangChain
         - **Frontend**: Gradio
+        - **Backend**: FastAPI
+        ### Development Progress
+        - ✅ Phase 1: Foundation (Week 1-2)
+        - 🔄 Phase 2: Multi-Agent (Week 3-4)
+        - ⏳ Phase 3: RAG System (Week 5-6)
+        - ⏳ Phase 4: Deployment (Week 7-8)
         """)
+# Launch
 if __name__ == "__main__":
     port = int(os.environ.get('PORT', 7860))
     demo.launch(

data/sample/bugs_sample.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+bug_id,title,description,category,priority,severity
+BUG-001,Login button not responding,User cannot click login button on mobile devices,UI,P1,High
+BUG-002,API timeout on large queries,Database queries taking >30 seconds,API,P0,Critical
+BUG-003,Memory leak in background service,Application consuming 8GB RAM after 24hrs,Performance,P1,High
+BUG-004,Incorrect tax calculation,Shopping cart shows wrong tax amount,Backend,P0,Critical
+BUG-005,Dark mode colors incorrect,Text not visible in dark mode,UI,P3,Low
+BUG-006,Search returns empty results,Search functionality broken for certain queries,Backend,P2,Medium
+BUG-007,Image upload fails for PNG,PNG files cannot be uploaded to profile,UI,P2,Medium
+BUG-008,Database connection pool exhausted,App crashes during high traffic,Database,P0,Critical
+BUG-009,Email notifications not sent,Users not receiving password reset emails,Backend,P1,High
+BUG-010,Pagination breaks on page 10,Cannot navigate beyond page 10 in results,UI,P3,Low

knowledge-graph/neo4j_client.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Neo4j Knowledge Graph Client"""
+import os
+from neo4j import GraphDatabase
+from typing import List, Dict, Any
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class Neo4jClient:
+    """Client for interacting with Neo4j Knowledge Graph"""
+    def __init__(self):
+        """Initialize Neo4j connection"""
+        self.uri = os.getenv('NEO4J_URI', 'bolt://localhost:7687')
+        self.user = os.getenv('NEO4J_USER', 'neo4j')
+        self.password = os.getenv('NEO4J_PASSWORD', 'password')
+        self.driver = None
+    def connect(self):
+        """Establish connection to Neo4j"""
+        try:
+            self.driver = GraphDatabase.driver(
+                self.uri,
+                auth=(self.user, self.password)
+            )
+            logger.info("✅ Connected to Neo4j")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Neo4j connection failed: {e}")
+            return False
+    def close(self):
+        """Close Neo4j connection"""
+        if self.driver:
+            self.driver.close()
+            logger.info("Neo4j connection closed")
+    def execute_query(self, query: str, parameters: Dict = None):
+        """Execute a Cypher query"""
+        if not self.driver:
+            logger.error("Not connected to Neo4j")
+            return None
+        with self.driver.session() as session:
+            result = session.run(query, parameters or {})
+            return [record.data() for record in result]
+    def create_bug(self, bug_data: Dict[str, Any]):
+        """Create a new bug node"""
+        query = """
+        CREATE (b:Bug {
+            id: $id,
+            title: $title,
+            description: $description,
+            priority: $priority,
+            category: $category,
+            status: 'Open',
+            created_at: datetime()
+        })
+        RETURN b
+        """
+        return self.execute_query(query, bug_data)
+    def get_developer_by_expertise(self, expertise: str):
+        """Find developers with specific expertise"""
+        query = """
+        MATCH (d:Developer)
+        WHERE $expertise IN d.expertise
+        AND d.current_workload < d.max_capacity
+        RETURN d
+        ORDER BY d.current_workload ASC
+        LIMIT 5
+        """
+        return self.execute_query(query, {'expertise': expertise})
+# Example usage
+if __name__ == "__main__":
+    client = Neo4jClient()
+    if client.connect():
+        print("Neo4j client ready!")
+        client.close()

knowledge-graph/schemas/bug_schema.cypher ADDED Viewed

	@@ -0,0 +1,56 @@

+// Bug Triage Knowledge Graph Schema
+// Nodes: Bug, Developer, Component, Team, Resolution
+// Create Constraints
+CREATE CONSTRAINT bug_id IF NOT EXISTS FOR (b:Bug) REQUIRE b.id IS UNIQUE;
+CREATE CONSTRAINT dev_id IF NOT EXISTS FOR (d:Developer) REQUIRE d.id IS UNIQUE;
+CREATE CONSTRAINT component_id IF NOT EXISTS FOR (c:Component) REQUIRE c.name IS UNIQUE;
+CREATE CONSTRAINT team_id IF NOT EXISTS FOR (t:Team) REQUIRE t.name IS UNIQUE;
+// Create Indexes
+CREATE INDEX bug_priority IF NOT EXISTS FOR (b:Bug) ON (b.priority);
+CREATE INDEX bug_status IF NOT EXISTS FOR (b:Bug) ON (b.status);
+CREATE INDEX bug_category IF NOT EXISTS FOR (b:Bug) ON (b.category);
+CREATE INDEX dev_expertise IF NOT EXISTS FOR (d:Developer) ON (d.expertise);
+// Sample Node Creation
+CREATE (b:Bug {
+    id: 'BUG-001',
+    title: 'Sample Bug',
+    description: 'This is a sample bug for testing',
+    priority: 'P2',
+    severity: 'Medium',
+    category: 'UI',
+    status: 'Open',
+    created_at: datetime(),
+    reporter: 'user@example.com'
+});
+CREATE (d:Developer {
+    id: 'DEV-001',
+    name: 'John Doe',
+    email: 'john@example.com',
+    expertise: ['Python', 'FastAPI', 'React'],
+    current_workload: 3,
+    max_capacity: 5
+});
+CREATE (c:Component {
+    name: 'Authentication',
+    description: 'User authentication module',
+    language: 'Python',
+    repository: 'backend'
+});
+CREATE (t:Team {
+    name: 'Backend Team',
+    description: 'Handles backend services',
+    size: 5
+});
+// Sample Relationships
+MATCH (d:Developer {id: 'DEV-001'}), (t:Team {name: 'Backend Team'})
+CREATE (d)-[:BELONGS_TO]->(t);
+MATCH (d:Developer {id: 'DEV-001'}), (c:Component {name: 'Authentication'})
+CREATE (d)-[:EXPERT_IN {proficiency: 0.9}]->(c);

rag-system/vectorstore/qdrant_client.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""Qdrant Vector Database Client for RAG"""
+import os
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams, PointStruct
+from sentence_transformers import SentenceTransformer
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class QdrantVectorStore:
+    """Vector database for semantic search in bug resolution"""
+    def __init__(self, collection_name: str = "bug_solutions"):
+        """Initialize Qdrant client"""
+        self.host = os.getenv('QDRANT_HOST', 'localhost')
+        self.port = int(os.getenv('QDRANT_PORT', 6333))
+        self.collection_name = collection_name
+        self.client = None
+        self.encoder = None
+    def connect(self):
+        """Establish connection to Qdrant"""
+        try:
+            # For local development, use in-memory mode
+            self.client = QdrantClient(":memory:")
+            logger.info("✅ Connected to Qdrant (in-memory mode)")
+            # Initialize sentence transformer for embeddings
+            self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
+            logger.info("✅ Loaded embedding model")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Qdrant connection failed: {e}")
+            return False
+    def create_collection(self, vector_size: int = 384):
+        """Create a new collection for bug solutions"""
+        try:
+            self.client.create_collection(
+                collection_name=self.collection_name,
+                vectors_config=VectorParams(
+                    size=vector_size,
+                    distance=Distance.COSINE
+                )
+            )
+            logger.info(f"✅ Created collection: {self.collection_name}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Collection creation failed: {e}")
+            return False
+    def add_solution(self, solution_id: str, text: str, metadata: dict):
+        """Add a bug solution to vector store"""
+        try:
+            # Generate embedding
+            vector = self.encoder.encode(text).tolist()
+            # Create point
+            point = PointStruct(
+                id=solution_id,
+                vector=vector,
+                payload={
+                    "text": text,
+                    **metadata
+                }
+            )
+            self.client.upsert(
+                collection_name=self.collection_name,
+                points=[point]
+            )
+            logger.info(f"✅ Added solution: {solution_id}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Failed to add solution: {e}")
+            return False
+    def search_similar(self, query: str, limit: int = 5):
+        """Search for similar bug solutions"""
+        try:
+            # Generate query embedding
+            query_vector = self.encoder.encode(query).tolist()
+            # Search
+            results = self.client.search(
+                collection_name=self.collection_name,
+                query_vector=query_vector,
+                limit=limit
+            )
+            return results
+        except Exception as e:
+            logger.error(f"❌ Search failed: {e}")
+            return []
+# Example usage
+if __name__ == "__main__":
+    store = QdrantVectorStore()
+    if store.connect():
+        store.create_collection()
+        print("Qdrant vector store ready!")