Spaces:

minhan6559
/

Log-Analysis-MultiAgent

Running

App Files Files Community

minhan6559 commited on 15 days ago

Commit

56517e7

verified ·

1 Parent(s): 22c850f

update progress bar for streamlit

Browse files

Files changed (5) hide show

app.py +24 -21
src/agents/log_analysis_agent/__pycache__/prompts.cpython-311.pyc +0 -0
src/agents/log_analysis_agent/__pycache__/utils.cpython-311.pyc +0 -0
src/full_pipeline/__pycache__/simple_pipeline.cpython-311.pyc +0 -0
src/full_pipeline/simple_pipeline.py +46 -13

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import os
 import sys
 import tempfile
 import shutil
 import streamlit as st
 from pathlib import Path
 from typing import Dict, Any, Optional
@@ -87,6 +88,9 @@ def run_analysis(
     temp_dirs: Dict[str, str],
     api_key: str,
     provider: str,
 ) -> Dict[str, Any]:
     """Run the cybersecurity analysis pipeline."""
@@ -106,8 +110,11 @@ def run_analysis(
             tactic=None,
             model_name=model_name,
             temperature=0.1,
             log_agent_output_dir=temp_dirs["analysis"],
             response_agent_output_dir=temp_dirs["final_response"],
         )
         return {"success": True, "result": result}
     except Exception as e:
@@ -181,14 +188,6 @@ def main():
             help=f"Your {selected_provider} API key",
         )
-        # Additional query
-        st.subheader("Additional Context")
-        user_query = st.text_area(
-            "Optional Query",
-            placeholder="e.g., 'Focus on credential access attacks'",
-            help="Provide additional context or specific focus areas for the analysis",
-        )
     # Main content area
     col1, col2 = st.columns([2, 1])
@@ -237,19 +236,31 @@ def main():
             status_text.text("Initializing analysis...")
             progress_bar.progress(10)
-            # Run analysis
-            status_text.text("Running cybersecurity analysis...")
-            progress_bar.progress(50)
             analysis_result = run_analysis(
                 log_file_path=log_file_path,
                 model_name=selected_model,
-                query=user_query,
                 temp_dirs=temp_dirs,
                 api_key=api_key,
                 provider=selected_provider,
             )
             progress_bar.progress(90)
             status_text.text("Finalizing results...")
@@ -278,15 +289,7 @@ def main():
                     st.metric("Abnormal Events", len(abnormal_events))
                 with col3:
-                    execution_time = result.get("execution_time", "N/A")
-                    st.metric(
-                        "Execution Time",
-                        (
-                            f"{execution_time:.2f}s"
-                            if isinstance(execution_time, (int, float))
-                            else execution_time
-                        ),
-                    )
                 # Show markdown report
                 markdown_report = result.get("markdown_report", "")

 import sys
 import tempfile
 import shutil
+import time
 import streamlit as st
 from pathlib import Path
 from typing import Dict, Any, Optional
     temp_dirs: Dict[str, str],
     api_key: str,
     provider: str,
+    max_log_analysis_iterations: int,
+    max_retrieval_iterations: int,
+    progress_callback=None,
 ) -> Dict[str, Any]:
     """Run the cybersecurity analysis pipeline."""
             tactic=None,
             model_name=model_name,
             temperature=0.1,
+            max_log_analysis_iterations=max_log_analysis_iterations,
+            max_retrieval_iterations=max_retrieval_iterations,
             log_agent_output_dir=temp_dirs["analysis"],
             response_agent_output_dir=temp_dirs["final_response"],
+            progress_callback=progress_callback,
         )
         return {"success": True, "result": result}
     except Exception as e:
             help=f"Your {selected_provider} API key",
         )
     # Main content area
     col1, col2 = st.columns([2, 1])
             status_text.text("Initializing analysis...")
             progress_bar.progress(10)
+            # Start timing
+            start_time = time.time()
+            # Create progress callback
+            def update_progress(progress: int, message: str):
+                progress_bar.progress(progress)
+                status_text.text(message)
+            # Run analysis
             analysis_result = run_analysis(
                 log_file_path=log_file_path,
                 model_name=selected_model,
+                query="",
                 temp_dirs=temp_dirs,
                 api_key=api_key,
                 provider=selected_provider,
+                max_log_analysis_iterations=3,
+                max_retrieval_iterations=2,
+                progress_callback=update_progress,
             )
+            # Calculate execution time
+            end_time = time.time()
+            execution_time = end_time - start_time
             progress_bar.progress(90)
             status_text.text("Finalizing results...")
                     st.metric("Abnormal Events", len(abnormal_events))
                 with col3:
+                    st.metric("Execution Time", f"{execution_time:.2f}s")
                 # Show markdown report
                 markdown_report = result.get("markdown_report", "")

src/agents/log_analysis_agent/__pycache__/prompts.cpython-311.pyc CHANGED Viewed

Binary files a/src/agents/log_analysis_agent/__pycache__/prompts.cpython-311.pyc and b/src/agents/log_analysis_agent/__pycache__/prompts.cpython-311.pyc differ

src/agents/log_analysis_agent/__pycache__/utils.cpython-311.pyc CHANGED Viewed

Binary files a/src/agents/log_analysis_agent/__pycache__/utils.cpython-311.pyc and b/src/agents/log_analysis_agent/__pycache__/utils.cpython-311.pyc differ

src/full_pipeline/__pycache__/simple_pipeline.cpython-311.pyc CHANGED Viewed

Binary files a/src/full_pipeline/__pycache__/simple_pipeline.cpython-311.pyc and b/src/full_pipeline/__pycache__/simple_pipeline.cpython-311.pyc differ

src/full_pipeline/simple_pipeline.py CHANGED Viewed

@@ -42,20 +42,12 @@ class PipelineState(TypedDict):
 def create_simple_pipeline(
     model_name: str = "google_genai:gemini-2.0-flash",
     temperature: float = 0.1,
     log_agent_output_dir: str = "analysis",
     response_agent_output_dir: str = "final_response",
 ):
-    """
-    Create the simplified pipeline that directly connects the agents.
-    Args:
-        model_name: Name of the model to use (e.g., "gemini-2.0-flash", "gpt-oss-120b", "llama-3.1-8b-instant")
-        temperature: Temperature for model generation
-    Returns:
-        Compiled pipeline workflow
-    """
     # Initialize LLM client directly
     print("\n" + "=" * 60)
     print("INITIALIZING LLM CLIENT")
@@ -82,11 +74,15 @@ def create_simple_pipeline(
     # Initialize agents with shared LLM client
     log_agent = LogAnalysisAgent(
-        output_dir=log_agent_output_dir, max_iterations=2, llm_client=llm_client
     )
     retrieval_supervisor = RetrievalSupervisor(
-        kb_path="./cyber_knowledge_base", max_iterations=2, llm_client=llm_client
     )
     response_agent = ResponseAgent(
@@ -104,12 +100,18 @@ def create_simple_pipeline(
         log_file = state["log_file"]
         print(f"Analyzing log file: {log_file}")
         # Run log analysis (agent should not print its own phase headers)
         analysis_result = log_agent.analyze(log_file)
         # Store results in state
         state["log_analysis_result"] = analysis_result
         print(
             f"\nLog Analysis Assessment: {analysis_result.get('overall_assessment', 'UNKNOWN')}"
         )
@@ -133,6 +135,9 @@ def create_simple_pipeline(
         print(f"Generated retrieval query based on {assessment} assessment")
         print("\nStarting retrieval supervisor with log analysis context...\n")
         # Run retrieval supervisor with trace=True to show terminal output
         retrieval_result = retrieval_supervisor.invoke(
             query=query,
@@ -141,6 +146,9 @@ def create_simple_pipeline(
             trace=False,  # This shows the agent conversations in terminal
         )
         # Store retrieval results in state
         state["retrieval_result"] = retrieval_result
@@ -153,6 +161,9 @@ def create_simple_pipeline(
         print("=" * 60)
         print("Creating Event ID → MITRE technique mappings...")
         # Run response agent analysis (agent should not print its own phase headers)
         response_analysis, markdown_report = response_agent.analyze_and_map(
             log_analysis_result=state["log_analysis_result"],
@@ -161,6 +172,9 @@ def create_simple_pipeline(
             tactic=state.get("tactic"),
         )
         # Store response analysis in state
         state["response_analysis"] = response_analysis
@@ -246,8 +260,11 @@ def analyze_log_file(
     tactic: str = None,
     model_name: str = "google_genai:gemini-2.0-flash",
     temperature: float = 0.1,
     log_agent_output_dir: str = "analysis",
     response_agent_output_dir: str = "final_response",
 ):
     """
     Analyze a single log file through the integrated pipeline.
@@ -258,6 +275,8 @@ def analyze_log_file(
         tactic: Optional tactic name for organizing output
         model_name: Name of the model to use (e.g., "gemini-2.0-flash", "gpt-oss-120b", "llama-3.1-8b-instant")
         temperature: Temperature for model generation
         log_agent_output_dir: Directory to save log agent output
         response_agent_output_dir: Directory to save response agent output
     """
@@ -276,8 +295,11 @@ def analyze_log_file(
     pipeline = create_simple_pipeline(
         model_name=model_name,
         temperature=temperature,
         log_agent_output_dir=log_agent_output_dir,
         response_agent_output_dir=response_agent_output_dir,
     )
     # Initialize state
@@ -293,9 +315,16 @@ def analyze_log_file(
     # Run pipeline
     start_time = time.time()
     final_state = pipeline.invoke(initial_state)
     end_time = time.time()
     print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")
     print("Analysis complete!")
     return final_state
@@ -326,6 +355,8 @@ def main():
     query = None
     model_name = "gemini-2.0-flash"  # Default model
     temperature = 0.1
     log_agent_output_dir = "analysis"
     response_agent_output_dir = "final_response"
@@ -350,6 +381,8 @@ def main():
             tactic=None,
             model_name=model_name,
             temperature=temperature,
             log_agent_output_dir=log_agent_output_dir,
             response_agent_output_dir=response_agent_output_dir,
         )

 def create_simple_pipeline(
     model_name: str = "google_genai:gemini-2.0-flash",
     temperature: float = 0.1,
+    max_log_analysis_iterations: int = 2,
+    max_retrieval_iterations: int = 2,
     log_agent_output_dir: str = "analysis",
     response_agent_output_dir: str = "final_response",
+    progress_callback=None,
 ):
     # Initialize LLM client directly
     print("\n" + "=" * 60)
     print("INITIALIZING LLM CLIENT")
     # Initialize agents with shared LLM client
     log_agent = LogAnalysisAgent(
+        output_dir=log_agent_output_dir,
+        max_iterations=max_log_analysis_iterations,
+        llm_client=llm_client,
     )
     retrieval_supervisor = RetrievalSupervisor(
+        kb_path="./cyber_knowledge_base",
+        max_iterations=max_retrieval_iterations,
+        llm_client=llm_client,
     )
     response_agent = ResponseAgent(
         log_file = state["log_file"]
         print(f"Analyzing log file: {log_file}")
+        if progress_callback:
+            progress_callback(20, "Running log analysis...")
         # Run log analysis (agent should not print its own phase headers)
         analysis_result = log_agent.analyze(log_file)
         # Store results in state
         state["log_analysis_result"] = analysis_result
+        if progress_callback:
+            progress_callback(40, "Log analysis completed")
         print(
             f"\nLog Analysis Assessment: {analysis_result.get('overall_assessment', 'UNKNOWN')}"
         )
         print(f"Generated retrieval query based on {assessment} assessment")
         print("\nStarting retrieval supervisor with log analysis context...\n")
+        if progress_callback:
+            progress_callback(50, "Running threat intelligence retrieval...")
         # Run retrieval supervisor with trace=True to show terminal output
         retrieval_result = retrieval_supervisor.invoke(
             query=query,
             trace=False,  # This shows the agent conversations in terminal
         )
+        if progress_callback:
+            progress_callback(70, "Threat intelligence retrieval completed")
         # Store retrieval results in state
         state["retrieval_result"] = retrieval_result
         print("=" * 60)
         print("Creating Event ID → MITRE technique mappings...")
+        if progress_callback:
+            progress_callback(80, "Running response correlation analysis...")
         # Run response agent analysis (agent should not print its own phase headers)
         response_analysis, markdown_report = response_agent.analyze_and_map(
             log_analysis_result=state["log_analysis_result"],
             tactic=state.get("tactic"),
         )
+        if progress_callback:
+            progress_callback(90, "Response analysis completed")
         # Store response analysis in state
         state["response_analysis"] = response_analysis
     tactic: str = None,
     model_name: str = "google_genai:gemini-2.0-flash",
     temperature: float = 0.1,
+    max_log_analysis_iterations: int = 2,
+    max_retrieval_iterations: int = 2,
     log_agent_output_dir: str = "analysis",
     response_agent_output_dir: str = "final_response",
+    progress_callback=None,
 ):
     """
     Analyze a single log file through the integrated pipeline.
         tactic: Optional tactic name for organizing output
         model_name: Name of the model to use (e.g., "gemini-2.0-flash", "gpt-oss-120b", "llama-3.1-8b-instant")
         temperature: Temperature for model generation
+        max_log_analysis_iterations: Maximum number of iterations for the log analysis agent
+        max_retrieval_iterations: Maximum number of iterations for the retrieval supervisor
         log_agent_output_dir: Directory to save log agent output
         response_agent_output_dir: Directory to save response agent output
     """
     pipeline = create_simple_pipeline(
         model_name=model_name,
         temperature=temperature,
+        max_log_analysis_iterations=max_log_analysis_iterations,
+        max_retrieval_iterations=max_retrieval_iterations,
         log_agent_output_dir=log_agent_output_dir,
         response_agent_output_dir=response_agent_output_dir,
+        progress_callback=progress_callback,
     )
     # Initialize state
     # Run pipeline
     start_time = time.time()
+    if progress_callback:
+        progress_callback(10, "Initializing pipeline...")
     final_state = pipeline.invoke(initial_state)
     end_time = time.time()
+    if progress_callback:
+        progress_callback(100, "Analysis complete!")
     print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")
     print("Analysis complete!")
     return final_state
     query = None
     model_name = "gemini-2.0-flash"  # Default model
     temperature = 0.1
+    max_log_analysis_iterations = 2
+    max_retrieval_iterations = 2
     log_agent_output_dir = "analysis"
     response_agent_output_dir = "final_response"
             tactic=None,
             model_name=model_name,
             temperature=temperature,
+            max_log_analysis_iterations=max_log_analysis_iterations,
+            max_retrieval_iterations=max_retrieval_iterations,
             log_agent_output_dir=log_agent_output_dir,
             response_agent_output_dir=response_agent_output_dir,
         )