Spaces:

nihalaninihal
/

Thinking_Github_repo_analyzer

Sleeping

App Files Files Community

nihalaninihal commited on Jan 31

Commit

a60b4de

verified ·

1 Parent(s): 0916430

Update app.py

Browse files

Files changed (1) hide show

app.py +577 -59

app.py CHANGED Viewed

@@ -1,64 +1,582 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import google.generativeai as genai
+import os
+from dotenv import load_dotenv
+from github import Github
+import json
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+import base64
+from typing import Dict, List, Any, Optional, Tuple, Iterator
+from dataclasses import dataclass
+import tempfile
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+import time
+# Load environment variables
+load_dotenv()
+# Configure API keys
+GITHUB_TOKEN = os.getenv("github_api")
+GEMINI_API_KEY = os.getenv("gemini_api")
+if not GITHUB_TOKEN or not GEMINI_API_KEY:
+    raise ValueError("Both GITHUB_TOKEN and GEMINI_API_KEY must be set in environment")
+# Initialize APIs
+gh = Github(GITHUB_TOKEN)
+genai.configure(api_key=GEMINI_API_KEY)
+model = genai.GenerativeModel(
+    model_name="gemini-2.0-flash-thinking-exp-01-21",
+    generation_config={
+        "temperature": 1,
+        "top_p": 0.95,
+        "top_k": 40,
+        "max_output_tokens": 8192,
+        "response_mime_type": "text/plain",
+    },
+    safety_settings=[
+        {
+            "category": "HARM_CATEGORY_HARASSMENT",
+            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+        },
+        {
+            "category": "HARM_CATEGORY_HATE_SPEECH",
+            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+        },
+        {
+            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+        },
+        {
+            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+        },
+    ]
+)
+RELEVANT_EXTENSIONS = {
+    ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h",
+    ".hpp", ".rb", ".php", ".go", ".rs", ".swift", ".kt"
+}
+@dataclass
+class ChatMessage:
+    role: str
+    content: str
+    metadata: Dict[str, Any] = None
+class ThinkingAnalyzer:
+    """Handles streaming thoughts and responses from Gemini model"""
+    def __init__(self, model):
+        self.model = model
+    def stream_analysis(self, analysis_data: Dict[str, Any], system_prompt: str) -> Iterator[List[ChatMessage]]:
+        """Streams analysis with visible thinking process"""
+        # Format the prompt
+        prompt = f"{system_prompt}\n\nRepository Analysis Data:\n{json.dumps(analysis_data, indent=2)}"
+        # Initialize streaming response
+        response = self.model.generate_content(prompt, stream=True)
+        messages = []
+        thought_buffer = ""
+        response_buffer = ""
+        thinking_complete = False
+        # Add initial thinking message
+        messages.append(
+            ChatMessage(
+                role="assistant",
+                content="",
+                metadata={"title": "⏳ Analyzing Repository: Thought Process"}
+            )
+        )
+        for chunk in response:
+            parts = chunk.candidates[0].content.parts
+            current_chunk = parts[0].text
+            if len(parts) == 2 and not thinking_complete:
+                # Complete thought and start response
+                thought_buffer += current_chunk
+                messages[-1] = ChatMessage(
+                    role="assistant",
+                    content=thought_buffer,
+                    metadata={"title": "⏳ Analysis Thought Process"}
+                )
+                # Add response message
+                messages.append(
+                    ChatMessage(
+                        role="assistant",
+                        content=parts[1].text
+                    )
+                )
+                thinking_complete = True
+            elif thinking_complete:
+                # Continue streaming response
+                response_buffer += current_chunk
+                messages[-1] = ChatMessage(
+                    role="assistant",
+                    content=response_buffer
+                )
+            else:
+                # Continue streaming thoughts
+                thought_buffer += current_chunk
+                messages[-1] = ChatMessage(
+                    role="assistant",
+                    content=thought_buffer,
+                    metadata={"title": "⏳ Analysis Thought Process"}
+                )
+            yield messages
+    def stream_question_response(self, question: str, analysis_data: Dict[str, Any],
+                               chat_history: List[Tuple[str, str]]) -> Iterator[List[ChatMessage]]:
+        """Streams response to follow-up questions with thinking process"""
+        # Build context
+        context = "You are an expert code analyst helping users understand repository analysis results.\n\n"
+        context += f"Repository Analysis Data:\n{json.dumps(analysis_data, indent=2)}\n\n"
+        if chat_history:
+            context += "Previous conversation:\n"
+            for user_msg, assistant_msg in chat_history:
+                context += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
+        prompt = context + f"\nUser: {question}\nPlease think through your analysis:"
+        # Use stream_analysis with the constructed prompt
+        yield from self.stream_analysis(analysis_data, prompt)
+class RepositoryAnalyzer:
+    """Handles GitHub repository analysis"""
+    def __init__(self, repo_url: str):
+        # Extract owner and repo name from URL
+        parts = repo_url.rstrip('/').split('/')
+        if len(parts) < 2:
+            raise ValueError("Invalid repository URL format")
+        self.repo_name = parts[-1]
+        self.owner = parts[-2]
+        self.repo = gh.get_repo(f"{self.owner}/{self.repo_name}")
+        self.analysis_data: Dict[str, Any] = {}
+    def analyze(self) -> Dict[str, Any]:
+        """Perform complete repository analysis"""
+        try:
+            # Basic repository information
+            self.analysis_data["basic_info"] = {
+                "name": self.repo.name,
+                "owner": self.repo.owner.login,
+                "description": self.repo.description or "No description available",
+                "stars": self.repo.stargazers_count,
+                "forks": self.repo.forks_count,
+                "created_at": self.repo.created_at.isoformat(),
+                "last_updated": self.repo.updated_at.isoformat(),
+                "primary_language": self.repo.language or "Not specified",
+            }
+            # Analyze repository structure
+            self.analysis_data["structure"] = self._analyze_structure()
+            # Analyze code patterns
+            self.analysis_data["code_patterns"] = self._analyze_code_patterns()
+            # Analyze commit history
+            self.analysis_data["commit_history"] = self._analyze_commits()
+            # Get contributor statistics
+            self.analysis_data["contributors"] = self._analyze_contributors()
+            return self.analysis_data
+        except Exception as e:
+            raise Exception(f"Error analyzing repository: {str(e)}")
+    def _analyze_structure(self) -> Dict[str, Any]:
+        """Analyze repository structure and organization"""
+        structure = {
+            "files": defaultdict(int),
+            "directories": set(),
+            "total_size": 0,
+        }
+        try:
+            contents = self.repo.get_contents("")
+            while contents:
+                content = contents.pop(0)
+                if content.type == "dir":
+                    structure["directories"].add(content.path)
+                    contents.extend(self.repo.get_contents(content.path))
+                else:
+                    ext = Path(content.path).suffix.lower()
+                    if ext in RELEVANT_EXTENSIONS:
+                        structure["files"][ext] += 1
+                        structure["total_size"] += content.size
+        except Exception as e:
+            print(f"Error analyzing structure: {str(e)}")
+        return {
+            "file_types": dict(structure["files"]),
+            "directory_count": len(structure["directories"]),
+            "total_size": structure["total_size"],
+            "file_count": sum(structure["files"].values())
+        }
+    def _analyze_code_patterns(self) -> Dict[str, Any]:
+        """Analyze code patterns and style"""
+        patterns = {
+            "samples": [],
+            "languages": defaultdict(int),
+            "complexity_metrics": defaultdict(list)
+        }
+        try:
+            files = self.repo.get_contents("")
+            analyzed = 0
+            while files and analyzed < 5:
+                file = files.pop(0)
+                if file.type == "dir":
+                    files.extend(self.repo.get_contents(file.path))
+                elif Path(file.path).suffix.lower() in RELEVANT_EXTENSIONS:
+                    try:
+                        content = base64.b64decode(file.content).decode('utf-8')
+                        lines = content.splitlines()
+                        if not lines:
+                            continue
+                        loc = len([line for line in lines if line.strip()])
+                        avg_line_length = sum(len(line) for line in lines) / len(lines)
+                        patterns["samples"].append({
+                            "path": file.path,
+                            "language": Path(file.path).suffix[1:],
+                            "loc": loc,
+                            "avg_line_length": round(avg_line_length, 2)
+                        })
+                        patterns["languages"][Path(file.path).suffix[1:]] += loc
+                        patterns["complexity_metrics"]["loc"].append(loc)
+                        patterns["complexity_metrics"]["avg_line_length"].append(avg_line_length)
+                        analyzed += 1
+                    except Exception as e:
+                        print(f"Error analyzing file {file.path}: {str(e)}")
+                        continue
+        except Exception as e:
+            print(f"Error in code pattern analysis: {str(e)}")
+        return patterns
+    def _analyze_commits(self) -> Dict[str, Any]:
+        """Analyze commit history and patterns"""
+        commit_data = []
+        commit_times = []
+        try:
+            commits = list(self.repo.get_commits()[:100])  # Get last 100 commits
+            for commit in commits:
+                try:
+                    commit_info = {
+                        "sha": commit.sha,
+                        "author": commit.author.login if commit.author else "Unknown",
+                        "date": commit.commit.author.date.isoformat(),
+                        "message": commit.commit.message,
+                        "changes": {
+                            "additions": commit.stats.additions,
+                            "deletions": commit.stats.deletions,
+                        }
+                    }
+                    commit_data.append(commit_info)
+                    commit_times.append(commit.commit.author.date.hour)
+                except Exception as e:
+                    print(f"Error processing commit {commit.sha}: {str(e)}")
+                    continue
+            # Analyze commit patterns
+            commit_hours = defaultdict(int)
+            for hour in commit_times:
+                commit_hours[hour] += 1
+            total_commits = len(commit_data)
+            return {
+                "commits": commit_data,
+                "total_commits": total_commits,
+                "commit_hours": dict(commit_hours),
+                "avg_additions": sum(c["changes"]["additions"] for c in commit_data) / total_commits if total_commits else 0,
+                "avg_deletions": sum(c["changes"]["deletions"] for c in commit_data) / total_commits if total_commits else 0,
+            }
+        except Exception as e:
+            print(f"Error in commit analysis: {str(e)}")
+            return {
+                "commits": [],
+                "total_commits": 0,
+                "commit_hours": {},
+                "avg_additions": 0,
+                "avg_deletions": 0
+            }
+    def _analyze_contributors(self) -> Dict[str, Any]:
+        """Analyze contributor statistics"""
+        contributor_data = []
+        try:
+            contributors = list(self.repo.get_contributors())
+            for contributor in contributors:
+                contributor_data.append({
+                    "login": contributor.login,
+                    "contributions": contributor.contributions,
+                    "type": contributor.type,
+                })
+        except Exception as e:
+            print(f"Error analyzing contributors: {str(e)}")
+        return {
+            "total_contributors": len(contributor_data),
+            "contributors": contributor_data
+        }
+@retry(
+    retry=retry_if_exception_type(Exception),
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=10)
 )
+def process_analysis(repo_url: str, progress=gr.Progress()):
+    """Process repository analysis with visible thinking"""
+    try:
+        # Initialize analysis
+        progress(0, desc="Initializing repository analysis...")
+        analyzer = RepositoryAnalyzer(repo_url)
+        analysis_data = analyzer.analyze()
+        # Initialize thinking analyzer
+        thinking_analyzer = ThinkingAnalyzer(model)
+        # System prompt for analysis
+        system_prompt = """You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics. Analyze the provided repository data and create a detailed, insightful analysis using the following markdown template:
+# Repository Analysis
+## 📊 Project Overview
+[Provide a comprehensive overview including:
+- Project purpose and scope
+- Age and maturity of the project
+- Current activity level and maintenance status
+- Key metrics (stars, forks, etc.)
+- Primary technologies and languages used]
+## 🏗️ Architecture and Code Organization
+[Analyze in detail:
+- Repository structure and organization
+- Code distribution across different technologies
+- File and directory organization patterns
+- Project size and complexity metrics
+- Code modularity and component structure
+- Presence of key architectural patterns]
+## 💻 Development Practices & Code Quality
+[Evaluate:
+- Coding standards and consistency
+- Code complexity and maintainability metrics
+- Documentation practices
+- Testing approach and coverage (if visible)
+- Error handling and logging practices
+- Use of design patterns and best practices]
+## 📈 Development Workflow & History
+[Analyze:
+- Commit patterns and frequency
+- Release cycles and versioning
+- Branch management strategy
+- Code review practices
+- Continuous integration/deployment indicators
+- Peak development periods and cycles]
+## 👥 Team Dynamics & Collaboration
+[Examine:
+- Team size and composition
+- Contribution patterns
+- Core maintainer identification
+- Community engagement level
+- Communication patterns
+- Collaboration efficiency]
+## 🔧 Technical Depth & Innovation
+[Assess:
+- Technical sophistication level
+- Innovative approaches or solutions
+- Complex problem-solving examples
+- Performance optimization efforts
+- Security considerations
+- Scalability approach]
+## 🚀 Project Health & Sustainability
+[Evaluate:
+- Project momentum and growth trends
+- Maintenance patterns
+- Community health indicators
+- Documentation completeness
+- Onboarding friendliness
+- Long-term viability indicators]
+## 💡 Key Insights & Recommendations
+[Provide:
+- 3-5 key strengths identified
+- 3-5 potential improvement areas
+- Notable patterns or practices
+- Unique characteristics
+- Strategic recommendations]"""
+        # Stream thinking and analysis
+        progress(0.5, desc="Generating analysis with thinking process...")
+        messages = []
+        for msg_update in thinking_analyzer.stream_analysis(
+            analysis_data,
+            system_prompt
+        ):
+            messages = msg_update
+        return messages, analysis_data
+    except Exception as e:
+        return
+def process_question(question: str, analysis_data: Dict[str, Any], chat_history: List[str]):
+    """Process follow-up questions with visible thinking"""
+    if not analysis_data:
+        return [ChatMessage(role="assistant", content="Please analyze a repository first before asking questions.")]
+    thinking_analyzer = ThinkingAnalyzer(model)
+    messages = []
+    for msg_update in thinking_analyzer.stream_question_response(
+        question,
+        analysis_data,
+        chat_history
+    ):
+        messages = msg_update
+    return messages
+# Create Gradio interface with thinking visualization
+with gr.Blocks(theme=gr.themes.Soft()) as app:
+    gr.Markdown("""
+    # 🔍 GitHub Repository Analyzer with Thinking Process
+    Analyze any public GitHub repository using AI. Watch the AI's thought process as it:
+    1. 📊 Analyzes repository structure and patterns
+    2. 💡 Generates insights about development practices
+    3. 💭 Shows its thinking while answering your follow-up questions
+    Enter a GitHub repository URL (e.g., `https://github.com/owner/repo`)
+    """)
+    with gr.Row():
+        repo_url = gr.Textbox(
+            label="GitHub Repository URL",
+            placeholder="https://github.com/owner/repo",
+            scale=4
+        )
+        analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1)
+    # Status message
+    status_msg = gr.Markdown("", elem_id="status_message")
+    with gr.Row():
+        chatbot = gr.Chatbot(
+            label="Analysis & Discussion",
+            height=500,
+            show_label=True,
+            render_markdown=True,
+            type="messages"
+        )
+    with gr.Row():
+        question = gr.Textbox(
+            label="Your Question",
+            placeholder="Ask about the analysis...",
+            scale=4
+        )
+        ask_btn = gr.Button("💭 Ask", variant="primary", scale=1)
+        clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary", scale=1)
+    # Hidden states
+    analysis_data = gr.State({})
+    chat_history = gr.State([])
+    msg_store = gr.State("")
+    def clear_outputs():
+        return [], {}, [], ""
+    # Set up event handlers with thinking visualization
+    analyze_btn.click(
+        fn=lambda: "⏳ Analysis in progress... Watch the thinking process below!",
+        inputs=None,
+        outputs=status_msg,
+        queue=False
+    ).then(
+        process_analysis,
+        inputs=[repo_url],
+        outputs=[chatbot, analysis_data]
+    ).success(
+        lambda: "✅ Analysis complete! You can now ask questions about the repository.",
+        inputs=None,
+        outputs=status_msg
+    )
+    def update_chat(question, history):
+        """Update chat history with user question"""
+        history = history or []
+        history.append(question)
+        return question, history, ""
+    ask_btn.click(
+        update_chat,
+        inputs=[question, chat_history],
+        outputs=[msg_store, chat_history, question],
+        queue=False
+    ).then(
+        process_question,
+        inputs=[msg_store, analysis_data, chat_history],
+        outputs=chatbot
+    )
+    clear_btn.click(
+        clear_outputs,
+        inputs=None,
+        outputs=[chatbot, analysis_data, chat_history, status_msg],
+        queue=False
+    )
+    # Handle enter key in question input
+    question.submit(
+        update_chat,
+        inputs=[question, chat_history],
+        outputs=[msg_store, chat_history, question],
+        queue=False
+    ).then(
+        process_question,
+        inputs=[msg_store, analysis_data, chat_history],
+        outputs=chatbot
+    )
+# Launch the app
 if __name__ == "__main__":
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        debug=True
+    )