Spaces:
Sleeping
Sleeping
| import os, hashlib, re, base64, requests, gradio as gr | |
| from typing import List, Dict, Optional, Any | |
| import json | |
| GH = "https://api.github.com" | |
| TOKEN = os.getenv("GITHUB_TOKEN") | |
| RULES_REPO = os.getenv("RULES_REPO", "stefanoallima/awesome-cursorrules") | |
| DEFAULT_REF = os.getenv("DEFAULT_REF", "main") | |
| def _hdr(): | |
| return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"} | |
| def _sha256(b): | |
| return hashlib.sha256(b).hexdigest() | |
| def get_readme_content(ref: str = None) -> str: | |
| """Fetch README content from the repository for context""" | |
| ref = ref or DEFAULT_REF | |
| try: | |
| r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/README.md?ref={ref}", headers=_hdr()) | |
| r.raise_for_status() | |
| j = r.json() | |
| raw = base64.b64decode(j["content"]) if j.get("encoding") == "base64" else j["content"].encode() | |
| return raw.decode("utf-8", "replace") | |
| except Exception as e: | |
| return f"Error fetching README: {str(e)}" | |
| def extract_available_technologies(ref: str = None) -> List[str]: | |
| """Extract all available technologies from the rules directory""" | |
| ref = ref or DEFAULT_REF | |
| try: | |
| r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr()) | |
| r.raise_for_status() | |
| technologies = [] | |
| for item in r.json().get("tree", []): | |
| if item.get("type") == "blob" and item["path"].startswith("rules/"): | |
| # Extract technology name from directory structure | |
| path_parts = item["path"].split("/") | |
| if len(path_parts) >= 2: | |
| tech_dir = path_parts[1] | |
| # Clean up the directory name to extract technology | |
| tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ") | |
| if tech_name not in technologies: | |
| technologies.append(tech_name) | |
| return sorted(technologies) | |
| except Exception as e: | |
| return [f"Error: {str(e)}"] | |
| def semantic_match_technologies(requested_techs: List[str], available_techs: List[str]) -> Dict[str, List[str]]: | |
| """Use simple semantic matching to find relevant technologies""" | |
| matches = {} | |
| for requested in requested_techs: | |
| requested_lower = requested.lower() | |
| matched_techs = [] | |
| for available in available_techs: | |
| available_lower = available.lower() | |
| # Direct match | |
| if requested_lower == available_lower: | |
| matched_techs.append(available) | |
| continue | |
| # Partial match (contains) | |
| if requested_lower in available_lower or available_lower in requested_lower: | |
| matched_techs.append(available) | |
| continue | |
| # Common technology mappings | |
| tech_mappings = { | |
| 'python': ['python', 'django', 'fastapi', 'flask'], | |
| 'javascript': ['javascript', 'js', 'node', 'react', 'vue', 'angular'], | |
| 'typescript': ['typescript', 'ts', 'react', 'angular', 'nextjs'], | |
| 'react': ['react', 'nextjs', 'typescript'], | |
| 'vue': ['vue', 'vuejs', 'nuxt'], | |
| 'node': ['node', 'nodejs', 'javascript'], | |
| 'postgres': ['postgres', 'postgresql', 'database'], | |
| 'fastapi': ['fastapi', 'python', 'api'], | |
| 'nextjs': ['nextjs', 'next', 'react', 'typescript'] | |
| } | |
| # Check if requested tech maps to available tech | |
| if requested_lower in tech_mappings: | |
| for mapped_tech in tech_mappings[requested_lower]: | |
| if mapped_tech in available_lower: | |
| matched_techs.append(available) | |
| break | |
| matches[requested] = matched_techs | |
| return matches | |
| def list_rules(tech_key: str = None, ref: str = None) -> List[Dict[str, Any]]: | |
| """List available coding rules with enhanced metadata""" | |
| ref = ref or DEFAULT_REF | |
| try: | |
| r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr()) | |
| r.raise_for_status() | |
| rules = [] | |
| for item in r.json().get("tree", []): | |
| if item.get("type") == "blob" and item["path"].startswith("rules/"): | |
| path_parts = item["path"].split("/") | |
| if len(path_parts) >= 2: | |
| tech_dir = path_parts[1] | |
| tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ") | |
| if not tech_key or tech_key.lower() in tech_name.lower(): | |
| rules.append({ | |
| "tech_key": tech_name, | |
| "directory": tech_dir, | |
| "path": item["path"], | |
| "repo": RULES_REPO, | |
| "commit_sha": ref, | |
| "url": f"https://github.com/{RULES_REPO}/tree/{ref}/{item['path']}" | |
| }) | |
| return rules | |
| except Exception as e: | |
| return [{"error": str(e)}] | |
| def fetch_rule_content(tech_directory: str, ref: str = None) -> Dict[str, Any]: | |
| """Fetch the actual rule content from a technology directory""" | |
| ref = ref or DEFAULT_REF | |
| try: | |
| # Get files in the specific rule directory | |
| r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/rules/{tech_directory}?ref={ref}", headers=_hdr()) | |
| r.raise_for_status() | |
| files = r.json() | |
| if not isinstance(files, list): | |
| files = [files] | |
| # Look for .cursorrules or .md files | |
| rule_file = None | |
| for file in files: | |
| if file["name"].endswith(('.cursorrules', '.md')): | |
| rule_file = file | |
| break | |
| if not rule_file: | |
| return {"error": f"No rule file found in {tech_directory}"} | |
| # Fetch the file content | |
| content_r = requests.get(rule_file["download_url"]) | |
| content_r.raise_for_status() | |
| return { | |
| "tech_key": tech_directory.replace("-cursorrules-prompt-file", "").replace("-", " "), | |
| "filename": rule_file["name"], | |
| "content": content_r.text, | |
| "directory": tech_directory, | |
| "repo": RULES_REPO, | |
| "commit_sha": ref, | |
| "sha256": _sha256(content_r.content), | |
| "url": rule_file["html_url"] | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def fetch_rule(tech_key: str, ref: str = None) -> Dict[str, Any]: | |
| """Fetch rule with semantic matching fallback""" | |
| ref = ref or DEFAULT_REF | |
| # First try direct match | |
| rules = list_rules(tech_key=tech_key, ref=ref) | |
| if rules and "error" not in rules[0]: | |
| return fetch_rule_content(rules[0]["directory"], ref) | |
| # If no direct match, try semantic matching | |
| available_techs = extract_available_technologies(ref) | |
| matches = semantic_match_technologies([tech_key], available_techs) | |
| if tech_key in matches and matches[tech_key]: | |
| # Return the first match | |
| best_match = matches[tech_key][0] | |
| tech_directory = best_match.replace(" ", "-") + "-cursorrules-prompt-file" | |
| return fetch_rule_content(tech_directory, ref) | |
| return {"error": f"No rule found for '{tech_key}' in {RULES_REPO}@{ref}"} | |
| def get_guidelines_for_stack(tech_stack: List[str], ref: str = None) -> Dict[str, Any]: | |
| """Get coding guidelines for multiple technologies in a stack""" | |
| ref = ref or DEFAULT_REF | |
| available_techs = extract_available_technologies(ref) | |
| matches = semantic_match_technologies(tech_stack, available_techs) | |
| guidelines = {} | |
| for requested_tech, matched_techs in matches.items(): | |
| guidelines[requested_tech] = [] | |
| for matched_tech in matched_techs[:3]: # Limit to top 3 matches | |
| tech_directory = matched_tech.replace(" ", "-") + "-cursorrules-prompt-file" | |
| rule_content = fetch_rule_content(tech_directory, ref) | |
| if "error" not in rule_content: | |
| guidelines[requested_tech].append(rule_content) | |
| return { | |
| "tech_stack": tech_stack, | |
| "guidelines": guidelines, | |
| "available_technologies": available_techs, | |
| "matches": matches, | |
| "repo": RULES_REPO, | |
| "commit_sha": ref | |
| } | |
| def analyze_project_stack(framework_list: str, ref: str = None) -> Dict[str, Any]: | |
| """Analyze a project's technology stack and return relevant guidelines""" | |
| ref = ref or DEFAULT_REF | |
| # Parse the framework list (assume comma-separated or newline-separated) | |
| techs = [] | |
| for line in framework_list.replace(",", "\n").split("\n"): | |
| tech = line.strip() | |
| if tech: | |
| techs.append(tech) | |
| if not techs: | |
| return {"error": "No technologies found in the provided list"} | |
| # Get README for context | |
| readme_content = get_readme_content(ref) | |
| # Get guidelines for the entire stack | |
| stack_guidelines = get_guidelines_for_stack(techs, ref) | |
| return { | |
| "project_analysis": { | |
| "detected_technologies": techs, | |
| "readme_context": readme_content[:1000] + "..." if len(readme_content) > 1000 else readme_content, | |
| }, | |
| "guidelines": stack_guidelines, | |
| "summary": f"Found guidelines for {len([g for g in stack_guidelines['guidelines'].values() if g])} out of {len(techs)} requested technologies" | |
| } | |
| # Gradio Interface | |
| with gr.Blocks(title="Enhanced MCP Coding Guidelines Server") as demo: | |
| gr.Markdown("# 🚀 Enhanced MCP Coding Guidelines Server") | |
| gr.Markdown("Intelligent coding guideline retrieval with semantic matching") | |
| with gr.Tab("Single Technology"): | |
| with gr.Row(): | |
| tech_input = gr.Textbox(label="Technology", placeholder="e.g., python, react, fastapi") | |
| ref_input = gr.Textbox(label="Git Reference", value="main", placeholder="main") | |
| fetch_btn = gr.Button("Fetch Guidelines") | |
| single_output = gr.JSON(label="Guidelines") | |
| fetch_btn.click( | |
| fn=fetch_rule, | |
| inputs=[tech_input, ref_input], | |
| outputs=single_output | |
| ) | |
| with gr.Tab("Technology Stack"): | |
| stack_input = gr.Textbox( | |
| label="Technology Stack", | |
| placeholder="python, fastapi, postgres, react, typescript", | |
| lines=3 | |
| ) | |
| stack_ref_input = gr.Textbox(label="Git Reference", value="main") | |
| analyze_btn = gr.Button("Analyze Stack") | |
| stack_output = gr.JSON(label="Stack Analysis") | |
| analyze_btn.click( | |
| fn=analyze_project_stack, | |
| inputs=[stack_input, stack_ref_input], | |
| outputs=stack_output | |
| ) | |
| with gr.Tab("Available Technologies"): | |
| list_ref_input = gr.Textbox(label="Git Reference", value="main") | |
| list_btn = gr.Button("List Available Technologies") | |
| list_output = gr.JSON(label="Available Technologies") | |
| list_btn.click( | |
| fn=extract_available_technologies, | |
| inputs=[list_ref_input], | |
| outputs=list_output | |
| ) | |
| # Register MCP API endpoints | |
| gr.api(fn=list_rules) | |
| gr.api(fn=fetch_rule) | |
| gr.api(fn=get_guidelines_for_stack) | |
| gr.api(fn=analyze_project_stack) | |
| gr.api(fn=extract_available_technologies) | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) |