sallima's picture
claude2
fe8e34a verified
import os, hashlib, re, base64, requests, gradio as gr
from typing import List, Dict, Optional, Any
import json
GH = "https://api.github.com"
TOKEN = os.getenv("GITHUB_TOKEN")
RULES_REPO = os.getenv("RULES_REPO", "stefanoallima/awesome-cursorrules")
DEFAULT_REF = os.getenv("DEFAULT_REF", "main")
def _hdr():
return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"}
def _sha256(b):
return hashlib.sha256(b).hexdigest()
def get_readme_content(ref: str = None) -> str:
"""Fetch README content from the repository for context"""
ref = ref or DEFAULT_REF
try:
r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/README.md?ref={ref}", headers=_hdr())
r.raise_for_status()
j = r.json()
raw = base64.b64decode(j["content"]) if j.get("encoding") == "base64" else j["content"].encode()
return raw.decode("utf-8", "replace")
except Exception as e:
return f"Error fetching README: {str(e)}"
def extract_available_technologies(ref: str = None) -> List[str]:
"""Extract all available technologies from the rules directory"""
ref = ref or DEFAULT_REF
try:
r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
r.raise_for_status()
technologies = []
for item in r.json().get("tree", []):
if item.get("type") == "blob" and item["path"].startswith("rules/"):
# Extract technology name from directory structure
path_parts = item["path"].split("/")
if len(path_parts) >= 2:
tech_dir = path_parts[1]
# Clean up the directory name to extract technology
tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
if tech_name not in technologies:
technologies.append(tech_name)
return sorted(technologies)
except Exception as e:
return [f"Error: {str(e)}"]
def semantic_match_technologies(requested_techs: List[str], available_techs: List[str]) -> Dict[str, List[str]]:
"""Use simple semantic matching to find relevant technologies"""
matches = {}
for requested in requested_techs:
requested_lower = requested.lower()
matched_techs = []
for available in available_techs:
available_lower = available.lower()
# Direct match
if requested_lower == available_lower:
matched_techs.append(available)
continue
# Partial match (contains)
if requested_lower in available_lower or available_lower in requested_lower:
matched_techs.append(available)
continue
# Common technology mappings
tech_mappings = {
'python': ['python', 'django', 'fastapi', 'flask'],
'javascript': ['javascript', 'js', 'node', 'react', 'vue', 'angular'],
'typescript': ['typescript', 'ts', 'react', 'angular', 'nextjs'],
'react': ['react', 'nextjs', 'typescript'],
'vue': ['vue', 'vuejs', 'nuxt'],
'node': ['node', 'nodejs', 'javascript'],
'postgres': ['postgres', 'postgresql', 'database'],
'fastapi': ['fastapi', 'python', 'api'],
'nextjs': ['nextjs', 'next', 'react', 'typescript']
}
# Check if requested tech maps to available tech
if requested_lower in tech_mappings:
for mapped_tech in tech_mappings[requested_lower]:
if mapped_tech in available_lower:
matched_techs.append(available)
break
matches[requested] = matched_techs
return matches
def list_rules(tech_key: str = None, ref: str = None) -> List[Dict[str, Any]]:
"""List available coding rules with enhanced metadata"""
ref = ref or DEFAULT_REF
try:
r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
r.raise_for_status()
rules = []
for item in r.json().get("tree", []):
if item.get("type") == "blob" and item["path"].startswith("rules/"):
path_parts = item["path"].split("/")
if len(path_parts) >= 2:
tech_dir = path_parts[1]
tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
if not tech_key or tech_key.lower() in tech_name.lower():
rules.append({
"tech_key": tech_name,
"directory": tech_dir,
"path": item["path"],
"repo": RULES_REPO,
"commit_sha": ref,
"url": f"https://github.com/{RULES_REPO}/tree/{ref}/{item['path']}"
})
return rules
except Exception as e:
return [{"error": str(e)}]
def fetch_rule_content(tech_directory: str, ref: str = None) -> Dict[str, Any]:
"""Fetch the actual rule content from a technology directory"""
ref = ref or DEFAULT_REF
try:
# Get files in the specific rule directory
r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/rules/{tech_directory}?ref={ref}", headers=_hdr())
r.raise_for_status()
files = r.json()
if not isinstance(files, list):
files = [files]
# Look for .cursorrules or .md files
rule_file = None
for file in files:
if file["name"].endswith(('.cursorrules', '.md')):
rule_file = file
break
if not rule_file:
return {"error": f"No rule file found in {tech_directory}"}
# Fetch the file content
content_r = requests.get(rule_file["download_url"])
content_r.raise_for_status()
return {
"tech_key": tech_directory.replace("-cursorrules-prompt-file", "").replace("-", " "),
"filename": rule_file["name"],
"content": content_r.text,
"directory": tech_directory,
"repo": RULES_REPO,
"commit_sha": ref,
"sha256": _sha256(content_r.content),
"url": rule_file["html_url"]
}
except Exception as e:
return {"error": str(e)}
def fetch_rule(tech_key: str, ref: str = None) -> Dict[str, Any]:
"""Fetch rule with semantic matching fallback"""
ref = ref or DEFAULT_REF
# First try direct match
rules = list_rules(tech_key=tech_key, ref=ref)
if rules and "error" not in rules[0]:
return fetch_rule_content(rules[0]["directory"], ref)
# If no direct match, try semantic matching
available_techs = extract_available_technologies(ref)
matches = semantic_match_technologies([tech_key], available_techs)
if tech_key in matches and matches[tech_key]:
# Return the first match
best_match = matches[tech_key][0]
tech_directory = best_match.replace(" ", "-") + "-cursorrules-prompt-file"
return fetch_rule_content(tech_directory, ref)
return {"error": f"No rule found for '{tech_key}' in {RULES_REPO}@{ref}"}
def get_guidelines_for_stack(tech_stack: List[str], ref: str = None) -> Dict[str, Any]:
"""Get coding guidelines for multiple technologies in a stack"""
ref = ref or DEFAULT_REF
available_techs = extract_available_technologies(ref)
matches = semantic_match_technologies(tech_stack, available_techs)
guidelines = {}
for requested_tech, matched_techs in matches.items():
guidelines[requested_tech] = []
for matched_tech in matched_techs[:3]: # Limit to top 3 matches
tech_directory = matched_tech.replace(" ", "-") + "-cursorrules-prompt-file"
rule_content = fetch_rule_content(tech_directory, ref)
if "error" not in rule_content:
guidelines[requested_tech].append(rule_content)
return {
"tech_stack": tech_stack,
"guidelines": guidelines,
"available_technologies": available_techs,
"matches": matches,
"repo": RULES_REPO,
"commit_sha": ref
}
def analyze_project_stack(framework_list: str, ref: str = None) -> Dict[str, Any]:
"""Analyze a project's technology stack and return relevant guidelines"""
ref = ref or DEFAULT_REF
# Parse the framework list (assume comma-separated or newline-separated)
techs = []
for line in framework_list.replace(",", "\n").split("\n"):
tech = line.strip()
if tech:
techs.append(tech)
if not techs:
return {"error": "No technologies found in the provided list"}
# Get README for context
readme_content = get_readme_content(ref)
# Get guidelines for the entire stack
stack_guidelines = get_guidelines_for_stack(techs, ref)
return {
"project_analysis": {
"detected_technologies": techs,
"readme_context": readme_content[:1000] + "..." if len(readme_content) > 1000 else readme_content,
},
"guidelines": stack_guidelines,
"summary": f"Found guidelines for {len([g for g in stack_guidelines['guidelines'].values() if g])} out of {len(techs)} requested technologies"
}
# Gradio Interface
with gr.Blocks(title="Enhanced MCP Coding Guidelines Server") as demo:
gr.Markdown("# 🚀 Enhanced MCP Coding Guidelines Server")
gr.Markdown("Intelligent coding guideline retrieval with semantic matching")
with gr.Tab("Single Technology"):
with gr.Row():
tech_input = gr.Textbox(label="Technology", placeholder="e.g., python, react, fastapi")
ref_input = gr.Textbox(label="Git Reference", value="main", placeholder="main")
fetch_btn = gr.Button("Fetch Guidelines")
single_output = gr.JSON(label="Guidelines")
fetch_btn.click(
fn=fetch_rule,
inputs=[tech_input, ref_input],
outputs=single_output
)
with gr.Tab("Technology Stack"):
stack_input = gr.Textbox(
label="Technology Stack",
placeholder="python, fastapi, postgres, react, typescript",
lines=3
)
stack_ref_input = gr.Textbox(label="Git Reference", value="main")
analyze_btn = gr.Button("Analyze Stack")
stack_output = gr.JSON(label="Stack Analysis")
analyze_btn.click(
fn=analyze_project_stack,
inputs=[stack_input, stack_ref_input],
outputs=stack_output
)
with gr.Tab("Available Technologies"):
list_ref_input = gr.Textbox(label="Git Reference", value="main")
list_btn = gr.Button("List Available Technologies")
list_output = gr.JSON(label="Available Technologies")
list_btn.click(
fn=extract_available_technologies,
inputs=[list_ref_input],
outputs=list_output
)
# Register MCP API endpoints
gr.api(fn=list_rules)
gr.api(fn=fetch_rule)
gr.api(fn=get_guidelines_for_stack)
gr.api(fn=analyze_project_stack)
gr.api(fn=extract_available_technologies)
if __name__ == "__main__":
demo.launch(mcp_server=True)