Spaces:

sallima
/

mcp_fetch_coding_guidelines

Sleeping

App Files Files Community

mcp_fetch_coding_guidelines / app.py

sallima

claude2

fe8e34a verified 7 months ago

raw

history blame contribute delete

11.6 kB

	import os, hashlib, re, base64, requests, gradio as gr
	from typing import List, Dict, Optional, Any
	import json

	GH = "https://api.github.com"
	TOKEN = os.getenv("GITHUB_TOKEN")
	RULES_REPO = os.getenv("RULES_REPO", "stefanoallima/awesome-cursorrules")
	DEFAULT_REF = os.getenv("DEFAULT_REF", "main")

	def _hdr():
	return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"}

	def _sha256(b):
	return hashlib.sha256(b).hexdigest()

	def get_readme_content(ref: str = None) -> str:
	"""Fetch README content from the repository for context"""
	ref = ref or DEFAULT_REF
	try:
	r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/README.md?ref={ref}", headers=_hdr())
	r.raise_for_status()
	j = r.json()
	raw = base64.b64decode(j["content"]) if j.get("encoding") == "base64" else j["content"].encode()
	return raw.decode("utf-8", "replace")
	except Exception as e:
	return f"Error fetching README: {str(e)}"

	def extract_available_technologies(ref: str = None) -> List[str]:
	"""Extract all available technologies from the rules directory"""
	ref = ref or DEFAULT_REF
	try:
	r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
	r.raise_for_status()

	technologies = []
	for item in r.json().get("tree", []):
	if item.get("type") == "blob" and item["path"].startswith("rules/"):
	# Extract technology name from directory structure
	path_parts = item["path"].split("/")
	if len(path_parts) >= 2:
	tech_dir = path_parts[1]
	# Clean up the directory name to extract technology
	tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
	if tech_name not in technologies:
	technologies.append(tech_name)

	return sorted(technologies)
	except Exception as e:
	return [f"Error: {str(e)}"]

	def semantic_match_technologies(requested_techs: List[str], available_techs: List[str]) -> Dict[str, List[str]]:
	"""Use simple semantic matching to find relevant technologies"""
	matches = {}

	for requested in requested_techs:
	requested_lower = requested.lower()
	matched_techs = []

	for available in available_techs:
	available_lower = available.lower()

	# Direct match
	if requested_lower == available_lower:
	matched_techs.append(available)
	continue

	# Partial match (contains)
	if requested_lower in available_lower or available_lower in requested_lower:
	matched_techs.append(available)
	continue

	# Common technology mappings
	tech_mappings = {
	'python': ['python', 'django', 'fastapi', 'flask'],
	'javascript': ['javascript', 'js', 'node', 'react', 'vue', 'angular'],
	'typescript': ['typescript', 'ts', 'react', 'angular', 'nextjs'],
	'react': ['react', 'nextjs', 'typescript'],
	'vue': ['vue', 'vuejs', 'nuxt'],
	'node': ['node', 'nodejs', 'javascript'],
	'postgres': ['postgres', 'postgresql', 'database'],
	'fastapi': ['fastapi', 'python', 'api'],
	'nextjs': ['nextjs', 'next', 'react', 'typescript']
	}

	# Check if requested tech maps to available tech
	if requested_lower in tech_mappings:
	for mapped_tech in tech_mappings[requested_lower]:
	if mapped_tech in available_lower:
	matched_techs.append(available)
	break

	matches[requested] = matched_techs

	return matches

	def list_rules(tech_key: str = None, ref: str = None) -> List[Dict[str, Any]]:
	"""List available coding rules with enhanced metadata"""
	ref = ref or DEFAULT_REF
	try:
	r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
	r.raise_for_status()

	rules = []
	for item in r.json().get("tree", []):
	if item.get("type") == "blob" and item["path"].startswith("rules/"):
	path_parts = item["path"].split("/")
	if len(path_parts) >= 2:
	tech_dir = path_parts[1]
	tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")

	if not tech_key or tech_key.lower() in tech_name.lower():
	rules.append({
	"tech_key": tech_name,
	"directory": tech_dir,
	"path": item["path"],
	"repo": RULES_REPO,
	"commit_sha": ref,
	"url": f"https://github.com/{RULES_REPO}/tree/{ref}/{item['path']}"
	})

	return rules
	except Exception as e:
	return [{"error": str(e)}]

	def fetch_rule_content(tech_directory: str, ref: str = None) -> Dict[str, Any]:
	"""Fetch the actual rule content from a technology directory"""
	ref = ref or DEFAULT_REF
	try:
	# Get files in the specific rule directory
	r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/rules/{tech_directory}?ref={ref}", headers=_hdr())
	r.raise_for_status()

	files = r.json()
	if not isinstance(files, list):
	files = [files]

	# Look for .cursorrules or .md files
	rule_file = None
	for file in files:
	if file["name"].endswith(('.cursorrules', '.md')):
	rule_file = file
	break

	if not rule_file:
	return {"error": f"No rule file found in {tech_directory}"}

	# Fetch the file content
	content_r = requests.get(rule_file["download_url"])
	content_r.raise_for_status()

	return {
	"tech_key": tech_directory.replace("-cursorrules-prompt-file", "").replace("-", " "),
	"filename": rule_file["name"],
	"content": content_r.text,
	"directory": tech_directory,
	"repo": RULES_REPO,
	"commit_sha": ref,
	"sha256": _sha256(content_r.content),
	"url": rule_file["html_url"]
	}
	except Exception as e:
	return {"error": str(e)}

	def fetch_rule(tech_key: str, ref: str = None) -> Dict[str, Any]:
	"""Fetch rule with semantic matching fallback"""
	ref = ref or DEFAULT_REF

	# First try direct match
	rules = list_rules(tech_key=tech_key, ref=ref)
	if rules and "error" not in rules[0]:
	return fetch_rule_content(rules[0]["directory"], ref)

	# If no direct match, try semantic matching
	available_techs = extract_available_technologies(ref)
	matches = semantic_match_technologies([tech_key], available_techs)

	if tech_key in matches and matches[tech_key]:
	# Return the first match
	best_match = matches[tech_key][0]
	tech_directory = best_match.replace(" ", "-") + "-cursorrules-prompt-file"
	return fetch_rule_content(tech_directory, ref)

	return {"error": f"No rule found for '{tech_key}' in {RULES_REPO}@{ref}"}

	def get_guidelines_for_stack(tech_stack: List[str], ref: str = None) -> Dict[str, Any]:
	"""Get coding guidelines for multiple technologies in a stack"""
	ref = ref or DEFAULT_REF

	available_techs = extract_available_technologies(ref)
	matches = semantic_match_technologies(tech_stack, available_techs)

	guidelines = {}
	for requested_tech, matched_techs in matches.items():
	guidelines[requested_tech] = []
	for matched_tech in matched_techs[:3]: # Limit to top 3 matches
	tech_directory = matched_tech.replace(" ", "-") + "-cursorrules-prompt-file"
	rule_content = fetch_rule_content(tech_directory, ref)
	if "error" not in rule_content:
	guidelines[requested_tech].append(rule_content)

	return {
	"tech_stack": tech_stack,
	"guidelines": guidelines,
	"available_technologies": available_techs,
	"matches": matches,
	"repo": RULES_REPO,
	"commit_sha": ref
	}

	def analyze_project_stack(framework_list: str, ref: str = None) -> Dict[str, Any]:
	"""Analyze a project's technology stack and return relevant guidelines"""
	ref = ref or DEFAULT_REF

	# Parse the framework list (assume comma-separated or newline-separated)
	techs = []
	for line in framework_list.replace(",", "\n").split("\n"):
	tech = line.strip()
	if tech:
	techs.append(tech)

	if not techs:
	return {"error": "No technologies found in the provided list"}

	# Get README for context
	readme_content = get_readme_content(ref)

	# Get guidelines for the entire stack
	stack_guidelines = get_guidelines_for_stack(techs, ref)

	return {
	"project_analysis": {
	"detected_technologies": techs,
	"readme_context": readme_content[:1000] + "..." if len(readme_content) > 1000 else readme_content,
	},
	"guidelines": stack_guidelines,
	"summary": f"Found guidelines for {len([g for g in stack_guidelines['guidelines'].values() if g])} out of {len(techs)} requested technologies"
	}

	# Gradio Interface
	with gr.Blocks(title="Enhanced MCP Coding Guidelines Server") as demo:
	gr.Markdown("# 🚀 Enhanced MCP Coding Guidelines Server")
	gr.Markdown("Intelligent coding guideline retrieval with semantic matching")

	with gr.Tab("Single Technology"):
	with gr.Row():
	tech_input = gr.Textbox(label="Technology", placeholder="e.g., python, react, fastapi")
	ref_input = gr.Textbox(label="Git Reference", value="main", placeholder="main")
	fetch_btn = gr.Button("Fetch Guidelines")
	single_output = gr.JSON(label="Guidelines")

	fetch_btn.click(
	fn=fetch_rule,
	inputs=[tech_input, ref_input],
	outputs=single_output
	)

	with gr.Tab("Technology Stack"):
	stack_input = gr.Textbox(
	label="Technology Stack",
	placeholder="python, fastapi, postgres, react, typescript",
	lines=3
	)
	stack_ref_input = gr.Textbox(label="Git Reference", value="main")
	analyze_btn = gr.Button("Analyze Stack")
	stack_output = gr.JSON(label="Stack Analysis")

	analyze_btn.click(
	fn=analyze_project_stack,
	inputs=[stack_input, stack_ref_input],
	outputs=stack_output
	)

	with gr.Tab("Available Technologies"):
	list_ref_input = gr.Textbox(label="Git Reference", value="main")
	list_btn = gr.Button("List Available Technologies")
	list_output = gr.JSON(label="Available Technologies")

	list_btn.click(
	fn=extract_available_technologies,
	inputs=[list_ref_input],
	outputs=list_output
	)

	# Register MCP API endpoints
	gr.api(fn=list_rules)
	gr.api(fn=fetch_rule)
	gr.api(fn=get_guidelines_for_stack)
	gr.api(fn=analyze_project_stack)
	gr.api(fn=extract_available_technologies)

	if __name__ == "__main__":
	demo.launch(mcp_server=True)