Spaces:

Samarthrr
/

revcode-ai-engine

Sleeping

App Files Files Community

NyenNolife commited on 25 days ago

Commit

fe4f8a8

0 Parent(s):

Deploying 3-model AI stack

Browse files

Files changed (3) hide show

Dockerfile +21 -0
app.py +124 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the application code
+COPY . .
+# Expose the port FastAPI runs on
+EXPOSE 7860
+# Command to run the application (HF Spaces use port 7860 by default)
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import ast
+import torch
+import torch.nn as nn
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import T5ForConditionalGeneration, RobertaTokenizer, DistilBertModel, DistilBertTokenizer
+import pandas as pd
+import os
+app = FastAPI(title="Revcode AI Unified Orchestrator")
+# ---------------------------------------------------------
+# 1. SECURITY GUARDIAN (DistilBERT)
+# ---------------------------------------------------------
+class SecurityClassifier(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.bert = DistilBertModel.from_pretrained("distilbert-base-uncased")
+        self.classifier = nn.Sequential(
+            nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 2)
+        )
+    def forward(self, input_ids, attention_mask):
+        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        return self.classifier(outputs.last_hidden_state[:, 0, :])
+# ---------------------------------------------------------
+# 2. ARCHITECTURAL GUARDRAILS
+# ---------------------------------------------------------
+class Guardrails:
+    @staticmethod
+    def validate(code: str):
+        try:
+            tree = ast.parse(code)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.FunctionDef):
+                    if not node.name.islower() and "_" not in node.name:
+                        return False, f"Function '{node.name}' violates snake_case standards."
+            return True, "Valid"
+        except Exception as e:
+            return False, f"Syntax analysis failed: {str(e)}"
+# ---------------------------------------------------------
+# 3. GLOBAL MODEL HANDLERS (Lazy Loading)
+# ---------------------------------------------------------
+FIXER_MODEL = "Salesforce/codet5p-220m"
+SECURITY_MODEL = "distilbert-base-uncased"
+models = {
+    "fixer": None,
+    "security": None,
+    "tokenizers": {}
+}
+def load_fixer():
+    if not models["fixer"]:
+        print("Loading CodeT5+ Fixer...")
+        models["tokenizers"]["fixer"] = RobertaTokenizer.from_pretrained(FIXER_MODEL)
+        models["fixer"] = T5ForConditionalGeneration.from_pretrained(FIXER_MODEL)
+    return models["fixer"], models["tokenizers"]["fixer"]
+def load_security():
+    if not models["security"]:
+        print("Loading DistilBERT Guardian...")
+        models["tokenizers"]["security"] = DistilBertTokenizer.from_pretrained(SECURITY_MODEL)
+        # In a real app, we'd load fine-tuned weights here.
+        # For the demo, we use the base model with the classifier head.
+        models["security"] = SecurityClassifier()
+        models["security"].eval()
+    return models["security"], models["tokenizers"]["security"]
+# ---------------------------------------------------------
+# 4. API ENDPOINTS
+# ---------------------------------------------------------
+class CodeInput(BaseModel):
+    code: str
+@app.post("/analyze")
+async def analyze_security(data: CodeInput):
+    model, tokenizer = load_security()
+    inputs = tokenizer(data.code, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        logits = model(inputs['input_ids'], inputs['attention_mask'])
+        probs = torch.softmax(logits, dim=1)
+        vulnerability_prob = probs[0][1].item()
+    return {
+        "is_vulnerable": vulnerability_prob > 0.5,
+        "confidence": round(vulnerability_prob * 100, 2),
+        "verdict": "SECURE" if vulnerability_prob <= 0.5 else "VULNERABLE"
+    }
+@app.post("/fix")
+async def fix_code(data: CodeInput):
+    model, tokenizer = load_fixer()
+    input_text = f"Fix code: {data.code}"
+    inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_length=512)
+    suggestion = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Run Guardrails
+    is_valid, msg = Guardrails.validate(suggestion)
+    return {
+        "suggestion": suggestion,
+        "guardrail_status": "PASSED" if is_valid else "FAILED",
+        "guardrail_msg": msg
+    }
+@app.post("/feedback")
+async def store_feedback(data: dict):
+    # Store feedback for HITL (Human-In-The-Loop)
+    # columns: original_code, corrected_code
+    feedback_file = "feedback_dataset.csv"
+    df = pd.DataFrame([data])
+    df.to_csv(feedback_file, mode='a', header=not os.path.exists(feedback_file), index=False)
+    return {"status": "Feedback stored for retraining"}
+@app.get("/")
+async def health():
+    return {"status": "Revcode AI Engine is alive", "models_loaded": list(models.keys())}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+transformers
+fastapi
+uvicorn
+pydantic
+pandas
+accelerate