NyenNolife commited on
Commit
fe4f8a8
·
0 Parent(s):

Deploying 3-model AI stack

Browse files
Files changed (3) hide show
  1. Dockerfile +21 -0
  2. app.py +124 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements and install
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy the application code
15
+ COPY . .
16
+
17
+ # Expose the port FastAPI runs on
18
+ EXPOSE 7860
19
+
20
+ # Command to run the application (HF Spaces use port 7860 by default)
21
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import torch
3
+ import torch.nn as nn
4
+ from fastapi import FastAPI, HTTPException
5
+ from pydantic import BaseModel
6
+ from transformers import T5ForConditionalGeneration, RobertaTokenizer, DistilBertModel, DistilBertTokenizer
7
+ import pandas as pd
8
+ import os
9
+
10
+ app = FastAPI(title="Revcode AI Unified Orchestrator")
11
+
12
+ # ---------------------------------------------------------
13
+ # 1. SECURITY GUARDIAN (DistilBERT)
14
+ # ---------------------------------------------------------
15
+ class SecurityClassifier(nn.Module):
16
+ def __init__(self):
17
+ super().__init__()
18
+ self.bert = DistilBertModel.from_pretrained("distilbert-base-uncased")
19
+ self.classifier = nn.Sequential(
20
+ nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 2)
21
+ )
22
+
23
+ def forward(self, input_ids, attention_mask):
24
+ outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
25
+ return self.classifier(outputs.last_hidden_state[:, 0, :])
26
+
27
+ # ---------------------------------------------------------
28
+ # 2. ARCHITECTURAL GUARDRAILS
29
+ # ---------------------------------------------------------
30
+ class Guardrails:
31
+ @staticmethod
32
+ def validate(code: str):
33
+ try:
34
+ tree = ast.parse(code)
35
+ for node in ast.walk(tree):
36
+ if isinstance(node, ast.FunctionDef):
37
+ if not node.name.islower() and "_" not in node.name:
38
+ return False, f"Function '{node.name}' violates snake_case standards."
39
+ return True, "Valid"
40
+ except Exception as e:
41
+ return False, f"Syntax analysis failed: {str(e)}"
42
+
43
+ # ---------------------------------------------------------
44
+ # 3. GLOBAL MODEL HANDLERS (Lazy Loading)
45
+ # ---------------------------------------------------------
46
+ FIXER_MODEL = "Salesforce/codet5p-220m"
47
+ SECURITY_MODEL = "distilbert-base-uncased"
48
+
49
+ models = {
50
+ "fixer": None,
51
+ "security": None,
52
+ "tokenizers": {}
53
+ }
54
+
55
+ def load_fixer():
56
+ if not models["fixer"]:
57
+ print("Loading CodeT5+ Fixer...")
58
+ models["tokenizers"]["fixer"] = RobertaTokenizer.from_pretrained(FIXER_MODEL)
59
+ models["fixer"] = T5ForConditionalGeneration.from_pretrained(FIXER_MODEL)
60
+ return models["fixer"], models["tokenizers"]["fixer"]
61
+
62
+ def load_security():
63
+ if not models["security"]:
64
+ print("Loading DistilBERT Guardian...")
65
+ models["tokenizers"]["security"] = DistilBertTokenizer.from_pretrained(SECURITY_MODEL)
66
+ # In a real app, we'd load fine-tuned weights here.
67
+ # For the demo, we use the base model with the classifier head.
68
+ models["security"] = SecurityClassifier()
69
+ models["security"].eval()
70
+ return models["security"], models["tokenizers"]["security"]
71
+
72
+ # ---------------------------------------------------------
73
+ # 4. API ENDPOINTS
74
+ # ---------------------------------------------------------
75
+ class CodeInput(BaseModel):
76
+ code: str
77
+
78
+ @app.post("/analyze")
79
+ async def analyze_security(data: CodeInput):
80
+ model, tokenizer = load_security()
81
+ inputs = tokenizer(data.code, return_tensors="pt", truncation=True, padding=True)
82
+ with torch.no_grad():
83
+ logits = model(inputs['input_ids'], inputs['attention_mask'])
84
+ probs = torch.softmax(logits, dim=1)
85
+ vulnerability_prob = probs[0][1].item()
86
+
87
+ return {
88
+ "is_vulnerable": vulnerability_prob > 0.5,
89
+ "confidence": round(vulnerability_prob * 100, 2),
90
+ "verdict": "SECURE" if vulnerability_prob <= 0.5 else "VULNERABLE"
91
+ }
92
+
93
+ @app.post("/fix")
94
+ async def fix_code(data: CodeInput):
95
+ model, tokenizer = load_fixer()
96
+ input_text = f"Fix code: {data.code}"
97
+ inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
98
+
99
+ with torch.no_grad():
100
+ outputs = model.generate(**inputs, max_length=512)
101
+
102
+ suggestion = tokenizer.decode(outputs[0], skip_special_tokens=True)
103
+
104
+ # Run Guardrails
105
+ is_valid, msg = Guardrails.validate(suggestion)
106
+
107
+ return {
108
+ "suggestion": suggestion,
109
+ "guardrail_status": "PASSED" if is_valid else "FAILED",
110
+ "guardrail_msg": msg
111
+ }
112
+
113
+ @app.post("/feedback")
114
+ async def store_feedback(data: dict):
115
+ # Store feedback for HITL (Human-In-The-Loop)
116
+ # columns: original_code, corrected_code
117
+ feedback_file = "feedback_dataset.csv"
118
+ df = pd.DataFrame([data])
119
+ df.to_csv(feedback_file, mode='a', header=not os.path.exists(feedback_file), index=False)
120
+ return {"status": "Feedback stored for retraining"}
121
+
122
+ @app.get("/")
123
+ async def health():
124
+ return {"status": "Revcode AI Engine is alive", "models_loaded": list(models.keys())}
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ fastapi
4
+ uvicorn
5
+ pydantic
6
+ pandas
7
+ accelerate