Sgridda commited on
Commit
937b2c0
·
1 Parent(s): c0668e0

Initial commit

Browse files
Files changed (3) hide show
  1. Dockerfile +20 -0
  2. main.py +195 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Use the official Python 3.9 slim image
3
+ FROM python:3.9-slim
4
+
5
+ # Set the working directory inside the container
6
+ WORKDIR /code
7
+
8
+ # Copy the requirements file into the container
9
+ COPY ./requirements.txt /code/requirements.txt
10
+
11
+ # Install the Python dependencies
12
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13
+
14
+ # Copy the main application file into the container
15
+ COPY ./main.py /code/main.py
16
+
17
+ # Command to run the FastAPI server with Uvicorn
18
+ # We use --host 0.0.0.0 to make it accessible from outside the container
19
+ # and --port 7860 as this is the standard port Hugging Face Spaces expects
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+ import re
6
+ import json
7
+
8
+ # ----------------------------
9
+ # 1. Configuration
10
+ # ----------------------------
11
+
12
+ # Define the model we want to use.
13
+ # We use a 4-bit quantized version ("4bit") for efficiency.
14
+ MODEL_NAME = "deepseek-ai/deepseek-coder-6.7b-instruct"
15
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
+
17
+ # ----------------------------
18
+ # 2. FastAPI App Initialization
19
+ # ----------------------------
20
+
21
+ app = FastAPI(
22
+ title="AI Code Review Service",
23
+ description="An API to get AI-powered code reviews for pull request diffs.",
24
+ version="1.0.0",
25
+ )
26
+
27
+ # ----------------------------
28
+ # 3. AI Model Loading
29
+ # ----------------------------
30
+
31
+ # Use a global variable to hold the model and tokenizer
32
+ # This is lazy-loaded on the first request to speed up server startup.
33
+ model = None
34
+ tokenizer = None
35
+
36
+ def load_model():
37
+ """Loads the model and tokenizer into memory."""
38
+ global model, tokenizer
39
+ if model is None:
40
+ print(f"Loading model: {MODEL_NAME} on device: {DEVICE}...")
41
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
42
+
43
+ # Load the model with 4-bit quantization to save memory
44
+ model = AutoModelForCausalLM.from_pretrained(
45
+ MODEL_NAME,
46
+ trust_remote_code=True,
47
+ torch_dtype=torch.bfloat16,
48
+ load_in_4bit=True,
49
+ )
50
+ print("Model loaded successfully.")
51
+
52
+ @app.on_event("startup")
53
+ async def startup_event():
54
+ """
55
+ On server startup, we trigger the model loading.
56
+ This makes the first API call after startup faster.
57
+ """
58
+ print("Server starting up...")
59
+ load_model()
60
+
61
+ # ----------------------------
62
+ # 4. API Request/Response Models
63
+ # ----------------------------
64
+
65
+ class ReviewRequest(BaseModel):
66
+ """The request body for the /review endpoint."""
67
+ diff: str
68
+
69
+ class ReviewComment(BaseModel):
70
+ """A single review comment."""
71
+ file_path: str
72
+ line_number: int
73
+ comment_text: str
74
+
75
+ class ReviewResponse(BaseModel):
76
+ """The response body for the /review endpoint."""
77
+ comments: list[ReviewComment]
78
+
79
+ # ----------------------------
80
+ # 5. The AI Review Logic
81
+ # ----------------------------
82
+
83
+ def run_ai_inference(diff: str) -> str:
84
+ """
85
+ Runs the AI model to get the review.
86
+ """
87
+ if not model or not tokenizer:
88
+ raise RuntimeError("Model is not loaded.")
89
+
90
+ # This is the prompt engineering part. We create a clear instruction
91
+ # for the model, telling it exactly what to do and what format to output.
92
+ messages = [
93
+ {
94
+ "role": "system",
95
+ "content": """
96
+ You are an expert code reviewer. Your task is to analyze a pull request diff and provide constructive feedback.
97
+ Analyze the provided diff and identify potential issues, suggest improvements, or point out good practices.
98
+ Your feedback should be in the form of review comments.
99
+
100
+ IMPORTANT: Respond with a JSON array of comment objects. Each object must have three fields: 'file_path', 'line_number', and 'comment_text'.
101
+ The 'file_path' should be the full path of the file being changed.
102
+ The 'line_number' must be an integer corresponding to the line number in the *new* version of the file where the comment applies.
103
+ The 'comment_text' should be your concise and clear review comment.
104
+
105
+ Example response format:
106
+ [
107
+ {
108
+ "file_path": "src/utils/helpers.py",
109
+ "line_number": 42,
110
+ "comment_text": "This function could be simplified by using a list comprehension."
111
+ },
112
+ {
113
+ "file_path": "README.md",
114
+ "line_number": 12,
115
+ "comment_text": "There is a typo in this sentence."
116
+ }
117
+ ]
118
+
119
+ Do not add any introductory text or explanations outside of the JSON array.
120
+ """
121
+ },
122
+ {
123
+ "role": "user",
124
+ "content": f"Here is the diff to review:\n\n```diff\n{diff}\n```"
125
+ }
126
+ ]
127
+
128
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
129
+
130
+ # Generate the response from the model
131
+ outputs = model.generate(inputs, max_new_tokens=1024, do_sample=False, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
132
+
133
+ # Decode the output and clean it up
134
+ response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
135
+ return response_text.strip()
136
+
137
+ def parse_ai_response(response_text: str) -> list[ReviewComment]:
138
+ """
139
+ Parses the raw text from the AI to extract the JSON array.
140
+ This function is robust against the AI adding extra text before or after the JSON.
141
+ """
142
+ print(f"Raw AI Response:\n---\n{response_text}\n---")
143
+
144
+ # Find the start and end of the JSON array
145
+ json_match = re.search(r'\[.*\]', response_text, re.DOTALL)
146
+ if not json_match:
147
+ print("Warning: Could not find a JSON array in the AI response.")
148
+ return []
149
+
150
+ json_string = json_match.group(0)
151
+
152
+ try:
153
+ comments_data = json.loads(json_string)
154
+ # Validate the structure of the parsed data
155
+ validated_comments = [ReviewComment(**item) for item in comments_data]
156
+ return validated_comments
157
+ except (json.JSONDecodeError, TypeError, KeyError) as e:
158
+ print(f"Error parsing JSON from AI response: {e}")
159
+ print(f"Invalid JSON string: {json_string}")
160
+ return []
161
+
162
+ # ----------------------------
163
+ # 6. The API Endpoint
164
+ # ----------------------------
165
+
166
+ @app.post("/review", response_model=ReviewResponse)
167
+ async def get_code_review(request: ReviewRequest):
168
+ """
169
+ Receives a code diff, gets a review from the AI model,
170
+ and returns structured review comments.
171
+ """
172
+ if not request.diff:
173
+ raise HTTPException(status_code=400, detail="Diff content cannot be empty.")
174
+
175
+ try:
176
+ # 1. Run the AI model
177
+ ai_response_text = run_ai_inference(request.diff)
178
+
179
+ # 2. Parse the AI's response into structured objects
180
+ parsed_comments = parse_ai_response(ai_response_text)
181
+
182
+ return ReviewResponse(comments=parsed_comments)
183
+
184
+ except Exception as e:
185
+ print(f"An unexpected error occurred: {e}")
186
+ raise HTTPException(status_code=500, detail="An internal error occurred while processing the review.")
187
+
188
+ # ----------------------------
189
+ # 7. Health Check Endpoint
190
+ # ----------------------------
191
+
192
+ @app.get("/health")
193
+ async def health_check():
194
+ """A simple endpoint to confirm the server is running."""
195
+ return {"status": "ok", "model_loaded": model is not None}
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ torch
5
+ transformers
6
+ accelerate
7
+ bitsandbytes