Raju2024 commited on
Commit
4819b01
·
verified ·
1 Parent(s): 2385b7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -41
app.py CHANGED
@@ -14,7 +14,7 @@ app = FastAPI()
14
  GEMMA_API_KEY = os.getenv("GEMMA_API_KEY")
15
  APP_API_KEY = os.getenv("APP_API_KEY")
16
 
17
- GEMMA_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemma-3-27b-it:generateContent"
18
 
19
 
20
  # -------- Models --------
@@ -27,50 +27,166 @@ class ChatRequest(BaseModel):
27
  model: str
28
  messages: List[Message]
29
  stream: Optional[bool] = False
30
- plain: Optional[bool] = False # 👈 return plain text
31
 
32
 
33
  # -------- Helpers --------
34
  def extract_text(messages):
35
  text = ""
 
36
  for msg in messages:
37
- if isinstance(msg.content, list):
38
- for item in msg.content:
 
 
39
  if item.get("type") == "text":
40
  text += item.get("text", "") + "\n"
41
- else:
42
- text += msg.content + "\n"
 
 
 
 
43
  return text.strip()
44
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # -------- Endpoint --------
47
  @app.post("/v1/chat/completions")
48
  def chat_completions(
49
  request: ChatRequest,
50
  authorization: Optional[str] = Header(None)
51
  ):
52
- # ---- Auth ----
53
  if not authorization:
54
  raise HTTPException(status_code=401, detail="Missing Authorization header")
55
 
56
- token = authorization.replace("Bearer ", "")
57
  if token != APP_API_KEY:
58
  raise HTTPException(status_code=403, detail="Invalid API key")
59
 
 
 
 
 
60
  prompt = extract_text(request.messages)
 
61
 
62
- payload = {
63
- "contents": [
64
- {
65
- "parts": [{"text": prompt}]
66
- }
67
- ]
68
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- url = f"{GEMMA_URL}?key={GEMMA_API_KEY}"
71
 
 
72
  try:
73
- res = requests.post(url, json=payload)
 
 
 
 
 
 
74
  res.raise_for_status()
75
  data = res.json()
76
 
@@ -79,33 +195,11 @@ def chat_completions(
79
  except Exception as e:
80
  raise HTTPException(status_code=500, detail=str(e))
81
 
82
- # -------- PLAIN TEXT (FASTEST) --------
83
  if request.plain:
84
  return PlainTextResponse(output)
85
 
86
- # -------- STREAM (SIMULATED) --------
87
- if request.stream:
88
- def generate():
89
- words = output.split(" ")
90
- for word in words:
91
- chunk = {
92
- "id": "chatcmpl-gemma",
93
- "object": "chat.completion.chunk",
94
- "choices": [
95
- {
96
- "delta": {"content": word + " "},
97
- "index": 0,
98
- "finish_reason": None
99
- }
100
- ]
101
- }
102
- yield f"data: {json.dumps(chunk)}\n\n"
103
-
104
- yield "data: [DONE]\n\n"
105
-
106
- return StreamingResponse(generate(), media_type="text/event-stream")
107
-
108
- # -------- DEFAULT (OPENAI JSON) --------
109
  return JSONResponse({
110
  "id": "chatcmpl-gemma",
111
  "object": "chat.completion",
 
14
  GEMMA_API_KEY = os.getenv("GEMMA_API_KEY")
15
  APP_API_KEY = os.getenv("APP_API_KEY")
16
 
17
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
18
 
19
 
20
  # -------- Models --------
 
27
  model: str
28
  messages: List[Message]
29
  stream: Optional[bool] = False
30
+ plain: Optional[bool] = False
31
 
32
 
33
  # -------- Helpers --------
34
  def extract_text(messages):
35
  text = ""
36
+
37
  for msg in messages:
38
+ content = msg.content
39
+
40
+ if isinstance(content, list):
41
+ for item in content:
42
  if item.get("type") == "text":
43
  text += item.get("text", "") + "\n"
44
+ else:
45
+ # ignore images and any unknown part types safely
46
+ continue
47
+ elif isinstance(content, str):
48
+ text += content + "\n"
49
+
50
  return text.strip()
51
 
52
 
53
+ def build_payload(prompt: str):
54
+ return {
55
+ "contents": [
56
+ {
57
+ "parts": [{"text": prompt}]
58
+ }
59
+ ]
60
+ }
61
+
62
+
63
+ def get_stream_url(model_name: str) -> str:
64
+ return f"{GEMINI_BASE_URL}/models/{model_name}:streamGenerateContent?alt=sse&key={GEMMA_API_KEY}"
65
+
66
+
67
+ def get_generate_url(model_name: str) -> str:
68
+ return f"{GEMINI_BASE_URL}/models/{model_name}:generateContent?key={GEMMA_API_KEY}"
69
+
70
+
71
+ def parse_gemini_text(chunk_json: dict) -> str:
72
+ """
73
+ Gemini streaming chunks usually contain:
74
+ candidates[0].content.parts[0].text
75
+ """
76
+ try:
77
+ candidates = chunk_json.get("candidates", [])
78
+ if not candidates:
79
+ return ""
80
+ content = candidates[0].get("content", {})
81
+ parts = content.get("parts", [])
82
+ if not parts:
83
+ return ""
84
+ return parts[0].get("text", "") or ""
85
+ except Exception:
86
+ return ""
87
+
88
+
89
  # -------- Endpoint --------
90
  @app.post("/v1/chat/completions")
91
  def chat_completions(
92
  request: ChatRequest,
93
  authorization: Optional[str] = Header(None)
94
  ):
 
95
  if not authorization:
96
  raise HTTPException(status_code=401, detail="Missing Authorization header")
97
 
98
+ token = authorization.replace("Bearer ", "").strip()
99
  if token != APP_API_KEY:
100
  raise HTTPException(status_code=403, detail="Invalid API key")
101
 
102
+ if not GEMMA_API_KEY:
103
+ raise HTTPException(status_code=500, detail="GEMMA_API_KEY is not set")
104
+
105
+ model_name = request.model or "gemma-3-27b-it"
106
  prompt = extract_text(request.messages)
107
+ payload = build_payload(prompt)
108
 
109
+ # -------- STREAM MODE --------
110
+ if request.stream:
111
+ def generate():
112
+ try:
113
+ url = get_stream_url(model_name)
114
+
115
+ with requests.post(
116
+ url,
117
+ json=payload,
118
+ stream=True,
119
+ timeout=120,
120
+ headers={"Content-Type": "application/json"}
121
+ ) as res:
122
+ res.raise_for_status()
123
+
124
+ sent_role = False
125
+
126
+ for raw_line in res.iter_lines(decode_unicode=True):
127
+ if not raw_line:
128
+ continue
129
+
130
+ line = raw_line.strip()
131
+
132
+ if line.startswith("data:"):
133
+ line = line[5:].strip()
134
+
135
+ if not line:
136
+ continue
137
+
138
+ # Some SSE implementations may send end markers
139
+ if line == "[DONE]":
140
+ break
141
+
142
+ try:
143
+ chunk_json = json.loads(line)
144
+ except json.JSONDecodeError:
145
+ continue
146
+
147
+ text = parse_gemini_text(chunk_json)
148
+ if not text:
149
+ continue
150
+
151
+ delta = {"content": text}
152
+ if not sent_role:
153
+ delta["role"] = "assistant"
154
+ sent_role = True
155
+
156
+ openai_chunk = {
157
+ "id": "chatcmpl-gemma",
158
+ "object": "chat.completion.chunk",
159
+ "choices": [
160
+ {
161
+ "index": 0,
162
+ "delta": delta,
163
+ "finish_reason": None
164
+ }
165
+ ]
166
+ }
167
+
168
+ yield f"data: {json.dumps(openai_chunk, ensure_ascii=False)}\n\n"
169
+
170
+ yield "data: [DONE]\n\n"
171
+
172
+ except Exception as e:
173
+ error_chunk = {
174
+ "error": str(e)
175
+ }
176
+ yield f"data: {json.dumps(error_chunk, ensure_ascii=False)}\n\n"
177
+ yield "data: [DONE]\n\n"
178
 
179
+ return StreamingResponse(generate(), media_type="text/event-stream")
180
 
181
+ # -------- NON-STREAM --------
182
  try:
183
+ url = get_generate_url(model_name)
184
+ res = requests.post(
185
+ url,
186
+ json=payload,
187
+ timeout=120,
188
+ headers={"Content-Type": "application/json"}
189
+ )
190
  res.raise_for_status()
191
  data = res.json()
192
 
 
195
  except Exception as e:
196
  raise HTTPException(status_code=500, detail=str(e))
197
 
198
+ # -------- PLAIN TEXT --------
199
  if request.plain:
200
  return PlainTextResponse(output)
201
 
202
+ # -------- OPENAI JSON --------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  return JSONResponse({
204
  "id": "chatcmpl-gemma",
205
  "object": "chat.completion",