MatteoScript commited on
Commit
e5494cb
·
verified ·
1 Parent(s): 16662d1

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +130 -5
main.py CHANGED
@@ -3,7 +3,7 @@ import time
3
  import random
4
  import asyncio
5
  import json
6
- from fastapi import FastAPI, HTTPException, Depends
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.security.api_key import APIKeyHeader
9
  from pydantic import BaseModel
@@ -12,7 +12,10 @@ from dotenv import load_dotenv
12
  from starlette.responses import StreamingResponse
13
  from openai import OpenAI
14
  from typing import List, Optional, Dict, Any
 
15
  import copy
 
 
16
 
17
  load_dotenv()
18
 
@@ -26,7 +29,17 @@ API_KEYS = [
26
  os.getenv("API_GEMINI_4"),
27
  os.getenv("API_GEMINI_5"),
28
  ]
29
-
 
 
 
 
 
 
 
 
 
 
30
  # Classi Pydantic di VALIDAZIONE Body
31
  class ChatCompletionRequest(BaseModel):
32
  model: str = "gemini-2.0-flash"
@@ -181,14 +194,14 @@ def call_api_sync(params: ChatCompletionRequest):
181
  if params.messages:
182
  params.messages = sanitize_messages(params.messages)
183
  params = convert_payload_for_gemini(params)
184
- print('------------------------------------- INPUT --------------------------------')
185
  print(params)
186
  response_format = getattr(params, 'response_format', None)
187
  if response_format and getattr(response_format, 'type', None) == 'json_schema':
188
  response = client.beta.chat.completions.parse(**params.model_dump())
189
  else:
190
  response = client.chat.completions.create(**params.model_dump())
191
- print('------------------------------------- OUTPUT -------------------------------')
192
  print(response)
193
  print("")
194
  return response
@@ -208,11 +221,21 @@ async def _resp_async_generator(params: ChatCompletionRequest):
208
  if params.messages:
209
  params.messages = sanitize_messages(params.messages)
210
  params = convert_payload_for_gemini(params)
 
 
 
211
  for chunk in response:
212
  chunk_data = chunk.to_dict() if hasattr(chunk, "to_dict") else chunk
 
 
 
 
 
213
  yield f"data: {json.dumps(chunk_data)}\n\n"
214
  await asyncio.sleep(0.01)
215
  yield "data: [DONE]\n\n"
 
 
216
  except Exception as e:
217
  if "429" in str(e):
218
  await asyncio.sleep(2)
@@ -222,6 +245,83 @@ async def _resp_async_generator(params: ChatCompletionRequest):
222
  error_data = {"error": str(e)}
223
  yield f"data: {json.dumps(error_data)}\n\n"
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  # ---------------------------------- Metodi API ---------------------------------------
226
  @app.get("/")
227
  def read_general():
@@ -243,6 +343,31 @@ async def chat_completions(req: ChatCompletionRequest):
243
  except Exception as e:
244
  raise HTTPException(status_code=500, detail=str(e))
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  if __name__ == "__main__":
247
  import uvicorn
248
- uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
 
3
  import random
4
  import asyncio
5
  import json
6
+ from fastapi import FastAPI, HTTPException, Depends, File, UploadFile, Form
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.security.api_key import APIKeyHeader
9
  from pydantic import BaseModel
 
12
  from starlette.responses import StreamingResponse
13
  from openai import OpenAI
14
  from typing import List, Optional, Dict, Any
15
+ import io
16
  import copy
17
+ from pathlib import Path
18
+ from pydub import AudioSegment
19
 
20
  load_dotenv()
21
 
 
29
  os.getenv("API_GEMINI_4"),
30
  os.getenv("API_GEMINI_5"),
31
  ]
32
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
33
+ WHISPER_MODEL = "whisper-large-v3-turbo"
34
+ SEGMENT_MINUTES = 50
35
+ GROQ_API_KEYS = [
36
+ os.getenv("API_GROQ_1"),
37
+ #os.getenv("API_GROQ_2"),
38
+ #os.getenv("API_GROQ_3"),
39
+ #os.getenv("API_GROQ_4"),
40
+ #os.getenv("API_GROQ_5")
41
+ ]
42
+
43
  # Classi Pydantic di VALIDAZIONE Body
44
  class ChatCompletionRequest(BaseModel):
45
  model: str = "gemini-2.0-flash"
 
194
  if params.messages:
195
  params.messages = sanitize_messages(params.messages)
196
  params = convert_payload_for_gemini(params)
197
+ print('------------------------------------------------------- INPUT ---------------------------------------------------------------')
198
  print(params)
199
  response_format = getattr(params, 'response_format', None)
200
  if response_format and getattr(response_format, 'type', None) == 'json_schema':
201
  response = client.beta.chat.completions.parse(**params.model_dump())
202
  else:
203
  response = client.chat.completions.create(**params.model_dump())
204
+ print('------------------------------------------------------- OUTPUT ---------------------------------------------------------------')
205
  print(response)
206
  print("")
207
  return response
 
221
  if params.messages:
222
  params.messages = sanitize_messages(params.messages)
223
  params = convert_payload_for_gemini(params)
224
+ print('------------------------------------------------------- INPUT ---------------------------------------------------------------')
225
+ print(params.model_dump_json(indent=4))
226
+ final_response_content = ''
227
  for chunk in response:
228
  chunk_data = chunk.to_dict() if hasattr(chunk, "to_dict") else chunk
229
+ chunk_content = None
230
+ if chunk.choices and chunk.choices[0].delta:
231
+ chunk_content = chunk.choices[0].delta.content
232
+ if chunk_content:
233
+ final_response_content += chunk_content
234
  yield f"data: {json.dumps(chunk_data)}\n\n"
235
  await asyncio.sleep(0.01)
236
  yield "data: [DONE]\n\n"
237
+ print('------------------------------------------------------- OUTPUT ---------------------------------------------------------------')
238
+ print(final_response_content)
239
  except Exception as e:
240
  if "429" in str(e):
241
  await asyncio.sleep(2)
 
245
  error_data = {"error": str(e)}
246
  yield f"data: {json.dumps(error_data)}\n\n"
247
 
248
+
249
+ def get_openai_client():
250
+ ''' Client OpenAI passando in modo RANDOM le Chiavi API. In questo modo posso aggirare i limiti "Quota Exceeded" '''
251
+ api_key = random.choice(API_KEYS)
252
+ return OpenAI(api_key=api_key, base_url=BASE_URL)
253
+
254
+ # API Whisper Audio:
255
+ FORMAT_ALIASES = {
256
+ "mpeg": "mp3",
257
+ "x-wav": "wav",
258
+ "vnd.wave": "wav",
259
+ "x-m4a": "m4a",
260
+ "x-aac": "aac",
261
+ }
262
+
263
+ def _detect_format(upload_file: UploadFile) -> str:
264
+ """Rileva il formato audio dal MIME-type o dall'estensione, con alias safe."""
265
+ if upload_file.content_type and upload_file.content_type.startswith("audio/"):
266
+ fmt = upload_file.content_type.split("/", 1)[1]
267
+ else:
268
+ fmt = Path(upload_file.filename).suffix.lstrip(".").lower()
269
+ return FORMAT_ALIASES.get(fmt, fmt)
270
+
271
+ def _split_audio_to_mp3_chunks(audio_bytes: bytes, input_format: str, minutes: int):
272
+ """ Converte (se serve) e splitta. Lascia che ffmpeg auto-rilevi il formato passando format=None: è più sicuro e ignora alias sbagliati. """
273
+ try:
274
+ audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
275
+ except Exception:
276
+ audio = AudioSegment.from_file(io.BytesIO(audio_bytes), format=input_format)
277
+ chunk_len_ms = minutes * 60 * 1000
278
+ for start_ms in range(0, len(audio), chunk_len_ms):
279
+ chunk = audio[start_ms : start_ms + chunk_len_ms]
280
+ buf = io.BytesIO()
281
+ chunk.export(buf, format="mp3")
282
+ yield buf.getvalue()
283
+
284
+ def _transcribe_chunk(chunk_bytes: bytes,
285
+ model: str,
286
+ language: str,
287
+ response_format: str = "json") -> str:
288
+ bio = io.BytesIO(chunk_bytes)
289
+ bio.name = "chunk.mp3"
290
+ resp = call_whisper_api(
291
+ bio,
292
+ model=model,
293
+ language=language,
294
+ response_format=response_format
295
+ )
296
+ if isinstance(resp, str):
297
+ return resp
298
+ if hasattr(resp, "text"):
299
+ return resp.text
300
+ return resp.get("text", "")
301
+
302
+
303
+ def get_whisper_client():
304
+ api_key = random.choice(GROQ_API_KEYS)
305
+ return OpenAI(api_key=api_key, base_url=GROQ_BASE_URL)
306
+
307
+ def call_whisper_api(audio_file: io.BytesIO,
308
+ model: str = WHISPER_MODEL,
309
+ language: str = "it",
310
+ response_format: str = "json"):
311
+ try:
312
+ client = get_whisper_client()
313
+ return client.audio.transcriptions.create(
314
+ file=audio_file,
315
+ model=model,
316
+ language=language,
317
+ response_format=response_format
318
+ )
319
+ except Exception as e:
320
+ if "429" in str(e):
321
+ time.sleep(2)
322
+ return call_whisper_api(audio_file, model, language, response_format)
323
+ raise e
324
+
325
  # ---------------------------------- Metodi API ---------------------------------------
326
  @app.get("/")
327
  def read_general():
 
343
  except Exception as e:
344
  raise HTTPException(status_code=500, detail=str(e))
345
 
346
+ @app.post("/v1/audio/transcriptions", dependencies=[Depends(verify_api_key)])
347
+ async def audio_transcriptions_endpoint(
348
+ file: UploadFile = File(...),
349
+ model: str = Form(WHISPER_MODEL),
350
+ language: str = Form("it"),
351
+ response_format: str = Form("text"),
352
+ segment_minutes: int = Form(SEGMENT_MINUTES)):
353
+ try:
354
+ raw_bytes = await file.read()
355
+ input_fmt = _detect_format(file) or "mp3"
356
+ chunks = list(_split_audio_to_mp3_chunks(raw_bytes, input_fmt, segment_minutes))
357
+ if not chunks:
358
+ raise ValueError("Audio vuoto o formato non riconosciuto")
359
+ transcripts = [_transcribe_chunk(c, model, language, response_format) for c in chunks]
360
+ final_text = "\n\n".join(transcripts)
361
+ return {
362
+ "model": model,
363
+ "language": language,
364
+ "segments": len(transcripts),
365
+ "segment_minutes": segment_minutes,
366
+ "text": final_text,
367
+ }
368
+ except Exception as e:
369
+ raise HTTPException(status_code=500, detail=str(e))
370
+
371
  if __name__ == "__main__":
372
  import uvicorn
373
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)