VladGeekPro commited on
Commit
4b75cf2
·
2 Parent(s): 58d658a392dfe4

Merge remote-tracking branch 'origin/main' into RevertedWhisperTurbuAndDeleteTmpDebug

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -2
  2. app.py +21 -2
Dockerfile CHANGED
@@ -2,13 +2,13 @@ FROM python:3.11-slim
2
 
3
  ENV PYTHONUNBUFFERED=1 PIP_NO_CACHE_DIR=1 HOME=/home/user \
4
  PATH=/home/user/.local/bin:$PATH PORT=7860 \
5
- WHISPER_MODEL=deepdml/faster-whisper-large-v3-turbo-ct2 \
6
  OMP_NUM_THREADS=2 OPENBLAS_NUM_THREADS=2 \
7
  TOKENIZERS_PARALLELISM=false \
8
  WHISPER_CPU_THREADS=2 WHISPER_NUM_BEAMS=1 \
9
  WHISPER_VAD_FILTER=0 WHISPER_PRELOAD_ON_START=1 \
10
  WHISPER_BACKEND=auto WHISPER_REMOTE_PROVIDER=hf-inference \
11
- WHISPER_REMOTE_MODEL=openai/whisper-large-v3-turbo \
12
  WHISPER_REMOTE_TIMEOUT=15 WHISPER_PREPROCESS_AUDIO=1
13
 
14
  RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg \
 
2
 
3
  ENV PYTHONUNBUFFERED=1 PIP_NO_CACHE_DIR=1 HOME=/home/user \
4
  PATH=/home/user/.local/bin:$PATH PORT=7860 \
5
+ WHISPER_MODEL=deepdml/faster-whisper-large-v3-ct2 \
6
  OMP_NUM_THREADS=2 OPENBLAS_NUM_THREADS=2 \
7
  TOKENIZERS_PARALLELISM=false \
8
  WHISPER_CPU_THREADS=2 WHISPER_NUM_BEAMS=1 \
9
  WHISPER_VAD_FILTER=0 WHISPER_PRELOAD_ON_START=1 \
10
  WHISPER_BACKEND=auto WHISPER_REMOTE_PROVIDER=hf-inference \
11
+ WHISPER_REMOTE_MODEL=openai/whisper-large-v3 \
12
  WHISPER_REMOTE_TIMEOUT=15 WHISPER_PREPROCESS_AUDIO=1
13
 
14
  RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg \
app.py CHANGED
@@ -286,7 +286,7 @@ def transcribe_audio_remote(audio_path: str) -> tuple[str, float]:
286
  """Транскрибирует аудио через HF Inference."""
287
  started = time.time()
288
  client = get_hf_asr_client()
289
- model_id = os.getenv("WHISPER_REMOTE_MODEL", "openai/whisper-large-v3-turbo")
290
 
291
  result = client.automatic_speech_recognition(audio=audio_path, model=model_id)
292
  text = (getattr(result, "text", None) or "").strip()
@@ -328,7 +328,7 @@ def get_whisper_model() -> Any:
328
  if _WHISPER_MODEL is None:
329
  from faster_whisper import WhisperModel
330
 
331
- model_id = os.getenv("WHISPER_MODEL", "deepdml/faster-whisper-large-v3-turbo-ct2")
332
  cpu_threads = max(1, int(os.getenv("WHISPER_CPU_THREADS", "2")))
333
 
334
  _WHISPER_MODEL = WhisperModel(
@@ -374,6 +374,8 @@ class ExpenseTextExtractor:
374
  t0 = time.time()
375
  date_info = self.date_extractor.extract(text, reference_date=reference_date, debug=debug)
376
  timings["date_extractor"] = round(time.time() - t0, 3)
 
 
377
 
378
  t0 = time.time()
379
  supplier_info = self.supplier_extractor.extract(
@@ -382,6 +384,8 @@ class ExpenseTextExtractor:
382
  debug=debug,
383
  )
384
  timings["supplier_extractor"] = round(time.time() - t0, 3)
 
 
385
 
386
  t0 = time.time()
387
  user_info = self.user_extractor.extract(
@@ -391,6 +395,8 @@ class ExpenseTextExtractor:
391
  debug=debug,
392
  )
393
  timings["user_extractor"] = round(time.time() - t0, 3)
 
 
394
 
395
  t0 = time.time()
396
  amount_info = self.amount_extractor.extract(
@@ -400,6 +406,8 @@ class ExpenseTextExtractor:
400
  debug=debug,
401
  )
402
  timings["amount_extractor"] = round(time.time() - t0, 3)
 
 
403
 
404
  if debug:
405
  print(f"[TIMINGS] {timings}")
@@ -506,6 +514,13 @@ def process_voice_request(audio_path: str, mode: str, payload: dict[str, Any], d
506
  user_names = extract_names(context.get("users"))
507
 
508
  transcript, whisper_time = transcribe_audio_text(audio_path)
 
 
 
 
 
 
 
509
 
510
  if mode == "notes":
511
  notes = polish_notes_text(transcript)
@@ -531,6 +546,8 @@ def process_voice_request(audio_path: str, mode: str, payload: dict[str, Any], d
531
  print(f"[TIMINGS] pipeline_init: {pipeline_init_time}s")
532
 
533
  extracted = extractor.extract(transcript, reference_date=date.today().isoformat(), debug=debug)
 
 
534
 
535
  total_time = round(time.time() - total_start, 3)
536
  print(f"[TIMINGS] TOTAL: {total_time}s (whisper: {whisper_time}s)")
@@ -546,6 +563,8 @@ def process_voice_request(audio_path: str, mode: str, payload: dict[str, Any], d
546
  }
547
  if debug and extracted.get("debug"):
548
  payload["debug"] = extracted.get("debug")
 
 
549
  return payload
550
 
551
 
 
286
  """Транскрибирует аудио через HF Inference."""
287
  started = time.time()
288
  client = get_hf_asr_client()
289
+ model_id = os.getenv("WHISPER_REMOTE_MODEL", "openai/whisper-large-v3")
290
 
291
  result = client.automatic_speech_recognition(audio=audio_path, model=model_id)
292
  text = (getattr(result, "text", None) or "").strip()
 
328
  if _WHISPER_MODEL is None:
329
  from faster_whisper import WhisperModel
330
 
331
+ model_id = os.getenv("WHISPER_MODEL", "deepdml/faster-whisper-large-v3-ct2")
332
  cpu_threads = max(1, int(os.getenv("WHISPER_CPU_THREADS", "2")))
333
 
334
  _WHISPER_MODEL = WhisperModel(
 
374
  t0 = time.time()
375
  date_info = self.date_extractor.extract(text, reference_date=reference_date, debug=debug)
376
  timings["date_extractor"] = round(time.time() - t0, 3)
377
+ if debug:
378
+ print(f"[DEBUG][DATE] {date_info}")
379
 
380
  t0 = time.time()
381
  supplier_info = self.supplier_extractor.extract(
 
384
  debug=debug,
385
  )
386
  timings["supplier_extractor"] = round(time.time() - t0, 3)
387
+ if debug:
388
+ print(f"[DEBUG][SUPPLIER] {supplier_info}")
389
 
390
  t0 = time.time()
391
  user_info = self.user_extractor.extract(
 
395
  debug=debug,
396
  )
397
  timings["user_extractor"] = round(time.time() - t0, 3)
398
+ if debug:
399
+ print(f"[DEBUG][USER] {user_info}")
400
 
401
  t0 = time.time()
402
  amount_info = self.amount_extractor.extract(
 
406
  debug=debug,
407
  )
408
  timings["amount_extractor"] = round(time.time() - t0, 3)
409
+ if debug:
410
+ print(f"[DEBUG][AMOUNT] {amount_info}")
411
 
412
  if debug:
413
  print(f"[TIMINGS] {timings}")
 
514
  user_names = extract_names(context.get("users"))
515
 
516
  transcript, whisper_time = transcribe_audio_text(audio_path)
517
+ if debug:
518
+ print(f"[DEBUG][TRANSCRIPT] {transcript}")
519
+ print(
520
+ f"[DEBUG][CONTEXT] suppliers_count={len(supplier_names)}, users_count={len(user_names)}"
521
+ )
522
+ print(f"[DEBUG][SUPPLIERS] {supplier_names}")
523
+ print(f"[DEBUG][USERS] {user_names}")
524
 
525
  if mode == "notes":
526
  notes = polish_notes_text(transcript)
 
546
  print(f"[TIMINGS] pipeline_init: {pipeline_init_time}s")
547
 
548
  extracted = extractor.extract(transcript, reference_date=date.today().isoformat(), debug=debug)
549
+ if debug:
550
+ print(f"[DEBUG][EXTRACTED_RAW] {extracted}")
551
 
552
  total_time = round(time.time() - total_start, 3)
553
  print(f"[TIMINGS] TOTAL: {total_time}s (whisper: {whisper_time}s)")
 
563
  }
564
  if debug and extracted.get("debug"):
565
  payload["debug"] = extracted.get("debug")
566
+ if debug:
567
+ print(f"[DEBUG][RESPONSE_PAYLOAD] {payload}")
568
  return payload
569
 
570