VladGeekPro commited on
Commit
e446b1b
·
1 Parent(s): 0bd0146

TestingCodeExecution

Browse files
Files changed (1) hide show
  1. app.py +34 -41
app.py CHANGED
@@ -9,6 +9,7 @@ from __future__ import annotations
9
  import json
10
  import os
11
  import tempfile
 
12
  from datetime import date
13
  from pathlib import Path
14
  from typing import Any, Optional
@@ -61,7 +62,6 @@ def get_whisper_model() -> Any:
61
 
62
  return _WHISPER_MODEL
63
 
64
-
65
  class ExpenseTextExtractor:
66
  """
67
  Главный экстрактор данных о расходах.
@@ -96,22 +96,37 @@ class ExpenseTextExtractor:
96
  Returns:
97
  Словарь со всеми извлечёнными данными
98
  """
 
 
 
99
  date_info = self.date_extractor.extract(text, reference_date=reference_date)
 
 
 
100
  supplier_info = self.supplier_extractor.extract(
101
  text,
102
  date_phrase=date_info.get("matched_date_phrase"),
103
  debug=debug_supplier,
104
  )
 
 
 
105
  user_info = self.user_extractor.extract(
106
  text,
107
  supplier_phrase=supplier_info.get("matched_supplier_phrase"),
108
  date_phrase=date_info.get("matched_date_phrase"),
109
  )
 
 
 
110
  amount_info = self.amount_extractor.extract(
111
  text,
112
  matched_date_phrase=date_info["matched_date_phrase"],
113
  matched_supplier_phrase=supplier_info["matched_supplier_phrase"],
114
  )
 
 
 
115
 
116
  result = {
117
  "text": text,
@@ -164,18 +179,21 @@ def polish_notes_text(text: str) -> str:
164
  return normalized
165
 
166
 
167
- def transcribe_audio_text(audio_path: str) -> str:
168
- """Транскрибирует аудио в текст."""
169
  mock_text = os.getenv("EXPENSE_VOICE_MOCK_TEXT")
170
  if mock_text:
171
- return mock_text.strip()
172
 
173
  try:
 
174
  whisper_model = get_whisper_model()
175
  segments, _ = whisper_model.transcribe(audio_path, language="ru", vad_filter=True)
176
  text = " ".join(segment.text.strip() for segment in segments if segment.text and segment.text.strip())
 
 
177
  if text:
178
- return text
179
  except Exception:
180
  pass
181
 
@@ -184,11 +202,13 @@ def transcribe_audio_text(audio_path: str) -> str:
184
 
185
  def process_voice_request(audio_path: str, mode: str, payload: dict[str, Any]) -> dict[str, Any]:
186
  """Обрабатывает голосовой запрос."""
 
 
187
  context = payload.get("context", {}) if isinstance(payload, dict) else {}
188
  supplier_names = extract_names(context.get("suppliers"))
189
  user_names = extract_names(context.get("users"))
190
 
191
- transcript = transcribe_audio_text(audio_path)
192
 
193
  if mode == "notes":
194
  notes = polish_notes_text(transcript)
@@ -208,8 +228,15 @@ def process_voice_request(audio_path: str, mode: str, payload: dict[str, Any]) -
208
  if not user_names:
209
  raise RuntimeError("No users were provided by Laravel context.")
210
 
 
211
  extractor = build_default_pipeline(suppliers=supplier_names, users=user_names)
 
 
 
212
  extracted = extractor.extract(transcript, reference_date=date.today().isoformat())
 
 
 
213
 
214
  return {
215
  "status": "ok",
@@ -264,8 +291,7 @@ def index():
264
  "message": "Voice processing API is running",
265
  "endpoints": {
266
  "POST /process-audio": "Process audio file",
267
- "GET /health": "Health check",
268
- "GET /date-test": "Test date parsing"
269
  }
270
  })
271
 
@@ -276,39 +302,6 @@ def health():
276
  return jsonify({"status": "ok"})
277
 
278
 
279
- @app.get("/date-test")
280
- def date_test():
281
- """Тестирование парсера дат."""
282
- test_phrases = [
283
- "завтра",
284
- "через 2 дня",
285
- "на следующей неделе",
286
- "15 января 2025",
287
- "позавчера",
288
- "в прошлый понедельник",
289
- "оплата за март",
290
- "5 марта",
291
- "купил вчера",
292
- "в конце месяца"
293
- ]
294
-
295
- extractor = ExpenseDateExtractor()
296
- results = []
297
- for phrase in test_phrases:
298
- result = extractor.extract(phrase)
299
- results.append({
300
- "phrase": phrase,
301
- "date": result.get("date_iso"),
302
- "matched": result.get("matched_date_phrase")
303
- })
304
-
305
- return jsonify({
306
- "status": "ok",
307
- "reference_date": date.today().isoformat(),
308
- "results": results
309
- })
310
-
311
-
312
  @app.post("/process-audio")
313
  def process_audio():
314
  """Обработка аудио файла."""
 
9
  import json
10
  import os
11
  import tempfile
12
+ import time
13
  from datetime import date
14
  from pathlib import Path
15
  from typing import Any, Optional
 
62
 
63
  return _WHISPER_MODEL
64
 
 
65
  class ExpenseTextExtractor:
66
  """
67
  Главный экстрактор данных о расходах.
 
96
  Returns:
97
  Словарь со всеми извлечёнными данными
98
  """
99
+ timings = {}
100
+
101
+ t0 = time.time()
102
  date_info = self.date_extractor.extract(text, reference_date=reference_date)
103
+ timings["date_extractor"] = round(time.time() - t0, 3)
104
+
105
+ t0 = time.time()
106
  supplier_info = self.supplier_extractor.extract(
107
  text,
108
  date_phrase=date_info.get("matched_date_phrase"),
109
  debug=debug_supplier,
110
  )
111
+ timings["supplier_extractor"] = round(time.time() - t0, 3)
112
+
113
+ t0 = time.time()
114
  user_info = self.user_extractor.extract(
115
  text,
116
  supplier_phrase=supplier_info.get("matched_supplier_phrase"),
117
  date_phrase=date_info.get("matched_date_phrase"),
118
  )
119
+ timings["user_extractor"] = round(time.time() - t0, 3)
120
+
121
+ t0 = time.time()
122
  amount_info = self.amount_extractor.extract(
123
  text,
124
  matched_date_phrase=date_info["matched_date_phrase"],
125
  matched_supplier_phrase=supplier_info["matched_supplier_phrase"],
126
  )
127
+ timings["amount_extractor"] = round(time.time() - t0, 3)
128
+
129
+ print(f"[TIMINGS] {timings}")
130
 
131
  result = {
132
  "text": text,
 
179
  return normalized
180
 
181
 
182
+ def transcribe_audio_text(audio_path: str) -> tuple[str, float]:
183
+ """Транскрибирует аудио в текст. Возвращает (текст, время в секундах)."""
184
  mock_text = os.getenv("EXPENSE_VOICE_MOCK_TEXT")
185
  if mock_text:
186
+ return mock_text.strip(), 0.0
187
 
188
  try:
189
+ t0 = time.time()
190
  whisper_model = get_whisper_model()
191
  segments, _ = whisper_model.transcribe(audio_path, language="ru", vad_filter=True)
192
  text = " ".join(segment.text.strip() for segment in segments if segment.text and segment.text.strip())
193
+ elapsed = round(time.time() - t0, 3)
194
+ print(f"[TIMINGS] whisper_transcribe: {elapsed}s")
195
  if text:
196
+ return text, elapsed
197
  except Exception:
198
  pass
199
 
 
202
 
203
  def process_voice_request(audio_path: str, mode: str, payload: dict[str, Any]) -> dict[str, Any]:
204
  """Обрабатывает голосовой запрос."""
205
+ total_start = time.time()
206
+
207
  context = payload.get("context", {}) if isinstance(payload, dict) else {}
208
  supplier_names = extract_names(context.get("suppliers"))
209
  user_names = extract_names(context.get("users"))
210
 
211
+ transcript, whisper_time = transcribe_audio_text(audio_path)
212
 
213
  if mode == "notes":
214
  notes = polish_notes_text(transcript)
 
228
  if not user_names:
229
  raise RuntimeError("No users were provided by Laravel context.")
230
 
231
+ t0 = time.time()
232
  extractor = build_default_pipeline(suppliers=supplier_names, users=user_names)
233
+ pipeline_init_time = round(time.time() - t0, 3)
234
+ print(f"[TIMINGS] pipeline_init: {pipeline_init_time}s")
235
+
236
  extracted = extractor.extract(transcript, reference_date=date.today().isoformat())
237
+
238
+ total_time = round(time.time() - total_start, 3)
239
+ print(f"[TIMINGS] TOTAL: {total_time}s (whisper: {whisper_time}s)")
240
 
241
  return {
242
  "status": "ok",
 
291
  "message": "Voice processing API is running",
292
  "endpoints": {
293
  "POST /process-audio": "Process audio file",
294
+ "GET /health": "Health check"
 
295
  }
296
  })
297
 
 
302
  return jsonify({"status": "ok"})
303
 
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  @app.post("/process-audio")
306
  def process_audio():
307
  """Обработка аудио файла."""