Marti Umbert commited on
Commit
9d68c36
·
1 Parent(s): d2f2b3c

whisperlivekit/audio_processor.py: apply translation to the whole recognized chunk insted of on a token basis

Browse files
Files changed (1) hide show
  1. whisperlivekit/audio_processor.py +6 -2
whisperlivekit/audio_processor.py CHANGED
@@ -373,15 +373,19 @@ class AudioProcessor:
373
  "beg": format_time(token.start),
374
  "end": format_time(token.end),
375
  "diff": round(token.end - last_end_diarized, 2),
376
- "translation": await self.translate_text(text = token.text)
 
377
  })
378
  previous_speaker = speaker
379
  elif token.text: # Only append if text isn't empty
380
  lines[-1]["text"] += sep + token.text
381
  lines[-1]["end"] = format_time(token.end)
382
  lines[-1]["diff"] = round(token.end - last_end_diarized, 2)
383
- lines[-1]["translation"] += " " + await self.translate_text(text = token.text)
 
384
 
 
 
385
  # Handle undiarized text
386
  if undiarized_text:
387
  combined = sep.join(undiarized_text)
 
373
  "beg": format_time(token.start),
374
  "end": format_time(token.end),
375
  "diff": round(token.end - last_end_diarized, 2),
376
+ "translation": ""
377
+ #"translation": await self.translate_text(text = token.text)
378
  })
379
  previous_speaker = speaker
380
  elif token.text: # Only append if text isn't empty
381
  lines[-1]["text"] += sep + token.text
382
  lines[-1]["end"] = format_time(token.end)
383
  lines[-1]["diff"] = round(token.end - last_end_diarized, 2)
384
+ lines[-1]["translation"] = ""
385
+ #lines[-1]["translation"] += " " + await self.translate_text(text = token.text)
386
 
387
+ text_to_translate = lines[-1]["text"]
388
+ lines[-1]["translation"] = await self.translate_text(text=text_to_translate) if len(lines) > 0 else ""
389
  # Handle undiarized text
390
  if undiarized_text:
391
  combined = sep.join(undiarized_text)