EwoutLagendijk commited on
Commit
b9710dc
·
verified ·
1 Parent(s): a8d0349

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -11
app.py CHANGED
@@ -37,17 +37,36 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
37
  return seconds
38
 
39
 
40
- def transcribe(file, task, return_timestamps):
41
- outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
42
- text = outputs["text"]
43
- if return_timestamps:
44
- timestamps = outputs["chunks"]
45
- timestamps = [
46
- f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
47
- for chunk in timestamps
48
- ]
49
- text = "\n".join(str(feature) for feature in timestamps)
50
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  demo = gr.Blocks()
 
37
  return seconds
38
 
39
 
40
+ def transcribe_speech(filepath):
41
+ # Load the audio
42
+ audio, sampling_rate = librosa.load(filepath, sr=16000)
43
+
44
+ # Define chunk size (e.g., 30 seconds)
45
+ chunk_duration = 30 # in seconds
46
+ chunk_samples = chunk_duration * sampling_rate
47
+
48
+ # Process audio in chunks
49
+ transcription = []
50
+ for i in range(0, len(audio), chunk_samples):
51
+ chunk = audio[i:i + chunk_samples]
52
+
53
+ # Convert the chunk into input features
54
+ inputs = processor(audio=chunk, sampling_rate=16000, return_tensors="pt").input_features
55
+
56
+ # Generate transcription for the chunk
57
+ generated_ids = model.generate(
58
+ inputs,
59
+ max_new_tokens=444, # Max allowed by Whisper
60
+ forced_decoder_ids=processor.get_decoder_prompt_ids(language="id", task="transcribe")
61
+ )
62
+
63
+ # Decode and append the transcription
64
+ chunk_transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
65
+ transcription.append(chunk_transcription)
66
+
67
+ # Combine all chunk transcriptions into a single string
68
+ return " ".join(transcription)
69
+
70
 
71
 
72
  demo = gr.Blocks()