camparchimedes commited on
Commit
badcd8d
·
verified ·
1 Parent(s): 49351f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -65,11 +65,20 @@ def convert_to_wav(audio_file):
65
  return wav_file
66
 
67
 
68
- # @spaces.GPU(duration=300, queue=False)
69
- # Set distinct pad and eos tokens
 
 
 
70
  if processor.tokenizer.pad_token_id is None:
71
- processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id + 1
 
72
 
 
 
 
 
 
73
  def transcribe_audio(audio_file, batch_size=4):
74
  start_time = time.time()
75
  if audio_file.endswith(".m4a"):
@@ -84,18 +93,21 @@ def transcribe_audio(audio_file, batch_size=4):
84
  batch_chunks = chunks[i:i + batch_size]
85
  inputs = processor(batch_chunks, sampling_rate=16000, return_tensors="pt", padding=True)
86
  inputs = inputs.to(device)
 
87
  attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None
 
88
  with torch.no_grad():
89
  output = model.generate(
90
  inputs.input_features,
91
  max_length=2048,
92
  num_beams=8,
93
- # task="transcribe",
94
  attention_mask=attention_mask,
95
  language="no",
96
  pad_token_id=processor.tokenizer.pad_token_id,
97
  eos_token_id=processor.tokenizer.eos_token_id
98
  )
 
99
  transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
100
 
101
  end_time = time.time()
@@ -107,6 +119,8 @@ def transcribe_audio(audio_file, batch_size=4):
107
  return transcription.strip(), result
108
 
109
 
 
 
110
  # Graph-based summarization|TextRank
111
  def summarize_text(text):
112
  sentences = sent_tokenize(text)
 
65
  return wav_file
66
 
67
 
68
+ # @spaces.GPU(duration=120, queue=False)
69
+
70
+
71
+
72
+ # Configure tokenizer to have distinct pad_token_id and eos_token_id
73
  if processor.tokenizer.pad_token_id is None:
74
+ # Setting pad_token_id explicitly to ensure distinction from eos_token_id
75
+ processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id + 1
76
 
77
+ # Sanity check to confirm distinct pad and eos tokens
78
+ assert processor.tokenizer.pad_token_id != processor.tokenizer.eos_token_id, \
79
+ "pad_token_id and eos_token_id must be distinct!"
80
+
81
+ # Proceed with the transcription function
82
  def transcribe_audio(audio_file, batch_size=4):
83
  start_time = time.time()
84
  if audio_file.endswith(".m4a"):
 
93
  batch_chunks = chunks[i:i + batch_size]
94
  inputs = processor(batch_chunks, sampling_rate=16000, return_tensors="pt", padding=True)
95
  inputs = inputs.to(device)
96
+
97
  attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None
98
+
99
  with torch.no_grad():
100
  output = model.generate(
101
  inputs.input_features,
102
  max_length=2048,
103
  num_beams=8,
104
+ task="transcribe",
105
  attention_mask=attention_mask,
106
  language="no",
107
  pad_token_id=processor.tokenizer.pad_token_id,
108
  eos_token_id=processor.tokenizer.eos_token_id
109
  )
110
+
111
  transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
112
 
113
  end_time = time.time()
 
119
  return transcription.strip(), result
120
 
121
 
122
+
123
+
124
  # Graph-based summarization|TextRank
125
  def summarize_text(text):
126
  sentences = sent_tokenize(text)