zoghlami commited on
Commit
26d9c14
1 Parent(s): 333b302

debugging 2

Browse files
Files changed (1) hide show
  1. laughter-detection/transcribe.py +21 -28
laughter-detection/transcribe.py CHANGED
@@ -5,10 +5,15 @@ import torch
5
  from pydub import AudioSegment
6
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
7
  from tqdm import tqdm
 
 
 
 
8
 
9
  def create_directory(path):
10
  if not os.path.exists(path):
11
  os.makedirs(path)
 
12
 
13
  def transcribe_audio(file_path, pipe, transcripts_dir):
14
  try:
@@ -19,13 +24,13 @@ def transcribe_audio(file_path, pipe, transcripts_dir):
19
  txt_file_path = os.path.join(transcripts_dir, txt_file_name)
20
  with open(txt_file_path, 'w') as txt_file:
21
  txt_file.write('\n'.join(str(chunk) for chunk in result["chunks"]))
22
- print(f"Transcription saved to {txt_file_path}")
23
  return txt_file_path
24
  else:
25
- print("No transcription was generated.")
26
  return None
27
  except ValueError as e:
28
- print(f"Error processing file : {e}")
29
  return None
30
 
31
  def check_timestamps_and_slice(audio_file_path, transcript_file_path, chunks_dir, jump_threshold, timestamp_records):
@@ -37,11 +42,11 @@ def check_timestamps_and_slice(audio_file_path, transcript_file_path, chunks_dir
37
  for i in range(len(rows) - 1):
38
  current_end_time = rows[i]['timestamp'][1] * 1000 if rows[i]['timestamp'][1] is not None else None
39
  next_start_time = rows[i + 1]['timestamp'][0] * 1000 if rows[i+1]['timestamp'][1] is not None else None
40
- if current_end_time is not None and next_start_time is not None:
41
- if next_start_time - current_end_time > jump_threshold:
42
- timestamps.append((int(current_end_time), int(next_start_time)))
43
  else:
44
- print(f"Skipping segment due to missing timestamp: current_end_time={current_end_time}, next_start_time={next_start_time}")
45
 
46
  episode_name = os.path.splitext(os.path.basename(transcript_file_path))[0]
47
  episode_dir = chunks_dir
@@ -50,13 +55,13 @@ def check_timestamps_and_slice(audio_file_path, transcript_file_path, chunks_dir
50
  audio = AudioSegment.from_file(audio_file_path)
51
  for i, (start_ms, end_ms) in enumerate(timestamps):
52
  sliced_audio = audio[start_ms:end_ms]
53
- print("sliced chunk")
54
  output_file_name = f'sliced_chunk_{i+1}.wav'
55
  output_file_path = os.path.join(episode_dir, output_file_name)
56
  sliced_audio.export(output_file_path, format="wav")
57
  timestamp_records.append(f"{output_file_name} {start_ms / 1000.0}")
 
58
 
59
- print(f"Slicing complete for {episode_name}.")
60
 
61
  def main():
62
  parser = argparse.ArgumentParser(description='Audio Processing with Whisper and PyDub')
@@ -73,43 +78,31 @@ def main():
73
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
74
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
75
  model_id = "openai/whisper-base"
76
- model = AutoModelForSpeechSeq2Seq.from_pretrained(
77
- model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
78
- )
79
  model.to(device)
80
  processor = AutoProcessor.from_pretrained(model_id)
81
- pipe = pipeline(
82
- "automatic-speech-recognition",
83
- model=model,
84
- tokenizer=processor.tokenizer,
85
- feature_extractor=processor.feature_extractor,
86
- max_new_tokens=128,
87
- chunk_length_s=30,
88
- batch_size=16,
89
- return_timestamps=True,
90
- torch_dtype=torch_dtype,
91
- device=device,
92
- )
93
 
94
  if os.path.isdir(args.path):
95
  audio_files = [f for f in os.listdir(args.path) if f.endswith(('.mp3', '.wav', '.m4a'))]
 
96
  for filename in tqdm(audio_files, desc="Processing audio files"):
97
  file_path = os.path.join(args.path, filename)
98
  transcript_file_path = transcribe_audio(file_path, pipe, transcripts_dir)
99
  if transcript_file_path:
100
  check_timestamps_and_slice(file_path, transcript_file_path, chunks_dir, args.jump_threshold, timestamp_records)
101
  elif os.path.isfile(args.path):
 
102
  transcript_file_path = transcribe_audio(args.path, pipe, transcripts_dir)
103
  if transcript_file_path:
104
  check_timestamps_and_slice(args.path, transcript_file_path, chunks_dir, args.jump_threshold, timestamp_records)
105
  else:
106
- print("The provided path does not exist.")
107
 
108
- # Write timestamp records to a file
109
  with open("timestamps.txt", 'w') as f:
110
  for record in timestamp_records:
111
  f.write(record + '\n')
112
- print("Timestamps recorded in 'timestamps.txt'.")
113
 
114
  if __name__ == "__main__":
115
- main()
 
5
  from pydub import AudioSegment
6
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
7
  from tqdm import tqdm
8
+ import logging
9
+
10
+ # Setup logging
11
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
13
  def create_directory(path):
14
  if not os.path.exists(path):
15
  os.makedirs(path)
16
+ logging.debug(f"Directory created at: {path}")
17
 
18
  def transcribe_audio(file_path, pipe, transcripts_dir):
19
  try:
 
24
  txt_file_path = os.path.join(transcripts_dir, txt_file_name)
25
  with open(txt_file_path, 'w') as txt_file:
26
  txt_file.write('\n'.join(str(chunk) for chunk in result["chunks"]))
27
+ logging.info(f"Transcription saved to {txt_file_path}")
28
  return txt_file_path
29
  else:
30
+ logging.warning("No transcription was generated.")
31
  return None
32
  except ValueError as e:
33
+ logging.error(f"Error processing file {file_path} : {e}")
34
  return None
35
 
36
  def check_timestamps_and_slice(audio_file_path, transcript_file_path, chunks_dir, jump_threshold, timestamp_records):
 
42
  for i in range(len(rows) - 1):
43
  current_end_time = rows[i]['timestamp'][1] * 1000 if rows[i]['timestamp'][1] is not None else None
44
  next_start_time = rows[i + 1]['timestamp'][0] * 1000 if rows[i+1]['timestamp'][1] is not None else None
45
+ if current_end_time is not None and next_start_time is not None and (next_start_time - current_end_time > jump_threshold):
46
+ timestamps.append((int(current_end_time), int(next_start_time)))
47
+ logging.debug(f"Timestamp slice between {current_end_time} and {next_start_time} identified.")
48
  else:
49
+ logging.debug(f"Skipping segment due to missing or insufficient gap in timestamp: current_end_time={current_end_time}, next_start_time={next_start_time}")
50
 
51
  episode_name = os.path.splitext(os.path.basename(transcript_file_path))[0]
52
  episode_dir = chunks_dir
 
55
  audio = AudioSegment.from_file(audio_file_path)
56
  for i, (start_ms, end_ms) in enumerate(timestamps):
57
  sliced_audio = audio[start_ms:end_ms]
 
58
  output_file_name = f'sliced_chunk_{i+1}.wav'
59
  output_file_path = os.path.join(episode_dir, output_file_name)
60
  sliced_audio.export(output_file_path, format="wav")
61
  timestamp_records.append(f"{output_file_name} {start_ms / 1000.0}")
62
+ logging.info(f"Sliced audio chunk saved as {output_file_name}")
63
 
64
+ logging.info(f"Slicing complete for {episode_name}.")
65
 
66
  def main():
67
  parser = argparse.ArgumentParser(description='Audio Processing with Whisper and PyDub')
 
78
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
79
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
80
  model_id = "openai/whisper-base"
81
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
 
 
82
  model.to(device)
83
  processor = AutoProcessor.from_pretrained(model_id)
84
+ pipe = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=30, batch_size=16, return_timestamps=True, torch_dtype=torch_dtype, device=device)
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  if os.path.isdir(args.path):
87
  audio_files = [f for f in os.listdir(args.path) if f.endswith(('.mp3', '.wav', '.m4a'))]
88
+ logging.info(f"Processing {len(audio_files)} audio files in directory {args.path}")
89
  for filename in tqdm(audio_files, desc="Processing audio files"):
90
  file_path = os.path.join(args.path, filename)
91
  transcript_file_path = transcribe_audio(file_path, pipe, transcripts_dir)
92
  if transcript_file_path:
93
  check_timestamps_and_slice(file_path, transcript_file_path, chunks_dir, args.jump_threshold, timestamp_records)
94
  elif os.path.isfile(args.path):
95
+ logging.info(f"Processing single audio file at {args.path}")
96
  transcript_file_path = transcribe_audio(args.path, pipe, transcripts_dir)
97
  if transcript_file_path:
98
  check_timestamps_and_slice(args.path, transcript_file_path, chunks_dir, args.jump_threshold, timestamp_records)
99
  else:
100
+ logging.error("The provided path does not exist.")
101
 
 
102
  with open("timestamps.txt", 'w') as f:
103
  for record in timestamp_records:
104
  f.write(record + '\n')
105
+ logging.info("Timestamps recorded in 'timestamps.txt'.")
106
 
107
  if __name__ == "__main__":
108
+ main()