Rehman1603 commited on
Commit
53978c9
1 Parent(s): 91e6e4f

Create Audio_into_chunks.py

Browse files
Files changed (1) hide show
  1. Audio_into_chunks.py +56 -0
Audio_into_chunks.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydub import AudioSegment
3
+ import whisper
4
+ from deep_translator import GoogleTranslator
5
+
6
+
7
+ # @title Audio into chunks
8
+ def audio_into_chunks_transcribe_translate(audio_file,lang):
9
+ chunk_length_seconds=11
10
+ output_format="wav"
11
+ # Check if file exists
12
+ if not os.path.exists(audio_file):
13
+ raise ValueError(f"FLAC file not found: {audio_file}")
14
+ Transcribe_Text=[]
15
+ # Load the FLAC audio
16
+ audio_segment = AudioSegment.from_file(audio_file, format="flac")
17
+ #load Model For Transcribe
18
+ model = whisper.load_model("medium")
19
+ # Get total audio duration in milliseconds
20
+ total_duration_ms = audio_segment.duration_seconds * 1000
21
+
22
+ # Calculate chunk duration in milliseconds
23
+ chunk_duration_ms = chunk_length_seconds * 1000
24
+
25
+ # Split audio into chunks
26
+ start_time = 0
27
+ chunk_num = 1
28
+ while start_time < total_duration_ms:
29
+ # Get the end time for the current chunk
30
+ end_time = min(start_time + chunk_duration_ms, total_duration_ms)
31
+
32
+ # Extract the current chunk
33
+ chunk = audio_segment[start_time:end_time]
34
+
35
+ # Generate output filename with sequential numbering
36
+ output_filename = f"{os.path.splitext(os.path.basename(audio_file))[0]}_chunk_{chunk_num}.{output_format}"
37
+
38
+ # Export the chunk as the specified format
39
+ chunk.export(output_filename, format=output_format)
40
+
41
+ # Update start time for the next chunk
42
+ start_time += chunk_duration_ms
43
+ chunk_num += 1
44
+ #transcribe Chunks
45
+ result = model.transcribe(output_filename)
46
+ #translate the transcribe data
47
+ translator=GoogleTranslator(source='auto',target=lang)
48
+ data_trans=translator.translate(result['text'])
49
+ Transcribe_Text.append(data_trans)
50
+ print(data_trans)
51
+ print(result['text'])
52
+
53
+ return Transcribe_Text
54
+ print(f"FLAC file '{flac_filepath}' successfully split into {chunk_num - 1} chunks.")
55
+
56
+