File size: 3,326 Bytes
ec2398c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0bc8d0
 
 
 
 
ec2398c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Install the assemblyai package by executing the command `pip3 install assemblyai` (macOS) or `pip install assemblyai` (Windows).
import io
import os
import time
from pyannote.audio import Pipeline

# Import the AssemblyAI module
from pprint import pprint
import torch
from transformers import pipeline
from transformers.utils import is_flash_attn_2_available
from pydub import AudioSegment
import numpy as np

from segment_wave_files import segment_wave_files
from transcribe_files import transcribe_segments
from transcript_analysis import transcript_analysis
#from huggingface_hub import login
#login()
hugging_face = os.environ.get("HUGGING_FACE")
pipelineDiary = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    use_auth_token=hugging_face)

if torch.cuda.is_available():
    print("diarize_wav_file Using CUDA")
    pipelineDiary.to(torch.device("cuda"))
else:
    print("diarize_wav_file Using CPU")

def diarize_wav_file(file_name):
    print("DIARIZING " + file_name)
    start = time.time()
    diarization = pipelineDiary(file_name, num_speakers=2)
    print("Elapsed " + str(time.time() - start))
    # {"waveform": audio_tensor, "sample_rate": sample_rate_tensor})
    speakers = []
    contSpeaker = ""
    dict = None
    for turn, _, speaker in diarization.itertracks(yield_label=True):
        if contSpeaker != speaker:
            if dict is not None:
                speakers.append(dict)
            dict = {'speaker': speaker, 'start': round(turn.start, 1),
                    'end': round(turn.end, 1)}
            contSpeaker = speaker
        else:
            dict['end'] = round(turn.end, 1)

    return speakers


def convert_mono_16khz(location, file):
    sound = AudioSegment.from_file(location+file, "wav")
    sound = sound.set_channels(1)
    sound = sound.set_frame_rate(16000)
    sound.export(location+"16khz"+file, "wav")

location = os.path.join(".", "data") + os.sep
def get_included_files():
    files = os.listdir(location)

    return location, files

def main():

    dir_list = os.listdir(location)
    for file in dir_list :
        #input_file=location+file
        input_file='C:\\Users\\jerry\\Downloads\\SampleCallsWave\\Tech Support Help from Call Center Experts1.wav'

        # apply pretrained pipeline
        # Pass the audio tensor and sample rate to the pipeline
        speakers = diarize_wav_file(input_file)

        speakers = segment_wave_files(speakers, input_file)

        transcript = transcribe_segments(speakers)
        print(
            "---------------------------------------------------------------------")
        pprint(transcript)
        print("---------------------------------------------------------------------")

        summary = transcript_analysis(transcript)
        pprint(summary) #.encode('utf-8').decode('utf-8'))
        print("\n\n\n\n\n\n\n")

def convertMp3ToWav(file) :
    # convert mp3 file to a wav file
    sound = AudioSegment.from_mp3(file)
    # sound.export(output_file, format="wav")

    sample_rate = sound.frame_count() / sound.duration_seconds
    print(sample_rate)
    duration = sound.duration_seconds
    sound = sound.set_frame_rate(16000)
    sound = sound.set_channels(1)
    outFile = os.path.splitext(file)[0]+".wav"
    sound.export(outFile, format="wav")
    return outFile

main()