Spaces:
Build error
Build error
import pandas as pd | |
from pyannote.audio import Pipeline | |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization",use_auth_token="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") | |
def diarization(): | |
diarization = pipeline("result.wav") | |
speakertime=[] | |
# print the result | |
for turn, _, speaker in diarization.itertracks(yield_label=True): | |
details=[turn.start,turn.end,speaker] | |
speakertime.append(details) | |
#print(turn.start) | |
#print(speaker) | |
print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}") | |
#print(speakertime) | |
df = pd.DataFrame(speakertime,columns=['start', 'end','speaker']) | |
text=[] | |
for i in range (df.start.count()): | |
text.append(generatetext("result.wav",df.start[i], df.end[i])) | |
df['text']=text | |
with open('my_file.txt', 'w') as my_file: | |
for i in range (df.start.count()): | |
my_file.write(df.speaker[i]+": " +df.text[i] + '\n') | |
print(open("my_file.txt","r").read()) | |
def generatetext(filename,starttime,endtime): | |
t1 = starttime * 1000 # works in milliseconds | |
t2 = endtime * 1000 | |
newAudio = AudioSegment.from_wav(filename) | |
a = newAudio[t1:t2] | |
a.export('audio.wav', format="wav") | |
text1 = whisper('audio.wav') | |
return text1.get("text") | |
block = gr.Blocks() | |
with block: | |
with gr.Group(): | |
with gr.Box(): | |
with gr.Row().style(): | |
inp_audio = gr.Audio( | |
label="Input Audio", | |
type="filepath", | |
mirror_webcam = False | |
) | |
outputdialogs = gr.Textbox() | |
btn = gr.Button("Generate Text") | |
btn.click(diarisation, inputs=[inp_audio], outputdialogs=[op],api_name="view_api") | |
block.launch(enable_queue = True,debug=True) |