datasciencedojo commited on
Commit
a6370cb
1 Parent(s): c5239ac

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. TestAudio1.wav +3 -0
  3. app.py +44 -0
  4. requirements.txt +6 -0
.gitattributes CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ TestAudio1.wav filter=lfs diff=lfs merge=lfs -text
TestAudio1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63619394ce122f680985b597df2eb2721309811662f0e53a71c688ad3d073426
3
+ size 4611246
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pyannote.audio import Pipeline
3
+ from transformers import pipeline
4
+
5
+
6
+ asr = pipeline(
7
+ "automatic-speech-recognition",
8
+ model="facebook/wav2vec2-large-960h-lv60-self",
9
+ feature_extractor="facebook/wav2vec2-large-960h-lv60-self",
10
+
11
+ )
12
+ speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation")
13
+
14
+ def segmentation(audio):
15
+ speaker_output = speaker_segmentation(audio)
16
+ text_output = asr(audio,return_timestamps="word")
17
+
18
+ full_text = text_output['text'].lower()
19
+ chunks = text_output['chunks']
20
+
21
+ diarized_output = ""
22
+ i = 0
23
+ for turn, _, speaker in speaker_output.itertracks(yield_label=True):
24
+ diarized = ""
25
+ while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end:
26
+ diarized += chunks[i]['text'].lower() + ' '
27
+ i += 1
28
+
29
+ if diarized != "":
30
+ diarized_output += "{}: ''{}'' from {:.3f}-{:.3f}\n".format(speaker,diarized,turn.start,turn.end)
31
+
32
+ return diarized_output, full_text
33
+
34
+ inputs = gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:")
35
+ outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
36
+ gr.outputs.Textbox(type="auto",label="Full Text")]
37
+ examples = [["TestAudio1.wav"],]
38
+
39
+ app = gr.Interface(fn=segmentation,
40
+ inputs=inputs,
41
+ outputs=outputs,
42
+ examples=examples,
43
+ allow_flagging=False)
44
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pyannote.audio
2
+ https://github.com/pyannote/pyannote-audio/archive/develop.zip
3
+ speechbrain
4
+ gradio
5
+ Jinja2
6
+ transformers