Cahlil commited on
Commit
d5bc6cb
1 Parent(s): 8983ff3

input file handling edit

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -10,8 +10,11 @@ asr = pipeline(
10
  )
11
  pipeline1 = Pipeline.from_pretrained("pyannote/speaker-segmentation")
12
 
13
- def diarization(file_input,microphone_input,selection):
14
- audio = file_input if str(selection) == "Upload" else Path(microphone_input)
 
 
 
15
 
16
  speaker_output = pipeline1(audio)
17
  text_output = asr(audio,return_timestamps="word")
@@ -37,13 +40,13 @@ def diarization(file_input,microphone_input,selection):
37
  title = "Speech Recognition with Speaker Diarization"
38
  description = "Speaker Diarization is the act of attributing parts of the audio recording to different speakers. This space aims to distinguish the speakers and apply speech-to-text from a given input audio file. Pre-trained models from Pyannote[1] for the Speaker Diarization and [2]."
39
  article = "<p style='text-align: center'><a href='https://github.com/pyannote/pyannote-audio' target='_blank'>[1] Pyannote - Speaker Diarization model</a></p>"
40
- inputs = [gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:"),
41
- gr.inputs.Audio(source="microphone", type="filepath",label="Or use your Microphone:"),
42
- gr.inputs.Radio(["Upload","Microphone"],type="value",label="Select which input:")]
43
  outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
44
  gr.outputs.Textbox(type="auto",label="Full ASR Text for comparison")]
45
- examples = [["test_audio1.wav","test_audio1.wav","Upload"],
46
- ["test_audio2.wav","test_audio2.wav","Upload"]]
47
 
48
  app = gr.Interface(fn=diarization,
49
  inputs=inputs,
 
10
  )
11
  pipeline1 = Pipeline.from_pretrained("pyannote/speaker-segmentation")
12
 
13
+ def diarization(file_input,mic_input,selection):
14
+ mic_path = None if mic_input is None else mic_input.name
15
+ audio = file_input if selection == "Upload" else mic_path
16
+ if audio is None:
17
+ return "Please check your inputs!", ""
18
 
19
  speaker_output = pipeline1(audio)
20
  text_output = asr(audio,return_timestamps="word")
 
40
  title = "Speech Recognition with Speaker Diarization"
41
  description = "Speaker Diarization is the act of attributing parts of the audio recording to different speakers. This space aims to distinguish the speakers and apply speech-to-text from a given input audio file. Pre-trained models from Pyannote[1] for the Speaker Diarization and [2]."
42
  article = "<p style='text-align: center'><a href='https://github.com/pyannote/pyannote-audio' target='_blank'>[1] Pyannote - Speaker Diarization model</a></p>"
43
+ inputs = [gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:", optional=True),
44
+ gr.inputs.Audio(source="microphone", type="file",label="Or use your Microphone:", optional=True),
45
+ gr.inputs.Radio(["Upload","Microphone"], type="value", label="Select which input:")]
46
  outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
47
  gr.outputs.Textbox(type="auto",label="Full ASR Text for comparison")]
48
+ examples = [["test_audio1.wav",None,"Upload"],
49
+ ["test_audio2.wav",None,"Upload"]]
50
 
51
  app = gr.Interface(fn=diarization,
52
  inputs=inputs,