jpdiazpardo commited on
Commit
febac70
1 Parent(s): b30d2c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -41
app.py CHANGED
@@ -5,6 +5,7 @@ from dictionaries import calculate_average, transform_dict
5
  from icon import generate_icon
6
  from transformers import pipeline
7
  from timestamp import format_timestamp
 
8
 
9
  MODEL_NAME = "openai/whisper-medium"
10
  BATCH_SIZE = 8
@@ -18,16 +19,39 @@ pipe = pipeline(
18
  device=device,
19
  )
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  #Define classifier for sentiment analysis
22
  classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None)
23
 
24
- def transcribe(file, task, return_timestamps):
25
- outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
 
26
  text = outputs["text"]
27
  timestamps = outputs["chunks"]
28
 
29
  #If return timestamps is True, return html text with timestamps format
30
- if return_timestamps==True:
31
  spider_text = [f"{chunk['text']}" for chunk in timestamps] #Text for spider chart without timestamps
32
  timestamps = [f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}" for chunk in timestamps]
33
 
@@ -36,47 +60,70 @@ def transcribe(file, task, return_timestamps):
36
  spider_text = timestamps
37
 
38
  text = "<br>".join(str(feature) for feature in timestamps)
39
- text = f"<h4>Transcription</h4><div style='overflow-y: scroll; height: 250px;'>{text}</div>"
40
 
41
  spider_text = "\n".join(str(feature) for feature in spider_text)
 
 
 
42
 
43
- fig = spider_chart(calculate_average([transform_dict(classifier.predict(t)[0]) for t in spider_text.split("\n")]))
44
-
45
- return file, text, fig
46
-
47
-
48
- inputs = [gr.Audio(source="upload", label="Audio file", type="filepath"),
49
- gr.Radio(["transcribe"], label="Task", value="transcribe"),
50
- gr.Checkbox(value=True, label="Return timestamps")]
51
-
52
- outputs = [gr.Audio(label="Processed Audio", type="filepath"),
53
- gr.outputs.HTML("text"),
54
- gr.Plot(label="fig")]
55
-
56
- title = "Whisper Demo: Transcribe Audio"
57
-
58
- MODEL_NAME1 = "jpdiazpardo/whisper-tiny-metal"
59
-
60
- description = ("Transcribe long-form audio inputs with the click of a button! Demo uses the"
61
- f" checkpoint [{MODEL_NAME1}](https://huggingface.co/{MODEL_NAME1}) and 🤗 Transformers to transcribe audio files"
62
- " of arbitrary length. Check some of the 'cool' examples below")
63
-
64
- examples = [["When a Demon Defiles a Witch.wav","transcribe",True],
65
- ["Immaculate Misconception.wav","transcribe", True]]
66
-
67
-
68
- linkedin = generate_icon("linkedin")
69
- github = generate_icon("github")
70
-
71
- article = ("<div style='text-align: center; max-width:800px; margin:10px auto;'>"
72
- f"<p>{linkedin} <a href='https://www.linkedin.com/in/juanpablodiazp/' target='_blank'>Juan Pablo Díaz Pardo</a><br>"
73
- f"{github} <a href='https://github.com/jpdiazpardo' target='_blank'>jpdiazpardo</a></p>"
74
- )
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- title = "Scream: Fine-Tuned Whisper model for automatic gutural speech recognition 🤟🤟🤟"
78
-
79
- demo = gr.Interface(title = title, fn=transcribe, inputs = inputs, outputs = outputs, description=description, cache_examples=True, allow_flagging="never", article = article , examples=examples)
80
 
81
- demo.queue(concurrency_count=3)
82
- demo.launch(debug = True)
 
5
  from icon import generate_icon
6
  from transformers import pipeline
7
  from timestamp import format_timestamp
8
+ from youtube import get_youtube_video_id
9
 
10
  MODEL_NAME = "openai/whisper-medium"
11
  BATCH_SIZE = 8
 
19
  device=device,
20
  )
21
 
22
+ #Formating
23
+ title = "Whisper Demo: Transcribe Audio"
24
+ MODEL_NAME1 = "jpdiazpardo/whisper-tiny-metal"
25
+ description = ("Transcribe long-form audio inputs with the click of a button! Demo uses the"
26
+ f" checkpoint [{MODEL_NAME1}](https://huggingface.co/{MODEL_NAME1}) and 🤗 Transformers to transcribe audio files"
27
+ " of arbitrary length. Check some of the 'cool' examples below")
28
+
29
+ examples = [["https://www.youtube.com/watch?v=W72Lnz1n-jw&ab_channel=Whitechapel-Topic",None,None
30
+ "When a Demon Defiles a Witch.wav",True, True],
31
+ ["https://www.youtube.com/watch?v=BnO3Io0KOl4&ab_channel=MotionlessInWhite-Topic",None,None,
32
+ "Immaculate Misconception.wav",True, True]]
33
+
34
+ linkedin = generate_icon("linkedin")
35
+ github = generate_icon("github")
36
+ article = ("<div style='text-align: center; max-width:800px; margin:10px auto;'>"
37
+ f"<p>{linkedin} <a href='https://www.linkedin.com/in/juanpablodiazp/' target='_blank'>Juan Pablo Díaz Pardo</a><br>"
38
+ f"{github} <a href='https://github.com/jpdiazpardo' target='_blank'>jpdiazpardo</a></p>")
39
+
40
+ title = "Scream: Fine-Tuned Whisper model for automatic gutural speech recognition 🤟🤟🤟"
41
+
42
+ #-------------------------------------------------------------------------------------------------------------------------------
43
+
44
  #Define classifier for sentiment analysis
45
  classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None)
46
 
47
+ def transcribe(*args):#file, return_timestamps, *kwargs):
48
+ '''inputs: file, return_timestamps'''
49
+ outputs = pipe(args[3], batch_size=BATCH_SIZE, generate_kwargs={"task": 'transcribe'}, return_timestamps=True)
50
  text = outputs["text"]
51
  timestamps = outputs["chunks"]
52
 
53
  #If return timestamps is True, return html text with timestamps format
54
+ if args[4]==True:
55
  spider_text = [f"{chunk['text']}" for chunk in timestamps] #Text for spider chart without timestamps
56
  timestamps = [f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}" for chunk in timestamps]
57
 
 
60
  spider_text = timestamps
61
 
62
  text = "<br>".join(str(feature) for feature in timestamps)
63
+ text = f"<h4>Transcription</h4><div style='overflow-y: scroll; height: 150px;'>{text}</div>"
64
 
65
  spider_text = "\n".join(str(feature) for feature in spider_text)
66
+ trans_dict=[transform_dict(classifier.predict(t)[0]) for t in spider_text.split("\n")]
67
+ av_dict = calculate_average(trans_dict)
68
+ fig = spider_chart(av_dict)
69
 
70
+ return args[3], text, fig, av_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ def filter(choice):
73
+ if choice=="YouTube":
74
+ return yt_link.update(interactive=True), audio_input.update(interactive=False)
75
+ elif choice == "Upload File":
76
+ return yt_link.update(value=None,interactive=False), audio_input.update(interactive=True)
77
+ else:
78
+ return yt_link.update(interactive=False), audio_input.update(interactive=False)
79
+
80
+
81
+ embed_html = '<iframe src="https://www.youtube.com/embed/YOUTUBE_ID'\
82
+ 'title="YouTube video player" frameborder="0" allow="accelerometer;'\
83
+ 'autoplay; clipboard-write; encrypted-media; gyroscope;'\
84
+ 'picture-in-picture" allowfullscreen></iframe>'
85
+
86
+ def download(link):
87
+ subprocess.run(['python3', 'youtubetowav.py', link])
88
+ return thumbnail.update(value=embed_html.replace("YOUTUBE_ID",get_youtube_video_id(link)), visible=True)
89
+
90
+ def hide_sa(value):
91
+ if value == True:
92
+ return sa_plot.update(visible=True), sa_frequency.update(visible=True)
93
+ else:
94
+ return sa_plot.update(visible=False), sa_frequency.update(visible=False)
95
+
96
+ #Input components
97
+ yt_link = gr.Textbox(value=None,label="YouTube link", info = "Optional: Copy and paste YouTube URL")
98
+ audio_input = gr.Audio(source="upload", type="filepath", label="Upload audio file for transcription")
99
+ download_button = gr.Button("Download")
100
+ thumbnail = gr.HTML(value=embed_html, visible=False)
101
+ sa_checkbox = gr.Checkbox(value=True, label="Sentiment analysis")
102
+
103
+ inputs = [yt_link, #0
104
+ download_button, #1
105
+ thumbnail, #2
106
+ audio_input, #3
107
+ gr.Checkbox(value=True, label="Return timestamps"), #4
108
+ sa_checkbox] #5
109
+
110
+ #Ouput components
111
+ audio_out = gr.Audio(label="Processed Audio", type="filepath", info = "Vocals only")
112
+ sa_plot = gr.Plot(label="Sentiment Analysis")
113
+ sa_frequency = gr.Label(label="Frequency")
114
+
115
+ outputs = [audio_out, gr.outputs.HTML("text"), sa_plot, sa_frequency]
116
+
117
+ with gr.Blocks() as demo:
118
+ download_button.click(download, inputs=[yt_link], outputs=[thumbnail])
119
+ sa_checkbox.change(hide_sa, inputs=[sa_checkbox], outputs=[sa_plot, sa_frequency])
120
+
121
+
122
+ with gr.Column():
123
+ gr.Interface(title = title, fn=transcribe, inputs = inputs, outputs = outputs,
124
+ description=description, cache_examples=True, allow_flagging="never", article = article , examples=examples)
125
+
126
+ demo.queue(concurrency_count=3)
127
+ demo.launch(debug = True)
128
 
 
 
 
129