oconnoob's picture
Update app/app.py
0861832
import json
import gradio as gr
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import requests
from helpers import make_header, upload_file, request_transcript, make_polling_endpoint, wait_for_completion, \
make_html_from_topics, make_paras_string, create_highlighted_list, make_summary, \
make_sentiment_output, make_entity_dict, make_entity_html, make_true_dict, make_final_json, make_content_safety_fig
from helpers import transcription_options_headers, audio_intelligence_headers, language_headers
def change_audio_source(radio, plot, file_data, mic_data):
"""When the audio source radio selector is changed, update the wave plot and change the audio selector accordingly"""
# Empty plot
plot.update_traces(go.Line(y=[]))
# Update plot with appropriate data and change visibility audio components
if radio == "Audio File":
sample_rate, audio_data = file_data
plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
return [gr.Audio.update(visible=True),
gr.Audio.update(visible=False),
plot,
plot]
elif radio == "Record Audio":
sample_rate, audio_data = mic_data
plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
return [gr.Audio.update(visible=False),
gr.Audio.update(visible=True),
plot,
plot]
def plot_data(audio_data, plot):
"""Updates plot and appropriate state variable when audio is uploaded/recorded or deleted"""
# If the current audio file is deleted
if audio_data is None:
# Replace the state variable for the audio source with placeholder values
sample_rate, audio_data = [0, np.array([])]
# Update the plot to be empty
plot.update_traces(go.Line(y=[]))
# If new audio is uploaded/recorded
else:
# Replace the current state variable with new
sample_rate, audio_data = audio_data
# Plot the new data
plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
# Update the plot component and data state variable
return [plot, [sample_rate, audio_data], plot]
def set_lang_vis(transcription_options):
"""Sets visibility of language selector/warning when automatic language detection is (de)selected"""
if 'Automatic Language Detection' in transcription_options:
text = w
return [gr.Dropdown.update(visible=False),
gr.Textbox.update(value=text, visible=True)]
else:
text = ""
return [gr.Dropdown.update(visible=True),
gr.Textbox.update(value=text, visible=False)]
def option_verif(language, selected_tran_opts, selected_audint_opts):
"""When the language is changed, this function automatically deselects options that are not allowed for that
language."""
not_available_tran, not_available_audint = get_unavailable_opts(language)
current_tran_opts = list(set(selected_tran_opts) - set(not_available_tran))
current_audint_opts = list(set(selected_audint_opts) - set(not_available_audint))
return [current_tran_opts,
current_audint_opts,
current_tran_opts,
current_audint_opts]
# Get tran/audint opts that are not available by language
def get_unavailable_opts(language):
"""Get transcription and audio intelligence options that are unavailable for a given language"""
if language in ['Spanish', 'French', 'German', 'Portuguese']:
not_available_tran = ['Speaker Labels']
not_available_audint = ['PII Redaction', 'Auto Highlights', 'Sentiment Analysis', 'Summarization',
'Entity Detection']
elif language in ['Italian', 'Dutch']:
not_available_tran = ['Speaker Labels']
not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection',
'Sentiment Analysis', 'Summarization', 'Entity Detection']
elif language in ['Hindi', 'Japanese']:
not_available_tran = ['Speaker Labels']
not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection',
'Sentiment Analysis', 'Summarization', 'Entity Detection']
else:
not_available_tran = []
not_available_audint = []
return not_available_tran, not_available_audint
# When selecting new tran option, checks to make sure allowed by language and
# then adds to selected_tran_opts and updates
def tran_selected(language, transcription_options):
"""When a transcription option is selected, """
unavailable, _ = get_unavailable_opts(language)
selected_tran_opts = list(set(transcription_options) - set(unavailable))
return [selected_tran_opts, selected_tran_opts]
# When selecting new audint option, checks to make sure allowed by language and
# then adds to selected_audint_opts and updates
def audint_selected(language, audio_intelligence_selector):
"""Deselected"""
_, unavailable = get_unavailable_opts(language)
selected_audint_opts = list(set(audio_intelligence_selector) - set(unavailable))
return [selected_audint_opts, selected_audint_opts]
def create_output(r, paras, language, transc_opts=None, audint_opts=None):
"""From a transcript response, return all outputs for audio intelligence"""
if transc_opts is None:
transc_opts = ['Automatic Language Detection', 'Speaker Labels', 'Filter Profanity']
if audint_opts is None:
audint_opts = ['Summarization', 'Auto Highlights', 'Topic Detection', 'Entity Detection',
'Sentiment Analysis', 'PII Redaction', 'Content Moderation']
# DIARIZATION
if "Speaker Labels" in transc_opts:
utts = '\n\n\n'.join([f"Speaker {utt['speaker']}:\n\n" + utt['text'] for utt in r['utterances']])
else:
utts = " NOT ANALYZED"
# HIGHLIGHTS
if 'Auto Highlights' in audint_opts:
highlight_dict = create_highlighted_list(paras, r['auto_highlights_result']['results'])
else:
highlight_dict =[["NOT ANALYZED", 0]]
# SUMMARIZATION'
if 'Summarization' in audint_opts:
chapters = r['chapters']
summary_html = make_summary(chapters)
else:
summary_html = "<p>NOT ANALYZED</p>"
# TOPIC DETECTION
if "Topic Detection" in audint_opts:
topics = r['iab_categories_result']['summary']
topics_html = make_html_from_topics(topics)
else:
topics_html = "<p>NOT ANALYZED</p>"
# SENTIMENT
if "Sentiment Analysis" in audint_opts:
sent_results = r['sentiment_analysis_results']
sent = make_sentiment_output(sent_results)
else:
sent = "<p>NOT ANALYZED</p>"
# ENTITY
if "Entity Detection" in audint_opts:
entities = r['entities']
t = r['text']
d = make_entity_dict(entities, t)
entity_html = make_entity_html(d)
else:
entity_html = "<p>NOT ANALYZED</p>"
# CONTENT SAFETY
if "Content Moderation" in audint_opts:
cont = r['content_safety_labels']['summary']
content_fig = make_content_safety_fig(cont)
else:
content_fig = go.Figure()
return [language, paras, utts, highlight_dict, summary_html, topics_html, sent, entity_html, content_fig]
def submit_to_AAI(api_key,
transcription_options,
audio_intelligence_selector,
language,
radio,
audio_file,
mic_recording):
# Make request header
header = make_header(api_key)
# Map transcription/audio intelligence options to AssemblyAI API request JSON dict
true_dict = make_true_dict(transcription_options, audio_intelligence_selector)
final_json, language = make_final_json(true_dict, language)
final_json = {**true_dict, **final_json}
# Select which audio to use
if radio == "Audio File":
audio_data = audio_file
elif radio == "Record Audio":
audio_data = mic_recording
# Upload the audio
upload_url = upload_file(audio_data, header, is_file=False)
# Request transcript
transcript_response = request_transcript(upload_url, header, **final_json)
# Wait for the transcription to complete
polling_endpoint = make_polling_endpoint(transcript_response)
wait_for_completion(polling_endpoint, header)
# Fetch results JSON
r = requests.get(polling_endpoint, headers=header, json=final_json).json()
# Fetch paragraphs of transcript
transc_id = r['id']
paras = make_paras_string(transc_id, header)
return create_output(r, paras, language, transcription_options, audio_intelligence_selector)
def example_output(language):
"""Displays example output"""
with open("example_data/paras.txt", 'r') as f:
paras = f.read()
with open('example_data/response.json', 'r') as f:
r = json.load(f)
return create_output(r, paras, language)
with open('app/styles.css', 'r') as f:
css = f.read()
with gr.Blocks(css=css) as demo:
# Load image
gr.HTML('<a href="https://www.assemblyai.com/"><img src="file/app/images/logo.png" class="logo"></a>')
# Load descriptions
# www.assemblyai.com/blog/how-to-build-an-audio-intelligence-dashboard-with-gradio/
gr.HTML("<h1 class='title'>Audio Intelligence Dashboard</h1>"
"<br>"
"<p>Check out the <a href=\"https://www.assemblyai.com/blog/getting-started-with-huggingfaces-gradio/\">Getting Started with Hugging Face's Gradio</a> blog to learn how to build this dashboard.</p>")
gr.HTML("<h1 class='title'>Directions</h1>"
"<p>To use this dashboard:</p>"
"<ul>"
"<li>1) Paste your AssemblyAI API Key into the box below - you can copy it from <a href=\"https://app.assemblyai.com/signup\">here</a> (or get one for free if you don't already have one)</li>"
"<li>2) Choose an audio source and upload or record audio</li>"
"<li>3) Select the types of analyses you would like to perform on the audio</li>"
"<li>4) Click <i>Submit</i></li>"
"<li>5) View the results at the bottom of the page</li>"
"<ul>"
"<br>"
"<p>You may also click <b>Show Example Output</b> below to see an example without having to enter an API key.")
gr.HTML('<div class="alert alert__warning"><span>'
'Note that this dashboard is not an official AssemblyAI product and is intended for educational purposes.'
'</span></div>')
# API Key title
with gr.Box():
gr.HTML("<p class=\"apikey\">API Key:</p>")
# API key textbox (password-style)
api_key = gr.Textbox(label="", elem_id="pw")
# Gradio states for - plotly Figure object, audio data for file source, and audio data for mic source
plot = gr.State(px.line(labels={'x': 'Time (s)', 'y': ''}))
file_data = gr.State([1, [0]]) # [sample rate, [data]]
mic_data = gr.State([1, [0]]) # [Sample rate, [data]]
# Options that the user wants
selected_tran_opts = gr.State(list(transcription_options_headers.keys()))
selected_audint_opts = gr.State(list(audio_intelligence_headers.keys()))
# Current options = selected options - unavailable options for specified language
current_tran_opts = gr.State([])
current_audint_opts = gr.State([])
# Selector for audio source
radio = gr.Radio(["Audio File", "Record Audio"], label="Audio Source", value="Audio File")
# Audio object for both file and microphone data
audio_file = gr.Audio()
mic_recording = gr.Audio(source="microphone", visible=False)
# Audio wave plot
audio_wave = gr.Plot(plot.value)
# Checkbox for transcription options
transcription_options = gr.CheckboxGroup(
choices=list(transcription_options_headers.keys()),
value=list(transcription_options_headers.keys()),
label="Transcription Options",
)
# Warning for using Automatic Language detection
w = "<div class='alert alert__warning'>" \
"<p>Automatic Language Detection not available for Hindi or Japanese. For best results on non-US " \
"English audio, specify the dialect instead of using Automatic Language Detection. " \
"<br>" \
"Some Audio Intelligence features are not available in some languages. See " \
"<a href='https://airtable.com/shr53TWU5reXkAmt2/tblf7O4cffFndmsCH?backgroundColor=green'>here</a> " \
"for more details.</p>" \
"</div>"
auto_lang_detect_warning = gr.HTML(w)
# Checkbox for Audio Intelligence options
audio_intelligence_selector = gr.CheckboxGroup(
choices=list(audio_intelligence_headers.keys()),
value=list(audio_intelligence_headers.keys()),
label='Audio Intelligence Options'
)
# Language selector for manual language selection
language = gr.Dropdown(
choices=list(language_headers.keys()),
value="US English",
label="Language Specification",
visible=False,
)
# Button to submit audio for processing with selected options
submit = gr.Button('Submit')
# Button to submit audio for processing with selected options
example = gr.Button('Show Example Output')
# Results tab group
phl = 10
with gr.Tab('Transcript'):
trans_tab = gr.Textbox(placeholder="Your formatted transcript will appear here ...",
lines=phl,
max_lines=25,
show_label=False)
with gr.Tab('Speaker Labels'):
diarization_tab = gr.Textbox(placeholder="Your diarized transcript will appear here ...",
lines=phl,
max_lines=25,
show_label=False)
with gr.Tab('Auto Highlights'):
highlights_tab = gr.HighlightedText()
with gr.Tab('Summary'):
summary_tab = gr.HTML("<br>" * phl)
with gr.Tab("Detected Topics"):
topics_tab = gr.HTML("<br>" * phl)
with gr.Tab("Sentiment Analysis"):
sentiment_tab = gr.HTML("<br>" * phl)
with gr.Tab("Entity Detection"):
entity_tab = gr.HTML("<br>" * phl)
with gr.Tab("Content Safety"):
content_tab = gr.Plot()
####################################### Functionality ######################################################
# Changing audio source changes Audio input component
radio.change(fn=change_audio_source,
inputs=[
radio,
plot,
file_data,
mic_data],
outputs=[
audio_file,
mic_recording,
audio_wave,
plot])
# Inputting audio updates plot
audio_file.change(fn=plot_data,
inputs=[audio_file, plot],
outputs=[audio_wave, file_data, plot]
)
mic_recording.change(fn=plot_data,
inputs=[mic_recording, plot],
outputs=[audio_wave, mic_data, plot])
# Deselecting Automatic Language Detection shows Language Selector
transcription_options.change(
fn=set_lang_vis,
inputs=transcription_options,
outputs=[language, auto_lang_detect_warning])
# Changing language deselects certain Tran / Audio Intelligence options
language.change(
fn=option_verif,
inputs=[language,
selected_tran_opts,
selected_audint_opts],
outputs=[transcription_options, audio_intelligence_selector, current_tran_opts, current_audint_opts]
)
# Selecting Tran options adds it to selected if language allows it
transcription_options.change(
fn=tran_selected,
inputs=[language, transcription_options],
outputs=[transcription_options, selected_tran_opts, ]
)
# Selecting audio intelligence options adds it to selected if language allows it
audio_intelligence_selector.change(
fn=audint_selected,
inputs=[language, audio_intelligence_selector],
outputs=[audio_intelligence_selector, selected_audint_opts]
)
# Clicking "submit" uploads selected audio to AssemblyAI, performs requested analyses, and displays results
submit.click(fn=submit_to_AAI,
inputs=[api_key,
transcription_options,
audio_intelligence_selector,
language,
radio,
audio_file,
mic_recording],
outputs=[language,
trans_tab,
diarization_tab,
highlights_tab,
summary_tab,
topics_tab,
sentiment_tab,
entity_tab,
content_tab])
# Clicking "Show Example Output" displays example results
example.click(fn=example_output,
inputs=language,
outputs=[language,
trans_tab,
diarization_tab,
highlights_tab,
summary_tab,
topics_tab,
sentiment_tab,
entity_tab,
content_tab])
# Launch the application
demo.launch() # share=True