Spaces:

oconnoob
/

audio-intelligence-dashboard

Build error

App Files Files Community

audio-intelligence-dashboard / app /app.py

oconnoob

Update app/app.py

0861832 over 2 years ago

raw

history blame contribute delete

17.8 kB

	import json

	import gradio as gr
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	import requests

	from helpers import make_header, upload_file, request_transcript, make_polling_endpoint, wait_for_completion, \
	make_html_from_topics, make_paras_string, create_highlighted_list, make_summary, \
	make_sentiment_output, make_entity_dict, make_entity_html, make_true_dict, make_final_json, make_content_safety_fig

	from helpers import transcription_options_headers, audio_intelligence_headers, language_headers


	def change_audio_source(radio, plot, file_data, mic_data):
	"""When the audio source radio selector is changed, update the wave plot and change the audio selector accordingly"""

	# Empty plot
	plot.update_traces(go.Line(y=[]))
	# Update plot with appropriate data and change visibility audio components
	if radio == "Audio File":
	sample_rate, audio_data = file_data
	plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
	return [gr.Audio.update(visible=True),
	gr.Audio.update(visible=False),
	plot,
	plot]
	elif radio == "Record Audio":
	sample_rate, audio_data = mic_data
	plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
	return [gr.Audio.update(visible=False),
	gr.Audio.update(visible=True),
	plot,
	plot]


	def plot_data(audio_data, plot):
	"""Updates plot and appropriate state variable when audio is uploaded/recorded or deleted"""
	# If the current audio file is deleted
	if audio_data is None:
	# Replace the state variable for the audio source with placeholder values
	sample_rate, audio_data = [0, np.array([])]
	# Update the plot to be empty
	plot.update_traces(go.Line(y=[]))
	# If new audio is uploaded/recorded
	else:
	# Replace the current state variable with new
	sample_rate, audio_data = audio_data
	# Plot the new data
	plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))

	# Update the plot component and data state variable
	return [plot, [sample_rate, audio_data], plot]


	def set_lang_vis(transcription_options):
	"""Sets visibility of language selector/warning when automatic language detection is (de)selected"""
	if 'Automatic Language Detection' in transcription_options:
	text = w
	return [gr.Dropdown.update(visible=False),
	gr.Textbox.update(value=text, visible=True)]
	else:
	text = ""
	return [gr.Dropdown.update(visible=True),
	gr.Textbox.update(value=text, visible=False)]


	def option_verif(language, selected_tran_opts, selected_audint_opts):
	"""When the language is changed, this function automatically deselects options that are not allowed for that
	language."""

	not_available_tran, not_available_audint = get_unavailable_opts(language)

	current_tran_opts = list(set(selected_tran_opts) - set(not_available_tran))
	current_audint_opts = list(set(selected_audint_opts) - set(not_available_audint))

	return [current_tran_opts,
	current_audint_opts,
	current_tran_opts,
	current_audint_opts]


	# Get tran/audint opts that are not available by language
	def get_unavailable_opts(language):
	"""Get transcription and audio intelligence options that are unavailable for a given language"""
	if language in ['Spanish', 'French', 'German', 'Portuguese']:
	not_available_tran = ['Speaker Labels']
	not_available_audint = ['PII Redaction', 'Auto Highlights', 'Sentiment Analysis', 'Summarization',
	'Entity Detection']

	elif language in ['Italian', 'Dutch']:
	not_available_tran = ['Speaker Labels']
	not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection',
	'Sentiment Analysis', 'Summarization', 'Entity Detection']

	elif language in ['Hindi', 'Japanese']:
	not_available_tran = ['Speaker Labels']
	not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection',
	'Sentiment Analysis', 'Summarization', 'Entity Detection']

	else:
	not_available_tran = []
	not_available_audint = []

	return not_available_tran, not_available_audint


	# When selecting new tran option, checks to make sure allowed by language and
	# then adds to selected_tran_opts and updates
	def tran_selected(language, transcription_options):
	"""When a transcription option is selected, """
	unavailable, _ = get_unavailable_opts(language)
	selected_tran_opts = list(set(transcription_options) - set(unavailable))

	return [selected_tran_opts, selected_tran_opts]


	# When selecting new audint option, checks to make sure allowed by language and
	# then adds to selected_audint_opts and updates
	def audint_selected(language, audio_intelligence_selector):
	"""Deselected"""
	_, unavailable = get_unavailable_opts(language)
	selected_audint_opts = list(set(audio_intelligence_selector) - set(unavailable))

	return [selected_audint_opts, selected_audint_opts]


	def create_output(r, paras, language, transc_opts=None, audint_opts=None):
	"""From a transcript response, return all outputs for audio intelligence"""
	if transc_opts is None:
	transc_opts = ['Automatic Language Detection', 'Speaker Labels', 'Filter Profanity']

	if audint_opts is None:
	audint_opts = ['Summarization', 'Auto Highlights', 'Topic Detection', 'Entity Detection',
	'Sentiment Analysis', 'PII Redaction', 'Content Moderation']

	# DIARIZATION
	if "Speaker Labels" in transc_opts:
	utts = '\n\n\n'.join([f"Speaker {utt['speaker']}:\n\n" + utt['text'] for utt in r['utterances']])
	else:
	utts = " NOT ANALYZED"

	# HIGHLIGHTS
	if 'Auto Highlights' in audint_opts:
	highlight_dict = create_highlighted_list(paras, r['auto_highlights_result']['results'])
	else:
	highlight_dict =[["NOT ANALYZED", 0]]

	# SUMMARIZATION'
	if 'Summarization' in audint_opts:
	chapters = r['chapters']
	summary_html = make_summary(chapters)
	else:
	summary_html = "<p>NOT ANALYZED</p>"

	# TOPIC DETECTION
	if "Topic Detection" in audint_opts:
	topics = r['iab_categories_result']['summary']
	topics_html = make_html_from_topics(topics)
	else:
	topics_html = "<p>NOT ANALYZED</p>"

	# SENTIMENT
	if "Sentiment Analysis" in audint_opts:
	sent_results = r['sentiment_analysis_results']
	sent = make_sentiment_output(sent_results)
	else:
	sent = "<p>NOT ANALYZED</p>"

	# ENTITY
	if "Entity Detection" in audint_opts:
	entities = r['entities']
	t = r['text']
	d = make_entity_dict(entities, t)
	entity_html = make_entity_html(d)
	else:
	entity_html = "<p>NOT ANALYZED</p>"

	# CONTENT SAFETY
	if "Content Moderation" in audint_opts:
	cont = r['content_safety_labels']['summary']
	content_fig = make_content_safety_fig(cont)
	else:
	content_fig = go.Figure()

	return [language, paras, utts, highlight_dict, summary_html, topics_html, sent, entity_html, content_fig]


	def submit_to_AAI(api_key,
	transcription_options,
	audio_intelligence_selector,
	language,
	radio,
	audio_file,
	mic_recording):
	# Make request header
	header = make_header(api_key)

	# Map transcription/audio intelligence options to AssemblyAI API request JSON dict
	true_dict = make_true_dict(transcription_options, audio_intelligence_selector)

	final_json, language = make_final_json(true_dict, language)
	final_json = {true_dict, final_json}

	# Select which audio to use
	if radio == "Audio File":
	audio_data = audio_file
	elif radio == "Record Audio":
	audio_data = mic_recording

	# Upload the audio
	upload_url = upload_file(audio_data, header, is_file=False)

	# Request transcript
	transcript_response = request_transcript(upload_url, header, **final_json)

	# Wait for the transcription to complete
	polling_endpoint = make_polling_endpoint(transcript_response)
	wait_for_completion(polling_endpoint, header)

	# Fetch results JSON
	r = requests.get(polling_endpoint, headers=header, json=final_json).json()

	# Fetch paragraphs of transcript
	transc_id = r['id']
	paras = make_paras_string(transc_id, header)
	return create_output(r, paras, language, transcription_options, audio_intelligence_selector)


	def example_output(language):
	"""Displays example output"""
	with open("example_data/paras.txt", 'r') as f:
	paras = f.read()

	with open('example_data/response.json', 'r') as f:
	r = json.load(f)

	return create_output(r, paras, language)


	with open('app/styles.css', 'r') as f:
	css = f.read()

	with gr.Blocks(css=css) as demo:
	# Load image
	gr.HTML('<a href="https://www.assemblyai.com/"><img src="file/app/images/logo.png" class="logo"></a>')

	# Load descriptions
	# www.assemblyai.com/blog/how-to-build-an-audio-intelligence-dashboard-with-gradio/
	gr.HTML("<h1 class='title'>Audio Intelligence Dashboard</h1>"
	"<br>"
	"<p>Check out the <a href=\"https://www.assemblyai.com/blog/getting-started-with-huggingfaces-gradio/\">Getting Started with Hugging Face's Gradio</a> blog to learn how to build this dashboard.</p>")

	gr.HTML("<h1 class='title'>Directions</h1>"
	"<p>To use this dashboard:</p>"
	"<ul>"
	"<li>1) Paste your AssemblyAI API Key into the box below - you can copy it from <a href=\"https://app.assemblyai.com/signup\">here</a> (or get one for free if you don't already have one)</li>"
	"<li>2) Choose an audio source and upload or record audio</li>"
	"<li>3) Select the types of analyses you would like to perform on the audio</li>"
	"<li>4) Click <i>Submit</i></li>"
	"<li>5) View the results at the bottom of the page</li>"
	"<ul>"
	"<br>"
	"<p>You may also click <b>Show Example Output</b> below to see an example without having to enter an API key.")

	gr.HTML('<div class="alert alert__warning"><span>'
	'Note that this dashboard is not an official AssemblyAI product and is intended for educational purposes.'
	'</span></div>')

	# API Key title
	with gr.Box():
	gr.HTML("<p class=\"apikey\">API Key:</p>")
	# API key textbox (password-style)
	api_key = gr.Textbox(label="", elem_id="pw")

	# Gradio states for - plotly Figure object, audio data for file source, and audio data for mic source
	plot = gr.State(px.line(labels={'x': 'Time (s)', 'y': ''}))
	file_data = gr.State([1, [0]]) # [sample rate, [data]]
	mic_data = gr.State([1, [0]]) # [Sample rate, [data]]

	# Options that the user wants
	selected_tran_opts = gr.State(list(transcription_options_headers.keys()))
	selected_audint_opts = gr.State(list(audio_intelligence_headers.keys()))

	# Current options = selected options - unavailable options for specified language
	current_tran_opts = gr.State([])
	current_audint_opts = gr.State([])

	# Selector for audio source
	radio = gr.Radio(["Audio File", "Record Audio"], label="Audio Source", value="Audio File")

	# Audio object for both file and microphone data
	audio_file = gr.Audio()
	mic_recording = gr.Audio(source="microphone", visible=False)

	# Audio wave plot
	audio_wave = gr.Plot(plot.value)

	# Checkbox for transcription options
	transcription_options = gr.CheckboxGroup(
	choices=list(transcription_options_headers.keys()),
	value=list(transcription_options_headers.keys()),
	label="Transcription Options",
	)

	# Warning for using Automatic Language detection
	w = "<div class='alert alert__warning'>" \
	"<p>Automatic Language Detection not available for Hindi or Japanese. For best results on non-US " \
	"English audio, specify the dialect instead of using Automatic Language Detection. " \
	"<br>" \
	"Some Audio Intelligence features are not available in some languages. See " \
	"<a href='https://airtable.com/shr53TWU5reXkAmt2/tblf7O4cffFndmsCH?backgroundColor=green'>here</a> " \
	"for more details.</p>" \
	"</div>"

	auto_lang_detect_warning = gr.HTML(w)

	# Checkbox for Audio Intelligence options
	audio_intelligence_selector = gr.CheckboxGroup(
	choices=list(audio_intelligence_headers.keys()),
	value=list(audio_intelligence_headers.keys()),
	label='Audio Intelligence Options'
	)

	# Language selector for manual language selection
	language = gr.Dropdown(
	choices=list(language_headers.keys()),
	value="US English",
	label="Language Specification",
	visible=False,
	)

	# Button to submit audio for processing with selected options
	submit = gr.Button('Submit')

	# Button to submit audio for processing with selected options
	example = gr.Button('Show Example Output')

	# Results tab group
	phl = 10
	with gr.Tab('Transcript'):
	trans_tab = gr.Textbox(placeholder="Your formatted transcript will appear here ...",
	lines=phl,
	max_lines=25,
	show_label=False)
	with gr.Tab('Speaker Labels'):
	diarization_tab = gr.Textbox(placeholder="Your diarized transcript will appear here ...",
	lines=phl,
	max_lines=25,
	show_label=False)
	with gr.Tab('Auto Highlights'):
	highlights_tab = gr.HighlightedText()
	with gr.Tab('Summary'):
	summary_tab = gr.HTML("<br>" * phl)
	with gr.Tab("Detected Topics"):
	topics_tab = gr.HTML("<br>" * phl)
	with gr.Tab("Sentiment Analysis"):
	sentiment_tab = gr.HTML("<br>" * phl)
	with gr.Tab("Entity Detection"):
	entity_tab = gr.HTML("<br>" * phl)
	with gr.Tab("Content Safety"):
	content_tab = gr.Plot()

	####################################### Functionality ######################################################

	# Changing audio source changes Audio input component
	radio.change(fn=change_audio_source,
	inputs=[
	radio,
	plot,
	file_data,
	mic_data],
	outputs=[
	audio_file,
	mic_recording,
	audio_wave,
	plot])

	# Inputting audio updates plot
	audio_file.change(fn=plot_data,
	inputs=[audio_file, plot],
	outputs=[audio_wave, file_data, plot]
	)
	mic_recording.change(fn=plot_data,
	inputs=[mic_recording, plot],
	outputs=[audio_wave, mic_data, plot])

	# Deselecting Automatic Language Detection shows Language Selector
	transcription_options.change(
	fn=set_lang_vis,
	inputs=transcription_options,
	outputs=[language, auto_lang_detect_warning])

	# Changing language deselects certain Tran / Audio Intelligence options
	language.change(
	fn=option_verif,
	inputs=[language,
	selected_tran_opts,
	selected_audint_opts],
	outputs=[transcription_options, audio_intelligence_selector, current_tran_opts, current_audint_opts]
	)

	# Selecting Tran options adds it to selected if language allows it
	transcription_options.change(
	fn=tran_selected,
	inputs=[language, transcription_options],
	outputs=[transcription_options, selected_tran_opts, ]
	)

	# Selecting audio intelligence options adds it to selected if language allows it
	audio_intelligence_selector.change(
	fn=audint_selected,
	inputs=[language, audio_intelligence_selector],
	outputs=[audio_intelligence_selector, selected_audint_opts]
	)

	# Clicking "submit" uploads selected audio to AssemblyAI, performs requested analyses, and displays results
	submit.click(fn=submit_to_AAI,
	inputs=[api_key,
	transcription_options,
	audio_intelligence_selector,
	language,
	radio,
	audio_file,
	mic_recording],
	outputs=[language,
	trans_tab,
	diarization_tab,
	highlights_tab,
	summary_tab,
	topics_tab,
	sentiment_tab,
	entity_tab,
	content_tab])

	# Clicking "Show Example Output" displays example results
	example.click(fn=example_output,
	inputs=language,
	outputs=[language,
	trans_tab,
	diarization_tab,
	highlights_tab,
	summary_tab,
	topics_tab,
	sentiment_tab,
	entity_tab,
	content_tab])

	# Launch the application
	demo.launch() # share=True