import gradio as gr import os import json import requests import time import pandas as pd import io from scipy.io.wavfile import write # AssemblyAI transcript endpoint (where we submit the file) transcript_endpoint = "https://api.assemblyai.com/v2/transcript" upload_endpoint = "https://api.assemblyai.com/v2/upload" headers={ "Authorization": os.environ["ASSEMBLYAI_KEY"], "Content-Type": "application/json" } # Helper function to upload data def _read_file(filename, chunk_size=5242880): with open(filename, "rb") as f: while True: data = f.read(chunk_size) if not data: break yield data def _read_array(audio, chunk_size=5242880): """Like _read_file but for array - creates temporary unsaved "file" from sample rate and audio np.array""" sr, aud = audio # Create temporary "file" and write data to it bytes_wav = bytes() temp_file = io.BytesIO(bytes_wav) write(temp_file, sr, aud) while True: data = temp_file.read(chunk_size) if not data: break yield data def get_audio_from_upload(audio): upload_response = requests.post( upload_endpoint, headers=headers, data=_read_array(audio)) return upload_response.json()['upload_url'] def get_transcript_url(audio): url = get_audio_from_upload(audio) # JSON that tells the API which file to trancsribe json={ # URL of the audio file to process "audio_url": url, # Turn on speaker labels "speaker_labels": True, # Turn on cusom vocabulary "word_boost": ["assembly ai"], # Turn on custom spelling "custom_spelling": [ {"from": ["assembly AI"], "to": "AssemblyAI"}, {"from": ["assembly AI's"], "to": "AssemblyAI's"} ], # Turn on PII Redaction and specify policies "redact_pii": True, "redact_pii_policies": ["drug", "injury", "person_name"], "redact_pii_audio": True, # Turn on Auto Highlights "auto_highlights": True, # Turn on Content Moderation "content_safety": True, # Turn on Topic Detection "iab_categories": True, # Turn on Sentiment Analysis "sentiment_analysis": True, # Turn on Summarization and specify configuration "summarization": True, "summary_model": "informative", "summary_type": "bullets", # Turn on Entity Detection "entity_detection": True,} response = requests.post( transcript_endpoint, json=json, headers=headers # Authorization to link this transcription with your account ) polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{response.json()['id']}" while True: transcription_result = requests.get(polling_endpoint, headers=headers).json() if transcription_result['status'] == 'completed': break elif transcription_result['status'] == 'error': raise RuntimeError(f"Transcription failed: {transcription_result['error']}") else: time.sleep(3) res = transcription_result['sentiment_analysis_results'] sentiment_analysis_result = '' df = pd.DataFrame(res) df = df.loc[:, ["text", "sentiment", "confidence"]] topic = transcription_result['iab_categories_result']['summary'] topics = [] for k in topic: topic_dict = {} topic_dict["Topic"] = " > ".join(k.split(">")) topic_dict["Relevance"] = topic[k] topics.append(topic_dict) df_topic = pd.DataFrame(topics) return transcription_result['text'], transcription_result['summary'], df, df_topic.head() # def get_transcript_file(filename): # upload_response = requests.post( # upload_endpoint, # headers=headers, # data=_read_file(filename)) # # JSON that tells the API which file to trancsribe # json = { # # URL of the audio file to process # "audio_url": upload_response.json()['upload_url'], # # Turn on speaker labels # "speaker_labels": True, # # Turn on custom vocabulary # "word_boost": ["assembly ai"], # # Turn on custom spelling # "custom_spelling": [ # {"from": ["assembly AI"], "to": "AssemblyAI"}, # {"from": ["assembly AI's"], "to": "AssemblyAI's"} # ], # # Turn on PII Redaction and specify policies # "redact_pii": True, # "redact_pii_policies": ["drug", "injury", "person_name"], # "redact_pii_audio": True, # # Turn on Auto Highlights # "auto_highlights": True, # # Turn on Content Moderation # "content_safety": True, # # Turn on Topic Detection # "iab_categories": True, # # Turn on Sentiment Analysis # "sentiment_analysis": True, # # Turn on Summarization and specify configuration # "summarization": True, # "summary_model": "informative", # "summary_type": "bullets", # # Turn on Entity Detection # "entity_detection": True, # } # response = requests.post( # transcript_endpoint, # json=json, # headers=headers # Authorization to link this transcription with your account # ) # polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{response.json()['id']}" # while True: # transcription_result = requests.get(polling_endpoint, headers=headers).json() # if transcription_result['status'] == 'completed': # break # elif transcription_result['status'] == 'error': # raise RuntimeError(f"Transcription failed: {transcription_result['error']}") # else: # time.sleep(3) # return transcription_result['text'] audio_intelligence_list = [ "Summarization", "Sentiment Analysis" ] title = """

🔥Conformer-1 API

""" description = """ ### In this demo, you can explore the outputs of a Conformer-1 Speech Recognition Model from AssemblyAI. """ with gr.Blocks() as demo: gr.HTML(title) gr.Markdown(description) with gr.Column(elem_id = "col_container"): #audio_intelligence_options = gr.CheckboxGroup(audio_intelligence_list, label="Audio Intelligence Options") inputs = gr.Audio(source = "upload",label = "Upload the input Audio file") b1 = gr.Button('Process Audio') with gr.Tabs(): with gr.TabItem('Transcript') as transcript_tab: transcript = gr.Textbox(label = "Transcript Result" ) with gr.TabItem('Summary', visible = False) as summary_tab: summary = gr.Textbox(label = "Summary Result") with gr.TabItem('Sentiment Analysis', visible = False) as sentiment_tab: sentiment_analysis = gr.Dataframe(label = "Sentiment Analysis Result" ) with gr.TabItem('Topic Detection', visible = False) as topic_detection_tab: topic_detection = gr.Dataframe(label = "Topic Detection Result" ) b1.click(get_transcript_url, [inputs], [transcript, summary, sentiment_analysis,topic_detection]) examples = gr.Examples(examples = [["audio.mp3"]], inputs = inputs, outputs=[transcript, summary, sentiment_analysis, topic_detection], cache_examples = True, fn = get_transcript_url) demo.queue().launch(debug=True)