Spaces:

izzidyaz
/

Deployment_Testing

Runtime error

App Files Files Community

Deployment_Testing / app.py

izzidyaz

first commit

b799161 over 1 year ago

raw history blame contribute delete

No virus

3.19 kB

	import streamlit as st
	import youtube_dl
	import requests
	auth_key='9f704fc1faa44c7391fa81a59875404e'
	import os

	if 'status' not in st.session_state:
	st.session_state['status'] = 'submitted'

	ydl_opts = {
	'format': 'bestaudio/best',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	'ffmpeg-location': 'C:\FFmpeg\bin',
	'outtmpl': "./%(id)s.%(ext)s",
	}

	transcript_endpoint = "https://api.assemblyai.com/v2/transcript"
	upload_endpoint = 'https://api.assemblyai.com/v2/upload'

	headers_auth_only = {'authorization': auth_key}
	headers = {
	"authorization": auth_key,
	"content-type": "application/json"
	}
	CHUNK_SIZE = 5242880

	@st.cache
	def transcribe_from_link(link, categories: bool):
	_id = link.strip()

	def get_vid(_id):
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	return ydl.extract_info(_id)

	# download the audio of the YouTube video locally
	meta = get_vid(_id)
	save_location = meta['id'] + ".mp3"

	print('Saved mp3 to', save_location)


	def read_file(filename):
	with open(filename, 'rb') as _file:
	while True:
	data = _file.read(CHUNK_SIZE)
	if not data:
	break
	yield data


	# upload audio file to AssemblyAI
	upload_response = requests.post(
	upload_endpoint,
	headers=headers_auth_only, data=read_file(save_location)
	)

	audio_url = upload_response.json()['upload_url']
	print('Uploaded to', audio_url)

	# start the transcription of the audio file
	transcript_request = {
	'audio_url': audio_url,
	'iab_categories': 'True' if categories else 'False',
	}

	transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers)

	# this is the id of the file that is being transcribed in the AssemblyAI servers
	# we will use this id to access the completed transcription
	transcript_id = transcript_response.json()['id']
	polling_endpoint = transcript_endpoint + "/" + transcript_id

	print("Transcribing at", polling_endpoint)

	return polling_endpoint


	def get_status(polling_endpoint):
	polling_response = requests.get(polling_endpoint, headers=headers)
	st.session_state['status'] = polling_response.json()['status']

	def refresh_state():
	st.session_state['status'] = 'submitted'


	st.title('Easily transcribe YouTube videos')


	link = st.text_input('Enter your YouTube video link', on_change=refresh_state)
	if link == "":
	st.markdown("---", unsafe_allow_html=True)
	st.write("1. Go to [Youtube](https://www.youtube.com) to select a video for transcription.")
	st.write("2. Copy the link of the selected video and paste it in the from above and press enter. ")


	else:
	st.video(link)

	st.text("The transcription is " + st.session_state['status'])

	polling_endpoint = transcribe_from_link(link, False)

	st.button('check_status', on_click=get_status, args=(polling_endpoint,))

	transcript=''
	if st.session_state['status']=='completed':
	polling_response = requests.get(polling_endpoint, headers=headers)
	transcript = polling_response.json()['text']



	#Display Transcription
	st.markdown(transcript)

	#Download Button
	st.download_button('Download Transcripton', transcript, 'transcript.txt')