izzidyaz's picture
first commit
b799161
import streamlit as st
import youtube_dl
import requests
auth_key='9f704fc1faa44c7391fa81a59875404e'
import os
if 'status' not in st.session_state:
st.session_state['status'] = 'submitted'
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'ffmpeg-location': 'C:\FFmpeg\bin',
'outtmpl': "./%(id)s.%(ext)s",
}
transcript_endpoint = "https://api.assemblyai.com/v2/transcript"
upload_endpoint = 'https://api.assemblyai.com/v2/upload'
headers_auth_only = {'authorization': auth_key}
headers = {
"authorization": auth_key,
"content-type": "application/json"
}
CHUNK_SIZE = 5242880
@st.cache
def transcribe_from_link(link, categories: bool):
_id = link.strip()
def get_vid(_id):
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
return ydl.extract_info(_id)
# download the audio of the YouTube video locally
meta = get_vid(_id)
save_location = meta['id'] + ".mp3"
print('Saved mp3 to', save_location)
def read_file(filename):
with open(filename, 'rb') as _file:
while True:
data = _file.read(CHUNK_SIZE)
if not data:
break
yield data
# upload audio file to AssemblyAI
upload_response = requests.post(
upload_endpoint,
headers=headers_auth_only, data=read_file(save_location)
)
audio_url = upload_response.json()['upload_url']
print('Uploaded to', audio_url)
# start the transcription of the audio file
transcript_request = {
'audio_url': audio_url,
'iab_categories': 'True' if categories else 'False',
}
transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers)
# this is the id of the file that is being transcribed in the AssemblyAI servers
# we will use this id to access the completed transcription
transcript_id = transcript_response.json()['id']
polling_endpoint = transcript_endpoint + "/" + transcript_id
print("Transcribing at", polling_endpoint)
return polling_endpoint
def get_status(polling_endpoint):
polling_response = requests.get(polling_endpoint, headers=headers)
st.session_state['status'] = polling_response.json()['status']
def refresh_state():
st.session_state['status'] = 'submitted'
st.title('Easily transcribe YouTube videos')
link = st.text_input('Enter your YouTube video link', on_change=refresh_state)
if link == "":
st.markdown("---", unsafe_allow_html=True)
st.write("1. Go to [Youtube](https://www.youtube.com) to select a video for transcription.")
st.write("2. Copy the link of the selected video and paste it in the from above and press enter. ")
else:
st.video(link)
st.text("The transcription is " + st.session_state['status'])
polling_endpoint = transcribe_from_link(link, False)
st.button('check_status', on_click=get_status, args=(polling_endpoint,))
transcript=''
if st.session_state['status']=='completed':
polling_response = requests.get(polling_endpoint, headers=headers)
transcript = polling_response.json()['text']
#Display Transcription
st.markdown(transcript)
#Download Button
st.download_button('Download Transcripton', transcript, 'transcript.txt')