Spaces:
Runtime error
Runtime error
File size: 2,748 Bytes
d4b6fc6 fb8c0b6 667630b d4b6fc6 0ca3a16 d4b6fc6 0ca3a16 d4b6fc6 8407d4b 6f66c35 d4b6fc6 33186cb d4b6fc6 8508782 33186cb d4b6fc6 37e5baf a0e4077 d4b6fc6 667630b d4b6fc6 13aaabd d4b6fc6 c147b83 d4b6fc6 667630b c147b83 667630b 5f57e08 c147b83 667630b 5f57e08 667630b c147b83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import torch
import whisper
import pytube
import librosa
import streamlit as st
import numpy as np
from fpdf import FPDF
from reportlab.pdfgen.canvas import Canvas
import time
def predict(url=None, translation="No",tran_lang="en"):
model_m = whisper.load_model("tiny")
#file_path = 'https://cf-courses-data.s3.ujs.cloud-object-storage.appdomain.cloud/IBM-GPXX0EPMEN/20220627_140242.mp4'
file_path = 'https://www.youtube.com/watch?v=-WbN61qtTGQ'
data = pytube.YouTube(file_path)
speech = data.streams.get_audio_only()
audio_file = speech.download()
audio_35 = whisper.load_audio(audio_file)
audio = whisper.pad_or_trim(audio_35)
mel = whisper.log_mel_spectrogram(audio).to(model_m.device)
_,probs = model_m.detect_language(mel)
p = -1
for key in probs:
if probs[key] >p:
p = probs[key]
for keys in probs:
if probs[keys] == p:
detected_lang = keys
lang_dict = sorted(probs)
video_url = url
v_data = pytube.YouTube(video_url)
speech = v_data.streams.get_audio_only()
test_audio_file = speech.download()
transcription = model_m.transcribe(test_audio_file,fp16=False)["text"]
if translation == "Yes":
trans = model_m.transcribe(test_audio_file,language=tran_lang,fp16=False)["text"]
return detected_lang, transcription, trans
else:
return lang_dict, transcription
st.image(image="https://www.respeecher.com/hubfs/What-is-Text-to-Speech-TTS%29-Initial-Speech-Synthesis-Explained-Respeecher-voice-cloning-software.jpeg",output_format="JPEG")
st.title("Sppech to Text generator")
st.write("This app uses an open source neural net called Whisper(developed by OpenAI)")
url = st.text_input(label="Please enter the YouTube url: ")
tran_req = st.selectbox(label="Do you want to translate the transcript?",options=("Yes","No"))
if tran_req=="Yes":
lang = st.selectbox(label="Please select the required language: ", options=("en","fr","ja"))
else:
lang = "en"
if st.button("Generate"):
st.spinner("Fetching the video...")
lang_d,transcription,trans = predict(url,translation=tran_req,tran_lang=lang)
st.spinner("Speech to Text engine running...")
time.sleep(1)
st.write("Detected language:",lang_d)
#canvas = Canvas("transcript.pdf")
#canvas.drawString(72, 72, transcription)
#canvas.save()
#st.download_button(label="Click here to download the transcript", data=canvas, mime='text/csv',file_name="transcript.pdf")
st.write(transcription)
st.spinner("Translation in progress..")
time.sleep(1)
st.write("Translation: ")
st.write(trans)
st.spinner("Completed")
st.success("Speech to text converted successfully!") |