Spaces:
Runtime error
Runtime error
File size: 4,259 Bytes
df07f44 1718583 df07f44 b098f80 df07f44 b098f80 df07f44 b098f80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import whisper
import os
import ffmpeg
import textwrap
from flask import Flask
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from deep_translator import GoogleTranslator
'''
to run api paste " uvicorn milestone-2:app " in terminal
'''
def download_audio(url:str, download_path:str):
try:
yt = YouTube(url)
audio = yt.streams.filter(only_audio=True).first()
vid_title = yt.title
file_name = vid_title + '.mp3'
audio.download(output_path=download_path, filename=file_name)
except KeyError:
return 400, "Error: audio souce not avaliable or cannot be download"
except ValueError:
return 400, "Error: invalide URL"
except Exception as e:
return 400, "Error downloading video: " + str(e)
return os.path.join(download_path, file_name)
def download_captions(url:str, download_path:str):
formatter = SRTFormatter()
try:
yt = YouTube(url)
vid_id = url.split("v=")[1]
caption = YouTubeTranscriptApi.get_transcript(vid_id)
srt_formatted = formatter.format_transcript(caption)
file_name = yt.title + '.srt'
file_path = os.path.join(download_path, file_name)
with open(file_path, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
except KeyError:
return 400, "Error: video not avaliable or cannot be download"
except ValueError:
return 400, "Error: invalide URL"
except Exception as e:
400, "Error extracting transcript from: " + str(e)
srt_file.close()
def sep_audio(video:str, output_path): #seperates audio from video file
try:
input = ffmpeg.input(video)
audio = input.audio.filter("anull")
except FileNotFoundError:
print("%s file couldn't be accessed"%video)
temp = video.split('/')[-1] #gets last element if a file path
file_name = temp.split('.')[0] + '.mp3'
file_path = os.path.join(output_path, file_name)
try:
output = ffmpeg.output(audio, file_path)
output.run()
return file_path
except:
print("error creating audio file")
def transcribe_audio(input_file:str, output_path:str): #eventually add a check for if file is mp3
try:
model = whisper.load_model("base")
result = model.transcribe(input_file)
except FileNotFoundError:
print("%s file was not found " % input_file)
try:
file_name = input_file.split('/')[-1]
file_name = file_name.split('.')[0]
file_path = os.path.join(output_path, file_name) + ".txt"
with open(file_path, 'w', encoding='utf-8') as out_file:
wrapped_text = textwrap.fill(result["text"], width=100)
out_file.write(wrapped_text)
except FileNotFoundError:
print("%s this dir can't be accessed " % output_path)
out_file.close()
return(file_path)
def translate_text(input_file:str, output_path:str, lang: str):
translator = GoogleTranslator(source= 'english', target=lang)
try: #try to open our caption file
in_file = open(input_file, 'r', encoding="utf8") #opening file to read
except FileNotFoundError:
print("%s file was not found " % input_file)
try: #try to create a new file to store translation
out_file_name = (input_file.split('/')[-1]).split('.')[0] + ' translation.txt' # we do a split incase file is abs path then take old name
out_file_path = os.path.join(output_path, out_file_name)
out_file = open(out_file_path, 'w', encoding='utf8')
except FileNotFoundError:
print("%s this dir can't be accessed " % output_path)
for i in in_file.readlines(): #reading all files in the 'captions' directory
translated_line = translator.translate(i)
out_file.write(translated_line+'\n')
print('%s has be sucessfully translate' % input_file)
in_file.close()
out_file.close()
### FRONT END ###
import streamlit as st
from transformers import pipeline
pipe = pipeline('video-translation')
text = st.text_area('enter a video url!')
if text:
out = pipe(text)
st.json(out) |