Spaces:

JosefBirman
/

YoutubeVideoTranslator

Runtime error

File size: 4,259 Bytes

import whisper
import os
import ffmpeg
import textwrap 
from flask import Flask
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from deep_translator import GoogleTranslator


'''
    to run api paste " uvicorn milestone-2:app " in terminal
'''

def download_audio(url:str, download_path:str):

    try:
        yt = YouTube(url)
        audio = yt.streams.filter(only_audio=True).first()
        vid_title = yt.title
        file_name = vid_title + '.mp3'
        audio.download(output_path=download_path, filename=file_name)
    
    except KeyError:
        return 400, "Error: audio souce not avaliable or cannot be download"
    except ValueError:
        return 400, "Error: invalide URL"
    except Exception as e:
        return 400, "Error downloading video: " + str(e)
    
    return os.path.join(download_path, file_name)
    

def download_captions(url:str, download_path:str):

    formatter = SRTFormatter()

    try:
        yt = YouTube(url)
        vid_id = url.split("v=")[1]
        caption = YouTubeTranscriptApi.get_transcript(vid_id)
        srt_formatted = formatter.format_transcript(caption)
        file_name = yt.title + '.srt'
        file_path = os.path.join(download_path, file_name)
        with open(file_path, 'w', encoding='utf-8') as srt_file:
            srt_file.write(srt_formatted)
    except KeyError:
        return 400, "Error: video not avaliable or cannot be download"
    except ValueError:
        return 400, "Error: invalide URL"
    except Exception as e:
        400, "Error extracting transcript from: " + str(e)

    srt_file.close()

def sep_audio(video:str, output_path): #seperates audio from video file

    try:
        input = ffmpeg.input(video)
        audio = input.audio.filter("anull")
    except FileNotFoundError:
        print("%s file couldn't be accessed"%video)
    
    temp = video.split('/')[-1] #gets last element if a file path
    file_name = temp.split('.')[0] + '.mp3'
    file_path = os.path.join(output_path, file_name)

    try:
        output = ffmpeg.output(audio, file_path)
        output.run()
        return file_path
    except:
        print("error creating audio file")


def transcribe_audio(input_file:str, output_path:str): #eventually add a check for if file is mp3
    try:
        model = whisper.load_model("base")
        result = model.transcribe(input_file)
    except FileNotFoundError:
        print("%s file was not found " % input_file)

    try:
        file_name = input_file.split('/')[-1]
        file_name = file_name.split('.')[0]
        file_path = os.path.join(output_path, file_name) + ".txt"
        with open(file_path, 'w', encoding='utf-8') as out_file:
            wrapped_text = textwrap.fill(result["text"], width=100)
            out_file.write(wrapped_text)

    except FileNotFoundError:
        print("%s this dir can't be accessed " % output_path)
    
    out_file.close()
    return(file_path)

def translate_text(input_file:str, output_path:str, lang: str):

    translator = GoogleTranslator(source= 'english', target=lang)

    try: #try to open our caption file
        in_file = open(input_file, 'r', encoding="utf8") #opening file to read
    except FileNotFoundError:
        print("%s file was not found " % input_file)

    try: #try to create a new file to store translation
        out_file_name = (input_file.split('/')[-1]).split('.')[0] + ' translation.txt' # we do a split incase file is abs path then take old name
        out_file_path = os.path.join(output_path, out_file_name)
        out_file = open(out_file_path, 'w', encoding='utf8')
    except FileNotFoundError:
        print("%s this dir can't be accessed " % output_path)
        
    for i in in_file.readlines(): #reading all files in the 'captions' directory
        translated_line = translator.translate(i)
        out_file.write(translated_line+'\n')

    print('%s has be sucessfully translate' % input_file)
    in_file.close()
    out_file.close()


### FRONT END ###
import streamlit as st
from transformers import pipeline

pipe = pipeline('video-translation')
text = st.text_area('enter a video url!')

if text:
    out = pipe(text)
    st.json(out)