whisper_wrapper / lib /media.py
Madhuslista's picture
Feature: Add function to create a temp file to hold the returned text
478e831
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import secrets
import subprocess
import tempfile
from pathlib import Path
from gradio.utils import NamedString
from huggingface_hub import hf_hub_url as hf_link
from .config import (
AUDIO_DIR,
TRANSCRIPTS_DIR,
AUDIO_EXT,
TEXT_EXT,
)
# -->> Tunables <<---------------------
# -->> Definitions <<------------------
# -->> API <<--------------------------
def extract_audio_from_video(video_file, audio_folder_path):
"""Converts video to audio directly using `ffmpeg` command
with the help of subprocess module"""
# Create the audio folder if it doesn't exist
Path(audio_folder_path).mkdir(parents=True, exist_ok=True)
# Extract the filename from the video_file path
video_filename = Path(video_file).name
video_name = Path(video_filename).stem
output_audio_path = Path(audio_folder_path) / (video_name + AUDIO_EXT)
output_audio_pathstr = str(output_audio_path)
# Check if the audio file already exists and remove it
if output_audio_path.exists():
output_audio_path.unlink()
# Command to extract audio using ffmpeg
command = [
"ffmpeg",
"-i",
video_file,
"-ab",
"160k",
"-ac",
"2",
"-ar",
"44100",
"-vn",
output_audio_pathstr,
]
# Run the command
print("Processing audio...")
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print("Audio extracted!")
# If save_audio is True, provide a link to save the file
return output_audio_pathstr, output_audio_path
def save_file(audio_path, transcript_folder_path, text):
"""Saves the text to a file"""
# Create the transcript folder if it doesn't exist
transcript_folder_path = Path(transcript_folder_path)
transcript_folder_path.mkdir(parents=True, exist_ok=True)
# Extract the filename from the audio_file path
audio_filename = Path(audio_path).stem
output_transcript_path = transcript_folder_path / (audio_filename + TEXT_EXT)
output_transcript_pathstr = str(output_transcript_path)
# Check if the transcript file already exists and remove it
if output_transcript_path.exists():
output_transcript_path.unlink(missing_ok=True)
# Save the transcript to a file
with open(output_transcript_pathstr, "w") as f:
f.write(text)
# If save_transcript is True, provide a link to save the file
link = hf_link(
repo_id="Madhuslista/whisper_wrapper",
filename=output_transcript_pathstr,
subfolder=str(output_transcript_path.parent)
)
return output_transcript_pathstr, link
def create_tmp_file(text, dir_path, file_name, ext):
"""Creates a temporary file and returns the path to it"""
# Create the dir_path if it doesn't exist
dir_path = str(Path(dir_path) / str(secrets.token_hex(10)))
Path(dir_path).mkdir(parents=True, exist_ok=True)
# Create a temporary file
tmp = tempfile.NamedTemporaryFile(
delete=False,
dir=dir_path,
suffix=ext
)
# Write the text to the file
tmp.write(text.encode("utf-8"))
tmp.seek(0)
tmp.close()
# Rename it
file_path = Path(dir_path) / (file_name + ext)
tmp_name = tmp.name
os.rename(tmp_name, file_path)
# Return the path to the file
return NamedString(file_path)
# -->> Execute <<----------------------
# -->> Export <<-----------------------