|
import gradio as gr |
|
import requests |
|
from typing import Optional |
|
import json |
|
import subprocess |
|
import os |
|
import tempfile |
|
from pydub import AudioSegment |
|
|
|
|
|
API_URL = "http://astarwiz.com:9998" |
|
rapid_key = os.environ.get("RAPID_API_KEY") |
|
|
|
def fetch_youtube_id(youtube_url: str) -> str: |
|
if 'v=' in youtube_url: |
|
return youtube_url.split("v=")[1] |
|
elif 'shorts' in youtube_url: |
|
return youtube_url.split("/")[-1] |
|
else: |
|
raise Exception("Unsupported URL format") |
|
|
|
def download_youtube_audio(youtube_url: str, output_dir: Optional[str] = None) -> str: |
|
video_id = fetch_youtube_id(youtube_url) |
|
|
|
if output_dir is None: |
|
output_dir = tempfile.gettempdir() |
|
|
|
output_filename = os.path.join(output_dir, f"{video_id}.mp3") |
|
|
|
if os.path.exists(output_filename): |
|
return output_filename |
|
|
|
url = "https://youtube86.p.rapidapi.com/api/youtube/links" |
|
headers = { |
|
'Content-Type': 'application/json', |
|
'x-rapidapi-host': 'youtube86.p.rapidapi.com', |
|
'x-rapidapi-key': rapid_key |
|
} |
|
data = { |
|
"url": youtube_url |
|
} |
|
|
|
response = requests.post(url, headers=headers, json=data) |
|
print('Fetched audio links') |
|
|
|
if response.status_code == 200: |
|
result = response.json() |
|
for url in result[0]['urls']: |
|
if url.get('isBundle'): |
|
audio_url = url['url'] |
|
extension = url['extension'] |
|
audio_response = requests.get(audio_url) |
|
|
|
if audio_response.status_code == 200: |
|
temp_filename = os.path.join(output_dir, f"{video_id}.{extension}") |
|
with open(temp_filename, 'wb') as audio_file: |
|
audio_file.write(audio_response.content) |
|
|
|
|
|
audio = AudioSegment.from_file(temp_filename, format=extension) |
|
audio = audio.set_frame_rate(16000) |
|
audio.export(output_filename, format="mp3", parameters=["-ar", "16000"]) |
|
|
|
os.remove(temp_filename) |
|
return output_filename |
|
|
|
return None |
|
else: |
|
print("Error:", response.status_code, response.text) |
|
return None |
|
|
|
def run_asr(audio_file, youtube_url, with_timestamp, model_choice): |
|
temp_file = None |
|
try: |
|
if youtube_url: |
|
|
|
audio_file = download_youtube_audio(youtube_url) |
|
temp_file = audio_file |
|
elif not audio_file: |
|
return "Please provide either an audio file or a YouTube URL." |
|
|
|
files = {'file': open(audio_file, 'rb')} |
|
|
|
|
|
if model_choice == "whisper_v3": |
|
model_name = "official-v3" |
|
else: |
|
model_name = "whisper-large-v2-imda" |
|
|
|
data = {'language': 'en', 'model_name': model_name, 'with_timestamp': with_timestamp} |
|
response = requests.post(f"{API_URL}/asr", data=data, files=files) |
|
|
|
if response.status_code == 200: |
|
return response.json().get("text", "") |
|
else: |
|
return f"Error: {response.status_code}" |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
finally: |
|
|
|
if temp_file and os.path.exists(temp_file): |
|
os.remove(temp_file) |
|
|
|
def embed_youtube(youtube_url): |
|
if youtube_url: |
|
try: |
|
video_id = fetch_youtube_id(youtube_url) |
|
embed_html = f'<iframe width="560" height="315" src="https://www.youtube.com/embed/{video_id}" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>' |
|
return gr.update(value=embed_html, visible=True), "", None |
|
except Exception as e: |
|
return gr.update(value="", visible=False), f"Invalid YouTube URL: {str(e)}", None |
|
return gr.update(value="", visible=False), "", None |
|
|
|
def clear_on_audio_input(audio): |
|
if audio is not None: |
|
return "", gr.update(value="", visible=False), "", gr.update(interactive=True) |
|
return gr.update(), gr.update(), gr.update(), gr.update(interactive=False) |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# ποΈ Audio Transcription Service") |
|
gr.Markdown("Upload an audio file, record your voice, or paste a YouTube URL to get an English transcription.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
audio_input = gr.Audio(sources=['microphone', 'upload'], type="filepath", label="Audio Input") |
|
youtube_input = gr.Textbox(label="YouTube URL", placeholder="Or paste a YouTube URL here...") |
|
video_player = gr.HTML(visible=False) |
|
timestamp_toggle = gr.Checkbox(label="Include Timestamps", value=False) |
|
model_choice = gr.Radio(["local_whisper", "whisper_v3"], label="Model Selection", value="local_whisper") |
|
with gr.Column(scale=3): |
|
result = gr.Textbox( |
|
label="Transcription Result", |
|
placeholder="Your transcription will appear here...", |
|
lines=10 |
|
) |
|
|
|
run_button = gr.Button("π Transcribe Audio", variant="primary", interactive=False) |
|
run_button.click(run_asr, inputs=[audio_input, youtube_input, timestamp_toggle, model_choice], outputs=[result]) |
|
|
|
|
|
youtube_input.change( |
|
fn=lambda url: (*embed_youtube(url), gr.update(interactive=bool(url))), |
|
inputs=[youtube_input], |
|
outputs=[video_player, result, audio_input, run_button] |
|
) |
|
|
|
|
|
audio_input.change( |
|
fn=clear_on_audio_input, |
|
inputs=[audio_input], |
|
outputs=[result, video_player, youtube_input, run_button] |
|
) |
|
|
|
gr.Markdown("### How to use:") |
|
gr.Markdown("1. Upload an audio file or record your voice using the microphone, OR paste a YouTube URL.") |
|
gr.Markdown("2. If you paste a YouTube URL, the video will be displayed for your reference, and any previous transcription or audio input will be cleared.") |
|
gr.Markdown("3. If you upload or record audio, any previous transcription, YouTube URL, and video will be cleared.") |
|
gr.Markdown("4. Click the 'Transcribe Audio' button to start the process.") |
|
gr.Markdown("5. Wait for a few seconds, and your transcription will appear in the result box.") |
|
|
|
|
|
demo.launch(server_name='0.0.0.0') |
|
|