Spaces:

ArtSpace
/

TransCree

Sleeping

App Files Files Community

TransCree / app.py

ArtSpace

Update app.py

3412e09 verified 3 months ago

raw

history blame contribute delete

6.07 kB

	"""
	MediaTranscriberPro - Hugging Face Space
	Final Fix for DNS/IPv6 Issues
	"""
	# ---------------------------------------------------------
	# LAYER 1: SYSTEM SOCKET PATCH (Must be at the very top)
	# ---------------------------------------------------------
	import socket
	import os

	# Force IPv4 for all socket connections
	old_getaddrinfo = socket.getaddrinfo
	def new_getaddrinfo(args, *kwargs):
	responses = old_getaddrinfo(args, *kwargs)
	return [response for response in responses if response[0] == socket.AF_INET]
	socket.getaddrinfo = new_getaddrinfo
	# ---------------------------------------------------------

	import gradio as gr
	import logging
	import tempfile
	import shutil
	import subprocess
	import re
	import yt_dlp
	from pathlib import Path
	from dataclasses import dataclass
	from typing import Optional, Callable

	# Logging Setup
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Constants
	SUPPORTED_MEDIA = {".mp3", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".flac", ".mp4", ".mkv", ".avi", ".mov", ".webm"}

	@dataclass
	class Result:
	success: bool
	data: Optional[str] = None
	file_path: Optional[str] = None
	error: Optional[str] = None

	class MediaDownloader:
	def __init__(self, output_dir):
	self.output_dir = output_dir
	self.output_dir.mkdir(parents=True, exist_ok=True)

	def download(self, url, progress=None):
	try:
	if progress: progress(0.1, "Initializing download...")

	# LAYER 2: YT-DLP SPECIFIC OPTIONS
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': str(self.output_dir / '%(title)s.%(ext)s'),
	'noplaylist': True,
	'force_ipv4': True, # <--- يجبر المكتبة على استخدام IPv4
	'nocheckcertificate': True, # <--- يتجاوز أخطاء SSL
	'socket_timeout': 30, # <--- يزيد وقت الانتظار
	'quiet': True,
	'no_warnings': True,
	# LAYER 3: USER AGENT SPOOFING
	'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=True)
	filename = ydl.prepare_filename(info)
	file_path = Path(filename)

	# Fallback check if filename differs
	if not file_path.exists():
	potential_files = list(self.output_dir.glob("*"))
	if not potential_files:
	return Result(False, error="Download finished but file not found.")
	file_path = max(potential_files, key=lambda x: x.stat().st_mtime)

	return Result(True, file_path=str(file_path))

	except Exception as e:
	logger.error(f"Download Error: {e}")
	return Result(False, error=str(e))

	class Processor:
	def __init__(self):
	self.tmp = Path(tempfile.mkdtemp())
	self.downloader = MediaDownloader(self.tmp / "download")

	# Lazy load whisper to save startup time
	self.model = None

	def load_model(self):
	if not self.model:
	from faster_whisper import WhisperModel
	self.model = WhisperModel("medium", device="cpu", compute_type="int8")

	def run(self, url, upload, lang, progress=gr.Progress()):
	try:
	# 1. Acquire Media
	target_file = None
	if upload:
	target_file = Path(upload)
	elif url:
	res = self.downloader.download(url, progress)
	if not res.success: return f"❌ Error: {res.error}", None, None
	target_file = Path(res.file_path)
	else:
	return "Please provide URL or File", None, None

	# 2. Transcribe
	progress(0.3, "Loading Model...")
	self.load_model()

	progress(0.5, "Transcribing...")
	lang_code = lang.split("-")[0]
	segments, _ = self.model.transcribe(str(target_file), language=lang_code, beam_size=5)

	# Collect result
	full_text = []
	srt_content = []
	for i, seg in enumerate(segments, 1):
	full_text.append(seg.text)
	# Simple SRT formatting
	start = f"{int(seg.start//3600):02}:{int((seg.start%3600)//60):02}:{int(seg.start%60):02},000"
	end = f"{int(seg.end//3600):02}:{int((seg.end%3600)//60):02}:{int(seg.end%60):02},000"
	srt_content.append(f"{i}\n{start} --> {end}\n{seg.text.strip()}\n")

	text_str = " ".join(full_text)
	srt_str = "\n".join(srt_content)

	# Save files
	out_txt = self.tmp / "transcript.txt"
	out_srt = self.tmp / "subs.srt"
	out_txt.write_text(text_str, encoding="utf-8")
	out_srt.write_text(srt_str, encoding="utf-8")

	return f"✅ Done! ({len(text_str)} chars)", str(out_txt), str(out_srt)

	except Exception as e:
	return f"❌ Critical Error: {str(e)}", None, None

	# UI Setup
	proc = Processor()

	with gr.Blocks(title="Transcriber Pro") as demo:
	gr.Markdown("## 🎙️ Media Transcriber Pro (IPv4 Fix)")

	with gr.Row():
	url_in = gr.Textbox(label="YouTube URL")
	file_in = gr.File(label="Upload File")

	lang_in = gr.Dropdown(["ar", "en"], value="ar", label="Language")
	btn = gr.Button("Transcribe", variant="primary")

	status = gr.Textbox(label="Status")
	with gr.Row():
	f1 = gr.File(label="TXT")
	f2 = gr.File(label="SRT")

	btn.click(proc.run, [url_in, file_in, lang_in], [status, f1, f2])

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)