Spaces:
Runtime error
Runtime error
import gradio as gr | |
import yt_dlp | |
from collections import defaultdict | |
import tempfile | |
import google.generativeai as genai | |
import os | |
prompt_filepath = "./system_instructions.txt" | |
def read_system_instruction_from_file(filepath): | |
try: | |
with open(filepath, 'r') as f: | |
system_instruction = f.read().strip() | |
return system_instruction | |
except FileNotFoundError: | |
return "Error: System instruction file not found." | |
except Exception as e: | |
return f"Error reading system instruction file: {str(e)}" | |
def download_and_convert_subtitles(url): | |
"""Downloads VTT subtitles from a YouTube URL, extracts text, and returns it. | |
Args: | |
url: The URL of the YouTube video. | |
Returns: | |
A string containing the extracted subtitles text or an error message. | |
""" | |
# Download subtitles using yt-dlp with temporary directory | |
with tempfile.TemporaryDirectory() as download_dir: | |
ydl_opts = { | |
'skip_download': True, | |
'writeautomaticsub': True, | |
'subtitlesformat': 'vtt', | |
'outtmpl': f'{download_dir}/%(id)s.%(ext)s', | |
} | |
try: | |
# Use context manager properly (Option 1) | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) # Download only subtitles | |
# Extract video ID and handle language code extension | |
info = ydl.extract_info(url, download=False) | |
video_id = info['id'] | |
# Extract text from downloaded VTT file (in temp directory) | |
vtt_file_path = f"{download_dir}/{video_id}.{info.get('language', '')}.vtt" | |
return extract_text_from_vtt(vtt_file_path) | |
except Exception as e: | |
return f"Error downloading and converting subtitles: {str(e)}" | |
def extract_text_from_vtt(vtt_file_path): | |
"""Extracts text content from a VTT file. | |
Args: | |
vtt_file_path: The path to the VTT file (in temporary directory). | |
Returns: | |
A string containing all text lines from the VTT file. | |
""" | |
text_lines = [] | |
with open(vtt_file_path, 'r', encoding='utf-8') as f: | |
for line in f: | |
if not line.strip() or '-->' in line: | |
continue | |
text = line.strip().split("<", 1)[0] # Extract text content | |
text_lines.append(text) | |
user_prompt = "\n".join(text_lines) | |
genai.configure(api_key=os.getenv("GEMINI_API_KEY")) | |
generation_config = { | |
"temperature": 1, | |
"top_p": 0.95, | |
"top_k": 0, | |
"max_output_tokens": 8192, | |
} | |
safety_settings = [ | |
{ | |
"category": "HARM_CATEGORY_HARASSMENT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_HATE_SPEECH", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
{ | |
"category": "HARM_CATEGORY_DANGEROUS_CONTENT", | |
"threshold": "BLOCK_MEDIUM_AND_ABOVE" | |
}, | |
] | |
a = read_system_instruction_from_file(prompt_filepath) | |
model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest", | |
generation_config=generation_config, | |
system_instruction=a, | |
safety_settings=safety_settings) | |
convo = model.start_chat(history=[ | |
]) | |
convo.send_message(user_prompt) | |
ai_model_output = convo.last.text | |
return ai_model_output | |
# Gradio interface definition | |
interface = gr.Interface( | |
fn=download_and_convert_subtitles, | |
inputs="text", | |
outputs="text", | |
title="Podcast Summary Generator", | |
description="Enter a YouTube video URL of a podcast to get a short summary." | |
) | |
# Launch the Gradio interface | |
interface.launch() | |