Spaces:

tomhagen1989
/

podcast_summary_trial

Runtime error

App Files Files Community

podcast_summary_trial / app.py

tomhagen1989

Update app.py

44698f3 2 months ago

raw

history blame contribute delete

No virus

3.55 kB

	import gradio as gr
	import yt_dlp
	from collections import defaultdict
	import tempfile
	import google.generativeai as genai
	import os

	prompt_filepath = "./system_instructions.txt"
	def read_system_instruction_from_file(filepath):
	try:
	with open(filepath, 'r') as f:
	system_instruction = f.read().strip()
	return system_instruction
	except FileNotFoundError:
	return "Error: System instruction file not found."
	except Exception as e:
	return f"Error reading system instruction file: {str(e)}"


	def download_and_convert_subtitles(url):
	"""Downloads VTT subtitles from a YouTube URL, extracts text, and returns it.

	Args:
	url: The URL of the YouTube video.

	Returns:
	A string containing the extracted subtitles text or an error message.
	"""

	# Download subtitles using yt-dlp with temporary directory
	with tempfile.TemporaryDirectory() as download_dir:
	ydl_opts = {
	'skip_download': True,
	'writeautomaticsub': True,
	'subtitlesformat': 'vtt',
	'outtmpl': f'{download_dir}/%(id)s.%(ext)s',
	}

	try:
	# Use context manager properly (Option 1)
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url]) # Download only subtitles

	# Extract video ID and handle language code extension
	info = ydl.extract_info(url, download=False)
	video_id = info['id']

	# Extract text from downloaded VTT file (in temp directory)
	vtt_file_path = f"{download_dir}/{video_id}.{info.get('language', '')}.vtt"
	return extract_text_from_vtt(vtt_file_path)

	except Exception as e:
	return f"Error downloading and converting subtitles: {str(e)}"


	def extract_text_from_vtt(vtt_file_path):
	"""Extracts text content from a VTT file.

	Args:
	vtt_file_path: The path to the VTT file (in temporary directory).

	Returns:
	A string containing all text lines from the VTT file.
	"""

	text_lines = []
	with open(vtt_file_path, 'r', encoding='utf-8') as f:
	for line in f:
	if not line.strip() or '-->' in line:
	continue
	text = line.strip().split("<", 1)[0] # Extract text content
	text_lines.append(text)

	user_prompt = "\n".join(text_lines)
	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"top_k": 0,
	"max_output_tokens": 8192,
	}
	safety_settings = [
	{
	"category": "HARM_CATEGORY_HARASSMENT",
	"threshold": "BLOCK_MEDIUM_AND_ABOVE"
	},
	{
	"category": "HARM_CATEGORY_HATE_SPEECH",
	"threshold": "BLOCK_MEDIUM_AND_ABOVE"
	},
	{
	"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
	"threshold": "BLOCK_MEDIUM_AND_ABOVE"
	},
	{
	"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
	"threshold": "BLOCK_MEDIUM_AND_ABOVE"
	},
	]
	a = read_system_instruction_from_file(prompt_filepath)

	model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest",
	generation_config=generation_config,
	system_instruction=a,
	safety_settings=safety_settings)
	convo = model.start_chat(history=[
	])
	convo.send_message(user_prompt)
	ai_model_output = convo.last.text

	return ai_model_output


	# Gradio interface definition
	interface = gr.Interface(
	fn=download_and_convert_subtitles,
	inputs="text",
	outputs="text",
	title="Podcast Summary Generator",
	description="Enter a YouTube video URL of a podcast to get a short summary."

	)

	# Launch the Gradio interface
	interface.launch()