File size: 2,909 Bytes
416840c ecb2850 416840c ecb2850 83b685c ecb2850 416840c ecb2850 416840c ecb2850 416840c ecb2850 416840c ecb2850 416840c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
import PyPDF2
import docx
import requests
from bs4 import BeautifulSoup
from groq import Groq
from gtts import gTTS
from pydub import AudioSegment
import os
import io
# Initialize Groq client
groq_client = Groq()
def extract_text(file_or_url):
if isinstance(file_or_url, str): # URL
response = requests.get(file_or_url)
soup = BeautifulSoup(response.text, 'html.parser')
return soup.get_text()
elif file_or_url is not None:
if file_or_url.name.endswith('.pdf'):
reader = PyPDF2.PdfReader(file_or_url.file)
return ' '.join([page.extract_text() for page in reader.pages])
elif file_or_url.name.endswith('.docx'):
doc = docx.Document(file_or_url.file)
return ' '.join([para.text for para in doc.paragraphs])
return ""
def generate_podcast_script(text):
prompt = f"""Generate a podcast script between a man and a woman discussing the following text:
{text}
The podcast should be informative and engaging, with a natural conversation flow.
Limit the script to approximately 750 words to fit within a 5-minute podcast."""
response = groq_client.chat.completions.create(
messages=[
{"role": "system", "content": "You are an AI assistant that generates podcast scripts based on given text."},
{"role": "user", "content": prompt}
],
model="llama-3.1-70b-versatile", # Using LLaMa 3.1 70B model
max_tokens=1000,
temperature=0.7
)
return response.choices[0].message.content
def text_to_speech(script):
lines = script.split('\n')
audio_segments = []
for line in lines:
if line.startswith("Man:"):
tts = gTTS(line[4:], lang='en', tld='co.uk')
elif line.startswith("Woman:"):
tts = gTTS(line[6:], lang='en', tld='com.au')
else:
continue
buffer = io.BytesIO()
tts.write_to_fp(buffer)
buffer.seek(0)
audio_segments.append(AudioSegment.from_mp3(buffer))
final_audio = sum(audio_segments)
final_audio = final_audio[:300000] # Trim to 5 minutes (300,000 ms)
buffer = io.BytesIO()
final_audio.export(buffer, format="mp3")
buffer.seek(0)
return buffer
def generate_podcast(file_or_url):
text = extract_text(file_or_url)
if not text:
return None, "Failed to extract text. Please check your input."
script = generate_podcast_script(text)
audio_file = text_to_speech(script)
return audio_file, script
iface = gr.Interface(
fn=generate_podcast,
inputs=[
gr.File(label="Upload PDF/DOC file"),
gr.Textbox(label="Or enter URL")
],
outputs=[
gr.Audio(label="Generated Podcast"),
gr.Textbox(label="Podcast Script")
],
title="Custom NotebookLM-type Podcast Generator (LLaMa 3.1 70B)"
)
iface.launch() |