File size: 2,909 Bytes
416840c
 
 
 
ecb2850
 
 
 
 
 
416840c
ecb2850
83b685c
ecb2850
 
 
 
 
 
 
 
 
 
 
 
 
416840c
 
ecb2850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416840c
ecb2850
 
 
 
 
 
 
 
 
 
 
 
 
 
416840c
 
 
 
ecb2850
 
 
 
 
 
416840c
ecb2850
416840c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
import PyPDF2
import docx
import requests
from bs4 import BeautifulSoup
from groq import Groq
from gtts import gTTS
from pydub import AudioSegment
import os
import io

# Initialize Groq client
groq_client = Groq()

def extract_text(file_or_url):
    if isinstance(file_or_url, str):  # URL
        response = requests.get(file_or_url)
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    elif file_or_url is not None:
        if file_or_url.name.endswith('.pdf'):
            reader = PyPDF2.PdfReader(file_or_url.file)
            return ' '.join([page.extract_text() for page in reader.pages])
        elif file_or_url.name.endswith('.docx'):
            doc = docx.Document(file_or_url.file)
            return ' '.join([para.text for para in doc.paragraphs])
    return ""

def generate_podcast_script(text):
    prompt = f"""Generate a podcast script between a man and a woman discussing the following text:
    {text}
    The podcast should be informative and engaging, with a natural conversation flow.
    Limit the script to approximately 750 words to fit within a 5-minute podcast."""

    response = groq_client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are an AI assistant that generates podcast scripts based on given text."},
            {"role": "user", "content": prompt}
        ],
        model="llama-3.1-70b-versatile",  # Using LLaMa 3.1 70B model
        max_tokens=1000,
        temperature=0.7
    )
    return response.choices[0].message.content

def text_to_speech(script):
    lines = script.split('\n')
    audio_segments = []
    for line in lines:
        if line.startswith("Man:"):
            tts = gTTS(line[4:], lang='en', tld='co.uk')
        elif line.startswith("Woman:"):
            tts = gTTS(line[6:], lang='en', tld='com.au')
        else:
            continue
        buffer = io.BytesIO()
        tts.write_to_fp(buffer)
        buffer.seek(0)
        audio_segments.append(AudioSegment.from_mp3(buffer))
    
    final_audio = sum(audio_segments)
    final_audio = final_audio[:300000]  # Trim to 5 minutes (300,000 ms)
    buffer = io.BytesIO()
    final_audio.export(buffer, format="mp3")
    buffer.seek(0)
    return buffer

def generate_podcast(file_or_url):
    text = extract_text(file_or_url)
    if not text:
        return None, "Failed to extract text. Please check your input."
    script = generate_podcast_script(text)
    audio_file = text_to_speech(script)
    return audio_file, script

iface = gr.Interface(
    fn=generate_podcast,
    inputs=[
        gr.File(label="Upload PDF/DOC file"),
        gr.Textbox(label="Or enter URL")
    ],
    outputs=[
        gr.Audio(label="Generated Podcast"),
        gr.Textbox(label="Podcast Script")
    ],
    title="Custom NotebookLM-type Podcast Generator (LLaMa 3.1 70B)"
)

iface.launch()