Podcastify / app.py
eswardivi's picture
Update app.py
8bb652f verified
raw
history blame
3.17 kB
import gradio as gr
import spaces
import os, torch, io
import json
os.system("python -m unidic download")
import httpx
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import tempfile
import wave
from pydub import AudioSegment
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
quantization_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
"NousResearch/Hermes-2-Pro-Llama-3-8B",
quantization_config=quantization_config,
)
tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B")
terminators = [tok.eos_token_id, tok.convert_tokens_to_ids("<|eot_id|>")]
def fetch_text(url):
print("Entered Webpage Extraction")
prefix_url = "https://r.jina.ai/"
url = prefix_url + url
response = httpx.get(url, timeout=60.0)
return response.text
@spaces.GPU
def synthesize(article_url, progress=gr.Progress()):
text = fetch_text(article_url)
device = "cuda" if torch.cuda.is_available() else "cpu"
template = """
{
"conversation": [
{"speaker": "", "text": ""},
{"speaker": "", "text": ""}
]
}
"""
chat = [
{
"role": "user",
"content": f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template \n {template}",
}
]
messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
model_inputs = tok([messages], return_tensors="pt").to(device)
text = model.generate(
model_inputs,
max_new_tokens=1024,
do_sample=True,
temperature=0.9,
eos_token_id=terminators,
)
speed = 1.0
models = {
"EN": TTS(language="EN", device=device),
}
speakers = ["EN-Default", "EN-US"]
combined_audio = AudioSegment.empty()
conversation = json.loads(text)
for i, turn in enumerate(conversation["conversation"]):
bio = io.BytesIO()
text = turn["text"]
speaker = speakers[i % 2]
speaker_id = models["EN"].hps.data.spk2id[speaker]
models["EN"].tts_to_file(
text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format="wav"
)
bio.seek(0)
audio_segment = AudioSegment.from_file(bio, format="wav")
combined_audio += audio_segment
final_audio_path = "final.mp3"
combined_audio.export(final_audio_path, format="mp3")
return final_audio_path
with gr.Blocks() as demo:
gr.Markdown("# Not Ready to USE")
gr.Markdown("# Turn Any Article into Podcast")
gr.Markdown("## Easily convert articles from URLs into listenable audio Podcast.")
with gr.Group():
text = gr.Textbox(label="Article Link")
btn = gr.Button("Podcasitfy", variant="primary")
aud = gr.Audio(interactive=False)
btn.click(synthesize, inputs=[text], outputs=[aud])
demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)