import gradio as gr import spaces import os, torch, io import json import re os.system("python -m unidic download") import httpx # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.") from melo.api import TTS import tempfile import wave from pydub import AudioSegment from transformers import ( AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig, ) from threading import Thread from gradio_client import Client # client = Client("eswardivi/AIO_Chat") quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16 ) model = AutoModelForCausalLM.from_pretrained( "NousResearch/Hermes-2-Pro-Llama-3-8B", quantization_config=quantization_config ) tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B",revision='8ab73a6800796d84448bc936db9bac5ad9f984ae') terminators = [ tok.eos_token_id, tok.convert_tokens_to_ids("<|eot_id|>") ] def validate_url(url): try: response = httpx.get(url, timeout=60.0) response.raise_for_status() return response.text except httpx.RequestError as e: return f"An error occurred while requesting {url}: {str(e)}" except httpx.HTTPStatusError as e: return f"Error response {e.response.status_code} while requesting {url}" except Exception as e: return f"An unexpected error occurred: {str(e)}" def fetch_text(url): print("Entered Webpage Extraction") prefix_url = "https://r.jina.ai/" full_url = prefix_url + url print(full_url) print("Exited Webpage Extraction") return validate_url(full_url) @spaces.GPU(duration=100) def synthesize(article_url,progress_audio=gr.Progress()): if not article_url.startswith("http://") and not article_url.startswith("https://"): return "URL must start with 'http://' or 'https://'",None text = fetch_text(article_url) if "Error" in text: return text, None device = "cuda" if torch.cuda.is_available() else "cpu" template = """ { "conversation": [ {"speaker": "", "text": ""}, {"speaker": "", "text": ""} ] } """ chat = [] chat.append( { "role": "user", "content": text + """\n Convert the provided text into a short, informative podcast conversation between two experts. The tone should be professional and engaging. Please adhere to the following format and return only JSON: { "conversation": [ {"speaker": "", "text": ""}, {"speaker": "", "text": ""} ] } """, } ) messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) model_inputs = tok([messages], return_tensors="pt").to(device) streamer = TextIteratorStreamer( tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True ) generate_kwargs = dict( model_inputs, streamer=streamer, max_new_tokens=1024, do_sample=True, temperature=0.9, eos_token_id=terminators, ) print("Entered Generation") t = Thread(target=model.generate, kwargs=generate_kwargs) t.start() partial_text = "" for new_text in streamer: partial_text += new_text # print("Calling API") # result = client.predict( # f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template and return only JSON \n {template}", # 0.9, # True, # 1024, # api_name="/chat" # ) # print("API Call Completed") pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}" json_match = re.search(pattern, partial_text) print("Exited Generation") if json_match: conversation=json_match.group() else: conversation = template print(partial_text) print(conversation) speed = 1.0 models = {"EN": TTS(language="EN", device=device)} speakers = ["EN-Default", "EN-US"] combined_audio = AudioSegment.empty() conversation_dict = json.loads(conversation) for i, turn in enumerate(conversation_dict["conversation"]): bio = io.BytesIO() text = turn["text"] speaker = speakers[i % 2] speaker_id = models["EN"].hps.data.spk2id[speaker] models["EN"].tts_to_file(text, speaker_id, bio, speed=1.0, pbar=progress_audio.tqdm, format="wav") bio.seek(0) audio_segment = AudioSegment.from_file(bio, format="wav") combined_audio += audio_segment final_audio_path = "final.mp3" combined_audio.export(final_audio_path, format="mp3") return conversation, final_audio_path with gr.Blocks(theme='gstaff/sketch') as demo: gr.Markdown("# Turn Any Article into a Podcast") gr.Markdown("## Easily convert articles from URLs into listenable audio podcasts.") gr.Markdown("### Instructions") gr.Markdown(""" - **Step 1:** Paste the URL of the article you want to convert into the textbox. - **Step 2:** Click on "Podcastify" to generate the podcast. - **Step 3:** Listen to the podcast or view the conversation. """) gr.Markdown(""" - View the code at [GitHub - NarrateIt](https://github.com/EswarDivi/NarrateIt). """) with gr.Group(): text = gr.Textbox(label="Article Link") btn = gr.Button("Podcastify", variant="primary") with gr.Row(): conv_display = gr.Textbox(label="Conversation", interactive=False) aud = gr.Audio(interactive=False) btn.click(synthesize, inputs=[text], outputs=[conv_display, aud]) gr.Markdown(""" Special thanks to: - [gstaff/sketch](https://huggingface.co/spaces/gstaff/sketch) for the Sketch Theme. - [mrfakename/MeloTTS](https://huggingface.co/spaces/mrfakename/MeloTTS) and [GitHub](https://github.com/myshell-ai/MeloTTS) for MeloTTS. - [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) for Function Calling Support. - [Jina AI](https://jina.ai/reader/) for the web page parsing. """) demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True,share=True)