Spaces:
Runtime error
Runtime error
import time | |
import torch | |
import requests | |
import gradio as gr | |
from transformers import pipeline | |
from usellm import Message, Options, UseLLM | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
def text_to_speech(text_input): | |
CHUNK_SIZE = 1024 | |
url = "https://api.elevenlabs.io/v1/text-to-speech/TxGEqnHWrfWFTfGW9XjX" | |
headers = { | |
"Accept": "audio/mpeg", | |
"Content-Type": "application/json", | |
"xi-api-key": "7f91dfdd5390bbfd9d44148c59644039" | |
} | |
data = { | |
"text": text_input, | |
"model_id": "eleven_monolingual_v1" | |
} | |
audio_write_path = f"""output_{int(time.time())}.mp3""" | |
response = requests.post(url, json=data, headers=headers) | |
with open(audio_write_path, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=CHUNK_SIZE): | |
if chunk: | |
f.write(chunk) | |
return audio_write_path | |
def whisper_inference(input_audio): | |
processor1 = WhisperProcessor.from_pretrained("openai/whisper-large-v2") | |
model1 = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2") | |
forced_decoder_ids = processor1.get_decoder_prompt_ids(task="translate") | |
input_features = processor1(input_audio, sampling_rate=16000, return_tensors="pt").input_features | |
predicted_ids = model1.generate(input_features, forced_decoder_ids=forced_decoder_ids) | |
transcription = processor1.batch_decode(predicted_ids, skip_special_tokens=True) | |
return transcription | |
def biogpt_large_infer(input_text): | |
tokenizer1 = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large-PubMedQA", add_special_tokens=False) | |
model = AutoModelForCausalLM.from_pretrained("microsoft/BioGPT-Large-PubMedQA")#.to('cuda:0') | |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer1)#, device="cuda:0") | |
output = generator(input_text, min_length=100,max_length=1024,num_beams=5,early_stopping=True, | |
num_return_sequences=1, do_sample=True) | |
output = output[0]['generated_text'] | |
output = output.replace('▃','').replace('FREETEXT','').replace('TITLE','').replace('PARAGRAPH','').replace('ABSTRACT','').replace('<','').replace('>','').replace('/','').strip() | |
return output | |
def chatgpt_infer(input_text): | |
# Initialize the service | |
service = UseLLM(service_url="https://usellm.org/api/llm") | |
# Prepare the conversation | |
messages = [ | |
Message(role="system", content="You are a medical assistant, which answers the query based on factual medical information only."), | |
Message(role="user", content=f"Give me few points on the disease {input_text} and its treatment."), | |
] | |
options = Options(messages=messages) | |
# Interact with the service | |
response = service.chat(options) | |
return response.content | |
def audio_interface_demo(input_audio): | |
en_prompt = whisper_inference(input_audio) | |
biogpt_output = biogpt_large_infer(en_prompt) | |
chatgpt_output = chatgpt_infer(en_prompt) | |
bio_audio_output = text_to_speech(biogpt_output) | |
chat_audio_output = text_to_speech(chatgpt_output) | |
return biogpt_output, chatgpt_output, bio_audio_output, chat_audio_output | |
def text_interface_demo(input_text): | |
#en_prompt = whisper_inference(input_audio) | |
biogpt_output = biogpt_large_infer(input_text) | |
chatgpt_output = chatgpt_infer(input_text) | |
return biogpt_output, chatgpt_output | |
examples = [ | |
["Meningitis is"], | |
["Brain Tumour is"] | |
] | |
app = gr.Blocks() | |
with app: | |
gr.Markdown("# **<h4 align='center'>Voice based Medical Informational Bot<h4>**") | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Tab("Text"): | |
input_text = gr.Textbox(lines=3, value="Brain Tumour is", label="Text") | |
text_button = gr.Button(value="Predict") | |
with gr.Tab("Audio"): | |
input_audio = gr.Audio(value="input.mp3", source="upload", type="filepath", label='Audio') | |
audio_button = gr.Button(value="Predict") | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Tab("Output Text"): | |
biogpt_output = gr.Textbox(lines=3, label="BioGpt Output") | |
chatgpt_output = gr.Textbox(lines=3,label="ChatGPT Output") | |
with gr.Tab("Output Audio"): | |
biogpt_output = gr.Textbox(lines=3, label="BioGpt Output") | |
chatgpt_output = gr.Textbox(lines=3,label="ChatGPT Output") | |
audio_output1 = gr.Audio(value=None, label="ChatGPT Audio Output") | |
audio_output2 = gr.Audio(value=None, label="BioGpt Audio Output") | |
#gr.Examples(examples, inputs=[input_text], outputs=[prompt_text, output_text, translated_text], fn=biogpt_text, cache_examples=False) | |
text_button.click(text_interface_demo, inputs=[input_text], outputs=[biogpt_output, chatgpt_output]) | |
audio_button.click(audio_interface_demo, inputs=[input_audio], outputs=[biogpt_output, chatgpt_output, audio_output2, audio_output1]) | |
app.launch(debug=True) |