fawadrashid's picture
Update app.py
4eb34af verified
raw
history blame contribute delete
No virus
1.11 kB
import gradio as gr
from transformers import pipeline
from transformers import BlipForQuestionAnswering
from transformers.utils import logging
logging.set_verbosity_error()
from transformers import AutoProcessor
tts_pipe = pipeline("text-to-speech",
model="kakao-enterprise/vits-ljs")
model = BlipForQuestionAnswering.from_pretrained(
"Salesforce/blip-vqa-base")
processor = AutoProcessor.from_pretrained(
"Salesforce/blip-vqa-base")
def get_pipeline_prediction(pil_image, question):
inputs = processor(pil_image, question, return_tensors="pt")
out = model.generate(**inputs)
text = processor.decode(out[0], skip_special_tokens=True)
narrated_text = tts_pipe(text)
return (narrated_text["sampling_rate"], narrated_text["audio"][0] )
demo = gr.Interface(
fn=get_pipeline_prediction,
inputs=[gr.Image(label="Input image",
type="pil"), gr.Textbox(label="Ask your question")],
outputs=gr.Audio(label="Narration", type="numpy", autoplay=True)
)
demo.launch(server_name="0.0.0.0", server_port=7860)