|
import os |
|
from PIL import Image |
|
from gtts import gTTS |
|
import torch |
|
import gradio as gr |
|
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize |
|
from transformers import pipeline, GPT2LMHeadModel, GPT2Tokenizer |
|
|
|
def describe_photo(image): |
|
image = Image.fromarray(image.astype('uint8'), 'RGB') |
|
captioner = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base") |
|
results = captioner(image) |
|
text = results[0]['generated_text'] |
|
print(f"Image caption is: {text}") |
|
return text |
|
|
|
def generate_story(description): |
|
model = GPT2LMHeadModel.from_pretrained("gpt2") |
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
|
inputs = tokenizer.encode(description + " [SEP] A funny and friendly story:", return_tensors='pt') |
|
outputs = model.generate(input_ids=inputs, |
|
max_length=200, |
|
num_return_sequences=1, |
|
temperature=0.7, |
|
no_repeat_ngram_size=2) |
|
story = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return story |
|
|
|
def convert_to_audio(text): |
|
tts = gTTS(text) |
|
audio_file_path = "audio.mp3" |
|
tts.save(audio_file_path) |
|
return audio_file_path |
|
|
|
def audio_to_text(audio_file_path): |
|
pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v2") |
|
result = pipe("audio.mp3") |
|
print(result) |
|
return result['text'] |
|
|
|
def sentiment_analysis(text): |
|
sentiment_analyzer = pipeline("sentiment-analysis") |
|
result = sentiment_analyzer(text) |
|
print(result) |
|
return result |
|
|
|
def app(image): |
|
description = describe_photo(image) |
|
story = generate_story(description) |
|
audio_file = convert_to_audio(story) |
|
transcribed_text = audio_to_text(audio_file) |
|
sentiment = sentiment_analysis(transcribed_text) |
|
return description,audio_file,transcribed_text, sentiment |
|
|
|
ui = gr.Interface( |
|
fn=app, |
|
inputs="image", |
|
outputs=["text", "audio", "text", "text"], |
|
title="Diego's Story Telling Multimodel LLM Gen AI" |
|
) |
|
ui.launch() |