Spaces:
Runtime error
Runtime error
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation | |
from transformers import pipeline | |
from PIL import Image | |
import gradio as gr | |
import torch | |
#this converts text to speech | |
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech") | |
#this function detects the objects in the room | |
def object_classify(img1,img2): | |
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512") | |
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512") | |
object_detector = pipeline(task="image-segmentation", model = model, feature_extractor = feature_extractor) | |
#list of dictionaries | |
dict_obj1 = object_detector(img1) | |
dict_obj2 = object_detector(img2) | |
#list of object labels present in the image | |
objects_1=[] | |
objects_2=[] | |
#this is will read by the fastspeech | |
tts_words=['The missing items are'] | |
#gets the label from each dictionary | |
for i in dict_obj1: | |
objects_1.append(i['label']) | |
for j in dict_obj2: | |
objects_2.append(j['label']) | |
#gets the uncommon elements from the 2 lists | |
missing_objects= list(set(objects_1)-set(objects_2)) | |
if len(missing_objects)==0: | |
tts_words.append('None') | |
elif len(missing_objects)==1: | |
tts_words[0]='The missing item is a' | |
tts_words.extend(missing_objects) | |
else: | |
tts_words.extend(missing_objects) | |
gonna_process=' '.join(tts_words) | |
return missing_objects, fastspeech(gonna_process) | |
TITLE = 'Missing Items using Nvidia Segformer' | |
DESCRIPTION = 'Input two indoor pictures. First image being the original and second is one with the missing item/s' | |
EXAMPLES = [['Bedroom_1.jpg'],['Bedroom_2.jpg']] | |
INPUTS=[gr.inputs.Image(type = 'pil', label='Original Image'),gr.inputs.Image(type = 'pil', label='Second Image')] | |
OUTPUTS=[gr.outputs.Textbox(label='Missing Item/s is/are'),gr.outputs.Audio(type="auto", label="Missing Items Audio")] | |
interface=gr.Interface(object_classify, | |
INPUTS, | |
OUTPUTS, | |
examples = EXAMPLES, | |
title = TITLE, | |
description=DESCRIPTION, allow_flagging="never") | |
interface.launch() |