missing-items / app.py
coco-gelamay's picture
Update app.py
ed429b5
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
from transformers import pipeline
from PIL import Image
import gradio as gr
import torch
#this converts text to speech
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
#this function detects the objects in the room
def object_classify(img1,img2):
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512")
object_detector = pipeline(task="image-segmentation", model = model, feature_extractor = feature_extractor)
#list of dictionaries
dict_obj1 = object_detector(img1)
dict_obj2 = object_detector(img2)
#list of object labels present in the image
objects_1=[]
objects_2=[]
#this is will read by the fastspeech
tts_words=['The missing items are']
#gets the label from each dictionary
for i in dict_obj1:
objects_1.append(i['label'])
for j in dict_obj2:
objects_2.append(j['label'])
#gets the uncommon elements from the 2 lists
missing_objects= list(set(objects_1)-set(objects_2))
if len(missing_objects)==0:
tts_words.append('None')
elif len(missing_objects)==1:
tts_words[0]='The missing item is a'
tts_words.extend(missing_objects)
else:
tts_words.extend(missing_objects)
gonna_process=' '.join(tts_words)
return missing_objects, fastspeech(gonna_process)
TITLE = 'Missing Items using Nvidia Segformer'
DESCRIPTION = 'Input two pictures. First image being the original and second is the one with the missing item/s. This will output a list of items that are missing and an audio version of it'
EXAMPLES = [['Bedroom_1.jpg'],['Bedroom_2.jpg']]
INPUTS=[gr.inputs.Image(type = 'pil', label='Original Image'),gr.inputs.Image(type = 'pil', label='Second Image')]
OUTPUTS=[gr.outputs.Textbox(label='Missing Item/s is/are'),gr.outputs.Audio(type="auto", label="Missing Items Audio")]
interface=gr.Interface(object_classify,
INPUTS,
OUTPUTS,
examples = EXAMPLES,
title = TITLE,
description=DESCRIPTION, allow_flagging="never")
interface.launch()