coco-gelamay commited on
Commit
79ffc5d
1 Parent(s): 3011b0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -5
app.py CHANGED
@@ -1,8 +1,13 @@
1
  from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
2
  from transformers import pipeline
3
- from PIL import Image, ImageDraw, ImageFont
4
  import gradio as gr
 
5
 
 
 
 
 
6
  def object_classify(img1,img2):
7
 
8
  feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512")
@@ -18,6 +23,9 @@ def object_classify(img1,img2):
18
  objects_1=[]
19
  objects_2=[]
20
 
 
 
 
21
  #gets the label from each dictionary
22
  for i in dict_obj1:
23
  objects_1.append(i['label'])
@@ -27,16 +35,28 @@ def object_classify(img1,img2):
27
 
28
  #gets the uncommon elements from the 2 lists
29
  missing_objects= list(set(objects_1)-set(objects_2))
30
- #missing_objects=*missing_objects, sep = ", "
31
- return missing_objects
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
- TITLE = 'Missing Items'
35
  DESCRIPTION = 'Input two indoor pictures. First image being the original and second is one with the missing item/s'
36
  EXAMPLES = [['Bedroom_1.jpg'],['Bedroom_2.jpg']]
37
 
38
  INPUTS=[gr.inputs.Image(type = 'pil', label='Original Image'),gr.inputs.Image(type = 'pil', label='Second Image')]
39
- OUTPUTS=gr.outputs.Textbox(label="Missing Item/s is/are: ")
 
40
 
41
  interface=gr.Interface(object_classify,
42
  INPUTS,
 
1
  from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
2
  from transformers import pipeline
3
+ from PIL import Image
4
  import gradio as gr
5
+ import torch
6
 
7
+ #this converts text to speech
8
+ fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
9
+
10
+ #this function detects the objects in the room
11
  def object_classify(img1,img2):
12
 
13
  feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512")
 
23
  objects_1=[]
24
  objects_2=[]
25
 
26
+ #this is will read by the fastspeech
27
+ tts_words=['The missing items are']
28
+
29
  #gets the label from each dictionary
30
  for i in dict_obj1:
31
  objects_1.append(i['label'])
 
35
 
36
  #gets the uncommon elements from the 2 lists
37
  missing_objects= list(set(objects_1)-set(objects_2))
38
+
39
+ if len(missing_objects)==0:
40
+ tts_words.append('None')
41
+ elif len(missing_objects)==1:
42
+ tts_words[0]='The missing item is'
43
+ tts_words.extend(missing_objects)
44
+ else:
45
+ tts_words.extend(missing_objects)
46
+
47
+ gonna_process=' '.join(tts_words)
48
+
49
+
50
+ return missing_objects, fastspeech(gonna_process)
51
 
52
 
53
+ TITLE = 'Missing Items using Nvidia Segformer'
54
  DESCRIPTION = 'Input two indoor pictures. First image being the original and second is one with the missing item/s'
55
  EXAMPLES = [['Bedroom_1.jpg'],['Bedroom_2.jpg']]
56
 
57
  INPUTS=[gr.inputs.Image(type = 'pil', label='Original Image'),gr.inputs.Image(type = 'pil', label='Second Image')]
58
+ OUTPUTS=[gr.outputs.Textbox(label='Missing Item/s is/are'),gr.outputs.Audio(type="auto", label="Missing Items Audio")]
59
+
60
 
61
  interface=gr.Interface(object_classify,
62
  INPUTS,