Upyaya commited on
Commit
fd469c3
·
1 Parent(s): d173f19

Upload file not generating caption

Browse files

Add a progress bar to understand, uploaded image fail to generate capation

Files changed (1) hide show
  1. app.py +60 -27
app.py CHANGED
@@ -53,50 +53,83 @@ def main():
53
  st.caption("Accurate and enchanting descriptions of clothes on shopping websites can help customers without fashion knowledge to better understand the features (attributes, style, functionality, etc.) of the items and increase online sales by enticing more customers.")
54
  st.caption("Also, most of the time when any customer visits shopping websites, they are looking for a certain style or type of clothes that wish to purchase, they search for the item by providing a description of the item and the system finds the relevant items that match the search query by computing the similarity score between the query and the item caption.")
55
  st.caption("Given the clothes image provide a short caption that describes the item. In general, in image captioning datasets (e.g., COCO, Fliker), the descriptions of fashion items have three unique features, which makes the automatic generation of captions a challenging task. First, fashion captioning needs to describe the attributes of an item, while image captioning generally narrates the objects and their relations in the image.")
56
- st.caption("Solution: Used Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models (BLIP-2)")
57
- st.write("Github: [link](https://github.com/SmithaUpadhyaya/fashion_image_caption)")
58
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  processor, model = init_model()
61
 
62
  #Select few sample images for the catagory of cloths
63
- st.text("Select image:")
64
  option = st.selectbox('From sample', ('None', 'dress', 'earrings', 'sweater', 'sunglasses', 'shoe', 'hat', 'heels', 'socks', 'tee', 'bracelet'), index = 0)
65
  st.text("Or")
66
  file_name = st.file_uploader(label = "Upload an image", accept_multiple_files = False)
67
-
68
- image = None
69
- if file_name is not None:
 
 
 
 
 
 
70
 
71
- image = Image.open(file_name)
72
 
73
- elif option is not 'None':
 
74
 
75
- file_name = os.path.join(sample_img_path, map_sampleid_name[option])
76
- image = Image.open(file_name)
77
 
78
- if image is not None:
79
 
80
- image_col, caption_text = st.columns(2)
81
- image_col.header("Image")
82
- image_col.image(image, use_column_width = True)
83
 
84
- #Preprocess the image
85
- #Inferance on GPU. When used this on GPU will get errors like: "slow_conv2d_cpu" not implemented for 'Half'" , " Input type (float) and bias type (struct c10::Half)"
86
- #inputs = processor(images = image, return_tensors = "pt").to('cuda', torch.float16)
87
 
88
- #Inferance on CPU
89
- inputs = processor(images = image, return_tensors = "pt")
90
 
91
- pixel_values = inputs.pixel_values
92
 
93
- #Predict the caption for the imahe
94
- generated_ids = model.generate(pixel_values = pixel_values, max_length = 25)
95
- generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
96
 
97
- #Output the predict text
98
- caption_text.header("Generated Caption")
99
- caption_text.text(generated_caption)
100
 
101
 
102
  if __name__ == "__main__":
 
53
  st.caption("Accurate and enchanting descriptions of clothes on shopping websites can help customers without fashion knowledge to better understand the features (attributes, style, functionality, etc.) of the items and increase online sales by enticing more customers.")
54
  st.caption("Also, most of the time when any customer visits shopping websites, they are looking for a certain style or type of clothes that wish to purchase, they search for the item by providing a description of the item and the system finds the relevant items that match the search query by computing the similarity score between the query and the item caption.")
55
  st.caption("Given the clothes image provide a short caption that describes the item. In general, in image captioning datasets (e.g., COCO, Fliker), the descriptions of fashion items have three unique features, which makes the automatic generation of captions a challenging task. First, fashion captioning needs to describe the attributes of an item, while image captioning generally narrates the objects and their relations in the image.")
56
+ st.caption("Solution: Used Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models [(BLIP-2)](https://huggingface.co/Salesforce/blip2-opt-6.7b) by Salesforce")
57
+ st.write("For more detail: [Github link](https://github.com/SmithaUpadhyaya/fashion_image_caption)")
58
+ footer = """<style>
59
+ a:link , a:visited{
60
+ color: blue;
61
+ background-color: transparent;
62
+ text-decoration: underline;
63
+ }
64
+
65
+ a:hover, a:active {
66
+ color: red;
67
+ background-color: transparent;
68
+ text-decoration: underline;
69
+ }
70
+
71
+ .footer {
72
+ position: fixed;
73
+ left: 0;
74
+ bottom: 0;
75
+ width: 100%;
76
+ background-color: white;
77
+ color: black;
78
+ text-align: center;
79
+ }
80
+ </style>
81
+ <div class="footer">
82
+ <p>Application deployed on CPU with 16GB RAM</p>
83
+ </div>
84
+ """
85
+ st.markdown(footer,unsafe_allow_html=True)
86
 
87
  processor, model = init_model()
88
 
89
  #Select few sample images for the catagory of cloths
90
+ st.caption("Select image:")
91
  option = st.selectbox('From sample', ('None', 'dress', 'earrings', 'sweater', 'sunglasses', 'shoe', 'hat', 'heels', 'socks', 'tee', 'bracelet'), index = 0)
92
  st.text("Or")
93
  file_name = st.file_uploader(label = "Upload an image", accept_multiple_files = False)
94
+
95
+ btn_click = st.button('Generate')
96
+
97
+ if btn_click:
98
+
99
+ image = None
100
+ if file_name is not None:
101
+
102
+ image = Image.open(file_name)
103
 
104
+ elif option is not 'None':
105
 
106
+ file_name = os.path.join(sample_img_path, map_sampleid_name[option])
107
+ image = Image.open(file_name)
108
 
109
+ if image is not None:
 
110
 
111
+ with st.spinner('Generating Caption...'):
112
 
113
+ image_col, caption_text = st.columns(2)
114
+ image_col.header("Image")
115
+ image_col.image(image, use_column_width = True)
116
 
117
+ #Preprocess the image
118
+ #Inferance on GPU. When used this on GPU will get errors like: "slow_conv2d_cpu" not implemented for 'Half'" , " Input type (float) and bias type (struct c10::Half)"
119
+ #inputs = processor(images = image, return_tensors = "pt").to('cuda', torch.float16)
120
 
121
+ #Inferance on CPU
122
+ inputs = processor(images = image, return_tensors = "pt")
123
 
124
+ pixel_values = inputs.pixel_values
125
 
126
+ #Predict the caption for the imahe
127
+ generated_ids = model.generate(pixel_values = pixel_values, max_length = 25)
128
+ generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
129
 
130
+ #Output the predict text
131
+ caption_text.header("Generated Caption")
132
+ caption_text.text(generated_caption)
133
 
134
 
135
  if __name__ == "__main__":