Spaces:

zanemotiwala
/

image-recognition-caption

Sleeping

zanemotiwala commited on Apr 14

Commit

17b4c6b

•

1 Parent(s): c04f31b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,8 +11,9 @@ feature_extractor = ViTImageProcessor.from_pretrained(encoder_checkpoint)
 tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
 caption_model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
-# Initialize the image generation model (e.g., Stable Diffusion)
-image_gen_model = pipeline("text-to-image", model="CompVis/stable-diffusion-v1-4")
 def predict(image):
     # Generate a caption from the image
@@ -22,7 +23,7 @@ def predict(image):
     caption_text = tokenizer.decode(caption_ids, skip_special_tokens=True)
     # Generate an image from the caption
-    generated_images = image_gen_model(caption_text, num_images=1)
     return caption_text, generated_images[0]

 tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
 caption_model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
+# Load the Stable Diffusion model
+diffusion_model = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
+diffusion_model = diffusion_model.to(device)
 def predict(image):
     # Generate a caption from the image
     caption_text = tokenizer.decode(caption_ids, skip_special_tokens=True)
     # Generate an image from the caption
+    generated_image = diffusion_model(caption_text)["sample"][0]
     return caption_text, generated_images[0]