mrm8488 multimodalart HF staff commited on
Commit
78cea7d
1 Parent(s): aeceb12

Suggestion to simplify (#2)

Browse files

- Suggestion to simplify (ea98132386519a165e842c4c4ffedefdb3aa37dc)
- Upload 5 files (a2f01e5b572df78727ff7d3f4198cad5f327ec67)
- Update app.py (d413d63c2fa94c25e73076432da36ed66280e375)


Co-authored-by: Apolinário from multimodal AI art <multimodalart@users.noreply.huggingface.co>

.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 0_6as5rHi0sgG4W2Tq.png filter=lfs diff=lfs merge=lfs -text
37
+ 1_sTXgMwDUW0pk-1yK4iHYFw.png filter=lfs diff=lfs merge=lfs -text
0_6as5rHi0sgG4W2Tq.png ADDED

Git LFS Details

  • SHA256: 7429f34bb2a061c58f983697c8a06318e0d9a772e301ae0fee11b2fbd1b67de4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.64 MB
1_sTXgMwDUW0pk-1yK4iHYFw.png ADDED

Git LFS Details

  • SHA256: e652ca3bead8f10de24efe201d3c60dc4d3a7a0d5196eee7091000e4818a9053
  • Pointer size: 132 Bytes
  • Size of remote file: 1.16 MB
app.py CHANGED
@@ -14,39 +14,35 @@ processor = AutoProcessor.from_pretrained(config.base_model_name_or_path)
14
  model = model.to(device)
15
  model.eval()
16
 
17
- def predict(prompt, image_url, image_pil=None, max_length=64):
18
- if image_pil is not None:
19
- image = image_pil
20
- else:
21
- image = processor.image_processor.fetch_images(image_url)
 
 
22
  prompts = [[image, prompt]]
23
  inputs = processor(prompts[0], return_tensors="pt").to(device)
24
  generated_ids = model.generate(**inputs, max_length=max_length)
25
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
26
- return image, generated_text
27
-
28
-
29
 
30
  title = "Midjourney-like Image Captioning with IDEFICS"
31
  description = "Gradio Demo for generating *Midjourney* like captions (describe functionality) with **IDEFICS**"
32
 
33
  examples = [
34
- ["Describe the following image:", "https://miro.medium.com/v2/resize:fit:0/1*sTXgMwDUW0pk-1yK4iHYFw.png", None, 64],
35
- ["Describe the following image:", "https://miro.medium.com/v2/resize:fit:1400/0*6as5rHi0sgG4W2Tq.png", None, 64],
36
- ["Describe the following image:", "https://cdn.arstechnica.net/wp-content/uploads/2023/06/zoomout_2-1440x807.jpg", None, 64],
37
- ["Describe the following image:", "https://framerusercontent.com/images/inZdRVn7eafZNvaVre2iW1a538.png", None, 64],
38
- ["Describe the following image:", "https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg", None, 64]
39
-
40
  ]
41
  io = gr.Interface(fn=predict,
42
  inputs=[
43
- gr.Textbox(label="Prompt", value="Describe the following image:", interactive=False),
44
- gr.Textbox(label="image URL", placeholder="Insert the URL of the image to be described"),
45
- gr.Image(label="or upload an image", type="pil"),
46
- gr.Slider(label="Max tokens", value=64, max=128, min=16, step=8)
47
  ],
48
  outputs=[
49
- gr.Image(type='pil', label="Image"),
50
  gr.Textbox(label="IDEFICS Description")
51
  ],
52
  title=title, description=description, examples=examples,
 
14
  model = model.to(device)
15
  model.eval()
16
 
17
+ #Pre-determined best prompt for this fine-tune
18
+ prompt="Describe the following image:"
19
+
20
+ #Max generated tokens for your prompt
21
+ max_length=64
22
+
23
+ def predict(image):
24
  prompts = [[image, prompt]]
25
  inputs = processor(prompts[0], return_tensors="pt").to(device)
26
  generated_ids = model.generate(**inputs, max_length=max_length)
27
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
28
+ generated_text = generated_text.replace(prompt,"")
29
+ return generated_text
 
30
 
31
  title = "Midjourney-like Image Captioning with IDEFICS"
32
  description = "Gradio Demo for generating *Midjourney* like captions (describe functionality) with **IDEFICS**"
33
 
34
  examples = [
35
+ ["1_sTXgMwDUW0pk-1yK4iHYFw.png"],
36
+ ["0_6as5rHi0sgG4W2Tq.png"],
37
+ ["zoomout_2-1440x807.jpg"],
38
+ ["inZdRVn7eafZNvaVre2iW1a538.webp"],
39
+ ["cute-photos-of-cats-in-grass-1593184777.jpg"]
 
40
  ]
41
  io = gr.Interface(fn=predict,
42
  inputs=[
43
+ gr.Image(label="Upload an image", type="pil"),
 
 
 
44
  ],
45
  outputs=[
 
46
  gr.Textbox(label="IDEFICS Description")
47
  ],
48
  title=title, description=description, examples=examples,
cute-photos-of-cats-in-grass-1593184777.jpg ADDED
inZdRVn7eafZNvaVre2iW1a538.webp ADDED
zoomout_2-1440x807.jpg ADDED