arjunanand13 commited on
Commit
7b0a54f
1 Parent(s): 70acf1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -43
app.py CHANGED
@@ -14,56 +14,57 @@ processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b")
14
  model = Idefics2ForConditionalGeneration.from_pretrained(
15
  "HuggingFaceM4/idefics2-8b",
16
  torch_dtype=torch.bfloat16,
17
- #_attn_implementation="flash_attention_2",
18
- trust_remote_code=True).to("cuda")
 
19
 
20
- # import gradio as gr
21
- # from huggingface_hub import InferenceApi
22
- # import base64
23
- # from PIL import Image
24
- # import io
25
 
26
- # client = InferenceApi("HuggingFaceM4/idefics2-8b")
27
 
28
- # def image_to_base64(image):
29
- # buffered = io.BytesIO()
30
- # image.save(buffered, format="JPEG")
31
- # img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
32
- # return img_str
33
 
34
- # def model_inference(image, text):
35
- # image_base64 = image_to_base64(image)
36
- # inputs = {
37
- # "inputs": {
38
- # "text": text,
39
- # "image": image
40
- # }
41
- # }
42
 
43
- # result = client(inputs)
44
 
45
- # generated_text = result['generated_text']
46
- # return generated_text
47
 
48
- # with gr.Blocks(css=".input_image {max-width: 100%; border: 1px solid #ccc; box-shadow: 0 0 10px #ccc; margin-bottom: 10px;} .output_textbox {min-height: 100px;}") as demo:
49
- # gr.Markdown("## Enhanced IDEFICS2 Demo")
50
- # with gr.Row():
51
- # with gr.Column(scale=1):
52
- # image_input = gr.Image(label="Upload Image", type="pil", height=240, width=320)
53
- # query_input = gr.Textbox(label="Enter Prompt", placeholder="Type your prompt here...")
54
- # with gr.Column(scale=1):
55
- # output = gr.Textbox(label="Model Output", interactive=True, placeholder="Output will be displayed here...")
56
 
57
- # submit_btn = gr.Button("Generate")
58
- # submit_btn.click(model_inference, inputs=[image_input, query_input], outputs=output)
59
 
60
- # examples = [
61
- # ["american_football.png", "Explain in detail what is depicted in the picture"],
62
- # ["bike.png", "Explore the image closely and describe in detail what you discover."],
63
- # ["finance.png", "Provide a detailed description of everything you see in the image."],
64
- # ["science.png", "Please perform optical character recognition (OCR) on the uploaded image. Extract all text visible in the image accurately. Ensure to capture the text in its entirety and maintain the formatting as closely as possible to how it appears in the image. After extracting the text, display it in a clear and readable format, making sure that any special characters or symbols are also accurately represented. Provide the extracted text as output."],
65
- # ["spirituality.png", "Please perform optical character recognition (OCR) on the uploaded image. Extract all text visible in the image accurately. Ensure to capture the text in its entirety and maintain the formatting as closely as possible to how it appears in the image. After extracting the text, display it in a clear and readable format, making sure that any special characters or symbols are also accurately represented. Provide the extracted text as output."]
66
- # ]
67
- # gr.Examples(examples=examples, inputs=[image_input, query_input], outputs=output)
68
 
69
- # demo.launch(debug=True)
 
14
  model = Idefics2ForConditionalGeneration.from_pretrained(
15
  "HuggingFaceM4/idefics2-8b",
16
  torch_dtype=torch.bfloat16,
17
+ _attn_implementation="flash_attention_2",
18
+ # trust_remote_code=True
19
+ ).to("cuda")
20
 
21
+ import gradio as gr
22
+ from huggingface_hub import InferenceApi
23
+ import base64
24
+ from PIL import Image
25
+ import io
26
 
27
+ client = InferenceApi("HuggingFaceM4/idefics2-8b")
28
 
29
+ def image_to_base64(image):
30
+ buffered = io.BytesIO()
31
+ image.save(buffered, format="JPEG")
32
+ img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
33
+ return img_str
34
 
35
+ def model_inference(image, text):
36
+ image_base64 = image_to_base64(image)
37
+ inputs = {
38
+ "inputs": {
39
+ "text": text,
40
+ "image": image
41
+ }
42
+ }
43
 
44
+ result = client(inputs)
45
 
46
+ generated_text = result['generated_text']
47
+ return generated_text
48
 
49
+ with gr.Blocks(css=".input_image {max-width: 100%; border: 1px solid #ccc; box-shadow: 0 0 10px #ccc; margin-bottom: 10px;} .output_textbox {min-height: 100px;}") as demo:
50
+ gr.Markdown("## Enhanced IDEFICS2 Demo")
51
+ with gr.Row():
52
+ with gr.Column(scale=1):
53
+ image_input = gr.Image(label="Upload Image", type="pil", height=240, width=320)
54
+ query_input = gr.Textbox(label="Enter Prompt", placeholder="Type your prompt here...")
55
+ with gr.Column(scale=1):
56
+ output = gr.Textbox(label="Model Output", interactive=True, placeholder="Output will be displayed here...")
57
 
58
+ submit_btn = gr.Button("Generate")
59
+ submit_btn.click(model_inference, inputs=[image_input, query_input], outputs=output)
60
 
61
+ examples = [
62
+ ["american_football.png", "Explain in detail what is depicted in the picture"],
63
+ ["bike.png", "Explore the image closely and describe in detail what you discover."],
64
+ ["finance.png", "Provide a detailed description of everything you see in the image."],
65
+ ["science.png", "Please perform optical character recognition (OCR) on the uploaded image. Extract all text visible in the image accurately. Ensure to capture the text in its entirety and maintain the formatting as closely as possible to how it appears in the image. After extracting the text, display it in a clear and readable format, making sure that any special characters or symbols are also accurately represented. Provide the extracted text as output."],
66
+ ["spirituality.png", "Please perform optical character recognition (OCR) on the uploaded image. Extract all text visible in the image accurately. Ensure to capture the text in its entirety and maintain the formatting as closely as possible to how it appears in the image. After extracting the text, display it in a clear and readable format, making sure that any special characters or symbols are also accurately represented. Provide the extracted text as output."]
67
+ ]
68
+ gr.Examples(examples=examples, inputs=[image_input, query_input], outputs=output)
69
 
70
+ demo.launch(debug=True)