Spaces:
Runtime error
Runtime error
arjunanand13
commited on
Commit
•
70acf1a
1
Parent(s):
8c71278
Update app.py
Browse files
app.py
CHANGED
@@ -1,50 +1,69 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
import
|
|
|
4 |
from PIL import Image
|
5 |
-
import
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
with gr.Blocks(css=".input_image {max-width: 100%; border: 1px solid #ccc; box-shadow: 0 0 10px #ccc; margin-bottom: 10px;} .output_textbox {min-height: 100px;}") as demo:
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
demo.launch(debug=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoProcessor, Idefics2ForConditionalGeneration
|
3 |
+
import re
|
4 |
+
import time
|
5 |
from PIL import Image
|
6 |
+
import torch
|
7 |
+
import spaces
|
8 |
+
import subprocess
|
9 |
+
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
+
|
11 |
+
|
12 |
+
processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b")
|
13 |
+
|
14 |
+
model = Idefics2ForConditionalGeneration.from_pretrained(
|
15 |
+
"HuggingFaceM4/idefics2-8b",
|
16 |
+
torch_dtype=torch.bfloat16,
|
17 |
+
#_attn_implementation="flash_attention_2",
|
18 |
+
trust_remote_code=True).to("cuda")
|
19 |
+
|
20 |
+
# import gradio as gr
|
21 |
+
# from huggingface_hub import InferenceApi
|
22 |
+
# import base64
|
23 |
+
# from PIL import Image
|
24 |
+
# import io
|
25 |
+
|
26 |
+
# client = InferenceApi("HuggingFaceM4/idefics2-8b")
|
27 |
+
|
28 |
+
# def image_to_base64(image):
|
29 |
+
# buffered = io.BytesIO()
|
30 |
+
# image.save(buffered, format="JPEG")
|
31 |
+
# img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
32 |
+
# return img_str
|
33 |
+
|
34 |
+
# def model_inference(image, text):
|
35 |
+
# image_base64 = image_to_base64(image)
|
36 |
+
# inputs = {
|
37 |
+
# "inputs": {
|
38 |
+
# "text": text,
|
39 |
+
# "image": image
|
40 |
+
# }
|
41 |
+
# }
|
42 |
|
43 |
+
# result = client(inputs)
|
44 |
|
45 |
+
# generated_text = result['generated_text']
|
46 |
+
# return generated_text
|
47 |
+
|
48 |
+
# with gr.Blocks(css=".input_image {max-width: 100%; border: 1px solid #ccc; box-shadow: 0 0 10px #ccc; margin-bottom: 10px;} .output_textbox {min-height: 100px;}") as demo:
|
49 |
+
# gr.Markdown("## Enhanced IDEFICS2 Demo")
|
50 |
+
# with gr.Row():
|
51 |
+
# with gr.Column(scale=1):
|
52 |
+
# image_input = gr.Image(label="Upload Image", type="pil", height=240, width=320)
|
53 |
+
# query_input = gr.Textbox(label="Enter Prompt", placeholder="Type your prompt here...")
|
54 |
+
# with gr.Column(scale=1):
|
55 |
+
# output = gr.Textbox(label="Model Output", interactive=True, placeholder="Output will be displayed here...")
|
56 |
+
|
57 |
+
# submit_btn = gr.Button("Generate")
|
58 |
+
# submit_btn.click(model_inference, inputs=[image_input, query_input], outputs=output)
|
59 |
+
|
60 |
+
# examples = [
|
61 |
+
# ["american_football.png", "Explain in detail what is depicted in the picture"],
|
62 |
+
# ["bike.png", "Explore the image closely and describe in detail what you discover."],
|
63 |
+
# ["finance.png", "Provide a detailed description of everything you see in the image."],
|
64 |
+
# ["science.png", "Please perform optical character recognition (OCR) on the uploaded image. Extract all text visible in the image accurately. Ensure to capture the text in its entirety and maintain the formatting as closely as possible to how it appears in the image. After extracting the text, display it in a clear and readable format, making sure that any special characters or symbols are also accurately represented. Provide the extracted text as output."],
|
65 |
+
# ["spirituality.png", "Please perform optical character recognition (OCR) on the uploaded image. Extract all text visible in the image accurately. Ensure to capture the text in its entirety and maintain the formatting as closely as possible to how it appears in the image. After extracting the text, display it in a clear and readable format, making sure that any special characters or symbols are also accurately represented. Provide the extracted text as output."]
|
66 |
+
# ]
|
67 |
+
# gr.Examples(examples=examples, inputs=[image_input, query_input], outputs=output)
|
68 |
+
|
69 |
+
# demo.launch(debug=True)
|