Spaces:

alexbuz
/

ocr

Runtime error

App Files Files Community

alex buz commited on Jul 11

Commit

e1cddb8

•

1 Parent(s): 767736b

test

Browse files

Files changed (4) hide show

_app.py +60 -0
_requirements.txt +6 -0
app.py +12 -56
requirements.txt +1 -5

_app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from transformers import AutoProcessor, AutoModelForCausalLM
+from PIL import Image
+import gradio as gr
+model_id = 'microsoft/Florence-2-large'
+model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True,
+                    torch_dtype="auto",
+                #device_map="auto",
+                cache_dir="./cache",
+                #attn_implementation="flash_attention_2",
+    ).eval()
+processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True,
+                                                          torch_dtype="auto",
+                #device_map="auto",
+                cache_dir="./cache",
+                #attn_implementation="flash_attention_2",
+                )
+def run_example(task_prompt, image, text_input=None):
+    if text_input is None:
+        prompt = task_prompt
+    else:
+        prompt = task_prompt + text_input
+    inputs = processor(text=prompt, images=image, return_tensors="pt")
+    generated_ids = model.generate(
+      input_ids=inputs["input_ids"],
+      pixel_values=inputs["pixel_values"],
+      max_new_tokens=1024,
+      early_stopping=False,
+      do_sample=False,
+      num_beams=3,
+    )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = processor.post_process_generation(
+        generated_text,
+        task=task_prompt,
+        image_size=(image.width, image.height),
+        #stream=True
+    )
+    return parsed_answer
+def inference(image, task_prompt, text_input):
+    return run_example(task_prompt, image, text_input)
+interface = gr.Interface(
+    fn=inference,
+    inputs=[
+        gr.Image(type="pil"),
+        gr.Textbox(label="Task Prompt", placeholder="Enter task prompt here"),
+        gr.Textbox(label="Additional Text Input", placeholder="Enter additional text input here (optional)", optional=True)
+    ],
+    outputs="text",
+    title="Hugging Face Model Inference",
+    description="Generate text based on an image and a prompt using a Hugging Face model"
+)
+if __name__ == "__main__":
+    interface.launch()

_requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+transformers
+pillow
+gradio
+#flash_attn
+#timm
+#einops

app.py CHANGED Viewed

@@ -1,60 +1,16 @@
-from transformers import AutoProcessor, AutoModelForCausalLM
-from PIL import Image
-import gradio as gr
-model_id = 'microsoft/Florence-2-large'
-model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True,
-                    torch_dtype="auto",
-                #device_map="auto",
-                cache_dir="./cache",
-                #attn_implementation="flash_attention_2",
-    ).eval()
-processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True,
-                                                          torch_dtype="auto",
-                #device_map="auto",
-                cache_dir="./cache",
-                #attn_implementation="flash_attention_2",
-                )
-def run_example(task_prompt, image, text_input=None):
-    if text_input is None:
-        prompt = task_prompt
-    else:
-        prompt = task_prompt + text_input
-    inputs = processor(text=prompt, images=image, return_tensors="pt")
-    generated_ids = model.generate(
-      input_ids=inputs["input_ids"],
-      pixel_values=inputs["pixel_values"],
-      max_new_tokens=1024,
-      early_stopping=False,
-      do_sample=False,
-      num_beams=3,
-    )
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-    parsed_answer = processor.post_process_generation(
-        generated_text,
-        task=task_prompt,
-        image_size=(image.width, image.height),
-        #stream=True
-    )
-    return parsed_answer
-def inference(image, task_prompt, text_input):
-    return run_example(task_prompt, image, text_input)
-interface = gr.Interface(
-    fn=inference,
-    inputs=[
-        gr.Image(type="pil"),
-        gr.Textbox(label="Task Prompt", placeholder="Enter task prompt here"),
-        gr.Textbox(label="Additional Text Input", placeholder="Enter additional text input here (optional)", optional=True)
-    ],
-    outputs="text",
-    title="Hugging Face Model Inference",
-    description="Generate text based on an image and a prompt using a Hugging Face model"
-)
-if __name__ == "__main__":
-    interface.launch()

+import gradio as gr
+from transformers import pipeline
+pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+def predict(image):
+    predictions = pipeline(image)
+    return {p["label"]: p["score"] for p in predictions}
+gr.Interface(
+    predict,
+    inputs=gr.Image(label="Upload hot dog candidate", type="filepath"),
+    outputs=gr.Label(num_top_classes=2),
+    title="Hot Dog? Or Not?",
+).launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,2 @@
 transformers
-pillow
-gradio
-#flash_attn
-#timm
-#einops


1	transformers
2	+ torch