alex buz commited on
Commit
e1cddb8
1 Parent(s): 767736b
Files changed (4) hide show
  1. _app.py +60 -0
  2. _requirements.txt +6 -0
  3. app.py +12 -56
  4. requirements.txt +1 -5
_app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoProcessor, AutoModelForCausalLM
2
+ from PIL import Image
3
+ import gradio as gr
4
+
5
+ model_id = 'microsoft/Florence-2-large'
6
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True,
7
+ torch_dtype="auto",
8
+ #device_map="auto",
9
+ cache_dir="./cache",
10
+ #attn_implementation="flash_attention_2",
11
+ ).eval()
12
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True,
13
+ torch_dtype="auto",
14
+ #device_map="auto",
15
+ cache_dir="./cache",
16
+ #attn_implementation="flash_attention_2",
17
+ )
18
+
19
+ def run_example(task_prompt, image, text_input=None):
20
+ if text_input is None:
21
+ prompt = task_prompt
22
+ else:
23
+ prompt = task_prompt + text_input
24
+
25
+ inputs = processor(text=prompt, images=image, return_tensors="pt")
26
+ generated_ids = model.generate(
27
+ input_ids=inputs["input_ids"],
28
+ pixel_values=inputs["pixel_values"],
29
+ max_new_tokens=1024,
30
+ early_stopping=False,
31
+ do_sample=False,
32
+ num_beams=3,
33
+ )
34
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
35
+ parsed_answer = processor.post_process_generation(
36
+ generated_text,
37
+ task=task_prompt,
38
+ image_size=(image.width, image.height),
39
+ #stream=True
40
+ )
41
+
42
+ return parsed_answer
43
+
44
+ def inference(image, task_prompt, text_input):
45
+ return run_example(task_prompt, image, text_input)
46
+
47
+ interface = gr.Interface(
48
+ fn=inference,
49
+ inputs=[
50
+ gr.Image(type="pil"),
51
+ gr.Textbox(label="Task Prompt", placeholder="Enter task prompt here"),
52
+ gr.Textbox(label="Additional Text Input", placeholder="Enter additional text input here (optional)", optional=True)
53
+ ],
54
+ outputs="text",
55
+ title="Hugging Face Model Inference",
56
+ description="Generate text based on an image and a prompt using a Hugging Face model"
57
+ )
58
+
59
+ if __name__ == "__main__":
60
+ interface.launch()
_requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ pillow
3
+ gradio
4
+ #flash_attn
5
+ #timm
6
+ #einops
app.py CHANGED
@@ -1,60 +1,16 @@
1
- from transformers import AutoProcessor, AutoModelForCausalLM
2
- from PIL import Image
3
- import gradio as gr
4
-
5
- model_id = 'microsoft/Florence-2-large'
6
- model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True,
7
- torch_dtype="auto",
8
- #device_map="auto",
9
- cache_dir="./cache",
10
- #attn_implementation="flash_attention_2",
11
- ).eval()
12
- processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True,
13
- torch_dtype="auto",
14
- #device_map="auto",
15
- cache_dir="./cache",
16
- #attn_implementation="flash_attention_2",
17
- )
18
-
19
- def run_example(task_prompt, image, text_input=None):
20
- if text_input is None:
21
- prompt = task_prompt
22
- else:
23
- prompt = task_prompt + text_input
24
 
25
- inputs = processor(text=prompt, images=image, return_tensors="pt")
26
- generated_ids = model.generate(
27
- input_ids=inputs["input_ids"],
28
- pixel_values=inputs["pixel_values"],
29
- max_new_tokens=1024,
30
- early_stopping=False,
31
- do_sample=False,
32
- num_beams=3,
33
- )
34
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
35
- parsed_answer = processor.post_process_generation(
36
- generated_text,
37
- task=task_prompt,
38
- image_size=(image.width, image.height),
39
- #stream=True
40
- )
41
-
42
- return parsed_answer
43
 
44
- def inference(image, task_prompt, text_input):
45
- return run_example(task_prompt, image, text_input)
46
 
47
- interface = gr.Interface(
48
- fn=inference,
49
- inputs=[
50
- gr.Image(type="pil"),
51
- gr.Textbox(label="Task Prompt", placeholder="Enter task prompt here"),
52
- gr.Textbox(label="Additional Text Input", placeholder="Enter additional text input here (optional)", optional=True)
53
- ],
54
- outputs="text",
55
- title="Hugging Face Model Inference",
56
- description="Generate text based on an image and a prompt using a Hugging Face model"
57
- )
58
 
59
- if __name__ == "__main__":
60
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ import gradio as gr
3
+ from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
 
6
 
7
+ def predict(image):
8
+ predictions = pipeline(image)
9
+ return {p["label"]: p["score"] for p in predictions}
 
 
 
 
 
 
 
 
10
 
11
+ gr.Interface(
12
+ predict,
13
+ inputs=gr.Image(label="Upload hot dog candidate", type="filepath"),
14
+ outputs=gr.Label(num_top_classes=2),
15
+ title="Hot Dog? Or Not?",
16
+ ).launch()
requirements.txt CHANGED
@@ -1,6 +1,2 @@
1
  transformers
2
- pillow
3
- gradio
4
- #flash_attn
5
- #timm
6
- #einops
 
1
  transformers
2
+ torch