taufiqdp commited on
Commit
ffd7d69
1 Parent(s): 1882cb5

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +54 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import spaces
4
+ import subprocess
5
+ import gradio as gr
6
+ from huggingface_hub import login
7
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
8
+
9
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
+ login(os.environ.get("HF_TOKEN"))
11
+
12
+ model_id = "google/paligemma-3b-mix-448"
13
+ model = PaliGemmaForConditionalGeneration.from_pretrained(
14
+ model_id, device_map={"": 0},
15
+ attn_implementation="flash_attention_2",
16
+ torch_dtype=torch.bfloat16,
17
+ )
18
+ processor = AutoProcessor.from_pretrained(model_id)
19
+ model.eval()
20
+
21
+
22
+ @spaces.GPU()
23
+ def answer_question(image, prompt):
24
+ model_inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
25
+ input_len = model_inputs["input_ids"].shape[-1]
26
+
27
+ with torch.inference_mode():
28
+ generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False)
29
+ generation = generation[0][input_len:]
30
+ decoded = processor.decode(generation, skip_special_tokens=True)
31
+
32
+ return decoded
33
+
34
+
35
+ with gr.Blocks() as demo:
36
+ gr.Markdown(
37
+ """
38
+ # PaliGemma
39
+ Lightweight open vision-language model (VLM). [Model card](https://huggingface.co/google/paligemma-3b-mix-448)
40
+ """
41
+ )
42
+
43
+ with gr.Row():
44
+ prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
45
+ submit = gr.Button("Submit")
46
+
47
+ with gr.Row():
48
+ image = gr.Image(type="pil", label="Upload an Image")
49
+ output = gr.TextArea(label="Response")
50
+
51
+ submit.click(answer_question, [image, prompt], output)
52
+ prompt.submit(answer_question, [image, prompt], output)
53
+
54
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/transformers.git
2
+ accelerate
3
+ torch