Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,30 +3,35 @@ from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
|
|
| 3 |
from PIL import Image
|
| 4 |
import requests
|
| 5 |
from io import BytesIO
|
|
|
|
| 6 |
|
| 7 |
# Load the model and processor
|
| 8 |
repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
|
| 9 |
arguments = {
|
| 10 |
-
"device_map": "auto", #
|
| 11 |
-
"torch_dtype": "auto", #
|
| 12 |
-
"trust_remote_code": True # Allow
|
| 13 |
}
|
| 14 |
|
| 15 |
-
# Load the processor
|
| 16 |
processor = AutoProcessor.from_pretrained(repo_name, **arguments)
|
| 17 |
-
model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)
|
| 18 |
|
|
|
|
|
|
|
| 19 |
def describe_image(image):
|
|
|
|
|
|
|
|
|
|
| 20 |
# Process the uploaded image
|
| 21 |
inputs = processor.process(
|
| 22 |
images=[image],
|
| 23 |
text="Describe this image in great detail without missing any piece of information"
|
| 24 |
)
|
| 25 |
|
| 26 |
-
# Move inputs to model device
|
| 27 |
-
inputs = {k: v.to(
|
| 28 |
|
| 29 |
-
# Generate output
|
| 30 |
output = model.generate_from_batch(
|
| 31 |
inputs,
|
| 32 |
GenerationConfig(max_new_tokens=1024, stop_strings="<|endoftext|>"),
|
|
@@ -39,7 +44,7 @@ def describe_image(image):
|
|
| 39 |
|
| 40 |
return generated_text
|
| 41 |
|
| 42 |
-
|
| 43 |
def gradio_app():
|
| 44 |
# Define Gradio interface
|
| 45 |
image_input = gr.Image(type="pil", label="Upload Image")
|
|
@@ -58,4 +63,4 @@ def gradio_app():
|
|
| 58 |
interface.launch()
|
| 59 |
|
| 60 |
# Launch the Gradio app
|
| 61 |
-
gradio_app()
|
|
|
|
| 3 |
from PIL import Image
|
| 4 |
import requests
|
| 5 |
from io import BytesIO
|
| 6 |
+
import spaces # Import spaces for ZeroGPU support
|
| 7 |
|
| 8 |
# Load the model and processor
|
| 9 |
repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
|
| 10 |
arguments = {
|
| 11 |
+
"device_map": "auto", # Device will be set automatically
|
| 12 |
+
"torch_dtype": "auto", # Use appropriate precision
|
| 13 |
+
"trust_remote_code": True # Allow loading remote code
|
| 14 |
}
|
| 15 |
|
| 16 |
+
# Load the processor (this part doesn't need GPU yet)
|
| 17 |
processor = AutoProcessor.from_pretrained(repo_name, **arguments)
|
|
|
|
| 18 |
|
| 19 |
+
# Define the function for image description
|
| 20 |
+
@spaces.GPU # This ensures the function gets GPU access when needed
|
| 21 |
def describe_image(image):
|
| 22 |
+
# Load the model inside the function and move it to GPU
|
| 23 |
+
model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to('cuda')
|
| 24 |
+
|
| 25 |
# Process the uploaded image
|
| 26 |
inputs = processor.process(
|
| 27 |
images=[image],
|
| 28 |
text="Describe this image in great detail without missing any piece of information"
|
| 29 |
)
|
| 30 |
|
| 31 |
+
# Move inputs to model device (GPU)
|
| 32 |
+
inputs = {k: v.to('cuda').unsqueeze(0) for k, v in inputs.items()}
|
| 33 |
|
| 34 |
+
# Generate output using the model on GPU
|
| 35 |
output = model.generate_from_batch(
|
| 36 |
inputs,
|
| 37 |
GenerationConfig(max_new_tokens=1024, stop_strings="<|endoftext|>"),
|
|
|
|
| 44 |
|
| 45 |
return generated_text
|
| 46 |
|
| 47 |
+
# Gradio interface
|
| 48 |
def gradio_app():
|
| 49 |
# Define Gradio interface
|
| 50 |
image_input = gr.Image(type="pil", label="Upload Image")
|
|
|
|
| 63 |
interface.launch()
|
| 64 |
|
| 65 |
# Launch the Gradio app
|
| 66 |
+
gradio_app()
|