|
|
|
|
|
"""Untitled4.ipynb |
|
|
|
|
|
Automatically generated by Colab. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1LWcAEqZ865KCYhK-crQ3RKcFJEcH4xhD |
|
|
""" |
|
|
|
|
|
from huggingface_hub import login |
|
|
|
|
|
login(token="hf_xxxx") |
|
|
|
|
|
import torch |
|
|
from transformers import Blip2Processor, Blip2ForConditionalGeneration |
|
|
from PIL import Image |
|
|
import gradio as gr |
|
|
from io import BytesIO |
|
|
import requests |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"Using device: {device}") |
|
|
|
|
|
|
|
|
model_name = "Salesforce/blip2-flan-t5-xl-coco" |
|
|
processor = Blip2Processor.from_pretrained(model_name) |
|
|
model = Blip2ForConditionalGeneration.from_pretrained(model_name) |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_eo_image(image, question): |
|
|
""" |
|
|
Analyze Earth Observation (EO) image with a natural language question. |
|
|
""" |
|
|
try: |
|
|
if image is None or question.strip() == "": |
|
|
return "Please upload an EO image and ask a question." |
|
|
|
|
|
|
|
|
if image.mode != "RGB": |
|
|
image = image.convert("RGB") |
|
|
|
|
|
|
|
|
inputs = processor(image, question, return_tensors="pt").to( |
|
|
device, torch.float16 if device == "cuda" else torch.float32 |
|
|
) |
|
|
|
|
|
|
|
|
out = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=80, |
|
|
do_sample=True, |
|
|
temperature=0.7 |
|
|
) |
|
|
|
|
|
|
|
|
answer = processor.decode(out[0], skip_special_tokens=True) |
|
|
return answer |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_eo_url(url, question): |
|
|
try: |
|
|
response = requests.get(url) |
|
|
image = Image.open(BytesIO(response.content)).convert("RGB") |
|
|
return analyze_eo_image(image, question) |
|
|
except Exception as e: |
|
|
return f"Error loading image: {e}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=analyze_eo_image, |
|
|
inputs=[ |
|
|
gr.Image(type="pil", label="Upload EO Image"), |
|
|
gr.Textbox(label="Ask a Question about the EO Image") |
|
|
], |
|
|
outputs="text", |
|
|
title="π EO Image Analysis with Multimodal GPT-OSS", |
|
|
description=""" |
|
|
Ask questions about Earth Observation (EO) images. |
|
|
Powered by BLIP-2 + FLAN-T5 (small, memory-efficient). |
|
|
Examples: "Identify land cover types", "Where is the river?", |
|
|
"Has urban area expanded?". |
|
|
""", |
|
|
allow_flagging="never" |
|
|
) |
|
|
|
|
|
|
|
|
interface.launch(share=True) |
|
|
|
|
|
import torch |
|
|
from transformers import Blip2Processor, Blip2ForConditionalGeneration |
|
|
from PIL import Image |
|
|
import gradio as gr |
|
|
from io import BytesIO |
|
|
import requests |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"Using device: {device}") |
|
|
|
|
|
model_name = "Salesforce/blip2-flan-t5-xl-coco" |
|
|
processor = Blip2Processor.from_pretrained(model_name) |
|
|
model = Blip2ForConditionalGeneration.from_pretrained(model_name) |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_eo_image(image, question): |
|
|
if image is None or question.strip() == "": |
|
|
return "Please upload an EO image and ask a question." |
|
|
|
|
|
if image.mode != "RGB": |
|
|
image = image.convert("RGB") |
|
|
|
|
|
inputs = processor(image, question, return_tensors="pt").to( |
|
|
device, torch.float16 if device=="cuda" else torch.float32 |
|
|
) |
|
|
|
|
|
out = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=80, |
|
|
do_sample=True, |
|
|
temperature=0.7 |
|
|
) |
|
|
|
|
|
return processor.decode(out[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_eo_url(url, question): |
|
|
try: |
|
|
response = requests.get(url) |
|
|
image = Image.open(BytesIO(response.content)).convert("RGB") |
|
|
return analyze_eo_image(image, question) |
|
|
except Exception as e: |
|
|
return f"Error loading image: {e}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="π EO Image Analysis") as demo: |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
# π Earth Observation Image Analysis |
|
|
Ask questions about EO images using a multimodal AI model. |
|
|
Powered by BLIP-2 + FLAN-T5. |
|
|
**Examples:** "Identify land cover types", "Where is the river?", "Has urban area expanded?" |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.Tab("Upload Image"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
img_input = gr.Image(type="pil", label="Upload EO Image") |
|
|
question_input = gr.Textbox(label="Ask a question about the image", placeholder="E.g. Where is the river?") |
|
|
submit_btn = gr.Button("Analyze π") |
|
|
with gr.Column(scale=1): |
|
|
output_text = gr.Textbox(label="AI Answer", interactive=False) |
|
|
|
|
|
submit_btn.click(analyze_eo_image, inputs=[img_input, question_input], outputs=output_text) |
|
|
|
|
|
with gr.Tab("Use Image URL"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
url_input = gr.Textbox(label="Enter Image URL") |
|
|
url_question = gr.Textbox(label="Ask a question about the image") |
|
|
url_btn = gr.Button("Analyze π") |
|
|
with gr.Column(scale=1): |
|
|
url_output = gr.Textbox(label="AI Answer", interactive=False) |
|
|
|
|
|
url_btn.click(analyze_eo_url, inputs=[url_input, url_question], outputs=url_output) |
|
|
|
|
|
gr.Markdown( |
|
|
"π‘ Tip: Use clear, simple questions for best results. Supports natural language queries about EO images." |
|
|
) |
|
|
|
|
|
demo.launch(share=True) |
|
|
|
|
|
from huggingface_hub import login |
|
|
|
|
|
login(token="hf_xxxx") |
|
|
|
|
|
|
|
|
|
|
|
|