# -*- coding: utf-8 -*- """Untitled4.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1LWcAEqZ865KCYhK-crQ3RKcFJEcH4xhD """ from huggingface_hub import login login(token="hf_xxxx") import torch from transformers import Blip2Processor, Blip2ForConditionalGeneration from PIL import Image import gradio as gr from io import BytesIO import requests # ----------------------------- # 1️⃣ Device and model setup # ----------------------------- device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Use a smaller model for memory efficiency model_name = "Salesforce/blip2-flan-t5-xl-coco" # Updated model with preprocessor_config.json processor = Blip2Processor.from_pretrained(model_name) model = Blip2ForConditionalGeneration.from_pretrained(model_name) model.to(device) # ----------------------------- # 2️⃣ Inference function # ----------------------------- def analyze_eo_image(image, question): """ Analyze Earth Observation (EO) image with a natural language question. """ try: if image is None or question.strip() == "": return "Please upload an EO image and ask a question." # Convert image to RGB if needed if image.mode != "RGB": image = image.convert("RGB") # Prepare inputs inputs = processor(image, question, return_tensors="pt").to( device, torch.float16 if device == "cuda" else torch.float32 ) # Generate answer out = model.generate( **inputs, max_new_tokens=80, # allow slightly longer explanations do_sample=True, temperature=0.7 ) # Decode and return answer = processor.decode(out[0], skip_special_tokens=True) return answer except Exception as e: return f"Error: {e}" # ----------------------------- # 3️⃣ Optional: Analyze image from URL # ----------------------------- def analyze_eo_url(url, question): try: response = requests.get(url) image = Image.open(BytesIO(response.content)).convert("RGB") return analyze_eo_image(image, question) except Exception as e: return f"Error loading image: {e}" # ----------------------------- # 4️⃣ Gradio Interface # ----------------------------- interface = gr.Interface( fn=analyze_eo_image, inputs=[ gr.Image(type="pil", label="Upload EO Image"), gr.Textbox(label="Ask a Question about the EO Image") ], outputs="text", title="🌍 EO Image Analysis with Multimodal GPT-OSS", description=""" Ask questions about Earth Observation (EO) images. Powered by BLIP-2 + FLAN-T5 (small, memory-efficient). Examples: "Identify land cover types", "Where is the river?", "Has urban area expanded?". """, allow_flagging="never" ) # Launch the interface (shareable link) interface.launch(share=True) import torch from transformers import Blip2Processor, Blip2ForConditionalGeneration from PIL import Image import gradio as gr from io import BytesIO import requests # ----------------------------- # Device and Model Setup # ----------------------------- device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") model_name = "Salesforce/blip2-flan-t5-xl-coco" # public model processor = Blip2Processor.from_pretrained(model_name) model = Blip2ForConditionalGeneration.from_pretrained(model_name) model.to(device) # ----------------------------- # Inference Function # ----------------------------- def analyze_eo_image(image, question): if image is None or question.strip() == "": return "Please upload an EO image and ask a question." if image.mode != "RGB": image = image.convert("RGB") inputs = processor(image, question, return_tensors="pt").to( device, torch.float16 if device=="cuda" else torch.float32 ) out = model.generate( **inputs, max_new_tokens=80, do_sample=True, temperature=0.7 ) return processor.decode(out[0], skip_special_tokens=True) # ----------------------------- # Optional: URL input # ----------------------------- def analyze_eo_url(url, question): try: response = requests.get(url) image = Image.open(BytesIO(response.content)).convert("RGB") return analyze_eo_image(image, question) except Exception as e: return f"Error loading image: {e}" # ----------------------------- # Beautiful Gradio Layout # ----------------------------- with gr.Blocks(title="🌍 EO Image Analysis") as demo: gr.Markdown( """ # 🌍 Earth Observation Image Analysis Ask questions about EO images using a multimodal AI model. Powered by BLIP-2 + FLAN-T5. **Examples:** "Identify land cover types", "Where is the river?", "Has urban area expanded?" """ ) with gr.Tabs(): with gr.Tab("Upload Image"): with gr.Row(): with gr.Column(scale=1): img_input = gr.Image(type="pil", label="Upload EO Image") question_input = gr.Textbox(label="Ask a question about the image", placeholder="E.g. Where is the river?") submit_btn = gr.Button("Analyze 🌟") with gr.Column(scale=1): output_text = gr.Textbox(label="AI Answer", interactive=False) submit_btn.click(analyze_eo_image, inputs=[img_input, question_input], outputs=output_text) with gr.Tab("Use Image URL"): with gr.Row(): with gr.Column(scale=1): url_input = gr.Textbox(label="Enter Image URL") url_question = gr.Textbox(label="Ask a question about the image") url_btn = gr.Button("Analyze 🌟") with gr.Column(scale=1): url_output = gr.Textbox(label="AI Answer", interactive=False) url_btn.click(analyze_eo_url, inputs=[url_input, url_question], outputs=url_output) gr.Markdown( "💡 Tip: Use clear, simple questions for best results. Supports natural language queries about EO images." ) demo.launch(share=True) from huggingface_hub import login login(token="hf_xxxx")