Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PIL import Image | |
| from transformers import ( | |
| BlipProcessor, | |
| BlipForConditionalGeneration, | |
| AutoTokenizer, | |
| AutoModelForSeq2SeqLM | |
| ) | |
| import torch | |
| # -------- Load Models -------- # | |
| blip_processor = BlipProcessor.from_pretrained( | |
| "Salesforce/blip-image-captioning-base" | |
| ) | |
| blip_model = BlipForConditionalGeneration.from_pretrained( | |
| "Salesforce/blip-image-captioning-base" | |
| ) | |
| seo_tokenizer = AutoTokenizer.from_pretrained( | |
| "google/flan-t5-base" | |
| ) | |
| seo_model = AutoModelForSeq2SeqLM.from_pretrained( | |
| "google/flan-t5-base" | |
| ) | |
| # -------- Core Function -------- # | |
| def generate_alt_text(image, seo_mode, keywords): | |
| if image is None: | |
| return "" | |
| # ---- Step 1: Base Caption ---- # | |
| inputs = blip_processor(image, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = blip_model.generate( | |
| **inputs, | |
| max_new_tokens=30 | |
| ) | |
| base_caption = blip_processor.decode( | |
| output[0], | |
| skip_special_tokens=True | |
| ).strip() | |
| # ---- Step 2: Normal Mode ---- # | |
| if not seo_mode: | |
| return base_caption.capitalize() + "." | |
| # ---- Step 3: SEO Prompt ---- # | |
| keywords = keywords.strip() | |
| keyword_instruction = ( | |
| f"Include the following keywords naturally: {keywords}. " | |
| if keywords else "" | |
| ) | |
| prompt = ( | |
| "You are an SEO expert. " | |
| "Write a detailed, descriptive, and natural alt text for a website image. " | |
| "The alt text should be longer than the original caption and written in a single sentence. " | |
| f"{keyword_instruction}" | |
| f"Image description: {base_caption}." | |
| ) | |
| seo_inputs = seo_tokenizer( | |
| prompt, | |
| return_tensors="pt", | |
| truncation=True | |
| ) | |
| # ---- Step 4: Force Expansion ---- # | |
| with torch.no_grad(): | |
| seo_output = seo_model.generate( | |
| **seo_inputs, | |
| max_new_tokens=120, | |
| do_sample=True, | |
| top_p=0.95, | |
| temperature=1.0 | |
| ) | |
| seo_alt_text = seo_tokenizer.decode( | |
| seo_output[0], | |
| skip_special_tokens=True | |
| ).strip() | |
| return seo_alt_text | |
| # -------- Gradio UI -------- # | |
| with gr.Blocks(title="Alt Text Generator") as demo: | |
| gr.Markdown(""" | |
| # ๐ผ๏ธ Alt Text Generator | |
| AI-powered alt text for accessibility and SEO. | |
| """) | |
| image_input = gr.Image( | |
| type="pil", | |
| label="Upload Image" | |
| ) | |
| seo_toggle = gr.Checkbox( | |
| label="SEO Mode (expanded, keyword-aware alt text)", | |
| value=False | |
| ) | |
| keyword_input = gr.Textbox( | |
| label="Keywords (optional)", | |
| placeholder="e.g. science experiment for kids, STEM education" | |
| ) | |
| alt_text_output = gr.Textbox( | |
| label="Generated Alt Text", | |
| lines=6 | |
| ) | |
| generate_btn = gr.Button("Generate Alt Text ๐") | |
| generate_btn.click( | |
| fn=generate_alt_text, | |
| inputs=[image_input, seo_toggle, keyword_input], | |
| outputs=alt_text_output | |
| ) | |
| demo.launch() | |